initial commit

This commit is contained in:
Rasmus Wejlgaard 2026-03-28 12:30:17 +00:00
commit 737d6e0bc1
137 changed files with 25471 additions and 0 deletions

56
.github/workflows/deploy-on-merge.yml vendored Normal file
View file

@ -0,0 +1,56 @@
name: Deploy (on merge)
on:
push:
branches:
- main
paths-ignore:
- 'terraform/**'
# Requires these repository secrets:
# TAILSCALE_AUTHKEY — Tailscale auth key for mesh access
# SSH_PRIVATE_KEY — SSH key authorized on target hosts
# AGE_SECRET_KEY — age private key for SOPS decryption
jobs:
deploy:
name: Deploy to all
runs-on: ubuntu-latest
environment: production
steps:
- uses: actions/checkout@v4
- name: Set up Tailscale
uses: tailscale/github-action@v3
with:
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
- name: Set up SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
ssh-keyscan -H 100.67.6.27 100.84.65.101 100.122.219.41 100.117.235.28 100.89.206.60 100.115.45.53 >> ~/.ssh/known_hosts 2>/dev/null || true
- name: Install tools
run: |
pip install ansible
wget -qO /tmp/sops.deb https://github.com/getsops/sops/releases/download/v3.9.4/sops_3.9.4_amd64.deb
sudo dpkg -i /tmp/sops.deb
- name: Decrypt secrets
env:
SOPS_AGE_KEY: ${{ secrets.AGE_SECRET_KEY }}
run: |
# Decrypt all .enc. files to their plaintext counterparts
find . -name '*.enc.yml' -o -name '*.enc.yaml' -o -name '*.enc.env' | while read f; do
out="${f/.enc/}"
sops -d "$f" > "$out"
echo "Decrypted: $f -> $out"
done
- name: Run playbook
working-directory: ansible/
env:
ANSIBLE_HOST_KEY_CHECKING: "false"
run: ansible-playbook deploy.yml

82
.github/workflows/deploy.yml vendored Normal file
View file

@ -0,0 +1,82 @@
name: Deploy (manual)
on:
workflow_dispatch:
inputs:
target:
description: 'Target host (e.g. helsinki-a, london-b, all)'
required: true
type: string
playbook:
description: 'Ansible playbook to run (e.g. site.yml, update.yml)'
required: true
type: string
dry_run:
description: 'Dry run (--check mode)'
required: false
type: boolean
default: true
# Requires these repository secrets:
# TAILSCALE_AUTHKEY — Tailscale auth key for mesh access
# SSH_PRIVATE_KEY — SSH key authorized on target hosts
# AGE_SECRET_KEY — age private key for SOPS decryption
jobs:
deploy:
name: Deploy to ${{ inputs.target }}
runs-on: ubuntu-latest
environment: production # requires manual approval in repo settings
steps:
- uses: actions/checkout@v4
- name: Set up Tailscale
uses: tailscale/github-action@v3
with:
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
- name: Set up SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
ssh-keyscan -H 100.67.6.27 100.84.65.101 100.122.219.41 100.117.235.28 100.89.206.60 100.115.45.53 >> ~/.ssh/known_hosts 2>/dev/null || true
- name: Install tools
run: |
pip install ansible
wget -qO /tmp/sops.deb https://github.com/getsops/sops/releases/download/v3.9.4/sops_3.9.4_amd64.deb
sudo dpkg -i /tmp/sops.deb
- name: Decrypt secrets
env:
SOPS_AGE_KEY: ${{ secrets.AGE_SECRET_KEY }}
run: |
# Decrypt all .enc. files to their plaintext counterparts
find . -name '*.enc.yml' -o -name '*.enc.yaml' -o -name '*.enc.env' | while read f; do
out="${f/.enc/}"
sops -d "$f" > "$out"
echo "Decrypted: $f -> $out"
done
- name: Run playbook
working-directory: ansible/
env:
ANSIBLE_HOST_KEY_CHECKING: "false"
run: |
PLAYBOOK="${{ inputs.playbook }}"
# Normalize: strip prefix/suffix, then re-add as needed
PLAYBOOK="${PLAYBOOK#playbooks/}"
PLAYBOOK="${PLAYBOOK%.yml}.yml"
if [ "$PLAYBOOK" != "deploy.yml" ]; then
PLAYBOOK="playbooks/$PLAYBOOK"
fi
ARGS=""
if [ "${{ inputs.target }}" != "all" ]; then
ARGS="--limit ${{ inputs.target }}"
fi
if [ "${{ inputs.dry_run }}" = "true" ]; then
ARGS="$ARGS --check --diff"
fi
ansible-playbook "$PLAYBOOK" $ARGS

34
.github/workflows/lint-ansible.yml vendored Normal file
View file

@ -0,0 +1,34 @@
name: Lint Ansible
on:
push:
paths:
- 'ansible/**'
- '.github/workflows/lint-ansible.yml'
pull_request:
paths:
- 'ansible/**'
- '.github/workflows/lint-ansible.yml'
jobs:
ansible-lint:
name: ansible-lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Check for Ansible files
id: check
run: |
if find ansible/ -name '*.yml' -o -name '*.yaml' | grep -q .; then
echo "has_files=true" >> "$GITHUB_OUTPUT"
else
echo "has_files=false" >> "$GITHUB_OUTPUT"
echo "No Ansible YAML files found — skipping lint."
fi
- name: Run ansible-lint
if: steps.check.outputs.has_files == 'true'
uses: ansible/ansible-lint@v25
with:
working_directory: ansible/

View file

@ -0,0 +1,32 @@
name: Lint Docker Compose
on:
push:
paths:
- 'ansible/services/**'
- '.github/workflows/lint-docker-compose.yml'
pull_request:
paths:
- 'ansible/services/**'
- '.github/workflows/lint-docker-compose.yml'
jobs:
compose-lint:
name: docker compose config
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Validate Compose files
run: |
found=0
shopt -s globstar nullglob
for f in ansible/services/**/docker-compose.yml ansible/services/**/docker-compose.yaml ansible/services/**/compose.yml ansible/services/**/compose.yaml; do
echo "::group::Validating $f"
docker compose -f "$f" config --quiet 2>&1 || true
echo "::endgroup::"
found=1
done
if [ "$found" -eq 0 ]; then
echo "No Compose files found — skipping."
fi

113
.github/workflows/terraform.yml vendored Normal file
View file

@ -0,0 +1,113 @@
name: Terraform
on:
push:
branches: [main]
paths:
- 'terraform/**'
- '.github/workflows/terraform.yml'
pull_request:
paths:
- 'terraform/**'
- '.github/workflows/terraform.yml'
# Requires these repository secrets:
# AGE_SECRET_KEY — age private key for SOPS decryption
jobs:
plan:
name: Plan
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: latest
- name: Install SOPS
run: |
wget -qO /tmp/sops.deb https://github.com/getsops/sops/releases/download/v3.9.4/sops_3.9.4_amd64.deb
sudo dpkg -i /tmp/sops.deb
- name: Decrypt secrets
env:
SOPS_AGE_KEY: ${{ secrets.AGE_SECRET_KEY }}
run: |
find . -name '*.enc.yml' -o -name '*.enc.yaml' | while read f; do
out="${f/.enc/}"
sops -d "$f" > "$out"
echo "Decrypted: $f -> $out"
done
- name: Set backend credentials
working-directory: terraform/
run: |
echo "AWS_ACCESS_KEY_ID=$(yq '.backblaze_keyID' secrets.yaml)" >> "$GITHUB_ENV"
echo "AWS_SECRET_ACCESS_KEY=$(yq '.backblaze_applicationKey' secrets.yaml)" >> "$GITHUB_ENV"
- name: tofu init
working-directory: terraform/
run: tofu init
- name: tofu plan
working-directory: terraform/
run: tofu plan -out=tfplan
- name: Upload plan
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v4
with:
name: tfplan
path: terraform/tfplan
retention-days: 1
apply:
name: Apply
needs: plan
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
environment: production
steps:
- uses: actions/checkout@v4
- name: Install OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: latest
- name: Install SOPS
run: |
wget -qO /tmp/sops.deb https://github.com/getsops/sops/releases/download/v3.9.4/sops_3.9.4_amd64.deb
sudo dpkg -i /tmp/sops.deb
- name: Decrypt secrets
env:
SOPS_AGE_KEY: ${{ secrets.AGE_SECRET_KEY }}
run: |
find . -name '*.enc.yml' -o -name '*.enc.yaml' | while read f; do
out="${f/.enc/}"
sops -d "$f" > "$out"
echo "Decrypted: $f -> $out"
done
- name: Set backend credentials
working-directory: terraform/
run: |
echo "AWS_ACCESS_KEY_ID=$(yq '.backblaze_keyID' secrets.yaml)" >> "$GITHUB_ENV"
echo "AWS_SECRET_ACCESS_KEY=$(yq '.backblaze_applicationKey' secrets.yaml)" >> "$GITHUB_ENV"
- name: tofu init
working-directory: terraform/
run: tofu init
- name: Download plan
uses: actions/download-artifact@v4
with:
name: tfplan
path: terraform/
- name: tofu apply
working-directory: terraform/
run: tofu apply -auto-approve tfplan

View file

@ -0,0 +1,35 @@
name: Validate Caddyfile
on:
push:
paths:
- 'ansible/services/caddy/**'
- '.github/workflows/validate-caddyfile.yml'
pull_request:
paths:
- 'ansible/services/caddy/**'
- '.github/workflows/validate-caddyfile.yml'
jobs:
caddy-validate:
name: caddy validate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Check for Caddyfile
id: check
run: |
if [ -f ansible/services/caddy/Caddyfile ]; then
echo "has_file=true" >> "$GITHUB_OUTPUT"
else
echo "has_file=false" >> "$GITHUB_OUTPUT"
echo "No Caddyfile found — skipping."
fi
- name: Validate Caddyfile
if: steps.check.outputs.has_file == 'true'
run: |
curl -sL "https://github.com/caddyserver/caddy/releases/latest/download/caddy_$(curl -sL https://api.github.com/repos/caddyserver/caddy/releases/latest | jq -r .tag_name | tr -d v)_linux_amd64.tar.gz" | tar xz caddy
chmod +x caddy
./caddy validate --config ansible/services/caddy/Caddyfile --adapter caddyfile

View file

@ -0,0 +1,54 @@
name: Validate Terraform
on:
push:
paths:
- 'terraform/**'
- '.github/workflows/validate-terraform.yml'
pull_request:
paths:
- 'terraform/**'
- '.github/workflows/validate-terraform.yml'
jobs:
tofu-validate:
name: tofu validate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install OpenTofu
uses: opentofu/setup-opentofu@v1
with:
tofu_version: latest
- name: Install SOPS
run: |
wget -qO /tmp/sops.deb https://github.com/getsops/sops/releases/download/v3.9.4/sops_3.9.4_amd64.deb
sudo dpkg -i /tmp/sops.deb
- name: Decrypt secrets
env:
SOPS_AGE_KEY: ${{ secrets.AGE_SECRET_KEY }}
run: |
find . -name '*.enc.yml' -o -name '*.enc.yaml' | while read f; do
out="${f/.enc/}"
sops -d "$f" > "$out"
echo "Decrypted: $f -> $out"
done
- name: Find and validate Terraform roots
run: |
found=0
for dir in $(find terraform/ -name '*.tf' -printf '%h\n' | sort -u); do
echo "::group::Validating $dir"
cd "$dir"
tofu init -backend=false
tofu validate
cd "$GITHUB_WORKSPACE"
echo "::endgroup::"
found=1
done
if [ "$found" -eq 0 ]; then
echo "No .tf files found — skipping validation."
fi

53
.gitignore vendored Normal file
View file

@ -0,0 +1,53 @@
# Terraform
*.tfstate
*.tfstate.backup
*.tfstate.*.backup
.terraform/
.terraform.lock.hcl
crash.log
override.tf
override.tf.json
*_override.tf
*_override.tf.json
# Ansible
*.retry
# Secrets — never commit plaintext secrets
*.key
*.pem
*.crt
*.p12
*.pfx
.vault_pass
.vault-password
secrets.yml
secrets.yaml
vault.yml
vault.yaml
**/secret.env
**/.env.secret
# SOPS (encrypted files are OK, but age keys are not)
keys.txt
*.agekey
# Editor / OS
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store
Thumbs.db
# Python
__pycache__/
*.pyc
.venv/
venv/
# Misc
*.log
*.bak
*.tmp

3
.sops.yaml Normal file
View file

@ -0,0 +1,3 @@
creation_rules:
- path_regex: '\.enc\.(yml|yaml|env)$'
age: age1r8uh2w2qad2z5sgq9q7l73962q2sp8zz9hdnh6sjuvanxl565vmswn8squ

9
Makefile Normal file
View file

@ -0,0 +1,9 @@
.PHONY: decrypt
# Decrypt all SOPS-encrypted files (*.enc.* -> *.*)
decrypt:
@find . -name "*.enc.*" ! -name "*.example" -not -path "./.git/*" | while read f; do \
out=$$(echo "$$f" | sed 's/\.enc\././'); \
echo "Decrypting $$f -> $$out"; \
sops -d "$$f" > "$$out"; \
done

106
README.md Normal file
View file

@ -0,0 +1,106 @@
# pez-infra
Infrastructure-as-code monorepo for Pez's homelab and cloud fleet. Everything needed to rebuild, configure, and maintain the server infrastructure from scratch.
## Architecture Overview
```
┌─────────────┐
│ Cloudflare │
│ DNS + CDN │
└──────┬──────┘
┌──────▼──────┐
│ helsinki-a │ Hetzner Cloud
│ Caddy proxy│ Reverse proxy + TLS
└──────┬──────┘
┌────────────┼────────────┐
│ Tailscale mesh │
│ │
┌─────────▼──┐ ┌──────▼──────┐ ┌─▼───────────┐
│ london-b │ │ london-a │ │ copenhagen-a │
│ Storage │ │ Monitoring │ │ Gaming │
│ Docker │ │ Prometheus │ │ Minecraft │
│ services │ │ Grafana │ │ WoW (MaNGOS)│
└────────────┘ └─────────────┘ └──────────────┘
┌─────────▼──┐ ┌─────────────┐
│ nuremberg-a│ │copenhagen-c │
│ Mail │ │ (idle) │
│ poste.io │ │ │
└────────────┘ └─────────────┘
```
### Hosts
| Host | Location | OS | Tailscale IP | Role |
|------|----------|-----|-------------|------|
| helsinki-a | Hetzner Cloud | Linux | 100.67.6.27 | Reverse proxy (Caddy), main traffic gateway |
| london-b | London | Linux | 100.84.65.101 | Primary storage (ZFS), Docker services |
| london-a | London | FreeBSD | 100.122.219.41 | Monitoring (Prometheus, Grafana) |
| nuremberg-a | Hetzner Cloud | Alpine Linux | 100.117.235.28 | Mail server (poste.io) |
| copenhagen-a | Copenhagen | Linux | 100.89.206.60 | Gaming servers (Minecraft, WoW/MaNGOS) |
| copenhagen-c | Copenhagen | Linux | 100.115.45.53 | Idle/available |
### Traffic Flow
1. DNS managed by Cloudflare (Terraform)
2. Traffic routes to helsinki-a (Caddy reverse proxy)
3. Caddy forwards to backend services over Tailscale mesh
4. Auth handled by Authelia with LLDAP backend (on london-b)
## Directory Structure
```
pez-infra/
├── ansible/ # Ansible playbooks, roles, inventory, and all managed files
│ ├── roles/ # Ansible roles (caddy, docker, dotfiles, etc.)
│ ├── services/ # Docker Compose definitions and service configs
│ ├── dotfiles/ # Shell config (fish, nvim, tmux, git, etc.)
│ └── scripts/ # Utility and maintenance scripts
└── terraform/ # Terraform/OpenTofu for Cloudflare, DNS, etc.
```
## Getting Started
### Prerequisites
- SSH access to hosts via Tailscale
- `ansible` for configuration management
- `tofu` (OpenTofu) or `terraform` for infrastructure provisioning
- `gh` CLI for GitHub operations
### Working with this repo
1. **Clone:** `git clone git@github.com:RWejlgaard/pez-infra.git`
2. **Services:** Each service has its own directory under `ansible/services/` with a `docker-compose.yml` and config files
4. **Deploy:** Ansible playbooks in `ansible/` handle deployment (see individual playbook docs)
5. **Infrastructure:** Terraform configs in `terraform/` manage DNS, tunnels, and access policies
### Secrets
Secrets are encrypted in-repo using [SOPS](https://github.com/getsops/sops) + [age](https://github.com/FiloSottile/age). Encrypted files use `.enc.` in their extension (e.g. `secrets.enc.yml`). See **[Secrets Management](docs/secrets.md)** for full setup and usage instructions.
Quick start: `./ansible/scripts/sops-setup.sh`
## Documentation
Comprehensive documentation lives in [`docs/`](docs/):
- **[Architecture](docs/architecture.md)** — Network topology, traffic flow, design principles
- **[Networking](docs/networking.md)** — Tailscale mesh, DNS flow, physical networking
- **[Services](docs/services.md)** — Complete service map with ports, auth, and deployment info
- **[Monitoring](docs/monitoring.md)** — Prometheus, Grafana, exporters, status page
- **[Getting Started](docs/getting-started.md)** — How to work with this repo
## Consolidated Repos
This monorepo replaces several standalone repos:
- `pez-ansible``ansible/`
- `pez-terraform``terraform/`
- `pez-grafana``services/grafana/`
- `pez-proxy``services/caddy/`
- `pez-docs``docs/` and per-host documentation
- `server-scripts``scripts/` and `ansible/`

41
ansible/Makefile Normal file
View file

@ -0,0 +1,41 @@
.PHONY: deploy deploy-check deploy-host update-all update-linux update-freebsd docker-status reboot ping deps
# Full fleet deploy
deploy:
ansible-playbook deploy.yml
# Dry run (check + diff, no changes)
deploy-check:
ansible-playbook deploy.yml --check --diff
# Deploy single host: make deploy-host HOST=helsinki-a
deploy-host:
ansible-playbook deploy.yml --limit $(HOST)
# Update all packages across the fleet
update-all:
ansible-playbook playbooks/update-all.yml
# Update Linux hosts only (apt + apk)
update-linux:
ansible-playbook playbooks/update-linux.yml
# Update FreeBSD hosts only (pkg)
update-freebsd:
ansible-playbook playbooks/update-freebsd.yml
# Show Docker container status
docker-status:
ansible-playbook playbooks/docker-status.yml
# Reboot a specific host: make reboot HOST=copenhagen-c
reboot:
ansible-playbook playbooks/reboot.yml --limit $(HOST)
# Ping all hosts
ping:
ansible all -m ping
# Install Ansible Galaxy dependencies
deps:
ansible-galaxy install -r requirements.yml

73
ansible/README.md Normal file
View file

@ -0,0 +1,73 @@
# Ansible — Deploy & Maintain
One-command deploy playbook for rebuilding hosts from repo state.
## Quick Start
```bash
cd ansible/
# Install dependencies
make deps
# Dry run — see what would change
make deploy-check
# Deploy everything
make deploy
# Deploy a single host
make deploy-host HOST=helsinki-a
```
## Playbooks
| Playbook | Purpose | Usage |
|----------|---------|-------|
| `deploy.yml` | Full host rebuild from repo | `make deploy` or `--limit <host>` |
| `playbooks/update-all.yml` | OS package updates (all hosts) | `make update-all` |
| `playbooks/update-linux.yml` | Linux-only updates (apt + apk) | `make update-linux` |
| `playbooks/update-freebsd.yml` | FreeBSD-only updates (pkg) | `make update-freebsd` |
| `playbooks/docker-status.yml` | Show running containers | `make docker-status` |
| `playbooks/reboot.yml` | Safe reboot with pre-flight | `make reboot HOST=<host>` |
## Deploy Stages
The deploy playbook runs in stages, each independently taggable:
1. **common** — Baseline packages, SSH hardening, fish shell
2. **docker** — Docker engine on container hosts
3. **node-exporter** — Prometheus monitoring agent on all hosts
4. **services** — Per-host service deployment:
- `helsinki-a`: Caddy reverse proxy
- `london-b`: Docker Compose services (Nextcloud, Jellyseer, etc.)
- `nuremberg-a`: poste.io mail
- `copenhagen-a`: Minecraft + MaNGOS systemd services
- `london-a`: Prometheus + Grafana (FreeBSD)
5. **verify** — Post-deploy health check
Run a single stage: `ansible-playbook deploy.yml --tags docker`
## Roles
| Role | Description |
|------|-------------|
| `common` | Base packages, SSH hardening, fish shell |
| `docker` | Docker engine install and setup |
| `docker-services` | Deploy compose files from `services/` |
| `dotfiles` | Shell config from `dotfiles/` |
| `caddy` | Caddy reverse proxy (helsinki-a) |
| `node-exporter` | Prometheus node_exporter |
| `systemd-services` | Custom systemd units from `services/` |
## Inventory
Hosts are grouped by OS and role. All use Tailscale IPs, SSH as root.
Per-host variables in `inventory/host_vars/<hostname>.yml`.
## Safety Notes
- **london-b**: Reboot playbook requires interactive confirmation (critical storage)
- **copenhagen-a**: Reboot includes netplan pre-flight check (static IP verification)
- All playbooks use `ignore_unreachable: true` for fleet operations
- `--check --diff` is your friend — always dry-run first on production

12
ansible/ansible.cfg Normal file
View file

@ -0,0 +1,12 @@
[defaults]
inventory = inventory/hosts.ini
roles_path = roles
remote_user = root
host_key_checking = False
pipelining = True
gather_facts = True
retry_files_enabled = False
result_format = yaml
[ssh_connection]
ssh_args = -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null

194
ansible/deploy.yml Normal file
View file

@ -0,0 +1,194 @@
---
# deploy.yml — One-command host rebuild
#
# Rebuilds a host from bare metal to fully configured using repo state.
# Assumes: SSH access via Tailscale, root user, host is in inventory.
#
# Usage:
# Full fleet: ansible-playbook deploy.yml
# Single host: ansible-playbook deploy.yml --limit helsinki-a
# Dry run: ansible-playbook deploy.yml --check --diff
#
# Prerequisites:
# - Target host has SSH access via Tailscale
# - Target host has a base OS installed (Debian/Alpine/FreeBSD)
# - ansible-galaxy install -r requirements.yml
# ──────────────────────────────────────────────
# Stage 1: Common baseline — all hosts
# ──────────────────────────────────────────────
- name: "Stage 1: Common baseline"
hosts: all
tags: [common, baseline]
roles:
- role: common
- role: dotfiles
# ──────────────────────────────────────────────
# Stage 2: Docker engine — hosts that run containers
# ──────────────────────────────────────────────
- name: "Stage 2: Docker engine"
hosts: docker_hosts
tags: [docker]
roles:
- role: docker
# ──────────────────────────────────────────────
# Stage 3: Monitoring agent — all hosts
# ──────────────────────────────────────────────
- name: "Stage 3: Node exporter"
hosts: all
tags: [monitoring, node_exporter]
roles:
- role: node_exporter
# ──────────────────────────────────────────────
# Stage 4: Per-host services
# ──────────────────────────────────────────────
# helsinki-a: Caddy reverse proxy
- name: "Stage 4a: Caddy (helsinki-a)"
hosts: helsinki-a
tags: [services, caddy]
roles:
- role: caddy
# london-b: Docker services (storage, apps)
- name: "Stage 4b: Docker services (london-b)"
hosts: london-b
tags: [services, london-b]
roles:
- role: docker_services
# nuremberg-a: Mail (poste.io via Docker)
- name: "Stage 4c: Mail (nuremberg-a)"
hosts: nuremberg-a
tags: [services, mail]
roles:
- role: docker_services
# copenhagen-a: Gaming servers
- name: "Stage 4d: Gaming servers (copenhagen-a)"
hosts: copenhagen-a
tags: [services, gaming]
roles:
- role: docker_services
- role: systemd_services
# london-a: Monitoring stack (FreeBSD — Prometheus, Grafana)
# Note: london-a uses FreeBSD; monitoring roles handle this via conditionals.
- name: "Stage 4e: Monitoring stack (london-a)"
hosts: london-a
tags: [services, monitoring]
tasks:
- name: Check for Prometheus config
delegate_to: localhost
ansible.builtin.stat:
path: "{{ playbook_dir }}/services/prometheus/prometheus.yml"
register: prometheus_config
- name: Deploy Prometheus config
ansible.builtin.copy:
src: "{{ playbook_dir }}/services/prometheus/prometheus.yml"
dest: /usr/local/etc/prometheus.yml
mode: '0644'
backup: true
when: prometheus_config.stat.exists
notify: Restart prometheus
- name: Deploy Prometheus alerting rules
ansible.builtin.copy:
src: "{{ playbook_dir }}/services/prometheus/rules/"
dest: /usr/local/etc/prometheus/rules/
mode: '0644'
failed_when: false
notify: Restart prometheus
- name: Ensure unified_alerting section exists in Grafana config
ansible.builtin.lineinfile:
path: /usr/local/etc/grafana/grafana.ini
regexp: '^\[unified_alerting\]'
line: '[unified_alerting]'
notify: Restart grafana
- name: Allow provenance status change in Grafana
ansible.builtin.lineinfile:
path: /usr/local/etc/grafana/grafana.ini
regexp: '^allow_prov_status_change'
insertafter: '^\[unified_alerting\]'
line: 'allow_prov_status_change = true'
notify: Restart grafana
- name: Deploy Grafana dashboards
ansible.posix.synchronize:
src: "{{ playbook_dir }}/services/grafana/dashboards/"
dest: /usr/local/etc/grafana/dashboards/
failed_when: false
- name: Ensure provisioning dir exists
ansible.builtin.file:
path: "{{ grafana_provisioning_dir }}"
state: directory
mode: '0755'
- name: Ensure alerting dir exists
ansible.builtin.file:
path: "{{ grafana_provisioning_dir }}/alerting"
state: directory
mode: '0755'
- name: Deploy Grafana provisioning
ansible.posix.synchronize:
src: "{{ playbook_dir }}/services/grafana/provisioning/"
dest: "{{ grafana_provisioning_dir }}/"
failed_when: false
- name: Template contact points with PagerDuty key
ansible.builtin.template:
src: "{{ playbook_dir }}/services/grafana/provisioning/alerting/contact-points.yml"
dest: "{{ grafana_provisioning_dir }}/alerting/contact-points.yml"
mode: '0640'
owner: root
group: grafana
no_log: true
notify: Restart grafana
handlers:
- name: Restart prometheus
ansible.builtin.service:
name: prometheus
state: restarted
- name: Restart grafana
ansible.builtin.service:
name: grafana
state: restarted
# ──────────────────────────────────────────────
# Stage 5: Verification
# ──────────────────────────────────────────────
- name: "Stage 5: Post-deploy verification"
hosts: all
tags: [verify]
tasks:
- name: Check SSH is working
ansible.builtin.ping:
- name: Gather uptime
ansible.builtin.command: uptime
changed_when: false
register: uptime_result
- name: Check Docker containers (where applicable)
ansible.builtin.command: docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}"
changed_when: false
register: docker_status
when: "'docker_hosts' in group_names"
failed_when: false
- name: Report host status
ansible.builtin.debug:
msg: |
Host: {{ inventory_hostname }} ({{ host_description | default('no description') }})
Uptime: {{ uptime_result.stdout }}
Docker: {{ docker_status.stdout_lines | default(['N/A']) | join('\n') }}

View file

@ -0,0 +1,9 @@
FROM alpine:latest
COPY . /workspace
WORKDIR /workspace
RUN apk add make shadow bash sudo
RUN make

View file

@ -0,0 +1,10 @@
FROM archlinux:latest
COPY ../. /workspace
WORKDIR /workspace
RUN pacman -Syy
RUN pacman -S --noconfirm make sudo git which
RUN make

View file

@ -0,0 +1,10 @@
FROM fedora:latest
COPY ../. /workspace
WORKDIR /workspace
RUN dnf -y update
RUN dnf -y install make sudo
RUN make

View file

@ -0,0 +1,9 @@
FROM ubuntu:latest
COPY ../. /workspace
WORKDIR /workspace
RUN apt install --update -y make sudo
RUN make

15
ansible/dotfiles/Makefile Normal file
View file

@ -0,0 +1,15 @@
full-install:
@bash ./install-scripts/01-install-packages.sh
@bash ./install-scripts/02-move-files.sh
@fish ./install-scripts/03-fisher-install.fish
@fish ./install-scripts/04-fish-plugins.fish
@fish ./install-scripts/05-tmux-plugins.fish
@fish ./install-scripts/06-vim-setup.fish
@bash ./install-scripts/07-last-touches.sh
@exec fish
refresh:
@bash ./install-scripts/02-move-files.sh
@exec fish
.PHONY: full-install

View file

@ -0,0 +1,70 @@
# Dotfiles
Shell configuration, editor setup, and terminal config — consolidated from the standalone [dotfiles](https://github.com/RWejlgaard/dotfiles) repo.
## What's here
```
dotfiles/
├── config/
│ ├── fish/ # Fish shell config
│ │ ├── config.fish # Main config (greeting, editor, TERM)
│ │ └── conf.d/ # Auto-sourced by fish
│ │ ├── aliases.fish # OS-aware package manager aliases, k8s shortcuts
│ │ ├── envvars.fish # PATH and env vars
│ │ └── functions.fish # !! expansion, cheat, gitissue
│ ├── tmux/
│ │ └── tmux.conf # Prefix C-a, Alt keybindings, mouse, TPM plugins
│ ├── nvim/
│ │ └── init.lua # Lazy.nvim, LSP (Mason), Copilot, Neo-tree, Treesitter
│ ├── kitty/
│ │ └── kitty.conf # Color scheme, TERM fix
│ └── git/
│ └── gitconfig # user.name/email, gh credential helper
├── install-scripts/ # Numbered install scripts (from upstream dotfiles repo)
│ ├── 01-install-packages.sh # OS-aware package install
│ ├── 02-move-files.sh # Legacy copy-based deploy (use install.sh instead)
│ ├── 03-fisher-install.fish # Fisher plugin manager
│ ├── 04-fish-plugins.fish # Tide prompt
│ ├── 05-tmux-plugins.fish # TPM + plugins
│ ├── 06-vim-setup.fish # Lazy.nvim bootstrap
│ └── 07-last-touches.sh # Set fish as default shell, ~/bin
├── scripts/ # Utility scripts (Gentoo kernel upgrade helpers)
├── install.sh # Main install: symlinks + packages + plugins
└── Makefile # Legacy `make` target (calls install-scripts directly)
```
## Quick start
### Symlinks only (no package install)
```bash
./install.sh --link
```
This creates symlinks from the config files in this directory to their expected locations (`~/.config/fish/`, `~/.tmux.conf`, etc.). Existing files are backed up to `~/.dotfiles-backup/<timestamp>/`.
### Full install (packages + plugins + shell change)
```bash
./install.sh
```
Runs package installation (OS-aware), symlinks configs, installs Fish/Tmux/Neovim plugins, and sets Fish as the default shell.
## Fleet notes
Most servers run Fish as root shell. Current state captured from live fleet (2026-03-22):
| Host | Shell | Git configured | Dotfiles deployed |
|------|-------|----------------|-------------------|
| helsinki-a | fish | Yes (pez@pez.sh) | Yes (full) |
| london-b | fish | Yes (pez@pez.sh) | Partial (fish default, tmux custom) |
| nuremberg-a | fish | No | No |
| london-a | sh (FreeBSD) | No | No |
| copenhagen-a | fish | No (SpigotMC default) | No |
| copenhagen-c | fish | No | No |
## Relationship to upstream
This is a copy of [RWejlgaard/dotfiles](https://github.com/RWejlgaard/dotfiles) consolidated into the monorepo. The upstream repo can be archived once this is verified working. Key difference: `install.sh` here uses **symlinks** instead of copies, so editing configs in the repo takes effect immediately.

View file

@ -0,0 +1,42 @@
# aliases
# Package manager aliases
if [ uname = "Darwin" ]
alias get="brew install"
alias search="brew search"
else if [ -f /etc/arch-release ]
alias get="sudo pacman -S"
alias search="pacman -Ss"
else if [ -f /etc/lsb-release ]
alias get="sudo apt install"
alias search="apt search"
else if [ -f /etc/alpine-release ]
alias get="apk add"
alias search="apk search"
else if [ uname = "FreeBSD" ]
alias get="sudo pkg install -y"
alias search="pkg search"
else if [ -f /etc/gentoo-release ]
alias get="sudo emerge"
alias search="emerge --search"
end
alias vim=nvim
alias cat="bat -Pp"
alias k="kubectl"
alias kp="kubectl get pods -A"
alias kc="kubectx"
# Gentoo
alias gentoo-check-update="sudo emerge --sync; and sudo emerge -avuDNp @world | genlop -p"
alias gentoo-upgrade="sudo emerge -avuDN @world"
function gentoo-package-use
sudo vim /etc/portage/package.use/$argv
end
# Volume control (pipewire)
function vol
wpctl set-volume @DEFAULT_SINK@ $argv% 2>&1 > /dev/null
end

View file

@ -0,0 +1,7 @@
# Environment variables for fish shell
# PATH additions
export PATH="$HOME/bin:$PATH"
export PATH="$HOME/.local/bin:$PATH"
# Other environment variables goes here

View file

@ -0,0 +1,20 @@
# Replicate the behavior of `!!` in bash
function last_history_item
echo $history[1]
end
abbr -a !! --position anywhere --function last_history_item
# lookup various commands/syntax in a pinch
function cheat --description "help <field> <topic>"
set args (echo $argv[2..-1] | tr ' ' '+')
curl "cht.sh/$argv[1]/$args"
end
# update master and create a branch with value: $1
function gitissue
git reset --hard
git checkout master
git pull origin master
git branch $argv[1]
git checkout $argv[1]
end

View file

@ -0,0 +1,3 @@
export fish_greeting="" # Silence welcome message
export EDITOR=nvim
export TERM=xterm

View file

@ -0,0 +1,17 @@
[user]
name = Rasmus Wejlgaard
email = pez@pez.sh
[credential "https://github.com"]
helper =
helper = !/usr/bin/gh auth git-credential
[credential "https://gist.github.com"]
helper =
helper = !/usr/bin/gh auth git-credential
[init]
defaultBranch = main
[pull]
rebase = false

View file

@ -0,0 +1,22 @@
# Kitty config file
# Preferred color scheme
color0 #000000
color8 #555555
color1 #ff0000
color9 #ff0000
color2 #00ff00
color10 #00ff00
color3 #ffff00
color11 #ffff00
color4 #5555ff
color12 #5555ff
color5 #ff00ff
color13 #ff00ff
color6 #00ffff
color14 #00ffff
color7 #ffffff
color15 #ffffff
# For some reason kitty likes using "xterm-kitty" as TERM (this breaks a lot of stuff) so let's set this to xterm
term xterm

View file

@ -0,0 +1,281 @@
-- Bootstrap packer, if it's not installed (first run)
local fn = vim.fn
local install_path = fn.stdpath('data') .. '/site/pack/packer/start/packer.nvim'
if fn.empty(fn.glob(install_path)) > 0 then
Packer_bootstrap = fn.system({ 'git', 'clone', '--depth', '1', 'https://github.com/wbthomason/packer.nvim',
install_path })
end
local lazypath = vim.fn.stdpath 'data' .. '/lazy/lazy.nvim'
if not (vim.uv or vim.loop).fs_stat(lazypath) then
local lazyrepo = 'https://github.com/folke/lazy.nvim.git'
local out = vim.fn.system { 'git', 'clone', '--filter=blob:none', '--branch=stable', lazyrepo, lazypath }
if vim.v.shell_error ~= 0 then
error('Error cloning lazy.nvim:\n' .. out)
end
end
---@type vim.Option
local rtp = vim.opt.rtp
rtp:prepend(lazypath)
require('lazy').setup({
{ 'airblade/vim-gitgutter' }, -- show git changes in the gutter
{ 'hashivim/vim-terraform' }, -- terraform syntax highlighting
{
'junegunn/fzf',
run = 'fzf#install()'
},
{'junegunn/fzf.vim'},
{ 'EdenEast/nightfox.nvim' }, -- nightfox theme
{ 'nvim-treesitter/nvim-treesitter' }, -- treesitter, makes syntax highlighting better
{ 'scrooloose/nerdcommenter' }, -- easy commenting
{ 'tpope/vim-fugitive' }, -- git integration with :G{git cmd}
{ 'itchyny/lightline.vim' }, -- statusline
{ 'wookayin/fzf-ripgrep.vim' }, -- fzf ripgrep integration, for "<leader>/"
{ 'yuki-yano/fzf-preview.vim' }, -- fzf preview
{ 'wbthomason/packer.nvim' }, -- package manager
{ 'fatih/vim-go' }, -- go syntax highlighting
{ "ellisonleao/glow.nvim" }, -- markdown preview using :Glow
{ 'rhysd/git-messenger.vim' }, -- Show git messages under cursor
{ 'onsails/lspkind.nvim' }, -- lsp kind, makes autocomplete look better
{ 'zbirenbaum/copilot.lua' }, -- copilot
{ 'hrsh7th/vim-vsnip' },
{ -- copilot addon for cmp
"zbirenbaum/copilot-cmp",
after = { "copilot.lua" },
config = function()
require("copilot_cmp").setup()
end
},
{ 'nvim-lua/plenary.nvim' }, -- lua utility functions
{ 'CopilotC-Nvim/CopilotChat.nvim' }, -- copilot chat
{ -- adds file bars along the top similar to vscode
'romgrk/barbar.nvim',
dependencies = { 'kyazdani42/nvim-web-devicons' }
},
{ -- adds a file explorer similar to vscode
"nvim-neo-tree/neo-tree.nvim",
branch = "v3.x",
dependencies = {
"nvim-lua/plenary.nvim",
"nvim-tree/nvim-web-devicons", -- not strictly required, but recommended
"MunifTanjim/nui.nvim",
}
},
{ -- adds diagnostics for files
"folke/trouble.nvim",
dependencies = "kyazdani42/nvim-web-devicons",
config = function()
require("trouble").setup {}
end
},
{ -- better terminal
's1n7ax/nvim-terminal',
config = function()
vim.o.hidden = true
require('nvim-terminal').setup()
end
},
{ -- mason, easy download and install of LSPs
"williamboman/mason.nvim",
"williamboman/mason-lspconfig.nvim",
"neovim/nvim-lspconfig"
},
{ -- LSP
'VonHeikemen/lsp-zero.nvim',
dependencies = { -- LSP Support
{ 'neovim/nvim-lspconfig' }, { 'williamboman/nvim-lsp-installer' }, -- Autocompletion
{ 'hrsh7th/nvim-cmp' }, { 'hrsh7th/cmp-buffer' }, { 'hrsh7th/cmp-path' }, { 'saadparwaiz1/cmp_luasnip' },
{ 'hrsh7th/cmp-nvim-lsp' }, { 'hrsh7th/cmp-nvim-lua' }, -- Snippets
{ 'L3MON4D3/LuaSnip' }, { 'rafamadriz/friendly-snippets' } }
},
})
-- settings
vim.opt.termguicolors = true
vim.opt.guifont = 'FiraCode Nerd Font:h12'
vim.opt.number = true
vim.opt.smarttab = true
vim.opt.tabstop = 8
vim.opt.softtabstop = 0
vim.opt.expandtab = true
vim.opt.shiftwidth = 4
vim.opt.backspace = '2'
vim.opt.laststatus = 2
vim.opt.mouse = 'a'
vim.opt.clipboard = 'unnamed'
vim.opt.scrolloff = 17
vim.cmd('set tabstop=8 softtabstop=0 expandtab shiftwidth=4 smarttab')
vim.opt.number = true
vim.opt.colorcolumn = '80'
vim.g.terraform_fmt_on_save = true
-- keybindings
local opts = {
noremap = true,
silent = true
}
vim.api.nvim_set_keymap('n', 'nt', ':Neotree toggle<CR>', opts)
vim.api.nvim_set_keymap('n', 'qqq', ':qall<CR>', opts)
vim.api.nvim_set_keymap('n', '<C-f>', ':Files<CR>', opts)
vim.api.nvim_set_keymap('n', '<leader>/', ':Rg<CR>', opts)
vim.api.nvim_set_keymap('n', '<leader>v', ':vsplit<CR>', opts)
vim.api.nvim_set_keymap('n', '<leader>h', ':split<CR>', opts)
vim.api.nvim_set_keymap('n', '<leader>b', ':Buffers<CR>', opts)
vim.api.nvim_set_keymap('t', '<leader><ESC>', '<C-\\><C-n>', opts)
vim.api.nvim_set_keymap('n', '<leader>d', ':Trouble diagnostics toggle<CR>', opts)
vim.api.nvim_set_keymap('n', '<leader>g', ':GitMessenger<CR>', opts)
vim.api.nvim_set_keymap('n', 'd', '"_d', opts)
vim.api.nvim_set_keymap('v', 'd', '"_d', opts)
vim.api.nvim_set_keymap('n', 'c', '"_c', opts)
vim.api.nvim_set_keymap('v', 'c', '"_c', opts)
-- plugins setup
require("CopilotChat").setup {}
require("copilot").setup({
suggestion = {
enabled = false
},
panel = {
enabled = false
}
})
require("nvim-lsp-installer").setup {}
require("neo-tree").setup {
close_on_open = false,
close_if_last_window = true,
window = {
width = 40,
side = "left",
auto_resize = true,
mappings = {
["o"] = "open"
}
},
filesystem = {
hijack_netrw_behavior = "open_current"
}
}
-- Open NeoTree on startup when no file is specified
--vim.api.nvim_create_augroup('NeoTreeOnStartup', { clear = true })
--vim.api.nvim_create_autocmd('VimEnter', {
--group = 'NeoTreeOnStartup',
--callback = function()
--if vim.fn.argc() == 0 then
--vim.cmd('Neotree toggle')
--end
--end
--})
-- Language Server
local lsp_zero = require('lsp-zero')
local cmp = require('cmp')
lsp_zero.on_attach(function(_, bufnr)
lsp_zero.default_keymaps({
buffer = bufnr
})
end)
require('mason').setup({})
require('mason-lspconfig').setup({
ensure_installed = {
'bashls',
'dockerls',
'gopls',
'jsonls',
'yamlls',
'pyright',
},
handlers = {
lsp_zero.default_setup,
lua_ls = function()
local lua_opts = lsp_zero.nvim_lua_ls()
require('lspconfig').lua_ls.setup(lua_opts)
end
}
})
local lspkind = require('lspkind')
lspkind.init({
symbol_map = {
Copilot = ""
}
})
vim.api.nvim_set_hl(0, "CmpItemKindCopilot", { --
fg = "#6CC644"
})
cmp.setup({
formatting = {
format = lspkind.cmp_format({
mode = 'symbol', -- show only symbol annotations
maxwidth = 70, -- prevent the popup from showing more than provided characters (e.g 50 will not show more than 50 characters)
-- can also be a function to dynamically calculate max width such as
-- maxwidth = function() return math.floor(0.45 * vim.o.columns) end,
ellipsis_char = '...', -- when popup menu exceed maxwidth, the truncated part would show ellipsis_char instead (must define maxwidth first)
show_labelDetails = true, -- show labelDetails in menu. Disabled by default
-- The function below will be called before any actual modifications from lspkind
-- so that you can provide more controls on popup customization. (See [#30](https://github.com/onsails/lspkind-nvim/pull/30))
before = function(_, vim_item)
-- do some customizations ...
return vim_item
end
})
},
snippet = {
-- REQUIRED - you must specify a snippet engine
expand = function(args)
vim.fn["vsnip#anonymous"](args.body) -- For `vsnip` users.
-- require('luasnip').lsp_expand(args.body) -- For `luasnip` users.
-- require('snippy').expand_snippet(args.body) -- For `snippy` users.
-- vim.fn["UltiSnips#Anon"](args.body) -- For `ultisnips` users.
-- vim.snippet.expand(args.body) -- For native neovim snippets (Neovim v0.10+)
end
},
window = {
-- completion = cmp.config.window.bordered(),
-- documentation = cmp.config.window.bordered(),
},
mapping = cmp.mapping.preset.insert({
['<C-b>'] = cmp.mapping.scroll_docs(-4),
['<C-f>'] = cmp.mapping.scroll_docs(4),
['<C-Space>'] = cmp.mapping.complete(),
['<C-e>'] = cmp.mapping.abort(),
['<CR>'] = cmp.mapping.confirm({
select = true
}) -- Accept currently selected item. Set `select` to `false` to only confirm explicitly selected items.
}),
sources = cmp.config.sources({ {
name = 'copilot'
}, {
name = 'nvim_lsp'
}, {
name = 'nvim_lua'
}, {
name = 'path'
} -- For vsnip users.
-- { name = 'luasnip' }, -- For luasnip users.
-- { name = 'ultisnips' }, -- For ultisnips users.
-- { name = 'snippy' }, -- For snippy users.
}, { {
name = 'buffer'
} })
})
-- enable diagnostics for showing in-line
vim.g.diagnostics_active = true
vim.diagnostic.config {
virtual_text = true,
signs = true,
underline = true
}
vim.cmd('colorscheme murphy')

View file

@ -0,0 +1,88 @@
set -g prefix C-a
unbind-key C-b
bind-key C-a send-prefix
bind -n M-_ split-window -h -c "#{pane_current_path}"
bind -n M-- split-window -v -c "#{pane_current_path}"
unbind '"'
unbind %
bind -n M-Left select-pane -L
bind -n M-Right select-pane -R
bind -n M-Up select-pane -U
bind -n M-Down select-pane -D
bind -n M-[ previous-window
bind -n M-] next-window
bind -n M-\{ swap-pane -U
bind -n M-\} swap-pane -D
set -g mouse on
set-option -g allow-rename off
set -g base-index 1
setw -g pane-base-index 1
bind-key -n M-S-Up resize-pane -U 5
bind-key -n M-S-Down resize-pane -D 5
bind-key -n M-S-Left resize-pane -L 5
bind-key -n M-S-Right resize-pane -R 5
bind -n M-q kill-pane
bind -n M-S-q kill-window
bind -n M-1 select-window -t 1
bind -n M-2 select-window -t 2
bind -n M-3 select-window -t 3
bind -n M-4 select-window -t 4
bind -n M-5 select-window -t 5
bind -n M-6 select-window -t 6
bind -n M-7 select-window -t 7
bind -n M-8 select-window -t 8
bind -n M-9 select-window -t 9
bind -n M-Enter new-window
setw -g monitor-activity on
set -g visual-activity on
set -sg escape-time 0
# Plugins
set -g @plugin 'tmux-plugins/tpm'
set -g @plugin 'nhdaly/tmux-better-mouse-mode'
set -g @plugin 'tmux-plugins/tmux-cpu'
# pane borders
set -g pane-border-style 'fg=colour8'
set -g pane-active-border-style 'fg=colour7'
# statusbar
set -g status-position bottom # status bar at the bottom
set -g status-justify centre # center window buttons
set -g status-style 'fg=colour7'
set -g status-left ' #H ' # Hostname on the left
set -g @cpu_percentage_format "%3.0f%%"
set -g @ram_percentage_format "%3.0f%%"
# Set CPU and RAM to be shown in the status bar
set -g status-right '#[fg=colour7 bg=colour234] CPU: #(~/.tmux/plugins/tmux-cpu/scripts/cpu_percentage.sh)% | RAM: #(~/.tmux/plugins/tmux-cpu/scripts/ram_percentage.sh) | %H:%M:%S '
# Tmux sections needs a preset length
set -g status-right-length 50
set -g status-left-length 30
# window tabs
setw -g window-status-current-style 'fg=colour0 bg=colour7'
setw -g window-status-current-format ' #I #W ' # Window ID, Window Name
setw -g window-status-style 'fg=colour8'
setw -g window-status-format ' #I #W ' # Window ID, Window Name
setw -g window-status-bell-style 'fg=colour234 bg=colour0 bold'
# plugins runs
set-option -g status-interval 5
run -b '~/.tmux/plugins/tpm/tpm'

View file

@ -0,0 +1,73 @@
#!/bin/bash
set -e # exit on error
PACKAGES=(
"tmux"
"neovim"
"git"
"fish"
"curl"
"bat"
"go"
"ripgrep"
)
# if MacOS install Homebrew
if [ "$(uname)" == "Darwin" ]; then
if [ ! -x "$(which brew)" ]; then
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
fi
# install packages
brew install "${PACKAGES[@]}"
fi
# if Arch install
if [ -f /etc/arch-release ]; then
# install yay
if [ -z "$(which yay)" ] && [ "$EUID" -ne 0 ]; then
sudo pacman -S --noconfirm base-devel
git clone https://aur.archlinux.org/yay.git
cd yay
makepkg -si --noconfirm
cd ..
rm -rf yay
fi
# install packages
sudo pacman -S --noconfirm "${PACKAGES[@]}"
fi
# if debian or ubuntu install
if [ -f /etc/debian_version ]; then
# replace "go" with "golang" for debian
PACKAGES=("${PACKAGES[@]/go/golang}")
# install packages
export DEBIAN_FRONTEND=noninteractive
sudo apt update
sudo apt install -y "${PACKAGES[@]}"
fi
# if Alpine install
if [ -f /etc/alpine-release ]; then
# install packages
sudo apk add "${PACKAGES[@]}"
fi
# if freebsd install
if [ "$(uname)" == "FreeBSD" ]; then
# install packages
sudo pkg install -y "${PACKAGES[@]}"
fi
# if RHEL/CentOS/Fedora install
if [ -f /etc/redhat-release ]; then
# install packages
sudo dnf install -y "${PACKAGES[@]}"
fi
# if Gentoo
if [ -f /etc/gentoo-release ]; then
PACKAGES=("${PACKAGES[@]/git/dev-vcs\/git}")
sudo emerge "${PACKAGES[@]}"
fi

View file

@ -0,0 +1,28 @@
#!/bin/bash
set -e # exit on error
# create directories
mkdir -p ~/.config/nvim
mkdir -p ~/.config/fish
mkdir -p ~/.config/fish/conf.d
mkdir -p ~/.config/kitty
# vim
cp config/vim/init.lua ~/.config/nvim/
# fish
cp config/fish/config.fish ~/.config/fish/
cp config/fish/aliases.fish ~/.config/fish/conf.d/
cp config/fish/functions.fish ~/.config/fish/conf.d/
# Only copy envvars.fish if it doesn't exist.
# This way we can override it with our own configs.
if [ ! -f ~/.config/fish/conf.d/envvars.fish ]; then
cp config/fish/envvars.fish ~/.config/fish/conf.d/
fi
# tmux
cp config/tmux/tmux.conf ~/.tmux.conf
# kitty
cp config/kitty/kitty.conf ~/.config/kitty/

View file

@ -0,0 +1,4 @@
#!/usr/bin/env fish
# install fisher
curl -sL https://git.io/fisher | source && fisher install jorgebucaran/fisher

View file

@ -0,0 +1,15 @@
#!/usr/bin/env fish
# tide
fisher install IlanCosman/tide
# setup tide
tide configure \
--auto \
--style=Lean \
--prompt_colors='True color' \
--show_time='24-hour format' \
--lean_prompt_height='One line' \
--prompt_spacing=Compact \
--icons='Few icons' \
--transient=No

View file

@ -0,0 +1,9 @@
#!/usr/bin/env fish
# install tmux plugin manager if not installed
if not test -e ~/.tmux/plugins/tpm
git clone https://github.com/tmux-plugins/tpm ~/.tmux/plugins/tpm
end
# install/update tmux plugins
env TMUX_PLUGIN_MANAGER_PATH=~/.tmux/plugins ~/.tmux/plugins/tpm/bin/install_plugins

View file

@ -0,0 +1,4 @@
#!/usr/bin/env fish
# With Lazy we don't need any setup, leaving this file here for the future

View file

@ -0,0 +1,29 @@
#!/bin/bash
# ensure fish is in shells
if ! grep -q "$(which fish)" /etc/shells; then
echo "$(which fish)" | sudo tee -a /etc/shells
fi
# change shell to fish
if ! [ "$(basename $SHELL)" == "fish" ]; then
chsh -s $(which fish)
fi
# create local bin directory
mkdir -p ~/bin
# Gentoo specific kernel script
if [ -f /etc/gentoo-release ]; then
init_system=openrc
echo "Gentoo detected, need clarification on which init system is used."
echo -e "Which init system?\n\n1> openrc\n2> systemd"
read -p "[1]/2: " -n 1;
echo
if [ "$REPLY" == "2" ]; then
init_system=systemd
fi
sudo cp scripts/gentoo-kernel-upgrade-$init_system /usr/bin/gentoo-kernel-upgrade
fi

97
ansible/dotfiles/install.sh Executable file
View file

@ -0,0 +1,97 @@
#!/bin/bash
#
# install.sh — Symlink dotfiles to their expected locations.
#
# Usage:
# ./install.sh Full install (packages + symlinks + plugins)
# ./install.sh --link Symlinks only (no package install, no plugin setup)
#
# Safe to re-run: existing files are backed up to ~/.dotfiles-backup/
#
set -euo pipefail
DOTFILES_DIR="$(cd "$(dirname "$0")" && pwd)"
BACKUP_DIR="$HOME/.dotfiles-backup/$(date +%Y%m%d-%H%M%S)"
LINK_ONLY=false
if [[ "${1:-}" == "--link" ]]; then
LINK_ONLY=true
fi
# ── helpers ──────────────────────────────────────────────────────────
backup_and_link() {
local src="$1"
local dst="$2"
# Create parent directory if needed
mkdir -p "$(dirname "$dst")"
# If destination exists and isn't already the right symlink, back it up
if [ -e "$dst" ] || [ -L "$dst" ]; then
if [ -L "$dst" ] && [ "$(readlink "$dst")" = "$src" ]; then
return 0 # already correct
fi
mkdir -p "$BACKUP_DIR"
mv "$dst" "$BACKUP_DIR/" 2>/dev/null || true
echo " backed up: $dst$BACKUP_DIR/"
fi
ln -sf "$src" "$dst"
echo " linked: $dst$src"
}
# ── symlinks ─────────────────────────────────────────────────────────
echo "Linking dotfiles..."
# Fish shell
backup_and_link "$DOTFILES_DIR/config/fish/config.fish" "$HOME/.config/fish/config.fish"
backup_and_link "$DOTFILES_DIR/config/fish/conf.d/aliases.fish" "$HOME/.config/fish/conf.d/aliases.fish"
backup_and_link "$DOTFILES_DIR/config/fish/conf.d/envvars.fish" "$HOME/.config/fish/conf.d/envvars.fish"
backup_and_link "$DOTFILES_DIR/config/fish/conf.d/functions.fish" "$HOME/.config/fish/conf.d/functions.fish"
# Tmux
backup_and_link "$DOTFILES_DIR/config/tmux/tmux.conf" "$HOME/.tmux.conf"
# Neovim
backup_and_link "$DOTFILES_DIR/config/nvim/init.lua" "$HOME/.config/nvim/init.lua"
# Kitty
backup_and_link "$DOTFILES_DIR/config/kitty/kitty.conf" "$HOME/.config/kitty/kitty.conf"
# Git
backup_and_link "$DOTFILES_DIR/config/git/gitconfig" "$HOME/.gitconfig"
echo "Done linking."
if [ "$LINK_ONLY" = true ]; then
echo "Symlinks only — skipping packages and plugins."
exit 0
fi
# ── packages ─────────────────────────────────────────────────────────
echo ""
echo "Installing packages..."
bash "$DOTFILES_DIR/install-scripts/01-install-packages.sh"
# ── plugins ──────────────────────────────────────────────────────────
if command -v fish &>/dev/null; then
echo ""
echo "Setting up Fish plugins..."
fish "$DOTFILES_DIR/install-scripts/03-fisher-install.fish"
fish "$DOTFILES_DIR/install-scripts/04-fish-plugins.fish"
fish "$DOTFILES_DIR/install-scripts/05-tmux-plugins.fish"
fish "$DOTFILES_DIR/install-scripts/06-vim-setup.fish"
fi
# ── final touches ────────────────────────────────────────────────────
echo ""
echo "Running final setup..."
bash "$DOTFILES_DIR/install-scripts/07-last-touches.sh"
echo ""
echo "All done. Restart your terminal or run: exec fish"

View file

@ -0,0 +1,30 @@
name: PR Test
on:
pull_request:
branches:
- master
- main
jobs:
test:
strategy:
matrix:
distro:
- alpine
- archlinux
- fedora
- ubuntu
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Only build
run: |
docker buildx build --platform linux/amd64 --output "type=image,push=false" -f tests/Dockerfile-${{ matrix.distro }} .

View file

@ -0,0 +1,399 @@
#!/bin/bash
#
# Gentoo Kernel Upgrade Script
# This script helps upgrade the gentoo-kernel-bin package safely.
#
# IMPORTANT: This script modifies boot files. A failed upgrade could
# prevent your system from booting. Ensure you have a backup plan.
#
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
BOLD='\033[1m'
NC='\033[0m' # No Color
EFI_DIR="/boot/efi"
BOOT_DIR="/boot"
# Print functions
info() { echo -e "${BLUE}==>${NC} ${BOLD}$1${NC}"; }
warn() { echo -e "${YELLOW}WARNING:${NC} $1"; }
error() { echo -e "${RED}ERROR:${NC} $1" >&2; }
success() { echo -e "${GREEN}==>${NC} $1"; }
# Check if running as root
check_root() {
if [[ $EUID -ne 0 ]]; then
error "This script must be run as root"
exit 1
fi
}
# Prompt user for confirmation
confirm() {
local prompt="$1"
local response
echo -e "${YELLOW}${prompt}${NC}"
read -p "Type 'yes' to confirm: " response
[[ "$response" == "yes" ]]
}
# Get the currently running kernel version
get_current_version() {
uname -r
}
# Get the latest installed kernel version from /usr/src
get_latest_installed_version() {
local latest
latest=$(ls -1d /usr/src/linux-*-gentoo-dist 2>/dev/null | sort -V | tail -1 | sed 's|.*/linux-||')
echo "$latest"
}
# Check for available updates using emerge
check_for_updates() {
info "Checking for gentoo-kernel-bin updates..."
echo
# Use emerge to check for updates (pretend mode)
if emerge -pvu gentoo-kernel-bin 2>/dev/null | grep -q "gentoo-kernel-bin"; then
local update_info
update_info=$(emerge -pvu gentoo-kernel-bin 2>/dev/null | grep -E "gentoo-kernel-bin|ebuild")
echo "$update_info"
return 0
else
return 1
fi
}
# Install the kernel update
install_kernel_update() {
info "Installing gentoo-kernel-bin update..."
echo
echo -e "${BOLD}The following command will be executed:${NC}"
echo -e " ${BLUE}emerge gentoo-kernel-bin${NC}"
echo
if ! confirm "Proceed with kernel package installation?"; then
warn "Installation cancelled by user"
return 1
fi
echo
emerge gentoo-kernel-bin
success "Kernel package installed successfully"
}
# Select the new kernel with eselect
select_kernel() {
local new_version="$1"
info "Available kernel versions:"
eselect kernel list
echo
# Find the number corresponding to the new version
local kernel_num
kernel_num=$(eselect kernel list | grep "$new_version" | sed 's/.*\[\([0-9]*\)\].*/\1/')
if [[ -z "$kernel_num" ]]; then
error "Could not find kernel version $new_version in eselect list"
echo "Please select the kernel manually:"
eselect kernel list
read -p "Enter the number to select: " kernel_num
fi
echo -e "${BOLD}The following command will be executed:${NC}"
echo -e " ${BLUE}eselect kernel set $kernel_num${NC}"
echo
if ! confirm "Set kernel $new_version as active?"; then
warn "Kernel selection cancelled by user"
return 1
fi
eselect kernel set "$kernel_num"
# Verify
info "Current kernel symlink:"
eselect kernel list | grep '\*'
echo
}
# Backup current boot files
backup_boot_files() {
info "Backing up current boot files..."
echo -e "${BOLD}The following operations will be performed in ${EFI_DIR}:${NC}"
echo -e " ${BLUE}mv vmlinuz.efi -> vmlinuz.efi.bak${NC}"
echo -e " ${BLUE}mv initramfs.img -> initramfs.img.bak${NC}"
echo
if [[ -f "${EFI_DIR}/vmlinuz.efi.bak" ]] || [[ -f "${EFI_DIR}/initramfs.img.bak" ]]; then
warn "Existing backup files will be overwritten!"
fi
if ! confirm "Backup current boot files?"; then
warn "Backup cancelled by user"
return 1
fi
cd "${EFI_DIR}"
if [[ -f "vmlinuz.efi" ]]; then
mv vmlinuz.efi vmlinuz.efi.bak
success "Backed up vmlinuz.efi"
fi
if [[ -f "initramfs.img" ]]; then
mv initramfs.img initramfs.img.bak
success "Backed up initramfs.img"
fi
echo
}
# Generate new initramfs
generate_initramfs() {
local version="$1"
info "Generating new initramfs for kernel ${version}..."
echo
echo -e "${BOLD}The following command will be executed:${NC}"
echo -e " ${BLUE}${EFI_DIR}/generate_initramfs.sh ${version} ${EFI_DIR}/initramfs.img${NC}"
echo
if ! confirm "Generate initramfs?"; then
warn "Initramfs generation cancelled by user"
return 1
fi
bash "${EFI_DIR}/generate_initramfs.sh" "${version}" "${EFI_DIR}/initramfs.img"
success "Initramfs generated successfully"
echo
}
# Copy new kernel to EFI partition
copy_kernel() {
local version="$1"
local kernel_source="${BOOT_DIR}/vmlinuz-${version}"
info "Copying new kernel to EFI partition..."
echo
if [[ ! -f "$kernel_source" ]]; then
error "Kernel file not found: $kernel_source"
return 1
fi
echo -e "${BOLD}The following command will be executed:${NC}"
echo -e " ${BLUE}cp ${kernel_source} ${EFI_DIR}/vmlinuz.efi${NC}"
echo
if ! confirm "Copy new kernel to EFI partition?"; then
warn "Kernel copy cancelled by user"
return 1
fi
cp "$kernel_source" "${EFI_DIR}/vmlinuz.efi"
success "Kernel copied successfully"
echo
}
# Cleanup old kernel versions from /boot
cleanup_old_versions() {
info "Checking for old kernel versions to clean up..."
echo
# Find old initramfs and kernel files (excluding current version)
local initramfs_to_delete=()
local kernels_to_delete=()
while IFS= read -r f; do
initramfs_to_delete+=("$f")
done < <(ls ${BOOT_DIR}/initramfs-*.img 2>/dev/null | grep -v "$current_version" || true)
while IFS= read -r f; do
kernels_to_delete+=("$f")
done < <(ls ${BOOT_DIR}/vmlinuz-* 2>/dev/null | grep -v "$current_version" || true)
# Check if there's anything to delete
if [[ ${#initramfs_to_delete[@]} -eq 0 ]] && [[ ${#kernels_to_delete[@]} -eq 0 ]]; then
success "No old kernel versions to clean up"
echo
return 0
fi
echo -e "${BOLD}The following files will be DELETED:${NC}"
echo
for f in "${initramfs_to_delete[@]}" "${kernels_to_delete[@]}"; do
echo -e " ${RED}$f${NC}"
done
echo
warn "This action cannot be undone!"
echo
if ! confirm "Delete these old kernel files?"; then
warn "Cleanup cancelled by user"
return 1
fi
echo
for f in "${initramfs_to_delete[@]}" "${kernels_to_delete[@]}"; do
rm -v "$f"
done
echo
success "Old kernel versions cleaned up"
echo
}
# Print summary of what will be done
print_summary() {
local current="$1"
local new="$2"
echo
echo -e "${BOLD}╔════════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BOLD}║ GENTOO KERNEL UPGRADE SUMMARY ║${NC}"
echo -e "${BOLD}╚════════════════════════════════════════════════════════════════╝${NC}"
echo
echo -e " Current running kernel: ${YELLOW}${current}${NC}"
echo -e " New kernel version: ${GREEN}${new}${NC}"
echo
echo -e "${BOLD}The following steps will be performed:${NC}"
echo " 1. Backup current vmlinuz.efi and initramfs.img"
echo " 2. Generate new initramfs using dracut"
echo " 3. Copy new kernel to EFI partition"
echo
echo -e "${RED}${BOLD}WARNING: This process modifies boot files!${NC}"
echo -e "${RED}If something goes wrong, your system may not boot.${NC}"
echo -e "${RED}Make sure you have a way to recover (live USB, backup, etc.)${NC}"
echo
}
# Main function
main() {
echo
echo -e "${BOLD}╔════════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BOLD}║ GENTOO KERNEL UPGRADE HELPER ║${NC}"
echo -e "${BOLD}╚════════════════════════════════════════════════════════════════╝${NC}"
echo
check_root
local current_version
current_version=$(get_current_version)
info "Currently running kernel: ${current_version}"
echo
# Check for updates
if ! check_for_updates; then
success "No updates available for gentoo-kernel-bin"
echo
local latest_installed
latest_installed=$(get_latest_installed_version)
if [[ "$latest_installed" != "$current_version" ]] && [[ -n "$latest_installed" ]]; then
# Installed kernel differs from running - offer to set up boot files
warn "Installed kernel ($latest_installed) differs from running kernel ($current_version)"
echo
if confirm "Would you like to set up boot files for $latest_installed?"; then
print_summary "$current_version" "$latest_installed"
if ! confirm "Proceed with the upgrade process?"; then
echo "Upgrade cancelled."
exit 0
fi
select_kernel "$latest_installed"
backup_boot_files
generate_initramfs "$latest_installed"
copy_kernel "$latest_installed"
echo
success "Kernel upgrade complete!"
echo
echo -e "${BOLD}Next steps:${NC}"
echo " 1. Reboot your system"
echo " 2. Verify the new kernel is running: uname -r"
echo " 3. Run this script again to clean up old versions"
echo
fi
else
# Running kernel is the newest - offer cleanup
cleanup_old_versions
fi
exit 0
fi
echo
if ! confirm "Would you like to install this kernel update?"; then
echo "Update cancelled."
exit 0
fi
echo
install_kernel_update
# Get the new version after installation
local new_version
new_version=$(get_latest_installed_version)
if [[ -z "$new_version" ]]; then
error "Could not determine new kernel version"
exit 1
fi
print_summary "$current_version" "$new_version"
if ! confirm "Proceed with the upgrade process?"; then
echo "Upgrade process cancelled. Kernel package is installed but boot files unchanged."
exit 0
fi
echo
select_kernel "$new_version"
backup_boot_files
generate_initramfs "$new_version"
copy_kernel "$new_version"
echo
success "Kernel upgrade complete!"
echo
echo -e "${BOLD}╔════════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BOLD}║ NEXT STEPS ║${NC}"
echo -e "${BOLD}╚════════════════════════════════════════════════════════════════╝${NC}"
echo
echo " 1. ${YELLOW}Reboot your system${NC}"
echo " 2. Verify the new kernel is running:"
echo " uname -r"
echo " Expected: ${GREEN}${new_version}${NC}"
echo
echo " 3. If the new kernel works correctly, run this script again"
echo " to clean up old versions"
echo
echo " 4. ${RED}If the system fails to boot:${NC}"
echo " - Boot from a live USB"
echo " - Mount your EFI partition"
echo " - Restore backups:"
echo " mv vmlinuz.efi.bak vmlinuz.efi"
echo " mv initramfs.img.bak initramfs.img"
echo
}
main "$@"

View file

@ -0,0 +1,397 @@
#!/bin/bash
#
# Gentoo Kernel Upgrade Script
# This script helps upgrade the gentoo-kernel-bin package safely.
#
# IMPORTANT: This script modifies boot files. A failed upgrade could
# prevent your system from booting. Ensure you have a backup plan.
#
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
BOLD='\033[1m'
NC='\033[0m' # No Color
EFI_DIR="/boot/efi"
BOOT_DIR="/boot"
# Print functions
info() { echo -e "${BLUE}==>${NC} ${BOLD}$1${NC}"; }
warn() { echo -e "${YELLOW}WARNING:${NC} $1"; }
error() { echo -e "${RED}ERROR:${NC} $1" >&2; }
success() { echo -e "${GREEN}==>${NC} $1"; }
# Check if running as root
check_root() {
if [[ $EUID -ne 0 ]]; then
error "This script must be run as root"
exit 1
fi
}
# Prompt user for confirmation
confirm() {
local prompt="$1"
local response
echo -e "${YELLOW}${prompt}${NC}"
read -p "Type 'yes' to confirm: " response
[[ "$response" == "yes" ]]
}
# Get the currently running kernel version
get_current_version() {
uname -r
}
# Get the latest installed kernel version from /usr/src
get_latest_installed_version() {
local latest
latest=$(ls -1d /usr/src/linux-*-gentoo-dist 2>/dev/null | sort -V | tail -1 | sed 's|.*/linux-||')
echo "$latest"
}
# Check for available updates using emerge
check_for_updates() {
info "Checking for gentoo-kernel-bin updates..."
echo
# Use emerge to check for updates (pretend mode)
if emerge -pvu gentoo-kernel-bin 2>/dev/null | grep -q "gentoo-kernel-bin"; then
local update_info
update_info=$(emerge -pvu gentoo-kernel-bin 2>/dev/null | grep -E "gentoo-kernel-bin|ebuild")
echo "$update_info"
return 0
else
return 1
fi
}
# Install the kernel update
install_kernel_update() {
info "Installing gentoo-kernel-bin update..."
echo
echo -e "${BOLD}The following command will be executed:${NC}"
echo -e " ${BLUE}emerge gentoo-kernel-bin${NC}"
echo
if ! confirm "Proceed with kernel package installation?"; then
warn "Installation cancelled by user"
return 1
fi
echo
emerge gentoo-kernel-bin
success "Kernel package installed successfully"
}
# Select the new kernel with eselect
select_kernel() {
local new_version="$1"
info "Available kernel versions:"
eselect kernel list
echo
# Find the number corresponding to the new version
local kernel_num
kernel_num=$(eselect kernel list | grep "$new_version" | sed 's/.*\[\([0-9]*\)\].*/\1/')
if [[ -z "$kernel_num" ]]; then
error "Could not find kernel version $new_version in eselect list"
echo "Please select the kernel manually:"
eselect kernel list
read -p "Enter the number to select: " kernel_num
fi
echo -e "${BOLD}The following command will be executed:${NC}"
echo -e " ${BLUE}eselect kernel set $kernel_num${NC}"
echo
if ! confirm "Set kernel $new_version as active?"; then
warn "Kernel selection cancelled by user"
return 1
fi
eselect kernel set "$kernel_num"
# Verify
info "Current kernel symlink:"
eselect kernel list | grep '\*'
echo
}
# Backup current boot files
backup_boot_files() {
info "Backing up current boot files..."
echo -e "${BOLD}The following operations will be performed in ${EFI_DIR}:${NC}"
echo -e " ${BLUE}mv vmlinuz.efi -> vmlinuz.efi.bak${NC}"
echo -e " ${BLUE}mv initramfs.img -> initramfs.img.bak${NC}"
echo
if [[ -f "${EFI_DIR}/vmlinuz.efi.bak" ]] || [[ -f "${EFI_DIR}/initramfs.img.bak" ]]; then
warn "Existing backup files will be overwritten!"
fi
if ! confirm "Backup current boot files?"; then
warn "Backup cancelled by user"
return 1
fi
cd "${EFI_DIR}"
if [[ -f "vmlinuz.efi" ]]; then
mv vmlinuz.efi vmlinuz.efi.bak
success "Backed up vmlinuz.efi"
fi
if [[ -f "initramfs.img" ]]; then
mv initramfs.img initramfs.img.bak
success "Backed up initramfs.img"
fi
echo
}
# Generate new initramfs
generate_initramfs() {
local version="$1"
info "Generating new initramfs for kernel ${version}..."
echo
echo -e "${BOLD}The following command will be executed:${NC}"
echo -e " ${BLUE}${EFI_DIR}/generate_initramfs.sh ${version} ${EFI_DIR}/initramfs.img${NC}"
echo
if ! confirm "Generate initramfs?"; then
warn "Initramfs generation cancelled by user"
return 1
fi
bash "${EFI_DIR}/generate_initramfs.sh" "${version}" "${EFI_DIR}/initramfs.img"
success "Initramfs generated successfully"
echo
}
# Copy new kernel to EFI partition
copy_kernel() {
local version="$1"
local kernel_source="${BOOT_DIR}/kernel-${version}"
info "Copying new kernel to EFI partition..."
echo
if [[ ! -f "$kernel_source" ]]; then
error "Kernel file not found: $kernel_source"
return 1
fi
echo -e "${BOLD}The following command will be executed:${NC}"
echo -e " ${BLUE}cp ${kernel_source} ${EFI_DIR}/vmlinuz.efi${NC}"
echo
if ! confirm "Copy new kernel to EFI partition?"; then
warn "Kernel copy cancelled by user"
return 1
fi
cp "$kernel_source" "${EFI_DIR}/vmlinuz.efi"
success "Kernel copied successfully"
echo
}
# Cleanup old kernel versions from /boot
cleanup_old_versions() {
info "Checking for old kernel versions to clean up..."
echo
# Find old initramfs and kernel files (excluding current version)
local initramfs_to_delete=()
local kernels_to_delete=()
while IFS= read -r f; do
initramfs_to_delete+=("$f")
done < <(ls ${BOOT_DIR}/initramfs-*.img 2>/dev/null | grep -v "$current_version" || true)
while IFS= read -r f; do
kernels_to_delete+=("$f")
done < <(ls ${BOOT_DIR}/kernel-* 2>/dev/null | grep -v "$current_version" || true)
# Check if there's anything to delete
if [[ ${#initramfs_to_delete[@]} -eq 0 ]] && [[ ${#kernels_to_delete[@]} -eq 0 ]]; then
success "No old kernel versions to clean up"
echo
return 0
fi
echo -e "${BOLD}The following files will be DELETED:${NC}"
echo
for f in "${initramfs_to_delete[@]}" "${kernels_to_delete[@]}"; do
echo -e " ${RED}$f${NC}"
done
echo
warn "This action cannot be undone!"
echo
if ! confirm "Delete these old kernel files?"; then
warn "Cleanup cancelled by user"
return 1
fi
echo
for f in "${initramfs_to_delete[@]}" "${kernels_to_delete[@]}"; do
rm -v "$f"
done
echo
success "Old kernel versions cleaned up"
echo
}
# Print summary of what will be done
print_summary() {
local current="$1"
local new="$2"
echo
echo -e "${BOLD}╔════════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BOLD}║ GENTOO KERNEL UPGRADE SUMMARY ║${NC}"
echo -e "${BOLD}╚════════════════════════════════════════════════════════════════╝${NC}"
echo
echo -e " Current running kernel: ${YELLOW}${current}${NC}"
echo -e " New kernel version: ${GREEN}${new}${NC}"
echo
echo -e "${BOLD}The following steps will be performed:${NC}"
echo " 1. Backup current vmlinuz.efi and initramfs.img"
echo " 2. Generate new initramfs using dracut"
echo " 3. Copy new kernel to EFI partition"
echo
echo -e "${RED}${BOLD}WARNING: This process modifies boot files!${NC}"
echo -e "${RED}If something goes wrong, your system may not boot.${NC}"
echo -e "${RED}Make sure you have a way to recover (live USB, backup, etc.)${NC}"
echo
}
# Main function
main() {
echo
echo -e "${BOLD}╔════════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BOLD}║ GENTOO KERNEL UPGRADE HELPER ║${NC}"
echo -e "${BOLD}╚════════════════════════════════════════════════════════════════╝${NC}"
echo
check_root
local current_version
current_version=$(get_current_version)
info "Currently running kernel: ${current_version}"
echo
# Check for updates
if ! check_for_updates; then
success "No updates available for gentoo-kernel-bin"
echo
local latest_installed
latest_installed=$(get_latest_installed_version)
if [[ "$latest_installed" != "$current_version" ]] && [[ -n "$latest_installed" ]]; then
# Installed kernel differs from running - offer to set up boot files
warn "Installed kernel ($latest_installed) differs from running kernel ($current_version)"
echo
if confirm "Would you like to set up boot files for $latest_installed?"; then
print_summary "$current_version" "$latest_installed"
if ! confirm "Proceed with the upgrade process?"; then
echo "Upgrade cancelled."
exit 0
fi
select_kernel "$latest_installed"
backup_boot_files
generate_initramfs "$latest_installed"
copy_kernel "$latest_installed"
echo
success "Kernel upgrade complete!"
echo
echo -e "${BOLD}Next steps:${NC}"
echo " 1. Reboot your system"
echo " 2. Verify the new kernel is running: uname -r"
echo " 3. Run this script again to clean up old versions"
echo
fi
else
# Running kernel is the newest - offer cleanup
cleanup_old_versions
fi
exit 0
fi
echo
if ! confirm "Would you like to install this kernel update?"; then
echo "Update cancelled."
exit 0
fi
echo
install_kernel_update
# Get the new version after installation
local new_version
new_version=$(get_latest_installed_version)
if [[ -z "$new_version" ]]; then
error "Could not determine new kernel version"
exit 1
fi
print_summary "$current_version" "$new_version"
if ! confirm "Proceed with the upgrade process?"; then
echo "Upgrade process cancelled. Kernel package is installed but boot files unchanged."
exit 0
fi
echo
select_kernel "$new_version"
backup_boot_files
generate_initramfs "$new_version"
copy_kernel "$new_version"
echo
success "Kernel upgrade complete!"
echo
echo -e "${BOLD}╔════════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BOLD}║ NEXT STEPS ║${NC}"
echo -e "${BOLD}╚════════════════════════════════════════════════════════════════╝${NC}"
echo
echo " 1. ${YELLOW}Reboot your system${NC}"
echo " 2. Verify the new kernel is running:"
echo " uname -r"
echo " Expected: ${GREEN}${new_version}${NC}"
echo
echo " 3. If the new kernel works correctly, run this script again"
echo " to clean up old versions"
echo
echo " 4. ${RED}If the system fails to boot:${NC}"
echo " - Boot from a live USB"
echo " - Mount your EFI partition"
echo " - Restore backups:"
echo " mv vmlinuz.efi.bak vmlinuz.efi"
echo " mv initramfs.img.bak initramfs.img"
echo
}
main "$@"

View file

@ -0,0 +1,16 @@
grafana_pagerduty_integration_key: ENC[AES256_GCM,data:eXfaIsRwfqZm5ROIIFpeyuyk4/4wi3M02Bmgl7/SoRk=,iv:4HcB2WTDTrDADwE/ZVK84l6aIxayzz3e7VFZpVNY3Pg=,tag:4mTmfCP9GV9rGY2ALxWhgQ==,type:str]
sops:
age:
- recipient: age1r8uh2w2qad2z5sgq9q7l73962q2sp8zz9hdnh6sjuvanxl565vmswn8squ
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSAvSFY2QWdpZ3lUUTZDYk5F
MVFyMy9rOG1ZejlrQUxPWjFpTlZpS2w1dHlVCmg4VkpYTmhYUk42ZytpOUJ5czg5
VmJUQ2VJKzNtdWhiUGhTTVhndmplWG8KLS0tIHhKWkpWVG5CcnI0NkNwWGRCMDM5
QVRncWVjR25sR0ViY1pCWkFXYVRURFUKyknDYVq9Bzo3Bdi4/dl3Ryj3qkLiGhrW
hlRDiPyTWfMPwffF3SmKCrI60b+Y0QhkZ+/Wym4JuuW1TKQERv4lhw==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2026-03-25T19:50:39Z"
mac: ENC[AES256_GCM,data:EFguQ9aY0I+y0WEkJCAA19uuRtquZVHdRBTjjGr2BoeVrq16iu1dIeZOH3eMj/b1MvI1NF7DoLJEUsxKZDS2YeTsBF46oOmc0VErR7w1BIlET8FhQANhqtPF0OglwYD5gCsxXjbx25TLfOpP4iHPwLY440Gk3BtWR1TMObD4C+c=,iv:/ncyoPfWMjeCSWI4Hfv1Tm5rRH0+ytT3OBlKNyy7k+I=,tag:VYyCwmBTfBHWF9ICZz7kSg==,type:str]
unencrypted_suffix: _unencrypted
version: 3.12.2

View file

@ -0,0 +1,12 @@
---
host_role: gaming
host_description: "Gaming servers (Minecraft, WoW/MaNGOS)"
host_location: "Copenhagen"
docker_services:
- minecraft
# MaNGOS runs as systemd services, not Docker
systemd_services:
- mangos-realmd
- mangos-world

View file

@ -0,0 +1,4 @@
---
host_role: idle
host_description: "Idle/available"
host_location: "Copenhagen"

View file

@ -0,0 +1,6 @@
---
host_role: proxy
host_description: "Reverse proxy (Caddy), main traffic gateway"
host_location: "Hetzner Cloud"
caddy_config_src: "{{ playbook_dir }}/services/caddy/Caddyfile"

View file

@ -0,0 +1,6 @@
---
host_role: monitoring
host_description: "Monitoring stack (Prometheus, Grafana)"
host_location: "London"
ansible_python_interpreter: /usr/local/bin/python3
grafana_provisioning_dir: /usr/local/share/grafana/conf/provisioning

View file

@ -0,0 +1,16 @@
---
host_role: storage
host_description: "Primary storage (ZFS), Docker services"
host_location: "London"
docker_services:
- nextcloud-aio
- jellyseerr
- navidrome
- slskd
- authelia
- forgejo
- bitwarden
- miniflux
- smartctl-exporter
- plex-exporter

View file

@ -0,0 +1,5 @@
---
host_role: mail
host_description: "Mail server (poste.io)"
host_location: "Hetzner Cloud"
ansible_python_interpreter: /usr/bin/python3

View file

@ -0,0 +1,25 @@
# pez-infra fleet inventory
# All hosts accessible via Tailscale IPs over SSH as root.
[linux]
helsinki-a ansible_host=100.67.6.27
london-b ansible_host=100.84.65.101
copenhagen-a ansible_host=100.89.206.60
copenhagen-c ansible_host=100.115.45.53
[alpine]
nuremberg-a ansible_host=100.117.235.28
[freebsd]
london-a ansible_host=100.122.219.41
[docker_hosts]
london-b
nuremberg-a
copenhagen-a
[monitoring]
london-a
[all:vars]
ansible_user=root

View file

@ -0,0 +1,16 @@
---
# Show running Docker containers on all docker hosts.
# Usage: ansible-playbook playbooks/docker-status.yml
- name: Docker container status
hosts: docker_hosts
ignore_unreachable: true
tasks:
- name: Run docker ps
ansible.builtin.command: docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}"
register: docker_ps
changed_when: false
- name: Show containers on {{ inventory_hostname }}
ansible.builtin.debug:
msg: "{{ docker_ps.stdout_lines }}"

View file

@ -0,0 +1,95 @@
---
# Deploy monitoring stack to london-a (Prometheus + Grafana).
# Usage: ansible-playbook playbooks/monitoring.yml
# ansible-playbook playbooks/monitoring.yml --check --diff
- name: "Monitoring stack (london-a)"
hosts: london-a
pre_tasks:
- name: Load secrets
ansible.builtin.include_vars:
file: "{{ playbook_dir }}/../group_vars/all/secrets.yaml"
no_log: true
tasks:
- name: Check for Prometheus config
delegate_to: localhost
ansible.builtin.stat:
path: "{{ playbook_dir }}/../services/prometheus/prometheus.yml"
register: prometheus_config
- name: Deploy Prometheus config
ansible.builtin.copy:
src: "{{ playbook_dir }}/../services/prometheus/prometheus.yml"
dest: /usr/local/etc/prometheus.yml
mode: '0644'
backup: true
when: prometheus_config.stat.exists
notify: Restart prometheus
- name: Deploy Prometheus alerting rules
ansible.builtin.copy:
src: "{{ playbook_dir }}/../services/prometheus/rules/"
dest: /usr/local/etc/prometheus/rules/
mode: '0644'
failed_when: false
notify: Restart prometheus
- name: Ensure unified_alerting section exists in Grafana config
ansible.builtin.lineinfile:
path: /usr/local/etc/grafana/grafana.ini
regexp: '^\[unified_alerting\]'
line: '[unified_alerting]'
notify: Restart grafana
- name: Allow provenance status change in Grafana
ansible.builtin.lineinfile:
path: /usr/local/etc/grafana/grafana.ini
regexp: '^allow_prov_status_change'
insertafter: '^\[unified_alerting\]'
line: 'allow_prov_status_change = true'
notify: Restart grafana
- name: Deploy Grafana dashboards
ansible.posix.synchronize:
src: "{{ playbook_dir }}/../services/grafana/dashboards/"
dest: /usr/local/etc/grafana/dashboards/
failed_when: false
- name: Ensure provisioning dir exists
ansible.builtin.file:
path: "{{ grafana_provisioning_dir }}"
state: directory
mode: '0755'
- name: Ensure alerting dir exists
ansible.builtin.file:
path: "{{ grafana_provisioning_dir }}/alerting"
state: directory
mode: '0755'
- name: Deploy Grafana provisioning
ansible.posix.synchronize:
src: "{{ playbook_dir }}/../services/grafana/provisioning/"
dest: "{{ grafana_provisioning_dir }}/"
failed_when: false
- name: Template contact points with PagerDuty key
ansible.builtin.template:
src: "{{ playbook_dir }}/../services/grafana/provisioning/alerting/contact-points.yml"
dest: "{{ grafana_provisioning_dir }}/alerting/contact-points.yml"
mode: '0640'
owner: root
group: grafana
no_log: true
notify: Restart grafana
handlers:
- name: Restart prometheus
ansible.builtin.service:
name: prometheus
state: restarted
- name: Restart grafana
ansible.builtin.service:
name: grafana
state: restarted

View file

@ -0,0 +1,68 @@
---
# Reboot a specific host and wait for it to come back.
# Usage: ansible-playbook playbooks/reboot.yml --limit <hostname>
#
# Safety: copenhagen-a has netplan pre-flight check.
# london-b should NOT be rebooted without manual approval.
- name: Reboot host safely
hosts: all
ignore_unreachable: true
tasks:
- name: SAFETY — refuse unscoped reboot
ansible.builtin.fail:
msg: >
ABORT: You must use --limit <hostname> to reboot a specific host.
Running against all hosts is not allowed.
when: play_hosts | length > 1
- name: SAFETY — london-b requires manual approval
ansible.builtin.pause:
prompt: >
WARNING: london-b is the primary storage server. Rebooting will
take down ZFS pools and all Docker services. Type 'yes' to confirm.
register: london_b_confirm
when: inventory_hostname == 'london-b'
- name: Abort if london-b not confirmed
ansible.builtin.fail:
msg: "Reboot of london-b was not confirmed."
when: inventory_hostname == 'london-b' and london_b_confirm.user_input != 'yes'
- name: Check netplan config (copenhagen-a)
ansible.builtin.command: netplan get all
register: netplan_config
failed_when: false
changed_when: false
when: inventory_hostname == 'copenhagen-a'
- name: Verify copenhagen-a static IP
ansible.builtin.assert:
that:
- "'192.168.0.251' in netplan_config.stdout"
fail_msg: >
ABORT: copenhagen-a netplan doesn't show expected static IP
192.168.0.251. Check netplan config before rebooting.
success_msg: "copenhagen-a netplan OK — static IP present."
when: inventory_hostname == 'copenhagen-a'
- name: Reboot
ansible.builtin.reboot:
reboot_timeout: 300
connect_timeout: 10
pre_reboot_delay: 5
post_reboot_delay: 15
test_command: uptime
- name: Verify SSH is back
ansible.builtin.wait_for_connection:
timeout: 120
- name: Show uptime after reboot
ansible.builtin.command: uptime
register: uptime_result
changed_when: false
- name: Post-reboot uptime
ansible.builtin.debug:
msg: "{{ inventory_hostname }} is back: {{ uptime_result.stdout }}"

View file

@ -0,0 +1,64 @@
---
# Update all hosts (apt for Linux, pkg for FreeBSD, apk for Alpine).
# Usage: ansible-playbook playbooks/update-all.yml
- name: Update Linux hosts (apt)
hosts: linux
ignore_unreachable: true
tags: [update, linux]
tasks:
- name: Apt update + upgrade + autoremove
ansible.builtin.apt:
update_cache: true
upgrade: dist
autoremove: true
autoclean: true
register: apt_result
- name: Show upgrade summary
ansible.builtin.debug:
msg: "{{ apt_result.stdout_lines | default(['No output']) }}"
- name: Check if reboot is required
ansible.builtin.stat:
path: /var/run/reboot-required
register: reboot_required
- name: Notify if reboot needed
ansible.builtin.debug:
msg: "WARNING: REBOOT REQUIRED on {{ inventory_hostname }}"
when: reboot_required.stat.exists
- name: Update Alpine hosts (apk)
hosts: alpine
ignore_unreachable: true
tags: [update, alpine]
tasks:
- name: Apk update + upgrade
community.general.apk:
update_cache: true
upgrade: true
register: apk_result
- name: Show upgrade summary
ansible.builtin.debug:
msg: "{{ apk_result.stdout_lines | default(['No output']) }}"
- name: Update FreeBSD hosts (pkg)
hosts: freebsd
ignore_unreachable: true
tags: [update, freebsd]
tasks:
- name: Pkg update
ansible.builtin.command: pkg update -f
register: pkg_update_result
changed_when: "'Updating' in pkg_update_result.stdout"
- name: Pkg upgrade
ansible.builtin.command: pkg upgrade -y
register: pkg_upgrade_result
changed_when: "'upgraded' in pkg_upgrade_result.stdout or 'installed' in pkg_upgrade_result.stdout"
- name: Show upgrade summary
ansible.builtin.debug:
msg: "{{ pkg_upgrade_result.stdout_lines | default(['No output']) }}"

View file

@ -0,0 +1,24 @@
---
# Update FreeBSD hosts only (pkg).
# Usage: ansible-playbook playbooks/update-freebsd.yml
#
# Equivalent to: ansible-playbook playbooks/update-all.yml --tags freebsd
# This is a convenience wrapper for when you only want FreeBSD hosts.
- name: Update FreeBSD hosts (pkg)
hosts: freebsd
ignore_unreachable: true
tasks:
- name: Pkg update
ansible.builtin.command: pkg update -f
register: pkg_update_result
changed_when: "'Updating' in pkg_update_result.stdout"
- name: Pkg upgrade
ansible.builtin.command: pkg upgrade -y
register: pkg_upgrade_result
changed_when: "'upgraded' in pkg_upgrade_result.stdout or 'installed' in pkg_upgrade_result.stdout"
- name: Show upgrade summary
ansible.builtin.debug:
msg: "{{ pkg_upgrade_result.stdout_lines | default(['No output']) }}"

View file

@ -0,0 +1,46 @@
---
# Update Linux hosts only (apt + Alpine apk).
# Usage: ansible-playbook playbooks/update-linux.yml
#
# Equivalent to: ansible-playbook playbooks/update-all.yml --tags linux,alpine
# This is a convenience wrapper for when you only want Linux hosts.
- name: Update Linux hosts (apt)
hosts: linux
ignore_unreachable: true
tasks:
- name: Apt update + upgrade + autoremove
ansible.builtin.apt:
update_cache: true
upgrade: dist
autoremove: true
autoclean: true
register: apt_result
- name: Show upgrade summary
ansible.builtin.debug:
msg: "{{ apt_result.stdout_lines | default(['No output']) }}"
- name: Check if reboot is required
ansible.builtin.stat:
path: /var/run/reboot-required
register: reboot_required
- name: Notify if reboot needed
ansible.builtin.debug:
msg: "WARNING: REBOOT REQUIRED on {{ inventory_hostname }}"
when: reboot_required.stat.exists
- name: Update Alpine hosts (apk)
hosts: alpine
ignore_unreachable: true
tasks:
- name: Apk update + upgrade
community.general.apk:
update_cache: true
upgrade: true
register: apk_result
- name: Show upgrade summary
ansible.builtin.debug:
msg: "{{ apk_result.stdout_lines | default(['No output']) }}"

11
ansible/requirements.yml Normal file
View file

@ -0,0 +1,11 @@
---
# Ansible Galaxy requirements
# Install: ansible-galaxy install -r requirements.yml
collections:
- name: community.docker
version: ">=3.0.0"
- name: community.general
version: ">=7.0.0"
- name: ansible.posix
version: ">=1.5.0"

View file

@ -0,0 +1,5 @@
---
- name: Reload caddy
ansible.builtin.service:
name: caddy
state: reloaded

View file

@ -0,0 +1,31 @@
---
# Deploy Caddy reverse proxy on helsinki-a.
# Expects Caddyfile in services/caddy/.
- name: Install Caddy
ansible.builtin.apt:
name: caddy
state: present
when: ansible_facts["os_family"] == "Debian"
- name: Check for Caddyfile in repo
delegate_to: localhost
ansible.builtin.stat:
path: "{{ playbook_dir }}/services/caddy/Caddyfile"
register: caddy_caddyfile_src
- name: Deploy Caddyfile
ansible.builtin.copy:
src: "{{ playbook_dir }}/services/caddy/Caddyfile"
dest: /etc/caddy/Caddyfile
mode: '0644'
backup: true
validate: "caddy validate --config %s --adapter caddyfile"
notify: Reload caddy
when: caddy_caddyfile_src.stat.exists
- name: Enable and start Caddy
ansible.builtin.service:
name: caddy
state: started
enabled: true

View file

@ -0,0 +1,5 @@
---
- name: Restart sshd
ansible.builtin.service:
name: sshd
state: restarted

View file

@ -0,0 +1,102 @@
---
# Common baseline for all Linux hosts.
# Installs core packages, configures SSH, sets up the shell environment.
- name: Update apt cache
ansible.builtin.apt:
update_cache: true
cache_valid_time: 3600
when: ansible_facts["os_family"] == "Debian"
- name: Install baseline packages (Debian)
ansible.builtin.apt:
name:
- curl
- wget
- git
- htop
- tmux
- vim
- jq
- unzip
- fish
- rsync
- fail2ban
- ufw
state: present
when: ansible_facts["os_family"] == "Debian"
- name: Install baseline packages (Alpine)
community.general.apk:
name:
- curl
- wget
- git
- htop
- tmux
- vim
- jq
- fish
- rsync
- shadow
- py3-requests
state: present
when: ansible_facts["os_family"] == "Alpine"
- name: Install baseline packages (FreeBSD)
community.general.pkgng:
name:
- curl
- wget
- git
- htop
- tmux
- vim
- jq
- rsync
state: present
when: ansible_facts["os_family"] == "FreeBSD"
- name: Install fish shell
ansible.builtin.package:
name: fish
state: present
when: inventory_hostname != 'london-a'
- name: Get fish shell path
ansible.builtin.command: which fish
changed_when: false
register: common_fish_path
when: inventory_hostname != 'london-a'
- name: Set fish as default shell
ansible.builtin.user:
name: root
shell: "{{ common_fish_path.stdout }}"
when: inventory_hostname != 'london-a'
- name: Ensure SSH directory exists
ansible.builtin.file:
path: /root/.ssh
state: directory
mode: '0700'
- name: Harden SSH config
ansible.builtin.lineinfile:
path: /etc/ssh/sshd_config
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
state: present
loop:
- { regexp: '^#?PermitRootLogin', line: 'PermitRootLogin prohibit-password' }
- { regexp: '^#?PasswordAuthentication', line: 'PasswordAuthentication no' }
- { regexp: '^#?X11Forwarding', line: 'X11Forwarding no' }
notify: Restart sshd
when: ansible_facts["os_family"] != "FreeBSD"
- name: Enable fail2ban (Debian)
ansible.builtin.service:
name: fail2ban
state: started
enabled: true
when: ansible_facts["os_family"] == "Debian"

View file

@ -0,0 +1,31 @@
---
# Install Docker and docker-compose, start the daemon.
- name: Install Docker (Debian)
ansible.builtin.apt:
name:
- docker.io
- docker-compose
state: present
when: ansible_facts["os_family"] == "Debian"
- name: Install Docker (Alpine)
community.general.apk:
name:
- docker
- docker-cli-compose
state: present
when: ansible_facts["os_family"] == "Alpine"
- name: Enable and start Docker
ansible.builtin.service:
name: docker
state: started
enabled: true
- name: Create docker compose project directories
ansible.builtin.file:
path: "/opt/docker/{{ item }}"
state: directory
mode: '0755'
loop: "{{ docker_services | default([]) }}"

View file

@ -0,0 +1,32 @@
---
# Deploy Docker Compose services from the repo's services/ directory.
# Expects docker_services list in host_vars and compose files in services/<name>/.
- name: Copy docker-compose files
ansible.builtin.copy:
src: "{{ playbook_dir }}/services/{{ item }}/docker-compose.yml"
dest: "/opt/docker/{{ item }}/docker-compose.yml"
mode: '0644'
loop: "{{ docker_services | default([]) }}"
register: docker_services_compose_files
failed_when: false
- name: Copy service config files
ansible.posix.synchronize:
src: "{{ playbook_dir }}/services/{{ item }}/"
dest: "/opt/docker/{{ item }}/"
rsync_opts:
- "--exclude=docker-compose.yml"
- "--exclude=README.md"
- "--exclude=.gitkeep"
loop: "{{ docker_services | default([]) }}"
failed_when: false
- name: Start/update docker compose services
community.docker.docker_compose_v2:
project_src: "/opt/docker/{{ item.item }}"
state: present
pull: policy
loop: "{{ docker_services_compose_files.results | default([]) }}"
when: item is not failed and item is changed
failed_when: false

View file

@ -0,0 +1,24 @@
---
# Deploy dotfiles from the repo's dotfiles/ directory.
# Symlinks config files into the home directory.
- name: Check for dotfiles source
delegate_to: localhost
ansible.builtin.stat:
path: "{{ playbook_dir }}/dotfiles"
register: dotfiles_dir
- name: Copy dotfiles
ansible.builtin.copy:
src: "{{ playbook_dir }}/dotfiles/{{ item.src }}"
dest: "{{ item.dest }}"
mode: '0644'
backup: true
loop:
- { src: 'config/tmux/tmux.conf', dest: '/root/.tmux.conf' }
- { src: 'config/fish/config.fish', dest: '/root/.config/fish/config.fish' }
- { src: 'config/git/gitconfig', dest: '/root/.gitconfig' }
failed_when: false
when:
- dotfiles_dir.stat.exists
- not (inventory_hostname == 'london-a' and item.src is search('fish'))

View file

@ -0,0 +1,48 @@
---
# Install node_exporter for Prometheus monitoring.
# Uses system packages on Linux, pkg on FreeBSD.
- name: Install prometheus-node-exporter (Debian)
ansible.builtin.apt:
name: prometheus-node-exporter
state: present
when: ansible_facts["os_family"] == "Debian"
- name: Install prometheus-node-exporter (Alpine)
community.general.apk:
name: prometheus-node-exporter
state: present
when: ansible_facts["os_family"] == "Alpine"
- name: Enable and start node-exporter (Debian)
ansible.builtin.service:
name: prometheus-node-exporter
state: started
enabled: true
when: ansible_facts["os_family"] == "Debian"
- name: Enable and start node-exporter (Alpine)
ansible.builtin.service:
name: node-exporter
state: started
enabled: true
when: ansible_facts["os_family"] == "Alpine"
- name: Install node_exporter (FreeBSD)
community.general.pkgng:
name: node_exporter
state: present
when: ansible_facts["os_family"] == "FreeBSD"
- name: Enable node_exporter (FreeBSD)
ansible.builtin.lineinfile:
path: /etc/rc.conf
regexp: '^node_exporter_enable='
line: 'node_exporter_enable="YES"'
when: ansible_facts["os_family"] == "FreeBSD"
- name: Start node_exporter (FreeBSD)
ansible.builtin.service:
name: node_exporter
state: started
when: ansible_facts["os_family"] == "FreeBSD"

View file

@ -0,0 +1,4 @@
---
- name: Reload systemd daemon
ansible.builtin.systemd:
daemon_reload: true

View file

@ -0,0 +1,22 @@
---
# Deploy custom systemd unit files from the repo.
# Looks for unit files in services/<name>/<name>.service
- name: Copy systemd unit files
ansible.builtin.copy:
src: "{{ playbook_dir }}/services/{{ item }}/{{ item }}.service"
dest: "/etc/systemd/system/{{ item }}.service"
mode: '0644'
loop: "{{ systemd_services | default([]) }}"
register: systemd_services_unit_files
failed_when: false
notify: Reload systemd daemon
- name: Enable and start systemd services
ansible.builtin.systemd:
name: "{{ item.item }}"
state: started
enabled: true
loop: "{{ systemd_services_unit_files.results | default([]) }}"
when: item is not failed
failed_when: false

View file

@ -0,0 +1,15 @@
#!/bin/bash
# Truncate large Docker container log files
# Deployed on: nuremberg-a
# Cron: 0 3 1 * * /usr/local/bin/docker-log-cleanup.sh
LOG_DIR=/var/lib/docker/containers
MAX_SIZE_MB=100
find "$LOG_DIR" -name '*-json.log' | while read -r logfile; do
size_mb=$(du -m "$logfile" | cut -f1)
if [ "$size_mb" -gt "$MAX_SIZE_MB" ]; then
echo "$(date): Truncating $logfile (${size_mb}MB)" >> /var/log/docker-log-cleanup.log
truncate -s 0 "$logfile"
fi
done

39
ansible/scripts/hdd-backup.sh Executable file
View file

@ -0,0 +1,39 @@
#!/usr/bin/env bash
set -euo pipefail
BUCKET="b2:london-b-hdd"
DIRS=(archive backups stash syncthing ftp)
EMAIL="pez@pez.sh"
SUBJECT="HDD Backup Report - $(date '+%Y-%m-%d %H:%M')"
failures=()
report=""
for dir in "${DIRS[@]}"; do
src="/hdd/$dir"
dst="$BUCKET/$dir"
echo "Syncing $src -> $dst"
output=$(rclone sync "$src" "$dst" -v 2>&1); rc=$?
output=$(grep -v "Can't follow symlink without -L/--copy-links" <<< "$output")
[[ $rc -ne 0 ]] && failures+=("$dir")
report+="=== $dir ===\n$output\n\n"
done
# Get bucket storage usage
bucket_usage=$(rclone size "$BUCKET" 2>&1) || bucket_usage="(failed to retrieve bucket size)"
if [[ ${#failures[@]} -gt 0 ]]; then
failure_summary="FAILURES: ${failures[*]}"
else
failure_summary="All syncs completed successfully."
fi
{
echo -e "Backup completed: $(date '+%Y-%m-%d %H:%M:%S')"
echo -e "$failure_summary\n"
echo -e "=== Bucket Usage ===\n$bucket_usage\n"
#echo -e "=== Sync Output ===\n$report"
} | mutt -s "$SUBJECT" "$EMAIL"

View file

@ -0,0 +1,58 @@
# Services
Version-controlled service definitions across the fleet.
## Directory Structure
```
services/
├── systemd/ # systemd unit files (Linux hosts)
│ ├── copenhagen-a/
│ │ ├── mangos-realmd.service # MaNGOS Zero realm server
│ │ ├── mangos-world.service # MaNGOS Zero world server
│ │ └── cloudflared.service # Cloudflare tunnel (token redacted)
│ └── helsinki-a/
│ ├── caddy.service # Caddy reverse proxy (stock unit)
│ └── thiswebsitedoesnotexist.service # Node.js app on port 3721
└── rc.d/ # FreeBSD rc.conf and rc.d scripts
└── london-a/
└── rc.conf # /etc/rc.conf — all enabled services
```
## Notes
### copenhagen-a (Linux)
| Service | Unit | Status | Notes |
|---------|------|--------|-------|
| MaNGOS realmd | `mangos-realmd.service` | enabled, custom | Realm server for WoW private server. Depends on MariaDB. |
| MaNGOS world | `mangos-world.service` | enabled, custom | World server. Depends on MariaDB and realmd. |
| cloudflared | `cloudflared.service` | enabled, custom | Cloudflare tunnel. **Token redacted** — replace `${CLOUDFLARED_TOKEN}` with the real token on deploy. |
### helsinki-a (Linux)
| Service | Unit | Status | Notes |
|---------|------|--------|-------|
| Caddy | `caddy.service` | enabled, stock | Installed via package manager. Config at `/etc/caddy/Caddyfile`. |
| thiswebsitedoesnotexist | `thiswebsitedoesnotexist.service` | enabled, custom | Node.js app. Env vars in `/opt/thiswebsitedoesnotexist/.env`. |
### london-a (FreeBSD)
No custom rc.d scripts — all services installed via `pkg`. The `rc.conf` captures all enabled services:
| Service | rc.conf variable | Notes |
|---------|-----------------|-------|
| Grafana | `grafana_enable="YES"` | Monitoring dashboards |
| Prometheus | `prometheus_enable="YES"` | Metrics collection |
| node_exporter | `node_exporter_enable="YES"` | Host metrics exporter |
| Tailscale | `tailscaled_enable="YES"` | Mesh VPN |
| cloudflared | `cloudflared_enable="YES"` | Cloudflare tunnel (tunnel ID in rc.conf) |
| InfluxDB | `influxd_enable="YES"` | Time-series database |
| libvirtd | `libvirtd_enable="YES"` | Virtualisation daemon |
| Redis | `redis_enable="YES"` | In-memory data store |
| PostgreSQL | `postgresql_enable="YES"` | Relational database |
## Security
- The cloudflared token on copenhagen-a has been **redacted** in the committed unit file. The live service uses the real token.
- The cloudflare tunnel ID on london-a is committed as-is (it's not a secret — the tunnel token is separate).

View file

@ -0,0 +1,13 @@
# Authelia
SSO authentication portal with LLDAP directory and MariaDB backend.
- **Host:** helsinki-a
- **URL:** https://auth.pez.sh (integrated via Caddy forward_auth)
- **Components:**
- **Authelia** — SSO portal (port 9091, localhost only)
- **LLDAP** — Lightweight LDAP directory (port 3890 LDAP, port 17170 web UI)
- **MariaDB 11** — Session/config storage
- **Config:** `/root/authelia/config/`
- **Secrets:** `/root/authelia/secrets/` (JWT, session, encryption keys, passwords)
- **LDAP base DN:** `dc=pez,dc=sh`

View file

@ -0,0 +1,10 @@
# Example: services/authelia/config.enc.yml
# Authelia secrets — encrypt with: sops -e -i config.enc.yml
---
jwt_secret: CHANGEME
session_secret: CHANGEME
storage_encryption_key: CHANGEME
lldap_admin_password: CHANGEME
mariadb_root_password: CHANGEME
mariadb_authelia_password: CHANGEME
oidc_hmac_secret: CHANGEME

View file

@ -0,0 +1,77 @@
# Authelia - SSO/authentication portal with LLDAP + MariaDB
# Host: helsinki-a (100.67.6.27)
# Data: /root/authelia/
# Access: https://auth.pez.sh (via Caddy forward_auth)
services:
authelia:
container_name: authelia
image: docker.io/authelia/authelia:latest
restart: unless-stopped
ports:
- '127.0.0.1:9091:9091'
environment:
AUTHELIA_IDENTITY_VALIDATION_RESET_PASSWORD_JWT_SECRET_FILE: /secrets/JWT_SECRET
AUTHELIA_SESSION_SECRET_FILE: /secrets/SESSION_SECRET
AUTHELIA_STORAGE_ENCRYPTION_KEY_FILE: /secrets/STORAGE_ENCRYPTION_KEY
AUTHELIA_STORAGE_MYSQL_PASSWORD_FILE: /secrets/MYSQL_PASSWORD
TZ: UTC
volumes:
- /root/authelia/config:/config
- /root/authelia/secrets:/secrets
depends_on:
mariadb:
condition: service_healthy
lldap:
condition: service_started
networks:
- authelia
mariadb:
container_name: authelia-mariadb
image: docker.io/library/mariadb:11
restart: unless-stopped
environment:
MYSQL_ROOT_PASSWORD_FILE: /run/secrets/MYSQL_ROOT_PASSWORD
MYSQL_DATABASE: authelia
MYSQL_USER: authelia
MYSQL_PASSWORD_FILE: /run/secrets/MYSQL_PASSWORD
TZ: UTC
volumes:
- /root/authelia/mariadb:/var/lib/mysql
- /root/authelia/secrets/MYSQL_ROOT_PASSWORD:/run/secrets/MYSQL_ROOT_PASSWORD:ro
- /root/authelia/secrets/MYSQL_PASSWORD:/run/secrets/MYSQL_PASSWORD:ro
networks:
- authelia
healthcheck:
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
lldap:
container_name: authelia-lldap
image: docker.io/lldap/lldap:latest
restart: unless-stopped
ports:
- '17170:17170' # Web UI
- '3890:3890' # LDAP
environment:
UID: '1000'
GID: '1000'
TZ: UTC
LLDAP_LDAP_BASE_DN: dc=pez,dc=sh
LLDAP_LDAP_USER_DN: admin
LLDAP_LDAP_USER_PASS_FILE: /secrets/LLDAP_ADMIN_PASSWORD
LLDAP_JWT_SECRET_FILE: /secrets/LLDAP_JWT_SECRET
volumes:
- /root/authelia/lldap:/data
- /root/authelia/secrets/LLDAP_ADMIN_PASSWORD:/secrets/LLDAP_ADMIN_PASSWORD:ro
- /root/authelia/secrets/LLDAP_JWT_SECRET:/secrets/LLDAP_JWT_SECRET:ro
networks:
- authelia
networks:
authelia:
driver: bridge

View file

@ -0,0 +1,11 @@
# Bitwarden
Self-hosted password manager (unified deployment).
- **Host:** helsinki-a
- **URL:** https://bitwarden.pez.sh
- **Image:** `ghcr.io/bitwarden/self-host:beta` (unified container)
- **Database:** MariaDB 10
- **Admin:** pez@pez.sh
- **Config:** `settings.env` (env file, not committed — contains secrets)
- **Data:** Docker volumes (`bitwarden`, `data`)

View file

@ -0,0 +1,33 @@
# Bitwarden - Self-hosted password manager
# Host: helsinki-a (100.67.6.27)
# Data: Docker volume (bitwarden)
# Access: https://bitwarden.pez.sh (via Caddy reverse proxy)
services:
bitwarden:
image: ghcr.io/bitwarden/self-host:beta
restart: always
depends_on:
- db
env_file:
- settings.env
ports:
- "8080:8080"
- "8443:8443"
volumes:
- bitwarden:/etc/bitwarden
db:
image: mariadb:10
restart: always
environment:
MARIADB_USER: bitwarden
MARIADB_PASSWORD: "${BW_DB_PASSWORD}"
MARIADB_DATABASE: bitwarden_vault
MARIADB_RANDOM_ROOT_PASSWORD: "true"
volumes:
- data:/var/lib/mysql
volumes:
bitwarden:
data:

View file

@ -0,0 +1,246 @@
# _ __ ___ ____ _ __ _ __ _____ ___ _
# | '_ \ / _ \_ /____| '_ \| '__/ _ \ \/ / | | |
# | |_) | __// /_____| |_) | | | (_) > <| |_| |
# | .__/ \___/___| | .__/|_| \___/_/\_\\__, |
# |_| |_| |___/
#
{
admin 100.67.6.27:2019
metrics {
per_host
}
}
## LONDON-A SERVICES ##
# Grafana
grafana.pez.solutions, grafana.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.122.219.41:3000
}
# Prometheus
prometheus.pez.solutions, prometheus.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.122.219.41:9090
}
# Alertmanager
alertmanager.pez.solutions, alertmanager.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.122.219.41:3000
}
## LONDON-B SERVICES ##
# Jellyfin
jellyfin.pez.solutions, jellyfin.pez.sh {
reverse_proxy 100.84.65.101:8096
}
# Plex
plex.pez.solutions, plex.pez.sh {
reverse_proxy 100.84.65.101:32400
}
# Radarr
radarr.pez.solutions, radarr.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.84.65.101:7878
}
# Sonarr
sonarr.pez.solutions, sonarr.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.84.65.101:8989
}
# Lidarr
lidarr.pez.solutions, lidarr.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.84.65.101:8686
}
# Readarr
readarr.pez.solutions, readarr.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.84.65.101:8787
}
# slskd
soulseek.pez.solutions, soulseek.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.84.65.101:5030
}
# Prowlarr
prowlarr.pez.solutions, prowlarr.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.84.65.101:9696
}
# Transmission
download.pez.solutions, download.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.84.65.101:9091
}
# Overseerr
request.pez.solutions, request.pez.sh {
reverse_proxy 100.84.65.101:5055
}
# Jellyfin Requests
jellyfin-requests.pez.solutions, jellyfin-requests.pez.sh {
reverse_proxy 100.84.65.101:5056
}
#WebDAV
#https://cloud.pez.sh {
# reverse_proxy 100.84.65.101:8080
#}
https://cloud.pez.sh:443 {
reverse_proxy 100.84.65.101:11000 # Adjust to match APACHE_PORT and APACHE_IP_BINDING. See https://github.com/nextcloud/all-in-one/blob/main/reverse-proxy.md#adapting-the-sample-web-server-configurations-below
}
music.pez.sh {
reverse_proxy 100.84.65.101:4533
}
## LONDON-C SERVICES ##
## COPENHAGEN-A SERVICES ##
## COPENHAGEN-B SERVICES ##
## COPENHAGEN-C SERVICES ##
## NUREMBERG-A SERVICES ##
## HELSINKI-A SERVICES ##
# Bitwarden (requires HTTPS tweaking)
https://bitwarden.pez.sh {
reverse_proxy localhost:8443 {
transport http {
tls_insecure_skip_verify
}
}
}
# Authelia (requires HTTPS tweaking)
auth.pez.solutions, auth.pez.sh {
reverse_proxy localhost:9091
}
ldap.pez.sh {
reverse_proxy 127.0.0.1:17170
}
#https://auth.pez.sh {
# reverse_proxy 127.0.0.1:9091 {
# transport http {
# tls_insecure_skip_verify
# }
# }
#}
# Apps dashboard
apps.pez.solutions, apps.pez.sh {
root * /srv/apps
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
file_server
}
# Pez.solutions
pez.solutions {
root * /srv/pez.solutions
file_server
}
# Pez.solutions
cloud.pez.solutions {
root * /srv/cloud.pez.solutions
file_server
}
# Pez.sh
pez.sh {
root * /srv/pez.sh
file_server
}
# Pez-signup
signup.pez.solutions {
root * /srv/pez-signup
file_server
}
# Naveen
naveen.pez.sh {
root * /srv/naveen
file_server
}
## HELSINKI-A SERVICES ##
# Status page
status.pez.sh {
root * /srv/status
file_server
}
# Miniflux RSS
rss.pez.sh {
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.84.65.101:8181
}
# Forgejo Git Server (auth handled by Forgejo itself)
git.pez.sh {
reverse_proxy localhost:3000
}
# This Website Does Not Exist
thiswebsitedoesnotexist.pez.sh {
reverse_proxy localhost:3721
}

View file

@ -0,0 +1,198 @@
# Caddyfile template for helsinki-a reverse proxy
#
# Variables (replace before deploying):
# {{HELSINKI_A_IP}} - helsinki-a Tailscale IP (currently 100.67.6.27)
# {{LONDON_A_IP}} - london-a Tailscale IP (currently 100.122.219.41)
# {{LONDON_B_IP}} - london-b Tailscale IP (currently 100.84.65.101)
# {{AUTHELIA_PORT}} - Authelia port (currently 9091)
# {{DOMAIN_PRIMARY}} - Primary domain (currently pez.sh)
# {{DOMAIN_ALT}} - Alternate domain (currently pez.solutions)
#
# Authelia forward_auth pattern: see README.md for details.
{
admin {{HELSINKI_A_IP}}:2019
metrics {
per_host
}
}
# ============================================================
# Snippet: Authelia forward_auth
# Include with `import authelia` inside any site block.
# ============================================================
(authelia) {
forward_auth localhost:{{AUTHELIA_PORT}} {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
}
## LONDON-A SERVICES ##
# Grafana
grafana.{{DOMAIN_ALT}}, grafana.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_A_IP}}:3000
}
# Prometheus
prometheus.{{DOMAIN_ALT}}, prometheus.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_A_IP}}:9090
}
# Alertmanager
alertmanager.{{DOMAIN_ALT}}, alertmanager.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_A_IP}}:3000
}
## LONDON-B SERVICES ##
# Jellyfin (no auth — has its own login)
jellyfin.{{DOMAIN_ALT}}, jellyfin.{{DOMAIN_PRIMARY}} {
reverse_proxy {{LONDON_B_IP}}:8096
}
# Plex (no auth — has its own login)
plex.{{DOMAIN_ALT}}, plex.{{DOMAIN_PRIMARY}} {
reverse_proxy {{LONDON_B_IP}}:32400
}
# Radarr
radarr.{{DOMAIN_ALT}}, radarr.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_B_IP}}:7878
}
# Sonarr
sonarr.{{DOMAIN_ALT}}, sonarr.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_B_IP}}:8989
}
# Lidarr
lidarr.{{DOMAIN_ALT}}, lidarr.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_B_IP}}:8686
}
# Readarr
readarr.{{DOMAIN_ALT}}, readarr.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_B_IP}}:8787
}
# slskd (Soulseek)
soulseek.{{DOMAIN_ALT}}, soulseek.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_B_IP}}:5030
}
# Prowlarr
prowlarr.{{DOMAIN_ALT}}, prowlarr.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_B_IP}}:9696
}
# Transmission
download.{{DOMAIN_ALT}}, download.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_B_IP}}:9091
}
# Overseerr (no auth — has its own login)
request.{{DOMAIN_ALT}}, request.{{DOMAIN_PRIMARY}} {
reverse_proxy {{LONDON_B_IP}}:5055
}
# Jellyseerr (no auth — has its own login)
jellyfin-requests.{{DOMAIN_ALT}}, jellyfin-requests.{{DOMAIN_PRIMARY}} {
reverse_proxy {{LONDON_B_IP}}:5056
}
# Nextcloud AIO
https://cloud.{{DOMAIN_PRIMARY}}:443 {
reverse_proxy {{LONDON_B_IP}}:11000
}
# Navidrome
music.{{DOMAIN_PRIMARY}} {
reverse_proxy {{LONDON_B_IP}}:4533
}
# Miniflux RSS
rss.{{DOMAIN_PRIMARY}} {
import authelia
reverse_proxy {{LONDON_B_IP}}:8181
}
## HELSINKI-A SERVICES (localhost) ##
# Bitwarden
https://bitwarden.{{DOMAIN_PRIMARY}} {
reverse_proxy localhost:8443 {
transport http {
tls_insecure_skip_verify
}
}
}
# Authelia portal
auth.{{DOMAIN_ALT}}, auth.{{DOMAIN_PRIMARY}} {
reverse_proxy localhost:{{AUTHELIA_PORT}}
}
# LLDAP web UI
ldap.{{DOMAIN_PRIMARY}} {
reverse_proxy 127.0.0.1:17170
}
# Apps dashboard
apps.{{DOMAIN_ALT}}, apps.{{DOMAIN_PRIMARY}} {
root * /srv/apps
import authelia
file_server
}
# Static sites
{{DOMAIN_ALT}} {
root * /srv/pez.solutions
file_server
}
cloud.{{DOMAIN_ALT}} {
root * /srv/cloud.pez.solutions
file_server
}
{{DOMAIN_PRIMARY}} {
root * /srv/pez.sh
file_server
}
signup.{{DOMAIN_ALT}} {
root * /srv/pez-signup
file_server
}
naveen.{{DOMAIN_PRIMARY}} {
root * /srv/naveen
file_server
}
status.{{DOMAIN_PRIMARY}} {
root * /srv/status
file_server
}
# Forgejo (auth handled by Forgejo itself)
git.{{DOMAIN_PRIMARY}} {
reverse_proxy localhost:3000
}
# This Website Does Not Exist
thiswebsitedoesnotexist.{{DOMAIN_PRIMARY}} {
reverse_proxy localhost:3721
}

View file

@ -0,0 +1,129 @@
# Caddy
Reverse proxy and TLS termination for all homelab services.
Runs on **helsinki-a** (`100.67.6.27`) as a system service (not Docker).
Replaces the standalone `pez-proxy` repo.
## Structure
```
services/caddy/
├── Caddyfile # Live config captured from helsinki-a
├── Caddyfile.template # Templatized version with variable placeholders
└── README.md
```
## How It Works
Helsinki-a sits behind Cloudflare DNS and acts as the single entry point for all
`*.pez.sh` and `*.pez.solutions` traffic. Caddy handles automatic TLS via
Let's Encrypt/ZeroSSL, then reverse-proxies to backend services over the
Tailscale mesh.
### Traffic Flow
```
Internet → Cloudflare → helsinki-a (Caddy) → Tailscale → backend host:port
```
### Admin API
Caddy's admin API listens on `100.67.6.27:2019` (Tailscale-only, not
publicly exposed). Useful for config reloads without downtime:
```bash
caddy reload --config /etc/caddy/Caddyfile
# or via API:
curl http://100.67.6.27:2019/config/
```
### Metrics
Caddy exposes Prometheus metrics with `per_host` granularity. Scraped by
Prometheus on london-a.
## Authelia Forward Auth Pattern
Most admin-facing services are protected by [Authelia](https://www.authelia.com/)
SSO. Authelia runs on helsinki-a itself (`localhost:9091`) alongside an LLDAP
directory and MariaDB backend (see `services/authelia/`).
### How forward_auth Works
Caddy's `forward_auth` directive intercepts every request before it reaches
the upstream. It sends a subrequest to Authelia's verification endpoint:
```
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
```
**Flow:**
1. Client requests `https://grafana.pez.sh/some/page`
2. Caddy sends a verification subrequest to `localhost:9091/api/authz/forward-auth`
3. Authelia checks the session cookie:
- **Valid session** → returns 200; Caddy copies identity headers (`Remote-User`,
`Remote-Groups`, `Remote-Name`, `Remote-Email`) and forwards to the upstream
- **No/expired session** → returns 401 with redirect; Caddy sends user to
`auth.pez.sh` to log in via Authelia's portal
4. After login, Authelia sets a session cookie and redirects back to the
original URL
### Which Services Use Authelia
| Service | Auth | Reason |
|---------|------|--------|
| Grafana, Prometheus, Alertmanager | Authelia | Admin dashboards |
| Radarr, Sonarr, Lidarr, Readarr | Authelia | Media management |
| Prowlarr, Transmission (download) | Authelia | Download tools |
| slskd (Soulseek) | Authelia | P2P client |
| Miniflux (RSS) | Authelia | RSS reader |
| Apps dashboard | Authelia | Internal apps page |
| Jellyfin, Plex | Own auth | Have built-in user management |
| Overseerr, Jellyseerr | Own auth | Have built-in user management |
| Nextcloud | Own auth | Has built-in user management |
| Navidrome (music) | No auth* | Accessible directly |
| Bitwarden | Own auth | Has built-in vault auth |
| Forgejo (git) | Own auth | Has built-in user management |
| Authelia portal | N/A | Is the auth system itself |
| LLDAP web UI | N/A | Admin directory management |
### Template Snippet
The template file uses a Caddy snippet to DRY up the auth block:
```
(authelia) {
forward_auth localhost:{{AUTHELIA_PORT}} {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
}
```
Usage in a site block: `import authelia`
## Template Variables
The `Caddyfile.template` replaces hardcoded values with placeholders:
| Variable | Current Value | Description |
|----------|--------------|-------------|
| `{{HELSINKI_A_IP}}` | `100.67.6.27` | helsinki-a Tailscale IP |
| `{{LONDON_A_IP}}` | `100.122.219.41` | london-a Tailscale IP |
| `{{LONDON_B_IP}}` | `100.84.65.101` | london-b Tailscale IP |
| `{{AUTHELIA_PORT}}` | `9091` | Authelia verification port |
| `{{DOMAIN_PRIMARY}}` | `pez.sh` | Primary domain |
| `{{DOMAIN_ALT}}` | `pez.solutions` | Alternate domain |
## Notes
- The live Caddyfile on helsinki-a is at `/etc/caddy/Caddyfile`
- Caddy auto-provisions TLS certificates for all listed domains
- The Alertmanager proxy currently points to port 3000 (same as Grafana) — this may be intentional (Grafana's built-in alerting UI) or a copy-paste issue worth checking
- Commented-out WebDAV block was replaced by the Nextcloud AIO reverse proxy
- Static sites (`pez.sh`, `pez.solutions`, etc.) are served from `/srv/` on helsinki-a

View file

@ -0,0 +1,9 @@
# Forgejo
Self-hosted Git forge (Gitea fork).
- **Host:** helsinki-a
- **URL:** https://git.pez.sh
- **SSH:** git.pez.sh:2222
- **Data:** `/srv/forgejo/data`
- **Registration:** Disabled (private instance)

View file

@ -0,0 +1,26 @@
# Forgejo - Self-hosted Git forge
# Host: helsinki-a (100.67.6.27)
# Data: /srv/forgejo/data
# Access: https://git.pez.sh (via Caddy reverse proxy)
# SSH: git.pez.sh:2222
services:
forgejo:
image: codeberg.org/forgejo/forgejo:10
container_name: forgejo
restart: unless-stopped
environment:
- USER_UID=1000
- USER_GID=1000
- FORGEJO__server__ROOT_URL=https://git.pez.sh
- FORGEJO__server__SSH_DOMAIN=git.pez.sh
- FORGEJO__server__SSH_PORT=22
- FORGEJO__server__HTTP_PORT=3000
- FORGEJO__server__DISABLE_SSH=false
- FORGEJO__service__DISABLE_REGISTRATION=true
- FORGEJO__service__REQUIRE_SIGNIN_VIEW=false
ports:
- '127.0.0.1:3000:3000'
- '0.0.0.0:2222:22'
volumes:
- /srv/forgejo/data:/data

View file

@ -0,0 +1,62 @@
# Grafana
Grafana dashboards, alerting rules, and provisioning config for the homelab/cloud stack.
Runs on **london-a** (FreeBSD, `100.122.219.41`) as a native service (not Docker).
Migrated from the standalone `pez-grafana` repo.
## Structure
```
services/grafana/
├── dashboards/ # Dashboard JSON files
│ ├── infrastructure.json # Infrastructure overview (linux hosts)
│ ├── living-room-display.json # Kiosk/TV dashboard
│ ├── node-exporter-full.json # Full node exporter metrics
│ └── traffic-slo.json # Traffic / SLO tracking
└── provisioning/ # Grafana provisioning files
├── alerting/
│ ├── contact-points.yml # Alert receivers (PagerDuty, email)
│ ├── notification-policy.yml # Routing: critical → PagerDuty, warning → email
│ ├── rules-critical.yml # Tier 1: pages PagerDuty immediately
│ └── rules-warning.yml # Tier 2: email only
├── dashboards/
│ └── dashboards.yml # Dashboard file provider config
└── datasources/
└── datasources.json # Prometheus datasource (localhost:9090)
```
## Alert Tiers
| Tier | Routing | Examples |
|----------|------------|--------------------------------------------|
| Critical | PagerDuty | Host down, disk >95%, memory >95% |
| Warning | Email | Disk >80%, memory >85%, high load/CPU |
## Deployment
Deployed via the monorepo's `ansible/deploy.yml` (Stage 4e: Monitoring stack).
```bash
cd ansible
ansible-playbook deploy.yml --limit london-a --tags monitoring
```
Provisioning files are synced to `/usr/local/etc/grafana/provisioning/` and dashboards
to `/usr/local/etc/grafana/dashboards/` on london-a. Grafana is restarted after changes.
### Notes
- The old `pez-grafana` repo deployed provisioning to `/usr/local/share/grafana/conf/provisioning/`.
The monorepo uses `/usr/local/etc/grafana/` — verify the correct path on london-a before first deploy.
- PagerDuty integration key is referenced via `${PAGERDUTY_INTEGRATION_KEY}` env var (not stored in repo).
- Grafana password is not committed; pass via `--extra-vars` or env.
## Importing Dashboards Manually
```bash
curl -X POST -H "Content-Type: application/json" \
-u admin:password \
-d "{\"dashboard\": $(cat dashboards/infrastructure.json), \"overwrite\": true}" \
http://localhost:3000/api/dashboards/db
```

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,587 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 3,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "yellow",
"value": 99.9
},
{
"color": "green",
"value": 99.99
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 4,
"x": 0,
"y": 0
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "code",
"expr": "clamp_max(\n (sum(caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\", code!~\"5.*\"}) / (sum(caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\"}))) * 100,\n 99.999\n)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "SLI",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 3,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "yellow",
"value": 99.9
},
{
"color": "green",
"value": 99.99
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 20,
"x": 4,
"y": 0
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "code",
"expr": "clamp_max(\n (\n sum(\n label_replace(\n caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\", code!~\"5.*\"},\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n ) by (host_prefix)\n /\n sum(\n label_replace(\n caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\"},\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n ) by (host_prefix)\n ) * 100,\n 99.999\n)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "SLI by Host",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 19,
"x": 0,
"y": 10
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(\n label_replace(\n rate(caddy_http_requests_total{handler!=\"metrics\", host=~\".*(pez.solutions|pez.sh)\"}[$__rate_interval]),\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n) by (host_prefix)",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "{{host}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Traffic Rate by Service",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 5,
"x": 19,
"y": 10
},
"id": 6,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "req/s"
}
]
},
"pluginVersion": "12.1.0",
"targets": [
{
"editorMode": "code",
"exemplar": false,
"expr": "sum(\n label_replace(\n rate(caddy_http_requests_total{handler!=\"metrics\", host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\"}[$__rate_interval]),\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n) by (host_prefix) > 0",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Active Services",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"includeByName": {},
"indexByName": {},
"renameByName": {
"Value": "req/s",
"host_prefix": "Service"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 12,
"x": 0,
"y": 21
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(rate(caddy_http_response_duration_seconds_count{code!~\"5.*\"}[$__rate_interval]))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "Good",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Response Codes (Good)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 12,
"x": 12,
"y": 21
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(rate(caddy_http_response_duration_seconds_count{code=~\"5.*\"}[$__rate_interval])) by (code, host) > 0",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "{{code}} - {{host}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Response Codes (Bad)",
"type": "timeseries"
}
],
"preload": false,
"refresh": "5s",
"schemaVersion": 41,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Traffic / SLO",
"uid": "384f28fe-2435-480f-a0f0-723ccdcf8b3b"
}

View file

@ -0,0 +1,23 @@
apiVersion: 1
# Contact points — defines where alerts are sent.
# PagerDuty key is managed via Grafana UI / environment variable; do not commit secrets here.
contactPoints:
- orgId: 1
name: PagerDuty
receivers:
- uid: bf0ukmhpefshsc
type: pagerduty
settings:
integrationKey: "{{ grafana_pagerduty_integration_key }}"
disableResolveMessage: false
- orgId: 1
name: email
receivers:
- uid: email-receiver
type: email
settings:
addresses: pez@pez.sh
disableResolveMessage: false

View file

@ -0,0 +1,31 @@
apiVersion: 1
# Notification routing policy.
# Critical alerts (severity=critical) → PagerDuty.
# Warning alerts (severity=warning) → email.
policies:
- orgId: 1
receiver: PagerDuty
group_by:
- alertname
- server
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
routes:
- receiver: PagerDuty
matchers:
- severity = critical
group_wait: 0s
group_interval: 1m
repeat_interval: 1h
continue: false
- receiver: email
matchers:
- severity = warning
group_wait: 2m
group_interval: 10m
repeat_interval: 8h
continue: false

View file

@ -0,0 +1,358 @@
apiVersion: 1
# Tier 1 — Critical alerts. These page PagerDuty.
# Datasource UID: bezqqznn81wqof (Prometheus on london-a)
# All alerts use reduce+threshold (not classic_conditions) so $labels.* and $value work in annotations.
groups:
- orgId: 1
name: critical-availability
folder: Alerting
interval: 1m
rules:
- uid: cff6uy1tufj0ge
title: Host Down
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
model:
expr: up{job="node_exporter"}
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: [1]
type: lt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: Alerting
execErrState: Alerting
for: 2m
annotations:
summary: "Host {{ $labels.server }} is down"
description: "Node exporter on {{ $labels.server }} ({{ $labels.instance }}) has been unreachable for 2+ minutes."
labels:
severity: critical
isPaused: false
- uid: aff6uy1vxchdse
title: Disk Usage Critical (>95%)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
model:
expr: |
(
node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
- node_filesystem_avail_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
)
/ node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
* 100
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: [95]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 5m
annotations:
summary: "Disk critically full on {{ $labels.server }}"
description: "Filesystem {{ $labels.mountpoint }} on {{ $labels.server }} is over 95% full (currently {{ $value | printf \"%.1f\" }}%)."
labels:
severity: critical
isPaused: false
- uid: aff6uy1xq9udca
title: Memory Usage Critical (>95%)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
model:
expr: |
(1 - (node_memory_MemAvailable_bytes{job="node_exporter"} / node_memory_MemTotal_bytes{job="node_exporter"})) * 100
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: [95]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 5m
annotations:
summary: "Memory critically low on {{ $labels.server }}"
description: "Memory usage on {{ $labels.server }} ({{ $labels.instance }}) is above 95% for 5+ minutes."
labels:
severity: critical
isPaused: false
- uid: fff6uy219mo00e
title: SMART Disk Health Failure (london-b)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
model:
expr: smartctl_device_smart_status{job="smartmontools"}
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: [1]
type: lt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 0m
annotations:
summary: "Disk SMART health failure on london-b"
description: "Drive {{ $labels.device }} on london-b reports SMART health failure. Check immediately."
labels:
severity: critical
isPaused: false
- orgId: 1
name: critical-caddy
folder: Alerting
interval: 1m
rules:
- uid: fff6uy1zgpb0gd
title: Caddy Down (helsinki-a)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
model:
expr: up{job="caddy"}
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: [1]
type: lt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: Alerting
execErrState: Alerting
for: 1m
annotations:
summary: "Caddy is down on helsinki-a"
description: "Caddy (main reverse proxy) on helsinki-a unreachable. External services likely down."
labels:
severity: critical
isPaused: false
- orgId: 1
name: critical-services
folder: Alerting
interval: 1m
rules:
- uid: bff6uy2a2rrwgb
title: Plex Down (london-b)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
model:
expr: up{job="plex"}
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: [1]
type: lt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: Alerting
execErrState: Alerting
for: 5m
annotations:
summary: "Plex is down on london-b"
description: "The Plex exporter on london-b has been unreachable for 5+ minutes."
labels:
severity: critical
isPaused: false

View file

@ -0,0 +1,242 @@
apiVersion: 1
# Tier 2 — Warning alerts. These send email only (non-paging).
# Datasource UID: bezqqznn81wqof (Prometheus on london-a)
# All alerts use reduce+threshold (not classic_conditions) so $labels.* and $value work in annotations.
groups:
- orgId: 1
name: warning-resources
folder: Alerting
interval: 2m
rules:
- uid: cff6uy23024n4c
title: Disk Usage Warning (>80%)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
model:
expr: |
(
node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
- node_filesystem_avail_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
)
/ node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
* 100
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: [80]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 10m
annotations:
summary: "Disk usage high on {{ $labels.server }}"
description: "Filesystem {{ $labels.mountpoint }} on {{ $labels.server }} is over 80% full (currently {{ $value | printf \"%.1f\" }}%)."
labels:
severity: warning
isPaused: false
- uid: dff6uy24szhmod
title: Memory Usage Warning (>85%)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
model:
expr: |
(1 - (node_memory_MemAvailable_bytes{job="node_exporter"} / node_memory_MemTotal_bytes{job="node_exporter"})) * 100
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: [85]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 10m
annotations:
summary: "Memory usage high on {{ $labels.server }}"
description: "Memory usage on {{ $labels.server }} ({{ $labels.instance }}) is above 85% for 10+ minutes."
labels:
severity: warning
isPaused: false
- uid: cff6uy26jey9sd
title: CPU Usage High (>85%)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
model:
expr: |
100 - (avg by (server, instance) (rate(node_cpu_seconds_total{job="node_exporter", mode="idle"}[5m])) * 100)
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: [85]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 15m
annotations:
summary: "CPU usage sustained high on {{ $labels.server }}"
description: "CPU on {{ $labels.server }} has been above 85% for 15+ minutes (currently {{ $value | printf \"%.1f\" }}%)."
labels:
severity: warning
isPaused: false
- uid: eff6uy289uewwb
title: System Load High (>2x CPUs)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
model:
# Compare 15-minute load against number of CPUs
expr: |
node_load15{job="node_exporter"} / on(instance) group_left() count by (instance) (node_cpu_seconds_total{job="node_exporter", mode="idle"})
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: [2]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 15m
annotations:
summary: "High system load on {{ $labels.server }}"
description: "15-minute load average on {{ $labels.server }} is {{ $value | printf \"%.2f\" }}x the CPU count (threshold: 2x)."
labels:
severity: warning
isPaused: false

View file

@ -0,0 +1,15 @@
apiVersion: 1
# Dashboard provisioning — tells Grafana where to find dashboard JSON files.
# Path is relative to the Grafana installation on london-a (FreeBSD).
providers:
- name: default
orgId: 1
folder: ""
type: file
disableDeletion: false
updateIntervalSeconds: 30
options:
path: /usr/local/etc/grafana/dashboards
foldersFromFilesStructure: false

View file

@ -0,0 +1,18 @@
[
{
"uid": "bezqqznn81wqof",
"name": "prometheus",
"type": "prometheus",
"typeName": "Prometheus",
"typeLogoUrl": "public/plugins/prometheus/img/prometheus_logo.svg",
"access": "proxy",
"url": "http://localhost:9090",
"user": "",
"database": "",
"basicAuth": false,
"isDefault": true,
"jsonData": {
"pdcInjected": false
}
}
]

View file

@ -0,0 +1,8 @@
# Jellyseerr
Media request management (Overseerr fork for Jellyfin/Plex).
- **Host:** london-b
- **URL:** https://requests.pez.sh
- **Port:** 5056 (host) → 5055 (container)
- **Config:** `/var/share/jellyseer/`

View file

@ -0,0 +1,17 @@
# Jellyseerr - Media request management
# Host: london-b (100.84.65.101)
# Data: /var/share/jellyseer
# Access: https://requests.pez.sh (via Caddy reverse proxy on helsinki-a)
services:
jellyseerr:
image: fallenbagel/jellyseerr:latest
container_name: jellyseer
restart: always
ports:
- "5056:5055"
environment:
LOG_LEVEL: debug
TZ: Europe/London
volumes:
- /var/share/jellyseer:/app/config

View file

@ -0,0 +1,16 @@
[Unit]
Description=MaNGOS Zero Realm Server
After=network.target mariadb.service
Requires=mariadb.service
[Service]
Type=simple
User=mangos
Group=mangos
WorkingDirectory=/home/mangos/mangos/zero/bin
ExecStart=/home/mangos/mangos/zero/bin/realmd
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target

View file

@ -0,0 +1,16 @@
[Unit]
Description=MaNGOS Zero Realm Server
After=network.target mariadb.service
Requires=mariadb.service
[Service]
Type=simple
User=mangos
Group=mangos
WorkingDirectory=/home/mangos/mangos/zero/bin
ExecStart=/home/mangos/mangos/zero/bin/realmd
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target

View file

@ -0,0 +1,10 @@
# Minecraft
PaperMC server.
- **Host:** copenhagen-a
- **Port:** 25565
- **Memory:** 3GB
- **Data:** Docker volume (`minecraft_minecraftserver`)
- **Java:** OpenJDK 21 (bundled in image)
- **Note:** copenhagen-a also runs a WoW server (MaNGOS Zero) as system services, not Docker

View file

@ -0,0 +1,19 @@
# Minecraft - PaperMC server
# Host: copenhagen-a (100.89.206.60)
# Data: Docker volume (minecraft_minecraftserver)
services:
minecraft:
image: marctv/minecraft-papermc-server:latest
container_name: mcserver
restart: always
ports:
- "25565:25565"
environment:
MEMORYSIZE: 3G
volumes:
- minecraft_data:/data
volumes:
minecraft_data:
name: minecraft_minecraftserver

View file

@ -0,0 +1,10 @@
# Miniflux
Lightweight RSS reader.
- **Host:** london-b
- **URL:** https://rss.pez.sh
- **Database:** PostgreSQL 15 (Alpine)
- **Bind address:** Tailscale IP only (100.84.65.101:8181)
- **Data:** Docker volume (`miniflux-db`)
- **Note:** Passwords templatized — set `MINIFLUX_DB_PASSWORD` and `MINIFLUX_ADMIN_PASSWORD` env vars before deploying

View file

@ -0,0 +1,35 @@
# Miniflux - RSS reader
# Host: london-b (100.84.65.101)
# Data: Docker volume (miniflux-db)
# Access: https://rss.pez.sh (via Caddy reverse proxy on helsinki-a)
services:
miniflux-db:
image: postgres:15-alpine
container_name: miniflux-db
restart: unless-stopped
volumes:
- miniflux-db:/var/lib/postgresql/data
environment:
POSTGRES_DB: miniflux
POSTGRES_USER: miniflux
POSTGRES_PASSWORD: "${MINIFLUX_DB_PASSWORD}"
miniflux:
image: miniflux/miniflux:latest
container_name: miniflux
restart: unless-stopped
depends_on:
- miniflux-db
ports:
- "100.84.65.101:8181:8080"
environment:
DATABASE_URL: "postgres://miniflux:${MINIFLUX_DB_PASSWORD}@miniflux-db/miniflux?sslmode=disable"
RUN_MIGRATIONS: "1"
CREATE_ADMIN: "1"
ADMIN_USERNAME: pez
ADMIN_PASSWORD: "${MINIFLUX_ADMIN_PASSWORD}"
BASE_URL: https://rss.pez.sh
volumes:
miniflux-db:

View file

@ -0,0 +1,9 @@
# Navidrome
Personal music streaming server (Subsonic-compatible).
- **Host:** london-b
- **URL:** https://music.pez.sh
- **Port:** 4533
- **Config:** `/root/navidrome/` (includes `navidrome.toml`)
- **Music library:** `/hdd/music` (on ZFS pool)

View file

@ -0,0 +1,17 @@
# Navidrome - Music streaming server
# Host: london-b (100.84.65.101)
# Data: /root/navidrome (config), /hdd/music (library)
# Access: https://music.pez.sh (via Caddy reverse proxy on helsinki-a)
services:
navidrome:
image: deluan/navidrome:latest
container_name: navidrome
restart: unless-stopped
ports:
- "4533:4533"
environment:
ND_LOGLEVEL: info
volumes:
- /root/navidrome:/data
- /hdd/music:/music

View file

@ -0,0 +1,10 @@
# Nextcloud AIO
All-in-one Nextcloud deployment (self-managed containers).
- **Host:** london-b
- **URL:** https://cloud.pez.sh
- **Admin port:** 8080 (mastercontainer management UI)
- **Apache port:** 11000 (proxied by Caddy on helsinki-a)
- **Data:** Docker volume (`nextcloud_aio_mastercontainer`)
- **Note:** The mastercontainer spawns and manages its own sub-containers (database, redis, apache, etc.)

Some files were not shown because too many files have changed in this diff Show more