- Ansible: basic, with-exporter, with-notifications, enterprise playbooks - Kubernetes: CronJob, ConfigMap, ServiceMonitor, PVC manifests - Prometheus: alerting rules (RPO/RTO/failure) and scrape configs - Terraform: AWS S3 bucket with lifecycle policies - Scripts: GFS backup rotation and health check (Nagios compatible) All playbooks support: - Scheduled backups with systemd timers - GFS retention policies - Prometheus metrics exporter - SMTP/Slack/webhook notifications - Encrypted backups with cloud upload
141 lines
3.3 KiB
YAML
141 lines
3.3 KiB
YAML
apiVersion: batch/v1
|
|
kind: CronJob
|
|
metadata:
|
|
name: dbbackup-cluster
|
|
labels:
|
|
app: dbbackup
|
|
component: backup
|
|
spec:
|
|
# Daily at 2:00 AM UTC
|
|
schedule: "0 2 * * *"
|
|
|
|
# Keep last 3 successful and 1 failed job
|
|
successfulJobsHistoryLimit: 3
|
|
failedJobsHistoryLimit: 1
|
|
|
|
# Don't run if previous job is still running
|
|
concurrencyPolicy: Forbid
|
|
|
|
# Start job within 5 minutes of scheduled time or skip
|
|
startingDeadlineSeconds: 300
|
|
|
|
jobTemplate:
|
|
spec:
|
|
# Retry up to 2 times on failure
|
|
backoffLimit: 2
|
|
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: dbbackup
|
|
component: backup
|
|
spec:
|
|
restartPolicy: OnFailure
|
|
|
|
# Security context
|
|
securityContext:
|
|
runAsNonRoot: true
|
|
runAsUser: 1000
|
|
fsGroup: 1000
|
|
|
|
containers:
|
|
- name: dbbackup
|
|
image: git.uuxo.net/uuxo/dbbackup:latest
|
|
imagePullPolicy: IfNotPresent
|
|
|
|
args:
|
|
- backup
|
|
- cluster
|
|
- --compression
|
|
- "$(COMPRESSION)"
|
|
|
|
envFrom:
|
|
- configMapRef:
|
|
name: dbbackup-config
|
|
- secretRef:
|
|
name: dbbackup-secrets
|
|
|
|
env:
|
|
- name: BACKUP_DIR
|
|
value: /backups
|
|
|
|
volumeMounts:
|
|
- name: backup-storage
|
|
mountPath: /backups
|
|
|
|
resources:
|
|
requests:
|
|
memory: "256Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "2Gi"
|
|
cpu: "2000m"
|
|
|
|
volumes:
|
|
- name: backup-storage
|
|
persistentVolumeClaim:
|
|
claimName: dbbackup-storage
|
|
|
|
---
|
|
# Cleanup CronJob - runs weekly
|
|
apiVersion: batch/v1
|
|
kind: CronJob
|
|
metadata:
|
|
name: dbbackup-cleanup
|
|
labels:
|
|
app: dbbackup
|
|
component: cleanup
|
|
spec:
|
|
# Weekly on Sunday at 3:00 AM UTC
|
|
schedule: "0 3 * * 0"
|
|
successfulJobsHistoryLimit: 1
|
|
failedJobsHistoryLimit: 1
|
|
concurrencyPolicy: Forbid
|
|
|
|
jobTemplate:
|
|
spec:
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: dbbackup
|
|
component: cleanup
|
|
spec:
|
|
restartPolicy: OnFailure
|
|
securityContext:
|
|
runAsNonRoot: true
|
|
runAsUser: 1000
|
|
fsGroup: 1000
|
|
|
|
containers:
|
|
- name: dbbackup
|
|
image: git.uuxo.net/uuxo/dbbackup:latest
|
|
|
|
args:
|
|
- cleanup
|
|
- /backups
|
|
- --retention-days
|
|
- "$(RETENTION_DAYS)"
|
|
- --min-backups
|
|
- "$(MIN_BACKUPS)"
|
|
|
|
envFrom:
|
|
- configMapRef:
|
|
name: dbbackup-config
|
|
|
|
volumeMounts:
|
|
- name: backup-storage
|
|
mountPath: /backups
|
|
|
|
resources:
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "50m"
|
|
limits:
|
|
memory: "512Mi"
|
|
cpu: "500m"
|
|
|
|
volumes:
|
|
- name: backup-storage
|
|
persistentVolumeClaim:
|
|
claimName: dbbackup-storage
|