Compare commits
34 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b99b05fedb | |||
| c5f2c3322c | |||
| 56ad0824c7 | |||
| ec65df2976 | |||
| 23cc1e0e08 | |||
| 7770abab6f | |||
| f6a20f035b | |||
| 28e54d118f | |||
| ab0ff3f28d | |||
| b7dd325c51 | |||
| 2ed54141a3 | |||
| 495ee31247 | |||
| 78e10f5057 | |||
| f4a0e2d82c | |||
| f66d19acb0 | |||
| 16f377e9b5 | |||
| 7e32a0369d | |||
| 120ee33e3b | |||
| 9f375621d1 | |||
| 9ad925191e | |||
| 9d8a6e763e | |||
| 63b16eee8b | |||
| 91228552fb | |||
| 9ee55309bd | |||
| 0baf741c0b | |||
| faace7271c | |||
| c3ade7a693 | |||
| 52d475506c | |||
| 938ee61686 | |||
| 85b61048c0 | |||
| 30954cb7c2 | |||
| ddf46f190b | |||
| 4c6d44725e | |||
| be69c0e00f |
@@ -1,4 +1,6 @@
|
||||
# CI/CD Pipeline for dbbackup
|
||||
# Main repo: Gitea (git.uuxo.net)
|
||||
# Mirror: GitHub (github.com/PlusOne/dbbackup)
|
||||
name: CI/CD
|
||||
|
||||
on:
|
||||
@@ -8,9 +10,6 @@ on:
|
||||
pull_request:
|
||||
branches: [main, master]
|
||||
|
||||
env:
|
||||
GITEA_URL: https://git.uuxo.net
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Test
|
||||
@@ -18,26 +17,25 @@ jobs:
|
||||
container:
|
||||
image: golang:1.24-bookworm
|
||||
steps:
|
||||
- name: Install git
|
||||
run: apt-get update && apt-get install -y git ca-certificates
|
||||
|
||||
- name: Checkout code
|
||||
env:
|
||||
TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
apt-get update && apt-get install -y -qq git ca-certificates
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git clone --depth 1 --branch ${GITHUB_REF_NAME} ${{ env.GITEA_URL }}/${GITHUB_REPOSITORY}.git .
|
||||
git init
|
||||
git remote add origin "https://${TOKEN}@git.uuxo.net/${GITHUB_REPOSITORY}.git"
|
||||
git fetch --depth=1 origin "${GITHUB_SHA}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
- name: Download dependencies
|
||||
run: go mod download
|
||||
|
||||
- name: Run tests with race detection
|
||||
env:
|
||||
GOMAXPROCS: 8
|
||||
run: go test -race -coverprofile=coverage.out -covermode=atomic ./...
|
||||
- name: Run tests
|
||||
run: go test -race -coverprofile=coverage.out ./...
|
||||
|
||||
- name: Generate coverage report
|
||||
run: |
|
||||
go tool cover -func=coverage.out
|
||||
go tool cover -html=coverage.out -o coverage.html
|
||||
- name: Coverage summary
|
||||
run: go tool cover -func=coverage.out | tail -1
|
||||
|
||||
lint:
|
||||
name: Lint
|
||||
@@ -45,168 +43,119 @@ jobs:
|
||||
container:
|
||||
image: golang:1.24-bookworm
|
||||
steps:
|
||||
- name: Install git
|
||||
run: apt-get update && apt-get install -y git ca-certificates
|
||||
|
||||
- name: Checkout code
|
||||
run: |
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git clone --depth 1 --branch ${GITHUB_REF_NAME} ${{ env.GITEA_URL }}/${GITHUB_REPOSITORY}.git .
|
||||
|
||||
- name: Install golangci-lint
|
||||
run: go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.62.2
|
||||
|
||||
- name: Run golangci-lint
|
||||
env:
|
||||
GOMAXPROCS: 8
|
||||
run: golangci-lint run --timeout=5m ./...
|
||||
TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
apt-get update && apt-get install -y -qq git ca-certificates
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git init
|
||||
git remote add origin "https://${TOKEN}@git.uuxo.net/${GITHUB_REPOSITORY}.git"
|
||||
git fetch --depth=1 origin "${GITHUB_SHA}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
build:
|
||||
name: Build (${{ matrix.goos }}-${{ matrix.goarch }})
|
||||
- name: Install and run golangci-lint
|
||||
run: |
|
||||
go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.62.2
|
||||
golangci-lint run --timeout=5m ./...
|
||||
|
||||
build-and-release:
|
||||
name: Build & Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, lint]
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
container:
|
||||
image: golang:1.24-bookworm
|
||||
strategy:
|
||||
max-parallel: 8
|
||||
matrix:
|
||||
goos: [linux, darwin]
|
||||
goarch: [amd64, arm64]
|
||||
steps:
|
||||
- name: Install git
|
||||
run: apt-get update && apt-get install -y git ca-certificates
|
||||
|
||||
- name: Checkout code
|
||||
run: |
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git clone --depth 1 --branch ${GITHUB_REF_NAME} ${{ env.GITEA_URL }}/${GITHUB_REPOSITORY}.git .
|
||||
|
||||
- name: Build binary
|
||||
env:
|
||||
GOOS: ${{ matrix.goos }}
|
||||
GOARCH: ${{ matrix.goarch }}
|
||||
CGO_ENABLED: 0
|
||||
GOMAXPROCS: 8
|
||||
run: |
|
||||
BINARY_NAME=dbbackup
|
||||
go build -ldflags="-s -w" -o dist/${BINARY_NAME}-${{ matrix.goos }}-${{ matrix.goarch }} .
|
||||
|
||||
sbom:
|
||||
name: Generate SBOM
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test]
|
||||
container:
|
||||
image: golang:1.24-bookworm
|
||||
steps:
|
||||
- name: Install git
|
||||
run: apt-get update && apt-get install -y git ca-certificates
|
||||
|
||||
- name: Checkout code
|
||||
TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
apt-get update && apt-get install -y -qq git ca-certificates curl jq
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git clone --depth 1 --branch ${GITHUB_REF_NAME} ${{ env.GITEA_URL }}/${GITHUB_REPOSITORY}.git .
|
||||
git init
|
||||
git remote add origin "https://${TOKEN}@git.uuxo.net/${GITHUB_REPOSITORY}.git"
|
||||
git fetch --depth=1 origin "${GITHUB_SHA}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
- name: Install Syft
|
||||
run: curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b /usr/local/bin
|
||||
|
||||
- name: Generate SBOM
|
||||
- name: Build all platforms
|
||||
run: |
|
||||
syft . -o spdx-json=sbom-spdx.json
|
||||
syft . -o cyclonedx-json=sbom-cyclonedx.json
|
||||
mkdir -p release
|
||||
|
||||
release:
|
||||
name: Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, lint, build]
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
container:
|
||||
image: golang:1.24-bookworm
|
||||
steps:
|
||||
- name: Install tools
|
||||
run: |
|
||||
apt-get update && apt-get install -y git ca-certificates
|
||||
curl -sSfL https://github.com/goreleaser/goreleaser/releases/download/v2.4.8/goreleaser_Linux_x86_64.tar.gz | tar xz -C /usr/local/bin goreleaser
|
||||
curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b /usr/local/bin
|
||||
# Install cross-compilation tools for CGO
|
||||
apt-get update && apt-get install -y -qq gcc-aarch64-linux-gnu
|
||||
|
||||
- name: Checkout code
|
||||
run: |
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git clone --branch ${GITHUB_REF_NAME} ${{ env.GITEA_URL }}/${GITHUB_REPOSITORY}.git .
|
||||
git fetch --tags
|
||||
# Linux amd64 (with CGO for SQLite)
|
||||
echo "Building linux/amd64 (CGO enabled)..."
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-linux-amd64 .
|
||||
|
||||
- name: Run goreleaser
|
||||
# Linux arm64 (with CGO for SQLite)
|
||||
echo "Building linux/arm64 (CGO enabled)..."
|
||||
CC=aarch64-linux-gnu-gcc CGO_ENABLED=1 GOOS=linux GOARCH=arm64 go build -ldflags="-s -w" -o release/dbbackup-linux-arm64 .
|
||||
|
||||
# Darwin amd64 (no CGO - cross-compile limitation)
|
||||
echo "Building darwin/amd64 (CGO disabled)..."
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-darwin-amd64 .
|
||||
|
||||
# Darwin arm64 (no CGO - cross-compile limitation)
|
||||
echo "Building darwin/arm64 (CGO disabled)..."
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build -ldflags="-s -w" -o release/dbbackup-darwin-arm64 .
|
||||
|
||||
# FreeBSD amd64 (no CGO - cross-compile limitation)
|
||||
echo "Building freebsd/amd64 (CGO disabled)..."
|
||||
CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-freebsd-amd64 .
|
||||
|
||||
echo "All builds complete:"
|
||||
ls -lh release/
|
||||
|
||||
- name: Create Gitea Release
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
run: goreleaser release --clean
|
||||
|
||||
docker:
|
||||
name: Build & Push Docker Image
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, lint]
|
||||
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/'))
|
||||
container:
|
||||
image: docker:24-cli
|
||||
options: --privileged
|
||||
services:
|
||||
docker:
|
||||
image: docker:24-dind
|
||||
options: --privileged
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: apk add --no-cache git curl
|
||||
|
||||
- name: Checkout code
|
||||
GITEA_TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git clone --depth 1 --branch ${GITHUB_REF_NAME} ${{ env.GITEA_URL }}/${GITHUB_REPOSITORY}.git .
|
||||
TAG=${GITHUB_REF#refs/tags/}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
run: |
|
||||
docker buildx create --use --name builder --driver docker-container
|
||||
docker buildx inspect --bootstrap
|
||||
echo "Creating Gitea release for ${TAG}..."
|
||||
echo "Debug: GITHUB_REPOSITORY=${GITHUB_REPOSITORY}"
|
||||
echo "Debug: TAG=${TAG}"
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
if: ${{ secrets.REGISTRY_USER != '' && secrets.REGISTRY_TOKEN != '' }}
|
||||
run: |
|
||||
echo "${{ secrets.REGISTRY_TOKEN }}" | docker login git.uuxo.net -u "${{ secrets.REGISTRY_USER }}" --password-stdin
|
||||
# Simple body without special characters
|
||||
BODY="Download binaries for your platform"
|
||||
|
||||
- name: Build and push
|
||||
if: ${{ secrets.REGISTRY_USER != '' && secrets.REGISTRY_TOKEN != '' }}
|
||||
run: |
|
||||
# Determine tags
|
||||
if [[ "${GITHUB_REF}" == refs/tags/* ]]; then
|
||||
VERSION=${GITHUB_REF#refs/tags/}
|
||||
TAGS="-t git.uuxo.net/uuxo/dbbackup:${VERSION} -t git.uuxo.net/uuxo/dbbackup:latest"
|
||||
else
|
||||
TAGS="-t git.uuxo.net/uuxo/dbbackup:${GITHUB_SHA::8} -t git.uuxo.net/uuxo/dbbackup:main"
|
||||
# Create release via API with simple inline JSON
|
||||
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"tag_name":"'"${TAG}"'","name":"'"${TAG}"'","body":"'"${BODY}"'","draft":false,"prerelease":false}' \
|
||||
"https://git.uuxo.net/api/v1/repos/${GITHUB_REPOSITORY}/releases")
|
||||
|
||||
HTTP_CODE=$(echo "$RESPONSE" | tail -1)
|
||||
BODY_RESPONSE=$(echo "$RESPONSE" | sed '$d')
|
||||
|
||||
echo "HTTP Code: $HTTP_CODE"
|
||||
echo "Response: $BODY_RESPONSE"
|
||||
|
||||
RELEASE_ID=$(echo "$BODY_RESPONSE" | jq -r '.id')
|
||||
|
||||
if [ "$RELEASE_ID" = "null" ] || [ -z "$RELEASE_ID" ]; then
|
||||
echo "Failed to create release"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
docker buildx build \
|
||||
--platform linux/amd64,linux/arm64 \
|
||||
--push \
|
||||
${TAGS} \
|
||||
.
|
||||
# Test 1765481480
|
||||
echo "Created release ID: $RELEASE_ID"
|
||||
|
||||
mirror:
|
||||
name: Mirror to GitHub
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, lint]
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' && vars.MIRROR_ENABLED != 'false'
|
||||
container:
|
||||
image: debian:bookworm-slim
|
||||
volumes:
|
||||
- /root/.ssh:/root/.ssh:ro
|
||||
steps:
|
||||
- name: Install git
|
||||
run: apt-get update && apt-get install -y --no-install-recommends git openssh-client ca-certificates && rm -rf /var/lib/apt/lists/*
|
||||
# Upload each binary
|
||||
echo "Files to upload:"
|
||||
ls -la release/
|
||||
|
||||
- name: Clone and mirror
|
||||
env:
|
||||
GIT_SSH_COMMAND: "ssh -i /root/.ssh/id_ed25519 -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
|
||||
run: |
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git clone --mirror ${{ env.GITEA_URL }}/${GITHUB_REPOSITORY}.git repo.git
|
||||
cd repo.git
|
||||
git remote add github git@github.com:PlusOne/dbbackup.git
|
||||
git push --mirror github || git push --force --all github && git push --force --tags github
|
||||
for file in release/dbbackup-*; do
|
||||
FILENAME=$(basename "$file")
|
||||
echo "Uploading $FILENAME..."
|
||||
UPLOAD_RESPONSE=$(curl -s -X POST \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-F "attachment=@${file}" \
|
||||
"https://git.uuxo.net/api/v1/repos/${GITHUB_REPOSITORY}/releases/${RELEASE_ID}/assets?name=${FILENAME}")
|
||||
echo "Upload response: $UPLOAD_RESPONSE"
|
||||
done
|
||||
|
||||
echo "Gitea release complete!"
|
||||
echo "GitHub mirror complete!"
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -13,7 +13,8 @@ logs/
|
||||
/dbbackup
|
||||
/dbbackup_*
|
||||
!dbbackup.png
|
||||
bin/
|
||||
bin/dbbackup_*
|
||||
bin/*.exe
|
||||
|
||||
# Ignore development artifacts
|
||||
*.swp
|
||||
|
||||
87
CHANGELOG.md
87
CHANGELOG.md
@@ -5,6 +5,93 @@ All notable changes to dbbackup will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [3.42.0] - 2026-01-07 "The Operator"
|
||||
|
||||
### Added - 🐧 Systemd Integration & Prometheus Metrics
|
||||
|
||||
**Embedded Systemd Installer:**
|
||||
- New `dbbackup install` command installs as systemd service/timer
|
||||
- Supports single-database (`--backup-type single`) and cluster (`--backup-type cluster`) modes
|
||||
- Automatic `dbbackup` user/group creation with proper permissions
|
||||
- Hardened service units with security features (NoNewPrivileges, ProtectSystem, CapabilityBoundingSet)
|
||||
- Templated timer units with configurable schedules (daily, weekly, or custom OnCalendar)
|
||||
- Built-in dry-run mode (`--dry-run`) to preview installation
|
||||
- `dbbackup install --status` shows current installation state
|
||||
- `dbbackup uninstall` cleanly removes all systemd units and optionally configuration
|
||||
|
||||
**Prometheus Metrics Support:**
|
||||
- New `dbbackup metrics export` command writes textfile collector format
|
||||
- New `dbbackup metrics serve` command runs HTTP exporter on port 9399
|
||||
- Metrics: `dbbackup_last_success_timestamp`, `dbbackup_rpo_seconds`, `dbbackup_backup_total`, etc.
|
||||
- Integration with node_exporter textfile collector
|
||||
- Metrics automatically updated via ExecStopPost in service units
|
||||
- `--with-metrics` flag during install sets up exporter as systemd service
|
||||
|
||||
**New Commands:**
|
||||
```bash
|
||||
# Install as systemd service
|
||||
sudo dbbackup install --backup-type cluster --schedule daily
|
||||
|
||||
# Install with Prometheus metrics
|
||||
sudo dbbackup install --with-metrics --metrics-port 9399
|
||||
|
||||
# Check installation status
|
||||
dbbackup install --status
|
||||
|
||||
# Export metrics for node_exporter
|
||||
dbbackup metrics export --output /var/lib/dbbackup/metrics/dbbackup.prom
|
||||
|
||||
# Run HTTP metrics server
|
||||
dbbackup metrics serve --port 9399
|
||||
```
|
||||
|
||||
### Technical Details
|
||||
- Systemd templates embedded with `//go:embed` for self-contained binary
|
||||
- Templates use ReadWritePaths for security isolation
|
||||
- Service units include proper OOMScoreAdjust (-100) to protect backups
|
||||
- Metrics exporter caches with 30-second TTL for performance
|
||||
- Graceful shutdown on SIGTERM for metrics server
|
||||
|
||||
---
|
||||
|
||||
## [3.41.0] - 2026-01-07 "The Pre-Flight Check"
|
||||
|
||||
### Added - 🛡️ Pre-Restore Validation
|
||||
|
||||
**Automatic Dump Validation Before Restore:**
|
||||
- SQL dump files are now validated BEFORE attempting restore
|
||||
- Detects truncated COPY blocks that cause "syntax error" failures
|
||||
- Catches corrupted backups in seconds instead of wasting 49+ minutes
|
||||
- Cluster restore pre-validates ALL dumps upfront (fail-fast approach)
|
||||
- Custom format `.dump` files now validated with `pg_restore --list`
|
||||
|
||||
**Improved Error Messages:**
|
||||
- Clear indication when dump file is truncated
|
||||
- Shows which table's COPY block was interrupted
|
||||
- Displays sample orphaned data for diagnosis
|
||||
- Provides actionable error messages with root cause
|
||||
|
||||
### Fixed
|
||||
- **P0: SQL Injection** - Added identifier validation for database names in CREATE/DROP DATABASE to prevent SQL injection attacks; uses safe quoting and regex validation (alphanumeric + underscore only)
|
||||
- **P0: Data Race** - Fixed concurrent goroutines appending to shared error slice in notification manager; now uses mutex synchronization
|
||||
- **P0: psql ON_ERROR_STOP** - Added `-v ON_ERROR_STOP=1` to psql commands to fail fast on first error instead of accumulating millions of errors
|
||||
- **P1: Pipe deadlock** - Fixed streaming compression deadlock when pg_dump blocks on full pipe buffer; now uses goroutine with proper context timeout handling
|
||||
- **P1: SIGPIPE handling** - Detect exit code 141 (broken pipe) and report compressor failure as root cause
|
||||
- **P2: .dump validation** - Custom format dumps now validated with `pg_restore --list` before restore
|
||||
- **P2: fsync durability** - Added `outFile.Sync()` after streaming compression to prevent truncation on power loss
|
||||
- Truncated `.sql.gz` dumps no longer waste hours on doomed restores
|
||||
- "syntax error at or near" errors now caught before restore begins
|
||||
- Cluster restores abort immediately if any dump is corrupted
|
||||
|
||||
### Technical Details
|
||||
- Integrated `Diagnoser` into restore pipeline for pre-validation
|
||||
- Added `quickValidateSQLDump()` for fast integrity checks
|
||||
- Pre-validation runs on all `.sql.gz` and `.dump` files in cluster archives
|
||||
- Streaming compression uses channel-based wait with context cancellation
|
||||
- Zero performance impact on valid backups (diagnosis is fast)
|
||||
|
||||
---
|
||||
|
||||
## [3.40.0] - 2026-01-05 "The Diagnostician"
|
||||
|
||||
### Added - 🔍 Restore Diagnostics & Error Reporting
|
||||
|
||||
105
README.md
105
README.md
@@ -19,6 +19,8 @@ Database backup and restore utility for PostgreSQL, MySQL, and MariaDB.
|
||||
- Point-in-Time Recovery (PITR) for PostgreSQL and MySQL/MariaDB
|
||||
- **GFS retention policies**: Grandfather-Father-Son backup rotation
|
||||
- **Notifications**: SMTP email and webhook alerts
|
||||
- **Systemd integration**: Install as service with scheduled timers
|
||||
- **Prometheus metrics**: Textfile collector and HTTP exporter
|
||||
- Interactive terminal UI
|
||||
- Cross-platform binaries
|
||||
|
||||
@@ -94,6 +96,7 @@ Database: postgres@localhost:5432 (PostgreSQL)
|
||||
────────────────────────────────
|
||||
Restore Single Database
|
||||
Restore Cluster Backup
|
||||
Diagnose Backup File
|
||||
List & Manage Backups
|
||||
────────────────────────────────
|
||||
View Active Operations
|
||||
@@ -194,6 +197,7 @@ Configuration Settings
|
||||
> Database Type: postgres
|
||||
CPU Workload Type: balanced
|
||||
Backup Directory: /root/db_backups
|
||||
Work Directory: /tmp
|
||||
Compression Level: 6
|
||||
Parallel Jobs: 16
|
||||
Dump Jobs: 8
|
||||
@@ -282,6 +286,10 @@ dbbackup backup single mydb --dry-run
|
||||
| `drill` | DR drill testing |
|
||||
| `report` | Compliance report generation |
|
||||
| `rto` | RTO/RPO analysis |
|
||||
| `install` | Install as systemd service |
|
||||
| `uninstall` | Remove systemd service |
|
||||
| `metrics export` | Export Prometheus metrics to textfile |
|
||||
| `metrics serve` | Run Prometheus HTTP exporter |
|
||||
|
||||
## Global Flags
|
||||
|
||||
@@ -671,6 +679,102 @@ dbbackup rto analyze mydb --target-rto 4h --target-rpo 1h
|
||||
- Compliance status
|
||||
- Recommendations for improvement
|
||||
|
||||
## Systemd Integration
|
||||
|
||||
Install dbbackup as a systemd service for automated scheduled backups:
|
||||
|
||||
```bash
|
||||
# Install with Prometheus metrics exporter
|
||||
sudo dbbackup install --backup-type cluster --with-metrics
|
||||
|
||||
# Preview what would be installed
|
||||
dbbackup install --dry-run --backup-type cluster
|
||||
|
||||
# Check installation status
|
||||
dbbackup install --status
|
||||
|
||||
# Uninstall
|
||||
sudo dbbackup uninstall cluster --purge
|
||||
```
|
||||
|
||||
**Schedule options:**
|
||||
```bash
|
||||
--schedule daily # Every day at midnight (default)
|
||||
--schedule weekly # Every Monday at midnight
|
||||
--schedule "*-*-* 02:00:00" # Every day at 2am
|
||||
--schedule "Mon *-*-* 03:00" # Every Monday at 3am
|
||||
```
|
||||
|
||||
**What gets installed:**
|
||||
- Systemd service and timer units
|
||||
- Dedicated `dbbackup` user with security hardening
|
||||
- Directories: `/var/lib/dbbackup/`, `/etc/dbbackup/`
|
||||
- Optional: Prometheus HTTP exporter on port 9399
|
||||
|
||||
📖 **Full documentation:** [SYSTEMD.md](SYSTEMD.md) - Manual setup, security hardening, multiple instances, troubleshooting
|
||||
|
||||
## Prometheus Metrics
|
||||
|
||||
Export backup metrics for monitoring with Prometheus:
|
||||
|
||||
### Textfile Collector
|
||||
|
||||
For integration with node_exporter:
|
||||
|
||||
```bash
|
||||
# Export metrics to textfile
|
||||
dbbackup metrics export --output /var/lib/node_exporter/textfile_collector/dbbackup.prom
|
||||
|
||||
# Export for specific instance
|
||||
dbbackup metrics export --instance production --output /var/lib/dbbackup/metrics/production.prom
|
||||
```
|
||||
|
||||
Configure node_exporter:
|
||||
```bash
|
||||
node_exporter --collector.textfile.directory=/var/lib/node_exporter/textfile_collector/
|
||||
```
|
||||
|
||||
### HTTP Exporter
|
||||
|
||||
Run a dedicated metrics HTTP server:
|
||||
|
||||
```bash
|
||||
# Start metrics server on default port 9399
|
||||
dbbackup metrics serve
|
||||
|
||||
# Custom port
|
||||
dbbackup metrics serve --port 9100
|
||||
|
||||
# Run as systemd service (installed via --with-metrics)
|
||||
sudo systemctl start dbbackup-exporter
|
||||
```
|
||||
|
||||
**Endpoints:**
|
||||
- `/metrics` - Prometheus exposition format
|
||||
- `/health` - Health check (returns 200 OK)
|
||||
|
||||
**Available metrics:**
|
||||
| Metric | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `dbbackup_last_success_timestamp` | gauge | Unix timestamp of last successful backup |
|
||||
| `dbbackup_last_backup_duration_seconds` | gauge | Duration of last backup |
|
||||
| `dbbackup_last_backup_size_bytes` | gauge | Size of last backup |
|
||||
| `dbbackup_backup_total` | counter | Total backups by status (success/failure) |
|
||||
| `dbbackup_rpo_seconds` | gauge | Seconds since last successful backup |
|
||||
| `dbbackup_backup_verified` | gauge | Whether last backup was verified (1/0) |
|
||||
| `dbbackup_scrape_timestamp` | gauge | When metrics were collected |
|
||||
|
||||
**Labels:** `instance`, `database`, `engine`
|
||||
|
||||
**Example Prometheus query:**
|
||||
```promql
|
||||
# Alert if RPO exceeds 24 hours
|
||||
dbbackup_rpo_seconds{instance="production"} > 86400
|
||||
|
||||
# Backup success rate
|
||||
sum(rate(dbbackup_backup_total{status="success"}[24h])) / sum(rate(dbbackup_backup_total[24h]))
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### PostgreSQL Authentication
|
||||
@@ -754,6 +858,7 @@ Workload types:
|
||||
|
||||
## Documentation
|
||||
|
||||
- [SYSTEMD.md](SYSTEMD.md) - Systemd installation & scheduling
|
||||
- [DOCKER.md](DOCKER.md) - Docker deployment
|
||||
- [CLOUD.md](CLOUD.md) - Cloud storage configuration
|
||||
- [PITR.md](PITR.md) - Point-in-Time Recovery
|
||||
|
||||
69
RELEASE_NOTES.md
Normal file
69
RELEASE_NOTES.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# v3.41.0 Release Notes
|
||||
|
||||
## What's New in v3.41.0
|
||||
|
||||
### Features
|
||||
- **Systemd Integration** - One-command install with `dbbackup install`
|
||||
- **Prometheus Metrics** - HTTP exporter on port 9399 with `/metrics` and `/health` endpoints
|
||||
- **Backup Catalog** - SQLite-based tracking of all backup operations
|
||||
- **Automated CI/CD** - Gitea Actions pipeline with automated releases
|
||||
|
||||
### Installation
|
||||
|
||||
#### Quick Install (Recommended)
|
||||
```bash
|
||||
# Download for your platform
|
||||
curl -LO https://git.uuxo.net/UUXO/dbbackup/releases/download/v3.41.0/dbbackup-linux-amd64
|
||||
|
||||
# Install with systemd service
|
||||
chmod +x dbbackup-linux-amd64
|
||||
sudo ./dbbackup-linux-amd64 install --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
#### Available Binaries
|
||||
| Platform | Architecture | Binary |
|
||||
|----------|--------------|--------|
|
||||
| Linux | amd64 | `dbbackup-linux-amd64` |
|
||||
| Linux | arm64 | `dbbackup-linux-arm64` |
|
||||
| macOS | Intel | `dbbackup-darwin-amd64` |
|
||||
| macOS | Apple Silicon | `dbbackup-darwin-arm64` |
|
||||
| FreeBSD | amd64 | `dbbackup-freebsd-amd64` |
|
||||
|
||||
### Systemd Commands
|
||||
```bash
|
||||
dbbackup install --config config.yaml # Install service + timer
|
||||
dbbackup install --status # Check service status
|
||||
dbbackup install --uninstall # Remove services
|
||||
```
|
||||
|
||||
### Prometheus Metrics
|
||||
Available at `http://localhost:9399/metrics`:
|
||||
|
||||
| Metric | Description |
|
||||
|--------|-------------|
|
||||
| `dbbackup_last_backup_timestamp` | Unix timestamp of last backup |
|
||||
| `dbbackup_last_backup_success` | 1 if successful, 0 if failed |
|
||||
| `dbbackup_last_backup_duration_seconds` | Duration of last backup |
|
||||
| `dbbackup_last_backup_size_bytes` | Size of last backup |
|
||||
| `dbbackup_backup_total` | Total number of backups |
|
||||
| `dbbackup_backup_errors_total` | Total number of failed backups |
|
||||
|
||||
### Security Features
|
||||
- Hardened systemd service with `ProtectSystem=strict`
|
||||
- `NoNewPrivileges=true` prevents privilege escalation
|
||||
- Dedicated `dbbackup` system user (optional)
|
||||
- Credential files with restricted permissions
|
||||
|
||||
### Documentation
|
||||
- [SYSTEMD.md](SYSTEMD.md) - Complete systemd installation guide
|
||||
- [README.md](README.md) - Full documentation
|
||||
- [CHANGELOG.md](CHANGELOG.md) - Version history
|
||||
|
||||
### Bug Fixes
|
||||
- Fixed exporter status detection in `install --status`
|
||||
- Improved error handling in restore operations
|
||||
- Better JSON escaping in CI release creation
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: https://git.uuxo.net/UUXO/dbbackup/compare/v3.40.0...v3.41.0
|
||||
529
SYSTEMD.md
Normal file
529
SYSTEMD.md
Normal file
@@ -0,0 +1,529 @@
|
||||
# Systemd Integration Guide
|
||||
|
||||
This guide covers installing dbbackup as a systemd service for automated scheduled backups.
|
||||
|
||||
## Quick Start (Installer)
|
||||
|
||||
The easiest way to set up systemd services is using the built-in installer:
|
||||
|
||||
```bash
|
||||
# Install as cluster backup service (daily at midnight)
|
||||
sudo dbbackup install --backup-type cluster --schedule daily
|
||||
|
||||
# Check what would be installed (dry-run)
|
||||
dbbackup install --dry-run --backup-type cluster
|
||||
|
||||
# Check installation status
|
||||
dbbackup install --status
|
||||
|
||||
# Uninstall
|
||||
sudo dbbackup uninstall cluster --purge
|
||||
```
|
||||
|
||||
## Installer Options
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--instance NAME` | Instance name for named backups | - |
|
||||
| `--backup-type TYPE` | Backup type: `cluster`, `single`, `sample` | `cluster` |
|
||||
| `--schedule SPEC` | Timer schedule (see below) | `daily` |
|
||||
| `--with-metrics` | Install Prometheus metrics exporter | false |
|
||||
| `--metrics-port PORT` | HTTP port for metrics exporter | 9399 |
|
||||
| `--dry-run` | Preview changes without applying | false |
|
||||
|
||||
### Schedule Format
|
||||
|
||||
The `--schedule` option accepts systemd OnCalendar format:
|
||||
|
||||
| Value | Description |
|
||||
|-------|-------------|
|
||||
| `daily` | Every day at midnight |
|
||||
| `weekly` | Every Monday at midnight |
|
||||
| `hourly` | Every hour |
|
||||
| `*-*-* 02:00:00` | Every day at 2:00 AM |
|
||||
| `*-*-* 00/6:00:00` | Every 6 hours |
|
||||
| `Mon *-*-* 03:00` | Every Monday at 3:00 AM |
|
||||
| `*-*-01 00:00:00` | First day of every month |
|
||||
|
||||
Test schedule with: `systemd-analyze calendar "Mon *-*-* 03:00"`
|
||||
|
||||
## What Gets Installed
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
/etc/dbbackup/
|
||||
├── dbbackup.conf # Main configuration
|
||||
└── env.d/
|
||||
└── cluster.conf # Instance credentials (mode 0600)
|
||||
|
||||
/var/lib/dbbackup/
|
||||
├── catalog/
|
||||
│ └── backups.db # SQLite backup catalog
|
||||
├── backups/ # Default backup storage
|
||||
└── metrics/ # Prometheus textfile metrics
|
||||
|
||||
/var/log/dbbackup/ # Log files
|
||||
|
||||
/usr/local/bin/dbbackup # Binary copy
|
||||
```
|
||||
|
||||
### Systemd Units
|
||||
|
||||
**For cluster backups:**
|
||||
- `/etc/systemd/system/dbbackup-cluster.service` - Backup service
|
||||
- `/etc/systemd/system/dbbackup-cluster.timer` - Backup scheduler
|
||||
|
||||
**For named instances:**
|
||||
- `/etc/systemd/system/dbbackup@.service` - Template service
|
||||
- `/etc/systemd/system/dbbackup@.timer` - Template timer
|
||||
|
||||
**Metrics exporter (optional):**
|
||||
- `/etc/systemd/system/dbbackup-exporter.service`
|
||||
|
||||
### System User
|
||||
|
||||
A dedicated `dbbackup` user and group are created:
|
||||
- Home: `/var/lib/dbbackup`
|
||||
- Shell: `/usr/sbin/nologin`
|
||||
- Purpose: Run backup services with minimal privileges
|
||||
|
||||
## Manual Installation
|
||||
|
||||
If you prefer to set up systemd services manually without the installer:
|
||||
|
||||
### Step 1: Create User and Directories
|
||||
|
||||
```bash
|
||||
# Create system user
|
||||
sudo useradd --system --home-dir /var/lib/dbbackup --shell /usr/sbin/nologin dbbackup
|
||||
|
||||
# Create directories
|
||||
sudo mkdir -p /etc/dbbackup/env.d
|
||||
sudo mkdir -p /var/lib/dbbackup/{catalog,backups,metrics}
|
||||
sudo mkdir -p /var/log/dbbackup
|
||||
|
||||
# Set ownership
|
||||
sudo chown -R dbbackup:dbbackup /var/lib/dbbackup /var/log/dbbackup
|
||||
sudo chown root:dbbackup /etc/dbbackup
|
||||
sudo chmod 750 /etc/dbbackup
|
||||
|
||||
# Copy binary
|
||||
sudo cp dbbackup /usr/local/bin/
|
||||
sudo chmod 755 /usr/local/bin/dbbackup
|
||||
```
|
||||
|
||||
### Step 2: Create Configuration
|
||||
|
||||
```bash
|
||||
# Main configuration
|
||||
sudo tee /etc/dbbackup/dbbackup.conf << 'EOF'
|
||||
# DBBackup Configuration
|
||||
db-type=postgres
|
||||
host=localhost
|
||||
port=5432
|
||||
user=postgres
|
||||
backup-dir=/var/lib/dbbackup/backups
|
||||
compression=6
|
||||
retention-days=30
|
||||
min-backups=7
|
||||
EOF
|
||||
|
||||
# Instance credentials (secure permissions)
|
||||
sudo tee /etc/dbbackup/env.d/cluster.conf << 'EOF'
|
||||
PGPASSWORD=your_secure_password
|
||||
# Or for MySQL:
|
||||
# MYSQL_PWD=your_secure_password
|
||||
EOF
|
||||
sudo chmod 600 /etc/dbbackup/env.d/cluster.conf
|
||||
sudo chown dbbackup:dbbackup /etc/dbbackup/env.d/cluster.conf
|
||||
```
|
||||
|
||||
### Step 3: Create Service Unit
|
||||
|
||||
```bash
|
||||
sudo tee /etc/systemd/system/dbbackup-cluster.service << 'EOF'
|
||||
[Unit]
|
||||
Description=DBBackup Cluster Backup
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
After=network.target postgresql.service mysql.service
|
||||
Wants=network.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=dbbackup
|
||||
Group=dbbackup
|
||||
|
||||
# Load configuration
|
||||
EnvironmentFile=-/etc/dbbackup/env.d/cluster.conf
|
||||
|
||||
# Working directory
|
||||
WorkingDirectory=/var/lib/dbbackup
|
||||
|
||||
# Execute backup
|
||||
ExecStart=/usr/local/bin/dbbackup backup cluster \
|
||||
--config /etc/dbbackup/dbbackup.conf \
|
||||
--backup-dir /var/lib/dbbackup/backups \
|
||||
--allow-root
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=yes
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
RestrictNamespaces=yes
|
||||
RestrictRealtime=yes
|
||||
RestrictSUIDSGID=yes
|
||||
MemoryDenyWriteExecute=yes
|
||||
LockPersonality=yes
|
||||
|
||||
# Allow write to specific paths
|
||||
ReadWritePaths=/var/lib/dbbackup /var/log/dbbackup
|
||||
|
||||
# Capability restrictions
|
||||
CapabilityBoundingSet=CAP_DAC_READ_SEARCH CAP_NET_CONNECT
|
||||
AmbientCapabilities=
|
||||
|
||||
# Resource limits
|
||||
MemoryMax=4G
|
||||
CPUQuota=80%
|
||||
|
||||
# Prevent OOM killer from terminating backups
|
||||
OOMScoreAdjust=-100
|
||||
|
||||
# Logging
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=dbbackup
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
```
|
||||
|
||||
### Step 4: Create Timer Unit
|
||||
|
||||
```bash
|
||||
sudo tee /etc/systemd/system/dbbackup-cluster.timer << 'EOF'
|
||||
[Unit]
|
||||
Description=DBBackup Cluster Backup Timer
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
|
||||
[Timer]
|
||||
# Run daily at midnight
|
||||
OnCalendar=daily
|
||||
|
||||
# Randomize start time within 15 minutes to avoid thundering herd
|
||||
RandomizedDelaySec=900
|
||||
|
||||
# Run immediately if we missed the last scheduled time
|
||||
Persistent=true
|
||||
|
||||
# Run even if system was sleeping
|
||||
WakeSystem=false
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
```
|
||||
|
||||
### Step 5: Enable and Start
|
||||
|
||||
```bash
|
||||
# Reload systemd
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
# Enable timer (auto-start on boot)
|
||||
sudo systemctl enable dbbackup-cluster.timer
|
||||
|
||||
# Start timer
|
||||
sudo systemctl start dbbackup-cluster.timer
|
||||
|
||||
# Verify timer is active
|
||||
sudo systemctl status dbbackup-cluster.timer
|
||||
|
||||
# View next scheduled run
|
||||
sudo systemctl list-timers dbbackup-cluster.timer
|
||||
```
|
||||
|
||||
### Step 6: Test Backup
|
||||
|
||||
```bash
|
||||
# Run backup manually
|
||||
sudo systemctl start dbbackup-cluster.service
|
||||
|
||||
# Check status
|
||||
sudo systemctl status dbbackup-cluster.service
|
||||
|
||||
# View logs
|
||||
sudo journalctl -u dbbackup-cluster.service -f
|
||||
```
|
||||
|
||||
## Prometheus Metrics Exporter (Manual)
|
||||
|
||||
### Service Unit
|
||||
|
||||
```bash
|
||||
sudo tee /etc/systemd/system/dbbackup-exporter.service << 'EOF'
|
||||
[Unit]
|
||||
Description=DBBackup Prometheus Metrics Exporter
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=dbbackup
|
||||
Group=dbbackup
|
||||
|
||||
# Working directory
|
||||
WorkingDirectory=/var/lib/dbbackup
|
||||
|
||||
# Start HTTP metrics server
|
||||
ExecStart=/usr/local/bin/dbbackup metrics serve --port 9399
|
||||
|
||||
# Restart on failure
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=yes
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
RestrictNamespaces=yes
|
||||
RestrictRealtime=yes
|
||||
RestrictSUIDSGID=yes
|
||||
LockPersonality=yes
|
||||
|
||||
# Catalog access
|
||||
ReadWritePaths=/var/lib/dbbackup
|
||||
|
||||
# Capability restrictions
|
||||
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
|
||||
AmbientCapabilities=
|
||||
|
||||
# Logging
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=dbbackup-exporter
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
```
|
||||
|
||||
### Enable Exporter
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable dbbackup-exporter
|
||||
sudo systemctl start dbbackup-exporter
|
||||
|
||||
# Test
|
||||
curl http://localhost:9399/health
|
||||
curl http://localhost:9399/metrics
|
||||
```
|
||||
|
||||
### Prometheus Configuration
|
||||
|
||||
Add to `prometheus.yml`:
|
||||
|
||||
```yaml
|
||||
scrape_configs:
|
||||
- job_name: 'dbbackup'
|
||||
static_configs:
|
||||
- targets: ['localhost:9399']
|
||||
scrape_interval: 60s
|
||||
```
|
||||
|
||||
## Security Hardening
|
||||
|
||||
The systemd units include comprehensive security hardening:
|
||||
|
||||
| Setting | Purpose |
|
||||
|---------|---------|
|
||||
| `NoNewPrivileges=yes` | Prevent privilege escalation |
|
||||
| `ProtectSystem=strict` | Read-only filesystem except allowed paths |
|
||||
| `ProtectHome=yes` | Block access to /home, /root, /run/user |
|
||||
| `PrivateTmp=yes` | Isolated /tmp namespace |
|
||||
| `PrivateDevices=yes` | No access to physical devices |
|
||||
| `RestrictAddressFamilies` | Only Unix and IP sockets |
|
||||
| `MemoryDenyWriteExecute=yes` | Prevent code injection |
|
||||
| `CapabilityBoundingSet` | Minimal Linux capabilities |
|
||||
| `OOMScoreAdjust=-100` | Protect backup from OOM killer |
|
||||
|
||||
### Database Access
|
||||
|
||||
For PostgreSQL with peer authentication:
|
||||
```bash
|
||||
# Add dbbackup user to postgres group
|
||||
sudo usermod -aG postgres dbbackup
|
||||
|
||||
# Or create a .pgpass file
|
||||
sudo -u dbbackup tee /var/lib/dbbackup/.pgpass << EOF
|
||||
localhost:5432:*:postgres:password
|
||||
EOF
|
||||
sudo chmod 600 /var/lib/dbbackup/.pgpass
|
||||
```
|
||||
|
||||
For PostgreSQL with password authentication:
|
||||
```bash
|
||||
# Store password in environment file
|
||||
echo "PGPASSWORD=your_password" | sudo tee /etc/dbbackup/env.d/cluster.conf
|
||||
sudo chmod 600 /etc/dbbackup/env.d/cluster.conf
|
||||
```
|
||||
|
||||
## Multiple Instances
|
||||
|
||||
Run different backup configurations as separate instances:
|
||||
|
||||
```bash
|
||||
# Install multiple instances
|
||||
sudo dbbackup install --instance production --schedule "*-*-* 02:00:00"
|
||||
sudo dbbackup install --instance staging --schedule "*-*-* 04:00:00"
|
||||
sudo dbbackup install --instance analytics --schedule "weekly"
|
||||
|
||||
# Manage individually
|
||||
sudo systemctl status dbbackup@production.timer
|
||||
sudo systemctl start dbbackup@staging.service
|
||||
```
|
||||
|
||||
Each instance has its own:
|
||||
- Configuration: `/etc/dbbackup/env.d/<instance>.conf`
|
||||
- Timer schedule
|
||||
- Journal logs: `journalctl -u dbbackup@<instance>.service`
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### View Logs
|
||||
|
||||
```bash
|
||||
# Real-time logs
|
||||
sudo journalctl -u dbbackup-cluster.service -f
|
||||
|
||||
# Last backup run
|
||||
sudo journalctl -u dbbackup-cluster.service -n 100
|
||||
|
||||
# All dbbackup logs
|
||||
sudo journalctl -t dbbackup
|
||||
|
||||
# Exporter logs
|
||||
sudo journalctl -u dbbackup-exporter -f
|
||||
```
|
||||
|
||||
### Timer Not Running
|
||||
|
||||
```bash
|
||||
# Check timer status
|
||||
sudo systemctl status dbbackup-cluster.timer
|
||||
|
||||
# List all timers
|
||||
sudo systemctl list-timers --all | grep dbbackup
|
||||
|
||||
# Check if timer is enabled
|
||||
sudo systemctl is-enabled dbbackup-cluster.timer
|
||||
```
|
||||
|
||||
### Service Fails to Start
|
||||
|
||||
```bash
|
||||
# Check service status
|
||||
sudo systemctl status dbbackup-cluster.service
|
||||
|
||||
# View detailed error
|
||||
sudo journalctl -u dbbackup-cluster.service -n 50 --no-pager
|
||||
|
||||
# Test manually as dbbackup user
|
||||
sudo -u dbbackup /usr/local/bin/dbbackup backup cluster --config /etc/dbbackup/dbbackup.conf
|
||||
|
||||
# Check permissions
|
||||
ls -la /var/lib/dbbackup/
|
||||
ls -la /etc/dbbackup/
|
||||
```
|
||||
|
||||
### Permission Denied
|
||||
|
||||
```bash
|
||||
# Fix ownership
|
||||
sudo chown -R dbbackup:dbbackup /var/lib/dbbackup
|
||||
|
||||
# Check SELinux (if enabled)
|
||||
sudo ausearch -m avc -ts recent
|
||||
|
||||
# Check AppArmor (if enabled)
|
||||
sudo aa-status
|
||||
```
|
||||
|
||||
### Exporter Not Accessible
|
||||
|
||||
```bash
|
||||
# Check if running
|
||||
sudo systemctl status dbbackup-exporter
|
||||
|
||||
# Check port binding
|
||||
sudo ss -tlnp | grep 9399
|
||||
|
||||
# Test locally
|
||||
curl -v http://localhost:9399/health
|
||||
|
||||
# Check firewall
|
||||
sudo ufw status
|
||||
sudo iptables -L -n | grep 9399
|
||||
```
|
||||
|
||||
## Uninstallation
|
||||
|
||||
### Using Installer
|
||||
|
||||
```bash
|
||||
# Remove cluster backup (keeps config)
|
||||
sudo dbbackup uninstall cluster
|
||||
|
||||
# Remove and purge configuration
|
||||
sudo dbbackup uninstall cluster --purge
|
||||
|
||||
# Remove named instance
|
||||
sudo dbbackup uninstall production --purge
|
||||
```
|
||||
|
||||
### Manual Removal
|
||||
|
||||
```bash
|
||||
# Stop and disable services
|
||||
sudo systemctl stop dbbackup-cluster.timer dbbackup-cluster.service dbbackup-exporter
|
||||
sudo systemctl disable dbbackup-cluster.timer dbbackup-exporter
|
||||
|
||||
# Remove unit files
|
||||
sudo rm /etc/systemd/system/dbbackup-cluster.service
|
||||
sudo rm /etc/systemd/system/dbbackup-cluster.timer
|
||||
sudo rm /etc/systemd/system/dbbackup-exporter.service
|
||||
sudo rm /etc/systemd/system/dbbackup@.service
|
||||
sudo rm /etc/systemd/system/dbbackup@.timer
|
||||
|
||||
# Reload systemd
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
# Optional: Remove user and directories
|
||||
sudo userdel dbbackup
|
||||
sudo rm -rf /var/lib/dbbackup
|
||||
sudo rm -rf /etc/dbbackup
|
||||
sudo rm -rf /var/log/dbbackup
|
||||
sudo rm /usr/local/bin/dbbackup
|
||||
```
|
||||
|
||||
## See Also
|
||||
|
||||
- [README.md](README.md) - Main documentation
|
||||
- [DOCKER.md](DOCKER.md) - Docker deployment
|
||||
- [CLOUD.md](CLOUD.md) - Cloud storage configuration
|
||||
- [PITR.md](PITR.md) - Point-in-Time Recovery
|
||||
98
bin/README.md
Normal file
98
bin/README.md
Normal file
@@ -0,0 +1,98 @@
|
||||
# DB Backup Tool - Pre-compiled Binaries
|
||||
|
||||
## Download
|
||||
|
||||
**Binaries are distributed via GitHub Releases:**
|
||||
|
||||
📦 **https://github.com/PlusOne/dbbackup/releases**
|
||||
|
||||
Or build from source:
|
||||
```bash
|
||||
git clone https://github.com/PlusOne/dbbackup.git
|
||||
cd dbbackup
|
||||
./build_all.sh
|
||||
```
|
||||
|
||||
## Build Information
|
||||
- **Version**: 3.40.0
|
||||
- **Build Time**: 2026-01-07_10:55:47_UTC
|
||||
- **Git Commit**: 495ee31
|
||||
|
||||
## Recent Updates (v1.1.0)
|
||||
- ✅ Fixed TUI progress display with line-by-line output
|
||||
- ✅ Added interactive configuration settings menu
|
||||
- ✅ Improved menu navigation and responsiveness
|
||||
- ✅ Enhanced completion status handling
|
||||
- ✅ Better CPU detection and optimization
|
||||
- ✅ Silent mode support for TUI operations
|
||||
|
||||
## Available Binaries
|
||||
|
||||
### Linux
|
||||
- `dbbackup_linux_amd64` - Linux 64-bit (Intel/AMD)
|
||||
- `dbbackup_linux_arm64` - Linux 64-bit (ARM)
|
||||
- `dbbackup_linux_arm_armv7` - Linux 32-bit (ARMv7)
|
||||
|
||||
### macOS
|
||||
- `dbbackup_darwin_amd64` - macOS 64-bit (Intel)
|
||||
- `dbbackup_darwin_arm64` - macOS 64-bit (Apple Silicon)
|
||||
|
||||
### Windows
|
||||
- `dbbackup_windows_amd64.exe` - Windows 64-bit (Intel/AMD)
|
||||
- `dbbackup_windows_arm64.exe` - Windows 64-bit (ARM)
|
||||
|
||||
### BSD Systems
|
||||
- `dbbackup_freebsd_amd64` - FreeBSD 64-bit
|
||||
- `dbbackup_openbsd_amd64` - OpenBSD 64-bit
|
||||
- `dbbackup_netbsd_amd64` - NetBSD 64-bit
|
||||
|
||||
## Usage
|
||||
|
||||
1. Download the appropriate binary for your platform
|
||||
2. Make it executable (Unix-like systems): `chmod +x dbbackup_*`
|
||||
3. Run: `./dbbackup_* --help`
|
||||
|
||||
## Interactive Mode
|
||||
|
||||
Launch the interactive TUI menu for easy configuration and operation:
|
||||
|
||||
```bash
|
||||
# Interactive mode with TUI menu
|
||||
./dbbackup_linux_amd64
|
||||
|
||||
# Features:
|
||||
# - Interactive configuration settings
|
||||
# - Real-time progress display
|
||||
# - Operation history and status
|
||||
# - CPU detection and optimization
|
||||
```
|
||||
|
||||
## Command Line Mode
|
||||
|
||||
Direct command line usage with line-by-line progress:
|
||||
|
||||
```bash
|
||||
# Show CPU information and optimization settings
|
||||
./dbbackup_linux_amd64 cpu
|
||||
|
||||
# Auto-optimize for your hardware
|
||||
./dbbackup_linux_amd64 backup cluster --auto-detect-cores
|
||||
|
||||
# Manual CPU configuration
|
||||
./dbbackup_linux_amd64 backup single mydb --jobs 8 --dump-jobs 4
|
||||
|
||||
# Line-by-line progress output
|
||||
./dbbackup_linux_amd64 backup cluster --progress-type line
|
||||
```
|
||||
|
||||
## CPU Detection
|
||||
|
||||
All binaries include advanced CPU detection capabilities:
|
||||
- Automatic core detection for optimal parallelism
|
||||
- Support for different workload types (CPU-intensive, I/O-intensive, balanced)
|
||||
- Platform-specific optimizations for Linux, macOS, and Windows
|
||||
- Interactive CPU configuration in TUI mode
|
||||
|
||||
## Support
|
||||
|
||||
For issues or questions, please refer to the main project documentation.
|
||||
@@ -83,7 +83,8 @@ for platform_config in "${PLATFORMS[@]}"; do
|
||||
echo -e "${YELLOW}[$current/$total_platforms]${NC} Building for ${BOLD}$description${NC} (${platform})"
|
||||
|
||||
# Set environment and build (using export for better compatibility)
|
||||
export GOOS GOARCH
|
||||
# CGO_ENABLED=0 creates static binaries without glibc dependency
|
||||
export CGO_ENABLED=0 GOOS GOARCH
|
||||
if go build -ldflags "$LDFLAGS" -o "${BIN_DIR}/${binary_name}" . 2>/dev/null; then
|
||||
# Get file size
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
|
||||
239
cmd/install.go
Normal file
239
cmd/install.go
Normal file
@@ -0,0 +1,239 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"dbbackup/internal/installer"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
// Install flags
|
||||
installInstance string
|
||||
installSchedule string
|
||||
installBackupType string
|
||||
installUser string
|
||||
installGroup string
|
||||
installBackupDir string
|
||||
installConfigPath string
|
||||
installTimeout int
|
||||
installWithMetrics bool
|
||||
installMetricsPort int
|
||||
installDryRun bool
|
||||
installStatus bool
|
||||
|
||||
// Uninstall flags
|
||||
uninstallPurge bool
|
||||
)
|
||||
|
||||
// installCmd represents the install command
|
||||
var installCmd = &cobra.Command{
|
||||
Use: "install",
|
||||
Short: "Install dbbackup as a systemd service",
|
||||
Long: `Install dbbackup as a systemd service with automatic scheduling.
|
||||
|
||||
This command creates systemd service and timer units for automated database backups.
|
||||
It supports both single database and cluster backup modes.
|
||||
|
||||
Examples:
|
||||
# Interactive installation (will prompt for options)
|
||||
sudo dbbackup install
|
||||
|
||||
# Install cluster backup running daily at 2am
|
||||
sudo dbbackup install --backup-type cluster --schedule "daily"
|
||||
|
||||
# Install single database backup with custom schedule
|
||||
sudo dbbackup install --instance production --backup-type single --schedule "*-*-* 03:00:00"
|
||||
|
||||
# Install with Prometheus metrics exporter
|
||||
sudo dbbackup install --with-metrics --metrics-port 9399
|
||||
|
||||
# Check installation status
|
||||
dbbackup install --status
|
||||
|
||||
# Dry-run to see what would be installed
|
||||
sudo dbbackup install --dry-run
|
||||
|
||||
Schedule format (OnCalendar):
|
||||
daily - Every day at midnight
|
||||
weekly - Every Monday at midnight
|
||||
*-*-* 02:00:00 - Every day at 2am
|
||||
*-*-* 02,14:00 - Twice daily at 2am and 2pm
|
||||
Mon *-*-* 03:00 - Every Monday at 3am
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Handle --status flag
|
||||
if installStatus {
|
||||
return runInstallStatus(cmd.Context())
|
||||
}
|
||||
|
||||
return runInstall(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
// uninstallCmd represents the uninstall command
|
||||
var uninstallCmd = &cobra.Command{
|
||||
Use: "uninstall [instance]",
|
||||
Short: "Uninstall dbbackup systemd service",
|
||||
Long: `Uninstall dbbackup systemd service and timer.
|
||||
|
||||
Examples:
|
||||
# Uninstall default instance
|
||||
sudo dbbackup uninstall
|
||||
|
||||
# Uninstall specific instance
|
||||
sudo dbbackup uninstall production
|
||||
|
||||
# Uninstall and remove all configuration
|
||||
sudo dbbackup uninstall --purge
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
instance := "cluster"
|
||||
if len(args) > 0 {
|
||||
instance = args[0]
|
||||
}
|
||||
return runUninstall(cmd.Context(), instance)
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(installCmd)
|
||||
rootCmd.AddCommand(uninstallCmd)
|
||||
|
||||
// Install flags
|
||||
installCmd.Flags().StringVarP(&installInstance, "instance", "i", "", "Instance name (e.g., production, staging)")
|
||||
installCmd.Flags().StringVarP(&installSchedule, "schedule", "s", "daily", "Backup schedule (OnCalendar format)")
|
||||
installCmd.Flags().StringVarP(&installBackupType, "backup-type", "t", "cluster", "Backup type: single or cluster")
|
||||
installCmd.Flags().StringVar(&installUser, "user", "dbbackup", "System user to run backups")
|
||||
installCmd.Flags().StringVar(&installGroup, "group", "dbbackup", "System group for backup user")
|
||||
installCmd.Flags().StringVar(&installBackupDir, "backup-dir", "/var/lib/dbbackup/backups", "Directory for backups")
|
||||
installCmd.Flags().StringVar(&installConfigPath, "config-path", "/etc/dbbackup/dbbackup.conf", "Path to config file")
|
||||
installCmd.Flags().IntVar(&installTimeout, "timeout", 3600, "Backup timeout in seconds")
|
||||
installCmd.Flags().BoolVar(&installWithMetrics, "with-metrics", false, "Install Prometheus metrics exporter")
|
||||
installCmd.Flags().IntVar(&installMetricsPort, "metrics-port", 9399, "Prometheus metrics port")
|
||||
installCmd.Flags().BoolVar(&installDryRun, "dry-run", false, "Show what would be installed without making changes")
|
||||
installCmd.Flags().BoolVar(&installStatus, "status", false, "Show installation status")
|
||||
|
||||
// Uninstall flags
|
||||
uninstallCmd.Flags().BoolVar(&uninstallPurge, "purge", false, "Also remove configuration files")
|
||||
}
|
||||
|
||||
func runInstall(ctx context.Context) error {
|
||||
// Create context with signal handling
|
||||
ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
// Expand schedule shortcuts
|
||||
schedule := expandSchedule(installSchedule)
|
||||
|
||||
// Create installer
|
||||
inst := installer.NewInstaller(log, installDryRun)
|
||||
|
||||
// Set up options
|
||||
opts := installer.InstallOptions{
|
||||
Instance: installInstance,
|
||||
BackupType: installBackupType,
|
||||
Schedule: schedule,
|
||||
User: installUser,
|
||||
Group: installGroup,
|
||||
BackupDir: installBackupDir,
|
||||
ConfigPath: installConfigPath,
|
||||
TimeoutSeconds: installTimeout,
|
||||
WithMetrics: installWithMetrics,
|
||||
MetricsPort: installMetricsPort,
|
||||
}
|
||||
|
||||
// For cluster backup, override instance
|
||||
if installBackupType == "cluster" {
|
||||
opts.Instance = "cluster"
|
||||
}
|
||||
|
||||
return inst.Install(ctx, opts)
|
||||
}
|
||||
|
||||
func runUninstall(ctx context.Context, instance string) error {
|
||||
ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
inst := installer.NewInstaller(log, false)
|
||||
return inst.Uninstall(ctx, instance, uninstallPurge)
|
||||
}
|
||||
|
||||
func runInstallStatus(ctx context.Context) error {
|
||||
inst := installer.NewInstaller(log, false)
|
||||
|
||||
// Check cluster status
|
||||
clusterStatus, err := inst.Status(ctx, "cluster")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("📦 DBBackup Installation Status")
|
||||
fmt.Println(strings.Repeat("═", 50))
|
||||
|
||||
if clusterStatus.Installed {
|
||||
fmt.Println()
|
||||
fmt.Println("🔹 Cluster Backup:")
|
||||
fmt.Printf(" Service: %s\n", formatStatus(clusterStatus.Installed, clusterStatus.Active))
|
||||
fmt.Printf(" Timer: %s\n", formatStatus(clusterStatus.TimerEnabled, clusterStatus.TimerActive))
|
||||
if clusterStatus.NextRun != "" {
|
||||
fmt.Printf(" Next run: %s\n", clusterStatus.NextRun)
|
||||
}
|
||||
if clusterStatus.LastRun != "" {
|
||||
fmt.Printf(" Last run: %s\n", clusterStatus.LastRun)
|
||||
}
|
||||
} else {
|
||||
fmt.Println()
|
||||
fmt.Println("❌ No systemd services installed")
|
||||
fmt.Println()
|
||||
fmt.Println("Run 'sudo dbbackup install' to install as a systemd service")
|
||||
}
|
||||
|
||||
// Check for exporter
|
||||
if _, err := os.Stat("/etc/systemd/system/dbbackup-exporter.service"); err == nil {
|
||||
fmt.Println()
|
||||
fmt.Println("🔹 Metrics Exporter:")
|
||||
// Check if exporter is active using systemctl
|
||||
cmd := exec.CommandContext(ctx, "systemctl", "is-active", "dbbackup-exporter")
|
||||
if err := cmd.Run(); err == nil {
|
||||
fmt.Printf(" Service: ✅ active\n")
|
||||
} else {
|
||||
fmt.Printf(" Service: ⚪ inactive\n")
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
return nil
|
||||
}
|
||||
|
||||
func formatStatus(installed, active bool) string {
|
||||
if !installed {
|
||||
return "not installed"
|
||||
}
|
||||
if active {
|
||||
return "✅ active"
|
||||
}
|
||||
return "⚪ inactive"
|
||||
}
|
||||
|
||||
func expandSchedule(schedule string) string {
|
||||
shortcuts := map[string]string{
|
||||
"hourly": "*-*-* *:00:00",
|
||||
"daily": "*-*-* 02:00:00",
|
||||
"weekly": "Mon *-*-* 02:00:00",
|
||||
"monthly": "*-*-01 02:00:00",
|
||||
}
|
||||
|
||||
if expanded, ok := shortcuts[strings.ToLower(schedule)]; ok {
|
||||
return expanded
|
||||
}
|
||||
return schedule
|
||||
}
|
||||
138
cmd/metrics.go
Normal file
138
cmd/metrics.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"dbbackup/internal/prometheus"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
metricsInstance string
|
||||
metricsOutput string
|
||||
metricsPort int
|
||||
)
|
||||
|
||||
// metricsCmd represents the metrics command
|
||||
var metricsCmd = &cobra.Command{
|
||||
Use: "metrics",
|
||||
Short: "Prometheus metrics management",
|
||||
Long: `Prometheus metrics management for dbbackup.
|
||||
|
||||
Export metrics to a textfile for node_exporter, or run an HTTP server
|
||||
for direct Prometheus scraping.`,
|
||||
}
|
||||
|
||||
// metricsExportCmd exports metrics to a textfile
|
||||
var metricsExportCmd = &cobra.Command{
|
||||
Use: "export",
|
||||
Short: "Export metrics to textfile",
|
||||
Long: `Export Prometheus metrics to a textfile for node_exporter.
|
||||
|
||||
The textfile collector in node_exporter can scrape metrics from files
|
||||
in a designated directory (typically /var/lib/node_exporter/textfile_collector/).
|
||||
|
||||
Examples:
|
||||
# Export metrics to default location
|
||||
dbbackup metrics export
|
||||
|
||||
# Export with custom output path
|
||||
dbbackup metrics export --output /var/lib/dbbackup/metrics/dbbackup.prom
|
||||
|
||||
# Export for specific instance
|
||||
dbbackup metrics export --instance production --output /var/lib/dbbackup/metrics/production.prom
|
||||
|
||||
After export, configure node_exporter with:
|
||||
--collector.textfile.directory=/var/lib/dbbackup/metrics/
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runMetricsExport(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
// metricsServeCmd runs the HTTP metrics server
|
||||
var metricsServeCmd = &cobra.Command{
|
||||
Use: "serve",
|
||||
Short: "Run Prometheus HTTP server",
|
||||
Long: `Run an HTTP server exposing Prometheus metrics.
|
||||
|
||||
This starts a long-running daemon that serves metrics at /metrics.
|
||||
Prometheus can scrape this endpoint directly.
|
||||
|
||||
Examples:
|
||||
# Start server on default port 9399
|
||||
dbbackup metrics serve
|
||||
|
||||
# Start server on custom port
|
||||
dbbackup metrics serve --port 9100
|
||||
|
||||
# Run as systemd service (installed via 'dbbackup install --with-metrics')
|
||||
sudo systemctl start dbbackup-exporter
|
||||
|
||||
Endpoints:
|
||||
/metrics - Prometheus metrics
|
||||
/health - Health check (returns 200 OK)
|
||||
/ - Service info page
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runMetricsServe(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(metricsCmd)
|
||||
metricsCmd.AddCommand(metricsExportCmd)
|
||||
metricsCmd.AddCommand(metricsServeCmd)
|
||||
|
||||
// Export flags
|
||||
metricsExportCmd.Flags().StringVar(&metricsInstance, "instance", "default", "Instance name for metrics labels")
|
||||
metricsExportCmd.Flags().StringVarP(&metricsOutput, "output", "o", "/var/lib/dbbackup/metrics/dbbackup.prom", "Output file path")
|
||||
|
||||
// Serve flags
|
||||
metricsServeCmd.Flags().StringVar(&metricsInstance, "instance", "default", "Instance name for metrics labels")
|
||||
metricsServeCmd.Flags().IntVarP(&metricsPort, "port", "p", 9399, "HTTP server port")
|
||||
}
|
||||
|
||||
func runMetricsExport(ctx context.Context) error {
|
||||
// Open catalog
|
||||
cat, err := openCatalog()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open catalog: %w", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
// Create metrics writer
|
||||
writer := prometheus.NewMetricsWriter(log, cat, metricsInstance)
|
||||
|
||||
// Write textfile
|
||||
if err := writer.WriteTextfile(metricsOutput); err != nil {
|
||||
return fmt.Errorf("failed to write metrics: %w", err)
|
||||
}
|
||||
|
||||
log.Info("Exported metrics to textfile", "path", metricsOutput, "instance", metricsInstance)
|
||||
return nil
|
||||
}
|
||||
|
||||
func runMetricsServe(ctx context.Context) error {
|
||||
// Setup signal handling
|
||||
ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
// Open catalog
|
||||
cat, err := openCatalog()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open catalog: %w", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
// Create exporter
|
||||
exporter := prometheus.NewExporter(log, cat, metricsInstance, metricsPort)
|
||||
|
||||
// Run server (blocks until context is cancelled)
|
||||
return exporter.Serve(ctx)
|
||||
}
|
||||
@@ -502,7 +502,23 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
||||
|
||||
cmd := e.db.BuildBackupCommand(name, dumpFile, options)
|
||||
|
||||
dbCtx, cancel := context.WithTimeout(ctx, 2*time.Hour)
|
||||
// Calculate timeout based on database size:
|
||||
// - Minimum 2 hours for small databases
|
||||
// - Add 1 hour per 20GB for large databases
|
||||
// - This allows ~69GB database to take up to 5+ hours
|
||||
timeout := 2 * time.Hour
|
||||
if size, err := e.db.GetDatabaseSize(ctx, name); err == nil {
|
||||
sizeGB := size / (1024 * 1024 * 1024)
|
||||
if sizeGB > 20 {
|
||||
extraHours := (sizeGB / 20) + 1
|
||||
timeout = time.Duration(2+extraHours) * time.Hour
|
||||
mu.Lock()
|
||||
e.printf(" Extended timeout: %v (for %dGB database)\n", timeout, sizeGB)
|
||||
mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
dbCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
err := e.executeCommand(dbCtx, cmd, dumpFile)
|
||||
cancel()
|
||||
@@ -1352,20 +1368,53 @@ func (e *Engine) executeWithStreamingCompression(ctx context.Context, cmdArgs []
|
||||
|
||||
// Then start pg_dump
|
||||
if err := dumpCmd.Start(); err != nil {
|
||||
compressCmd.Process.Kill()
|
||||
return fmt.Errorf("failed to start pg_dump: %w", err)
|
||||
}
|
||||
|
||||
// Wait for pg_dump to complete
|
||||
if err := dumpCmd.Wait(); err != nil {
|
||||
return fmt.Errorf("pg_dump failed: %w", err)
|
||||
// Wait for pg_dump in a goroutine to handle context timeout properly
|
||||
// This prevents deadlock if pipe buffer fills and pg_dump blocks
|
||||
dumpDone := make(chan error, 1)
|
||||
go func() {
|
||||
dumpDone <- dumpCmd.Wait()
|
||||
}()
|
||||
|
||||
var dumpErr error
|
||||
select {
|
||||
case dumpErr = <-dumpDone:
|
||||
// pg_dump completed (success or failure)
|
||||
case <-ctx.Done():
|
||||
// Context cancelled/timeout - kill pg_dump to unblock
|
||||
e.log.Warn("Backup timeout - killing pg_dump process")
|
||||
dumpCmd.Process.Kill()
|
||||
<-dumpDone // Wait for goroutine to finish
|
||||
dumpErr = ctx.Err()
|
||||
}
|
||||
|
||||
// Close stdout pipe to signal compressor we're done
|
||||
// This MUST happen after pg_dump exits to avoid broken pipe
|
||||
dumpStdout.Close()
|
||||
|
||||
// Wait for compression to complete
|
||||
if err := compressCmd.Wait(); err != nil {
|
||||
return fmt.Errorf("compression failed: %w", err)
|
||||
compressErr := compressCmd.Wait()
|
||||
|
||||
// Check errors - compressor failure first (it's usually the root cause)
|
||||
if compressErr != nil {
|
||||
e.log.Error("Compressor failed", "error", compressErr)
|
||||
return fmt.Errorf("compression failed (check disk space): %w", compressErr)
|
||||
}
|
||||
if dumpErr != nil {
|
||||
// Check for SIGPIPE (exit code 141) - indicates compressor died first
|
||||
if exitErr, ok := dumpErr.(*exec.ExitError); ok && exitErr.ExitCode() == 141 {
|
||||
e.log.Error("pg_dump received SIGPIPE - compressor may have failed")
|
||||
return fmt.Errorf("pg_dump broken pipe - check disk space and compressor")
|
||||
}
|
||||
return fmt.Errorf("pg_dump failed: %w", dumpErr)
|
||||
}
|
||||
|
||||
// Sync file to disk to ensure durability (prevents truncation on power loss)
|
||||
if err := outFile.Sync(); err != nil {
|
||||
e.log.Warn("Failed to sync output file", "error", err)
|
||||
}
|
||||
|
||||
e.log.Debug("Streaming compression completed", "output", compressedFile)
|
||||
|
||||
@@ -126,13 +126,46 @@ func (m *MySQL) ListTables(ctx context.Context, database string) ([]string, erro
|
||||
return tables, rows.Err()
|
||||
}
|
||||
|
||||
// validateMySQLIdentifier checks if a database/table name is safe for use in SQL
|
||||
// Prevents SQL injection by only allowing alphanumeric names with underscores
|
||||
func validateMySQLIdentifier(name string) error {
|
||||
if len(name) == 0 {
|
||||
return fmt.Errorf("identifier cannot be empty")
|
||||
}
|
||||
if len(name) > 64 {
|
||||
return fmt.Errorf("identifier too long (max 64 chars): %s", name)
|
||||
}
|
||||
// Only allow alphanumeric, underscores, and must start with letter or underscore
|
||||
for i, c := range name {
|
||||
if i == 0 && !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
|
||||
return fmt.Errorf("identifier must start with letter or underscore: %s", name)
|
||||
}
|
||||
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') {
|
||||
return fmt.Errorf("identifier contains invalid character %q: %s", c, name)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// quoteMySQLIdentifier safely quotes a MySQL identifier
|
||||
func quoteMySQLIdentifier(name string) string {
|
||||
// Escape any backticks by doubling them and wrap in backticks
|
||||
return "`" + strings.ReplaceAll(name, "`", "``") + "`"
|
||||
}
|
||||
|
||||
// CreateDatabase creates a new database
|
||||
func (m *MySQL) CreateDatabase(ctx context.Context, name string) error {
|
||||
if m.db == nil {
|
||||
return fmt.Errorf("not connected to database")
|
||||
}
|
||||
|
||||
query := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS `%s`", name)
|
||||
// Validate identifier to prevent SQL injection
|
||||
if err := validateMySQLIdentifier(name); err != nil {
|
||||
return fmt.Errorf("invalid database name: %w", err)
|
||||
}
|
||||
|
||||
// Use safe quoting for identifier
|
||||
query := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", quoteMySQLIdentifier(name))
|
||||
_, err := m.db.ExecContext(ctx, query)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create database %s: %w", name, err)
|
||||
@@ -148,7 +181,13 @@ func (m *MySQL) DropDatabase(ctx context.Context, name string) error {
|
||||
return fmt.Errorf("not connected to database")
|
||||
}
|
||||
|
||||
query := fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", name)
|
||||
// Validate identifier to prevent SQL injection
|
||||
if err := validateMySQLIdentifier(name); err != nil {
|
||||
return fmt.Errorf("invalid database name: %w", err)
|
||||
}
|
||||
|
||||
// Use safe quoting for identifier
|
||||
query := fmt.Sprintf("DROP DATABASE IF EXISTS %s", quoteMySQLIdentifier(name))
|
||||
_, err := m.db.ExecContext(ctx, query)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to drop database %s: %w", name, err)
|
||||
|
||||
@@ -163,14 +163,47 @@ func (p *PostgreSQL) ListTables(ctx context.Context, database string) ([]string,
|
||||
return tables, rows.Err()
|
||||
}
|
||||
|
||||
// validateIdentifier checks if a database/table name is safe for use in SQL
|
||||
// Prevents SQL injection by only allowing alphanumeric names with underscores
|
||||
func validateIdentifier(name string) error {
|
||||
if len(name) == 0 {
|
||||
return fmt.Errorf("identifier cannot be empty")
|
||||
}
|
||||
if len(name) > 63 {
|
||||
return fmt.Errorf("identifier too long (max 63 chars): %s", name)
|
||||
}
|
||||
// Only allow alphanumeric, underscores, and must start with letter or underscore
|
||||
for i, c := range name {
|
||||
if i == 0 && !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
|
||||
return fmt.Errorf("identifier must start with letter or underscore: %s", name)
|
||||
}
|
||||
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') {
|
||||
return fmt.Errorf("identifier contains invalid character %q: %s", c, name)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// quoteIdentifier safely quotes a PostgreSQL identifier
|
||||
func quoteIdentifier(name string) string {
|
||||
// Double any existing double quotes and wrap in double quotes
|
||||
return `"` + strings.ReplaceAll(name, `"`, `""`) + `"`
|
||||
}
|
||||
|
||||
// CreateDatabase creates a new database
|
||||
func (p *PostgreSQL) CreateDatabase(ctx context.Context, name string) error {
|
||||
if p.db == nil {
|
||||
return fmt.Errorf("not connected to database")
|
||||
}
|
||||
|
||||
// Validate identifier to prevent SQL injection
|
||||
if err := validateIdentifier(name); err != nil {
|
||||
return fmt.Errorf("invalid database name: %w", err)
|
||||
}
|
||||
|
||||
// PostgreSQL doesn't support CREATE DATABASE in transactions or prepared statements
|
||||
query := fmt.Sprintf("CREATE DATABASE %s", name)
|
||||
// Use quoted identifier for safety
|
||||
query := fmt.Sprintf("CREATE DATABASE %s", quoteIdentifier(name))
|
||||
_, err := p.db.ExecContext(ctx, query)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create database %s: %w", name, err)
|
||||
@@ -186,8 +219,14 @@ func (p *PostgreSQL) DropDatabase(ctx context.Context, name string) error {
|
||||
return fmt.Errorf("not connected to database")
|
||||
}
|
||||
|
||||
// Validate identifier to prevent SQL injection
|
||||
if err := validateIdentifier(name); err != nil {
|
||||
return fmt.Errorf("invalid database name: %w", err)
|
||||
}
|
||||
|
||||
// Force drop connections and drop database
|
||||
query := fmt.Sprintf("DROP DATABASE IF EXISTS %s", name)
|
||||
// Use quoted identifier for safety
|
||||
query := fmt.Sprintf("DROP DATABASE IF EXISTS %s", quoteIdentifier(name))
|
||||
_, err := p.db.ExecContext(ctx, query)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to drop database %s: %w", name, err)
|
||||
|
||||
11
internal/installer/embed.go
Normal file
11
internal/installer/embed.go
Normal file
@@ -0,0 +1,11 @@
|
||||
// Package installer provides systemd service installation for dbbackup
|
||||
package installer
|
||||
|
||||
import (
|
||||
"embed"
|
||||
)
|
||||
|
||||
// Templates contains embedded systemd unit files
|
||||
//
|
||||
//go:embed templates/*.service templates/*.timer
|
||||
var Templates embed.FS
|
||||
680
internal/installer/installer.go
Normal file
680
internal/installer/installer.go
Normal file
@@ -0,0 +1,680 @@
|
||||
// Package installer provides systemd service installation for dbbackup
|
||||
package installer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// Installer handles systemd service installation
|
||||
type Installer struct {
|
||||
log logger.Logger
|
||||
unitDir string // /etc/systemd/system or custom
|
||||
dryRun bool
|
||||
}
|
||||
|
||||
// InstallOptions configures the installation
|
||||
type InstallOptions struct {
|
||||
// Instance name (e.g., "production", "staging")
|
||||
Instance string
|
||||
|
||||
// Binary path (auto-detected if empty)
|
||||
BinaryPath string
|
||||
|
||||
// Backup configuration
|
||||
BackupType string // "single" or "cluster"
|
||||
Schedule string // OnCalendar format, e.g., "daily", "*-*-* 02:00:00"
|
||||
|
||||
// Service user/group
|
||||
User string
|
||||
Group string
|
||||
|
||||
// Paths
|
||||
BackupDir string
|
||||
ConfigPath string
|
||||
|
||||
// Timeout in seconds (default: 3600)
|
||||
TimeoutSeconds int
|
||||
|
||||
// Metrics
|
||||
WithMetrics bool
|
||||
MetricsPort int
|
||||
}
|
||||
|
||||
// ServiceStatus contains information about installed services
|
||||
type ServiceStatus struct {
|
||||
Installed bool
|
||||
Enabled bool
|
||||
Active bool
|
||||
TimerEnabled bool
|
||||
TimerActive bool
|
||||
LastRun string
|
||||
NextRun string
|
||||
ServicePath string
|
||||
TimerPath string
|
||||
ExporterPath string
|
||||
}
|
||||
|
||||
// NewInstaller creates a new Installer
|
||||
func NewInstaller(log logger.Logger, dryRun bool) *Installer {
|
||||
return &Installer{
|
||||
log: log,
|
||||
unitDir: "/etc/systemd/system",
|
||||
dryRun: dryRun,
|
||||
}
|
||||
}
|
||||
|
||||
// SetUnitDir allows overriding the systemd unit directory (for testing)
|
||||
func (i *Installer) SetUnitDir(dir string) {
|
||||
i.unitDir = dir
|
||||
}
|
||||
|
||||
// Install installs the systemd service and timer
|
||||
func (i *Installer) Install(ctx context.Context, opts InstallOptions) error {
|
||||
// Validate platform
|
||||
if runtime.GOOS != "linux" {
|
||||
return fmt.Errorf("systemd installation only supported on Linux (current: %s)", runtime.GOOS)
|
||||
}
|
||||
|
||||
// Validate prerequisites
|
||||
if err := i.validatePrerequisites(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set defaults
|
||||
if err := i.setDefaults(&opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create user if needed
|
||||
if err := i.ensureUser(opts.User, opts.Group); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create directories
|
||||
if err := i.createDirectories(opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy binary to /usr/local/bin (required for ProtectHome=yes)
|
||||
if err := i.copyBinary(&opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write service and timer files
|
||||
if err := i.writeUnitFiles(opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Reload systemd
|
||||
if err := i.systemctl(ctx, "daemon-reload"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Enable timer
|
||||
timerName := i.getTimerName(opts)
|
||||
if err := i.systemctl(ctx, "enable", timerName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Install metrics exporter if requested
|
||||
if opts.WithMetrics {
|
||||
if err := i.installExporter(ctx, opts); err != nil {
|
||||
i.log.Warn("Failed to install metrics exporter", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
i.log.Info("Installation complete",
|
||||
"instance", opts.Instance,
|
||||
"timer", timerName,
|
||||
"schedule", opts.Schedule)
|
||||
|
||||
i.printNextSteps(opts)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Uninstall removes the systemd service and timer
|
||||
func (i *Installer) Uninstall(ctx context.Context, instance string, purge bool) error {
|
||||
if runtime.GOOS != "linux" {
|
||||
return fmt.Errorf("systemd uninstallation only supported on Linux")
|
||||
}
|
||||
|
||||
if err := i.validatePrerequisites(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Determine service names
|
||||
var serviceName, timerName string
|
||||
if instance == "cluster" || instance == "" {
|
||||
serviceName = "dbbackup-cluster.service"
|
||||
timerName = "dbbackup-cluster.timer"
|
||||
} else {
|
||||
serviceName = fmt.Sprintf("dbbackup@%s.service", instance)
|
||||
timerName = fmt.Sprintf("dbbackup@%s.timer", instance)
|
||||
}
|
||||
|
||||
// Stop and disable timer
|
||||
_ = i.systemctl(ctx, "stop", timerName)
|
||||
_ = i.systemctl(ctx, "disable", timerName)
|
||||
|
||||
// Stop and disable service
|
||||
_ = i.systemctl(ctx, "stop", serviceName)
|
||||
_ = i.systemctl(ctx, "disable", serviceName)
|
||||
|
||||
// Remove unit files
|
||||
servicePath := filepath.Join(i.unitDir, serviceName)
|
||||
timerPath := filepath.Join(i.unitDir, timerName)
|
||||
|
||||
if !i.dryRun {
|
||||
os.Remove(servicePath)
|
||||
os.Remove(timerPath)
|
||||
} else {
|
||||
i.log.Info("Would remove", "service", servicePath)
|
||||
i.log.Info("Would remove", "timer", timerPath)
|
||||
}
|
||||
|
||||
// Also try to remove template units if they exist
|
||||
if instance != "cluster" && instance != "" {
|
||||
templateService := filepath.Join(i.unitDir, "dbbackup@.service")
|
||||
templateTimer := filepath.Join(i.unitDir, "dbbackup@.timer")
|
||||
|
||||
// Only remove templates if no other instances are using them
|
||||
if i.canRemoveTemplates() {
|
||||
if !i.dryRun {
|
||||
os.Remove(templateService)
|
||||
os.Remove(templateTimer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove exporter
|
||||
exporterPath := filepath.Join(i.unitDir, "dbbackup-exporter.service")
|
||||
_ = i.systemctl(ctx, "stop", "dbbackup-exporter.service")
|
||||
_ = i.systemctl(ctx, "disable", "dbbackup-exporter.service")
|
||||
if !i.dryRun {
|
||||
os.Remove(exporterPath)
|
||||
}
|
||||
|
||||
// Reload systemd
|
||||
_ = i.systemctl(ctx, "daemon-reload")
|
||||
|
||||
// Purge config files if requested
|
||||
if purge {
|
||||
configDirs := []string{
|
||||
"/etc/dbbackup",
|
||||
"/var/lib/dbbackup",
|
||||
}
|
||||
for _, dir := range configDirs {
|
||||
if !i.dryRun {
|
||||
if err := os.RemoveAll(dir); err != nil {
|
||||
i.log.Warn("Failed to remove directory", "path", dir, "error", err)
|
||||
} else {
|
||||
i.log.Info("Removed directory", "path", dir)
|
||||
}
|
||||
} else {
|
||||
i.log.Info("Would remove directory", "path", dir)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i.log.Info("Uninstallation complete", "instance", instance, "purge", purge)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Status returns the current installation status
|
||||
func (i *Installer) Status(ctx context.Context, instance string) (*ServiceStatus, error) {
|
||||
if runtime.GOOS != "linux" {
|
||||
return nil, fmt.Errorf("systemd status only supported on Linux")
|
||||
}
|
||||
|
||||
status := &ServiceStatus{}
|
||||
|
||||
// Determine service names
|
||||
var serviceName, timerName string
|
||||
if instance == "cluster" || instance == "" {
|
||||
serviceName = "dbbackup-cluster.service"
|
||||
timerName = "dbbackup-cluster.timer"
|
||||
} else {
|
||||
serviceName = fmt.Sprintf("dbbackup@%s.service", instance)
|
||||
timerName = fmt.Sprintf("dbbackup@%s.timer", instance)
|
||||
}
|
||||
|
||||
// Check service file exists
|
||||
status.ServicePath = filepath.Join(i.unitDir, serviceName)
|
||||
if _, err := os.Stat(status.ServicePath); err == nil {
|
||||
status.Installed = true
|
||||
}
|
||||
|
||||
// Check timer file exists
|
||||
status.TimerPath = filepath.Join(i.unitDir, timerName)
|
||||
|
||||
// Check exporter
|
||||
status.ExporterPath = filepath.Join(i.unitDir, "dbbackup-exporter.service")
|
||||
|
||||
// Check enabled/active status
|
||||
if status.Installed {
|
||||
status.Enabled = i.isEnabled(ctx, serviceName)
|
||||
status.Active = i.isActive(ctx, serviceName)
|
||||
status.TimerEnabled = i.isEnabled(ctx, timerName)
|
||||
status.TimerActive = i.isActive(ctx, timerName)
|
||||
|
||||
// Get timer info
|
||||
status.NextRun = i.getTimerNext(ctx, timerName)
|
||||
status.LastRun = i.getTimerLast(ctx, timerName)
|
||||
}
|
||||
|
||||
return status, nil
|
||||
}
|
||||
|
||||
// validatePrerequisites checks system requirements
|
||||
func (i *Installer) validatePrerequisites() error {
|
||||
// Check root (skip in dry-run mode)
|
||||
if os.Getuid() != 0 && !i.dryRun {
|
||||
return fmt.Errorf("installation requires root privileges (use sudo)")
|
||||
}
|
||||
|
||||
// Check systemd
|
||||
if _, err := exec.LookPath("systemctl"); err != nil {
|
||||
return fmt.Errorf("systemctl not found - is this a systemd-based system?")
|
||||
}
|
||||
|
||||
// Check for container environment
|
||||
if _, err := os.Stat("/.dockerenv"); err == nil {
|
||||
i.log.Warn("Running inside Docker container - systemd may not work correctly")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setDefaults fills in default values
|
||||
func (i *Installer) setDefaults(opts *InstallOptions) error {
|
||||
// Auto-detect binary path
|
||||
if opts.BinaryPath == "" {
|
||||
binPath, err := os.Executable()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to detect binary path: %w", err)
|
||||
}
|
||||
binPath, err = filepath.EvalSymlinks(binPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to resolve binary path: %w", err)
|
||||
}
|
||||
opts.BinaryPath = binPath
|
||||
}
|
||||
|
||||
// Default instance
|
||||
if opts.Instance == "" {
|
||||
opts.Instance = "default"
|
||||
}
|
||||
|
||||
// Default backup type
|
||||
if opts.BackupType == "" {
|
||||
opts.BackupType = "single"
|
||||
}
|
||||
|
||||
// Default schedule (daily at 2am)
|
||||
if opts.Schedule == "" {
|
||||
opts.Schedule = "*-*-* 02:00:00"
|
||||
}
|
||||
|
||||
// Default user/group
|
||||
if opts.User == "" {
|
||||
opts.User = "dbbackup"
|
||||
}
|
||||
if opts.Group == "" {
|
||||
opts.Group = "dbbackup"
|
||||
}
|
||||
|
||||
// Default paths
|
||||
if opts.BackupDir == "" {
|
||||
opts.BackupDir = "/var/lib/dbbackup/backups"
|
||||
}
|
||||
if opts.ConfigPath == "" {
|
||||
opts.ConfigPath = "/etc/dbbackup/dbbackup.conf"
|
||||
}
|
||||
|
||||
// Default timeout (1 hour)
|
||||
if opts.TimeoutSeconds == 0 {
|
||||
opts.TimeoutSeconds = 3600
|
||||
}
|
||||
|
||||
// Default metrics port
|
||||
if opts.MetricsPort == 0 {
|
||||
opts.MetricsPort = 9399
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ensureUser creates the service user if it doesn't exist
|
||||
func (i *Installer) ensureUser(username, groupname string) error {
|
||||
// Check if user exists
|
||||
if _, err := user.Lookup(username); err == nil {
|
||||
i.log.Debug("User already exists", "user", username)
|
||||
return nil
|
||||
}
|
||||
|
||||
if i.dryRun {
|
||||
i.log.Info("Would create user", "user", username, "group", groupname)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create group first
|
||||
groupCmd := exec.Command("groupadd", "--system", groupname)
|
||||
if output, err := groupCmd.CombinedOutput(); err != nil {
|
||||
// Ignore if group already exists
|
||||
if !strings.Contains(string(output), "already exists") {
|
||||
i.log.Debug("Group creation output", "output", string(output))
|
||||
}
|
||||
}
|
||||
|
||||
// Create user
|
||||
userCmd := exec.Command("useradd",
|
||||
"--system",
|
||||
"--shell", "/usr/sbin/nologin",
|
||||
"--home-dir", "/var/lib/dbbackup",
|
||||
"--gid", groupname,
|
||||
username)
|
||||
|
||||
if output, err := userCmd.CombinedOutput(); err != nil {
|
||||
if !strings.Contains(string(output), "already exists") {
|
||||
return fmt.Errorf("failed to create user %s: %w (%s)", username, err, output)
|
||||
}
|
||||
}
|
||||
|
||||
i.log.Info("Created system user", "user", username, "group", groupname)
|
||||
return nil
|
||||
}
|
||||
|
||||
// createDirectories creates required directories
|
||||
func (i *Installer) createDirectories(opts InstallOptions) error {
|
||||
dirs := []struct {
|
||||
path string
|
||||
mode os.FileMode
|
||||
}{
|
||||
{"/etc/dbbackup", 0755},
|
||||
{"/etc/dbbackup/env.d", 0700},
|
||||
{"/var/lib/dbbackup", 0750},
|
||||
{"/var/lib/dbbackup/backups", 0750},
|
||||
{"/var/lib/dbbackup/metrics", 0755},
|
||||
{"/var/log/dbbackup", 0750},
|
||||
{opts.BackupDir, 0750},
|
||||
}
|
||||
|
||||
for _, d := range dirs {
|
||||
if i.dryRun {
|
||||
i.log.Info("Would create directory", "path", d.path, "mode", d.mode)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(d.path, d.mode); err != nil {
|
||||
return fmt.Errorf("failed to create directory %s: %w", d.path, err)
|
||||
}
|
||||
|
||||
// Set ownership
|
||||
u, err := user.Lookup(opts.User)
|
||||
if err == nil {
|
||||
var uid, gid int
|
||||
fmt.Sscanf(u.Uid, "%d", &uid)
|
||||
fmt.Sscanf(u.Gid, "%d", &gid)
|
||||
os.Chown(d.path, uid, gid)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// copyBinary copies the binary to /usr/local/bin for systemd access
|
||||
// This is required because ProtectHome=yes blocks access to home directories
|
||||
func (i *Installer) copyBinary(opts *InstallOptions) error {
|
||||
const installPath = "/usr/local/bin/dbbackup"
|
||||
|
||||
// Check if binary is already in a system path
|
||||
if opts.BinaryPath == installPath {
|
||||
return nil
|
||||
}
|
||||
|
||||
if i.dryRun {
|
||||
i.log.Info("Would copy binary", "from", opts.BinaryPath, "to", installPath)
|
||||
opts.BinaryPath = installPath
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read source binary
|
||||
src, err := os.Open(opts.BinaryPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open source binary: %w", err)
|
||||
}
|
||||
defer src.Close()
|
||||
|
||||
// Create destination
|
||||
dst, err := os.OpenFile(installPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create %s: %w", installPath, err)
|
||||
}
|
||||
defer dst.Close()
|
||||
|
||||
// Copy
|
||||
if _, err := io.Copy(dst, src); err != nil {
|
||||
return fmt.Errorf("failed to copy binary: %w", err)
|
||||
}
|
||||
|
||||
i.log.Info("Copied binary", "from", opts.BinaryPath, "to", installPath)
|
||||
opts.BinaryPath = installPath
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeUnitFiles renders and writes the systemd unit files
|
||||
func (i *Installer) writeUnitFiles(opts InstallOptions) error {
|
||||
// Prepare template data
|
||||
data := map[string]interface{}{
|
||||
"User": opts.User,
|
||||
"Group": opts.Group,
|
||||
"BinaryPath": opts.BinaryPath,
|
||||
"BackupType": opts.BackupType,
|
||||
"BackupDir": opts.BackupDir,
|
||||
"ConfigPath": opts.ConfigPath,
|
||||
"TimeoutSeconds": opts.TimeoutSeconds,
|
||||
"Schedule": opts.Schedule,
|
||||
"MetricsPort": opts.MetricsPort,
|
||||
}
|
||||
|
||||
// Determine which templates to use
|
||||
var serviceTemplate, timerTemplate string
|
||||
var serviceName, timerName string
|
||||
|
||||
if opts.BackupType == "cluster" {
|
||||
serviceTemplate = "templates/dbbackup-cluster.service"
|
||||
timerTemplate = "templates/dbbackup-cluster.timer"
|
||||
serviceName = "dbbackup-cluster.service"
|
||||
timerName = "dbbackup-cluster.timer"
|
||||
} else {
|
||||
serviceTemplate = "templates/dbbackup@.service"
|
||||
timerTemplate = "templates/dbbackup@.timer"
|
||||
serviceName = "dbbackup@.service"
|
||||
timerName = "dbbackup@.timer"
|
||||
}
|
||||
|
||||
// Write service file
|
||||
if err := i.writeTemplateFile(serviceTemplate, serviceName, data); err != nil {
|
||||
return fmt.Errorf("failed to write service file: %w", err)
|
||||
}
|
||||
|
||||
// Write timer file
|
||||
if err := i.writeTemplateFile(timerTemplate, timerName, data); err != nil {
|
||||
return fmt.Errorf("failed to write timer file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeTemplateFile reads an embedded template and writes it to the unit directory
|
||||
func (i *Installer) writeTemplateFile(templatePath, outputName string, data map[string]interface{}) error {
|
||||
// Read template
|
||||
content, err := Templates.ReadFile(templatePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read template %s: %w", templatePath, err)
|
||||
}
|
||||
|
||||
// Parse template
|
||||
tmpl, err := template.New(outputName).Parse(string(content))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse template %s: %w", templatePath, err)
|
||||
}
|
||||
|
||||
// Render template
|
||||
var buf strings.Builder
|
||||
if err := tmpl.Execute(&buf, data); err != nil {
|
||||
return fmt.Errorf("failed to render template %s: %w", templatePath, err)
|
||||
}
|
||||
|
||||
// Write file
|
||||
outputPath := filepath.Join(i.unitDir, outputName)
|
||||
if i.dryRun {
|
||||
i.log.Info("Would write unit file", "path", outputPath)
|
||||
i.log.Debug("Unit file content", "content", buf.String())
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := os.WriteFile(outputPath, []byte(buf.String()), 0644); err != nil {
|
||||
return fmt.Errorf("failed to write %s: %w", outputPath, err)
|
||||
}
|
||||
|
||||
i.log.Info("Created unit file", "path", outputPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// installExporter installs the metrics exporter service
|
||||
func (i *Installer) installExporter(ctx context.Context, opts InstallOptions) error {
|
||||
data := map[string]interface{}{
|
||||
"User": opts.User,
|
||||
"Group": opts.Group,
|
||||
"BinaryPath": opts.BinaryPath,
|
||||
"ConfigPath": opts.ConfigPath,
|
||||
"MetricsPort": opts.MetricsPort,
|
||||
}
|
||||
|
||||
if err := i.writeTemplateFile("templates/dbbackup-exporter.service", "dbbackup-exporter.service", data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := i.systemctl(ctx, "daemon-reload"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := i.systemctl(ctx, "enable", "dbbackup-exporter.service"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := i.systemctl(ctx, "start", "dbbackup-exporter.service"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
i.log.Info("Installed metrics exporter", "port", opts.MetricsPort)
|
||||
return nil
|
||||
}
|
||||
|
||||
// getTimerName returns the timer unit name for the given options
|
||||
func (i *Installer) getTimerName(opts InstallOptions) string {
|
||||
if opts.BackupType == "cluster" {
|
||||
return "dbbackup-cluster.timer"
|
||||
}
|
||||
return fmt.Sprintf("dbbackup@%s.timer", opts.Instance)
|
||||
}
|
||||
|
||||
// systemctl runs a systemctl command
|
||||
func (i *Installer) systemctl(ctx context.Context, args ...string) error {
|
||||
if i.dryRun {
|
||||
i.log.Info("Would run: systemctl", "args", args)
|
||||
return nil
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, "systemctl", args...)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("systemctl %v failed: %w\n%s", args, err, string(output))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// isEnabled checks if a unit is enabled
|
||||
func (i *Installer) isEnabled(ctx context.Context, unit string) bool {
|
||||
cmd := exec.CommandContext(ctx, "systemctl", "is-enabled", unit)
|
||||
return cmd.Run() == nil
|
||||
}
|
||||
|
||||
// isActive checks if a unit is active
|
||||
func (i *Installer) isActive(ctx context.Context, unit string) bool {
|
||||
cmd := exec.CommandContext(ctx, "systemctl", "is-active", unit)
|
||||
return cmd.Run() == nil
|
||||
}
|
||||
|
||||
// getTimerNext gets the next run time for a timer
|
||||
func (i *Installer) getTimerNext(ctx context.Context, timer string) string {
|
||||
cmd := exec.CommandContext(ctx, "systemctl", "show", timer, "--property=NextElapseUSecRealtime", "--value")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(string(output))
|
||||
}
|
||||
|
||||
// getTimerLast gets the last run time for a timer
|
||||
func (i *Installer) getTimerLast(ctx context.Context, timer string) string {
|
||||
cmd := exec.CommandContext(ctx, "systemctl", "show", timer, "--property=LastTriggerUSec", "--value")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(string(output))
|
||||
}
|
||||
|
||||
// canRemoveTemplates checks if template units can be safely removed
|
||||
func (i *Installer) canRemoveTemplates() bool {
|
||||
// Check if any dbbackup@*.service instances exist
|
||||
pattern := filepath.Join(i.unitDir, "dbbackup@*.service")
|
||||
matches, _ := filepath.Glob(pattern)
|
||||
|
||||
// Also check for running instances
|
||||
cmd := exec.Command("systemctl", "list-units", "--type=service", "--all", "dbbackup@*")
|
||||
output, _ := cmd.Output()
|
||||
|
||||
return len(matches) == 0 && !strings.Contains(string(output), "dbbackup@")
|
||||
}
|
||||
|
||||
// printNextSteps prints helpful next steps after installation
|
||||
func (i *Installer) printNextSteps(opts InstallOptions) {
|
||||
timerName := i.getTimerName(opts)
|
||||
serviceName := strings.Replace(timerName, ".timer", ".service", 1)
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("✅ Installation successful!")
|
||||
fmt.Println()
|
||||
fmt.Println("📋 Next steps:")
|
||||
fmt.Println()
|
||||
fmt.Printf(" 1. Edit configuration: sudo nano %s\n", opts.ConfigPath)
|
||||
fmt.Printf(" 2. Set credentials: sudo nano /etc/dbbackup/env.d/%s.conf\n", opts.Instance)
|
||||
fmt.Printf(" 3. Start the timer: sudo systemctl start %s\n", timerName)
|
||||
fmt.Printf(" 4. Verify timer status: sudo systemctl status %s\n", timerName)
|
||||
fmt.Printf(" 5. Run backup manually: sudo systemctl start %s\n", serviceName)
|
||||
fmt.Println()
|
||||
fmt.Println("📊 View backup logs:")
|
||||
fmt.Printf(" journalctl -u %s -f\n", serviceName)
|
||||
fmt.Println()
|
||||
|
||||
if opts.WithMetrics {
|
||||
fmt.Println("📈 Prometheus metrics:")
|
||||
fmt.Printf(" curl http://localhost:%d/metrics\n", opts.MetricsPort)
|
||||
fmt.Println()
|
||||
}
|
||||
}
|
||||
47
internal/installer/templates/dbbackup-cluster.service
Normal file
47
internal/installer/templates/dbbackup-cluster.service
Normal file
@@ -0,0 +1,47 @@
|
||||
[Unit]
|
||||
Description=Database Cluster Backup
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
After=network-online.target postgresql.service mysql.service mariadb.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User={{.User}}
|
||||
Group={{.Group}}
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
RestrictRealtime=yes
|
||||
LockPersonality=yes
|
||||
RemoveIPC=yes
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
# Directories
|
||||
ReadWritePaths={{.BackupDir}} /var/lib/dbbackup /var/log/dbbackup
|
||||
|
||||
# Network access for cloud uploads
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
|
||||
# Environment
|
||||
EnvironmentFile=-/etc/dbbackup/env.d/cluster.conf
|
||||
|
||||
# Execution - cluster backup (all databases)
|
||||
ExecStart={{.BinaryPath}} backup cluster --config {{.ConfigPath}}
|
||||
TimeoutStartSec={{.TimeoutSeconds}}
|
||||
|
||||
# Post-backup metrics export
|
||||
ExecStopPost=-{{.BinaryPath}} metrics export --instance cluster --output /var/lib/dbbackup/metrics/cluster.prom
|
||||
|
||||
# OOM protection for large backups
|
||||
OOMScoreAdjust=-500
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
11
internal/installer/templates/dbbackup-cluster.timer
Normal file
11
internal/installer/templates/dbbackup-cluster.timer
Normal file
@@ -0,0 +1,11 @@
|
||||
[Unit]
|
||||
Description=Database Cluster Backup Timer
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
|
||||
[Timer]
|
||||
OnCalendar={{.Schedule}}
|
||||
Persistent=true
|
||||
RandomizedDelaySec=1800
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
37
internal/installer/templates/dbbackup-exporter.service
Normal file
37
internal/installer/templates/dbbackup-exporter.service
Normal file
@@ -0,0 +1,37 @@
|
||||
[Unit]
|
||||
Description=DBBackup Prometheus Metrics Exporter
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User={{.User}}
|
||||
Group={{.Group}}
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=yes
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
RestrictRealtime=yes
|
||||
LockPersonality=yes
|
||||
RemoveIPC=yes
|
||||
|
||||
# Read-write access to catalog for metrics collection
|
||||
ReadWritePaths=/var/lib/dbbackup
|
||||
|
||||
# Network for HTTP server
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
|
||||
# Execution
|
||||
ExecStart={{.BinaryPath}} metrics serve --port {{.MetricsPort}}
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
47
internal/installer/templates/dbbackup@.service
Normal file
47
internal/installer/templates/dbbackup@.service
Normal file
@@ -0,0 +1,47 @@
|
||||
[Unit]
|
||||
Description=Database Backup for %i
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
After=network-online.target postgresql.service mysql.service mariadb.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User={{.User}}
|
||||
Group={{.Group}}
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
RestrictRealtime=yes
|
||||
LockPersonality=yes
|
||||
RemoveIPC=yes
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
# Directories
|
||||
ReadWritePaths={{.BackupDir}} /var/lib/dbbackup /var/log/dbbackup
|
||||
|
||||
# Network access for cloud uploads
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
|
||||
# Environment
|
||||
EnvironmentFile=-/etc/dbbackup/env.d/%i.conf
|
||||
|
||||
# Execution
|
||||
ExecStart={{.BinaryPath}} backup {{.BackupType}} %i --config {{.ConfigPath}}
|
||||
TimeoutStartSec={{.TimeoutSeconds}}
|
||||
|
||||
# Post-backup metrics export
|
||||
ExecStopPost=-{{.BinaryPath}} metrics export --instance %i --output /var/lib/dbbackup/metrics/%i.prom
|
||||
|
||||
# OOM protection for large backups
|
||||
OOMScoreAdjust=-500
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
11
internal/installer/templates/dbbackup@.timer
Normal file
11
internal/installer/templates/dbbackup@.timer
Normal file
@@ -0,0 +1,11 @@
|
||||
[Unit]
|
||||
Description=Database Backup Timer for %i
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
|
||||
[Timer]
|
||||
OnCalendar={{.Schedule}}
|
||||
Persistent=true
|
||||
RandomizedDelaySec=1800
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
@@ -69,6 +69,7 @@ func (m *Manager) NotifySync(ctx context.Context, event *Event) error {
|
||||
m.mu.RUnlock()
|
||||
|
||||
var errors []error
|
||||
var errMu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for _, n := range notifiers {
|
||||
@@ -80,7 +81,9 @@ func (m *Manager) NotifySync(ctx context.Context, event *Event) error {
|
||||
go func(notifier Notifier) {
|
||||
defer wg.Done()
|
||||
if err := notifier.Send(ctx, event); err != nil {
|
||||
errMu.Lock()
|
||||
errors = append(errors, fmt.Errorf("%s: %w", notifier.Name(), err))
|
||||
errMu.Unlock()
|
||||
}
|
||||
}(n)
|
||||
}
|
||||
|
||||
174
internal/prometheus/exporter.go
Normal file
174
internal/prometheus/exporter.go
Normal file
@@ -0,0 +1,174 @@
|
||||
// Package prometheus provides Prometheus metrics for dbbackup
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/catalog"
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// Exporter provides an HTTP endpoint for Prometheus metrics
|
||||
type Exporter struct {
|
||||
log logger.Logger
|
||||
catalog catalog.Catalog
|
||||
instance string
|
||||
port int
|
||||
|
||||
mu sync.RWMutex
|
||||
cachedData string
|
||||
lastRefresh time.Time
|
||||
refreshTTL time.Duration
|
||||
}
|
||||
|
||||
// NewExporter creates a new Prometheus exporter
|
||||
func NewExporter(log logger.Logger, cat catalog.Catalog, instance string, port int) *Exporter {
|
||||
return &Exporter{
|
||||
log: log,
|
||||
catalog: cat,
|
||||
instance: instance,
|
||||
port: port,
|
||||
refreshTTL: 30 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
// Serve starts the HTTP server and blocks until context is cancelled
|
||||
func (e *Exporter) Serve(ctx context.Context) error {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
// /metrics endpoint
|
||||
mux.HandleFunc("/metrics", e.handleMetrics)
|
||||
|
||||
// /health endpoint
|
||||
mux.HandleFunc("/health", e.handleHealth)
|
||||
|
||||
// / root with info
|
||||
mux.HandleFunc("/", e.handleRoot)
|
||||
|
||||
addr := fmt.Sprintf(":%d", e.port)
|
||||
srv := &http.Server{
|
||||
Addr: addr,
|
||||
Handler: mux,
|
||||
ReadTimeout: 10 * time.Second,
|
||||
WriteTimeout: 30 * time.Second,
|
||||
IdleTimeout: 60 * time.Second,
|
||||
}
|
||||
|
||||
// Start refresh goroutine
|
||||
go e.refreshLoop(ctx)
|
||||
|
||||
// Graceful shutdown
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
e.log.Info("Shutting down metrics server...")
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if err := srv.Shutdown(shutdownCtx); err != nil {
|
||||
e.log.Error("Server shutdown error", "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
e.log.Info("Starting Prometheus metrics server", "addr", addr)
|
||||
if err := srv.ListenAndServe(); err != http.ErrServerClosed {
|
||||
return fmt.Errorf("server error: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleMetrics handles /metrics endpoint
|
||||
func (e *Exporter) handleMetrics(w http.ResponseWriter, r *http.Request) {
|
||||
e.mu.RLock()
|
||||
data := e.cachedData
|
||||
e.mu.RUnlock()
|
||||
|
||||
if data == "" {
|
||||
// Force refresh if cache is empty
|
||||
if err := e.refresh(); err != nil {
|
||||
http.Error(w, "Failed to collect metrics", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
e.mu.RLock()
|
||||
data = e.cachedData
|
||||
e.mu.RUnlock()
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(data))
|
||||
}
|
||||
|
||||
// handleHealth handles /health endpoint
|
||||
func (e *Exporter) handleHealth(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok","service":"dbbackup-exporter"}`))
|
||||
}
|
||||
|
||||
// handleRoot handles / endpoint
|
||||
func (e *Exporter) handleRoot(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>DBBackup Exporter</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>DBBackup Prometheus Exporter</h1>
|
||||
<p>This is a Prometheus metrics exporter for DBBackup.</p>
|
||||
<ul>
|
||||
<li><a href="/metrics">/metrics</a> - Prometheus metrics</li>
|
||||
<li><a href="/health">/health</a> - Health check</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>`))
|
||||
}
|
||||
|
||||
// refreshLoop periodically refreshes the metrics cache
|
||||
func (e *Exporter) refreshLoop(ctx context.Context) {
|
||||
ticker := time.NewTicker(e.refreshTTL)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Initial refresh
|
||||
if err := e.refresh(); err != nil {
|
||||
e.log.Error("Initial metrics refresh failed", "error", err)
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := e.refresh(); err != nil {
|
||||
e.log.Error("Metrics refresh failed", "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// refresh updates the cached metrics
|
||||
func (e *Exporter) refresh() error {
|
||||
writer := NewMetricsWriter(e.log, e.catalog, e.instance)
|
||||
data, err := writer.GenerateMetricsString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
e.mu.Lock()
|
||||
e.cachedData = data
|
||||
e.lastRefresh = time.Now()
|
||||
e.mu.Unlock()
|
||||
|
||||
e.log.Debug("Refreshed metrics cache")
|
||||
return nil
|
||||
}
|
||||
245
internal/prometheus/textfile.go
Normal file
245
internal/prometheus/textfile.go
Normal file
@@ -0,0 +1,245 @@
|
||||
// Package prometheus provides Prometheus metrics for dbbackup
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/catalog"
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// MetricsWriter writes metrics in Prometheus text format
|
||||
type MetricsWriter struct {
|
||||
log logger.Logger
|
||||
catalog catalog.Catalog
|
||||
instance string
|
||||
}
|
||||
|
||||
// NewMetricsWriter creates a new MetricsWriter
|
||||
func NewMetricsWriter(log logger.Logger, cat catalog.Catalog, instance string) *MetricsWriter {
|
||||
return &MetricsWriter{
|
||||
log: log,
|
||||
catalog: cat,
|
||||
instance: instance,
|
||||
}
|
||||
}
|
||||
|
||||
// BackupMetrics holds metrics for a single database
|
||||
type BackupMetrics struct {
|
||||
Database string
|
||||
Engine string
|
||||
LastSuccess time.Time
|
||||
LastDuration time.Duration
|
||||
LastSize int64
|
||||
TotalBackups int
|
||||
SuccessCount int
|
||||
FailureCount int
|
||||
Verified bool
|
||||
RPOSeconds float64
|
||||
}
|
||||
|
||||
// WriteTextfile writes metrics to a Prometheus textfile collector file
|
||||
func (m *MetricsWriter) WriteTextfile(path string) error {
|
||||
metrics, err := m.collectMetrics()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to collect metrics: %w", err)
|
||||
}
|
||||
|
||||
output := m.formatMetrics(metrics)
|
||||
|
||||
// Atomic write: write to temp file, then rename
|
||||
dir := filepath.Dir(path)
|
||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create directory %s: %w", dir, err)
|
||||
}
|
||||
|
||||
tmpPath := path + ".tmp"
|
||||
if err := os.WriteFile(tmpPath, []byte(output), 0644); err != nil {
|
||||
return fmt.Errorf("failed to write temp file: %w", err)
|
||||
}
|
||||
|
||||
if err := os.Rename(tmpPath, path); err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return fmt.Errorf("failed to rename temp file: %w", err)
|
||||
}
|
||||
|
||||
m.log.Debug("Wrote metrics to textfile", "path", path, "databases", len(metrics))
|
||||
return nil
|
||||
}
|
||||
|
||||
// collectMetrics gathers metrics from the catalog
|
||||
func (m *MetricsWriter) collectMetrics() ([]BackupMetrics, error) {
|
||||
if m.catalog == nil {
|
||||
return nil, fmt.Errorf("catalog not available")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Get recent backups using Search with limit
|
||||
query := &catalog.SearchQuery{
|
||||
Limit: 1000,
|
||||
}
|
||||
entries, err := m.catalog.Search(ctx, query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to search backups: %w", err)
|
||||
}
|
||||
|
||||
// Group by database
|
||||
byDB := make(map[string]*BackupMetrics)
|
||||
|
||||
for _, e := range entries {
|
||||
key := e.Database
|
||||
if key == "" {
|
||||
key = "unknown"
|
||||
}
|
||||
|
||||
metrics, ok := byDB[key]
|
||||
if !ok {
|
||||
metrics = &BackupMetrics{
|
||||
Database: key,
|
||||
Engine: e.DatabaseType,
|
||||
}
|
||||
byDB[key] = metrics
|
||||
}
|
||||
|
||||
metrics.TotalBackups++
|
||||
|
||||
isSuccess := e.Status == catalog.StatusCompleted || e.Status == catalog.StatusVerified
|
||||
if isSuccess {
|
||||
metrics.SuccessCount++
|
||||
// Track most recent success
|
||||
if e.CreatedAt.After(metrics.LastSuccess) {
|
||||
metrics.LastSuccess = e.CreatedAt
|
||||
metrics.LastDuration = time.Duration(e.Duration * float64(time.Second))
|
||||
metrics.LastSize = e.SizeBytes
|
||||
metrics.Verified = e.VerifiedAt != nil && e.VerifyValid != nil && *e.VerifyValid
|
||||
metrics.Engine = e.DatabaseType
|
||||
}
|
||||
} else {
|
||||
metrics.FailureCount++
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate RPO for each database
|
||||
now := time.Now()
|
||||
for _, metrics := range byDB {
|
||||
if !metrics.LastSuccess.IsZero() {
|
||||
metrics.RPOSeconds = now.Sub(metrics.LastSuccess).Seconds()
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to slice and sort
|
||||
result := make([]BackupMetrics, 0, len(byDB))
|
||||
for _, metrics := range byDB {
|
||||
result = append(result, *metrics)
|
||||
}
|
||||
sort.Slice(result, func(i, j int) bool {
|
||||
return result[i].Database < result[j].Database
|
||||
})
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// formatMetrics formats metrics in Prometheus exposition format
|
||||
func (m *MetricsWriter) formatMetrics(metrics []BackupMetrics) string {
|
||||
var b strings.Builder
|
||||
|
||||
// Timestamp of metrics generation
|
||||
now := time.Now().Unix()
|
||||
|
||||
// Header comment
|
||||
b.WriteString("# DBBackup Prometheus Metrics\n")
|
||||
b.WriteString(fmt.Sprintf("# Generated at: %s\n", time.Now().Format(time.RFC3339)))
|
||||
b.WriteString(fmt.Sprintf("# Instance: %s\n", m.instance))
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_last_success_timestamp
|
||||
b.WriteString("# HELP dbbackup_last_success_timestamp Unix timestamp of last successful backup\n")
|
||||
b.WriteString("# TYPE dbbackup_last_success_timestamp gauge\n")
|
||||
for _, met := range metrics {
|
||||
if !met.LastSuccess.IsZero() {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_last_success_timestamp{instance=%q,database=%q,engine=%q} %d\n",
|
||||
m.instance, met.Database, met.Engine, met.LastSuccess.Unix()))
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_last_backup_duration_seconds
|
||||
b.WriteString("# HELP dbbackup_last_backup_duration_seconds Duration of last successful backup in seconds\n")
|
||||
b.WriteString("# TYPE dbbackup_last_backup_duration_seconds gauge\n")
|
||||
for _, met := range metrics {
|
||||
if met.LastDuration > 0 {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_last_backup_duration_seconds{instance=%q,database=%q,engine=%q} %.2f\n",
|
||||
m.instance, met.Database, met.Engine, met.LastDuration.Seconds()))
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_last_backup_size_bytes
|
||||
b.WriteString("# HELP dbbackup_last_backup_size_bytes Size of last successful backup in bytes\n")
|
||||
b.WriteString("# TYPE dbbackup_last_backup_size_bytes gauge\n")
|
||||
for _, met := range metrics {
|
||||
if met.LastSize > 0 {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_last_backup_size_bytes{instance=%q,database=%q,engine=%q} %d\n",
|
||||
m.instance, met.Database, met.Engine, met.LastSize))
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_backup_total (counter)
|
||||
b.WriteString("# HELP dbbackup_backup_total Total number of backup attempts\n")
|
||||
b.WriteString("# TYPE dbbackup_backup_total counter\n")
|
||||
for _, met := range metrics {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_backup_total{instance=%q,database=%q,status=\"success\"} %d\n",
|
||||
m.instance, met.Database, met.SuccessCount))
|
||||
b.WriteString(fmt.Sprintf("dbbackup_backup_total{instance=%q,database=%q,status=\"failure\"} %d\n",
|
||||
m.instance, met.Database, met.FailureCount))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_rpo_seconds
|
||||
b.WriteString("# HELP dbbackup_rpo_seconds Recovery Point Objective - seconds since last successful backup\n")
|
||||
b.WriteString("# TYPE dbbackup_rpo_seconds gauge\n")
|
||||
for _, met := range metrics {
|
||||
if met.RPOSeconds > 0 {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_rpo_seconds{instance=%q,database=%q} %.0f\n",
|
||||
m.instance, met.Database, met.RPOSeconds))
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_backup_verified
|
||||
b.WriteString("# HELP dbbackup_backup_verified Whether the last backup was verified (1=yes, 0=no)\n")
|
||||
b.WriteString("# TYPE dbbackup_backup_verified gauge\n")
|
||||
for _, met := range metrics {
|
||||
verified := 0
|
||||
if met.Verified {
|
||||
verified = 1
|
||||
}
|
||||
b.WriteString(fmt.Sprintf("dbbackup_backup_verified{instance=%q,database=%q} %d\n",
|
||||
m.instance, met.Database, verified))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_scrape_timestamp
|
||||
b.WriteString("# HELP dbbackup_scrape_timestamp Unix timestamp when metrics were collected\n")
|
||||
b.WriteString("# TYPE dbbackup_scrape_timestamp gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_scrape_timestamp{instance=%q} %d\n", m.instance, now))
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// GenerateMetricsString returns metrics as a string (for HTTP endpoint)
|
||||
func (m *MetricsWriter) GenerateMetricsString() (string, error) {
|
||||
metrics, err := m.collectMetrics()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return m.formatMetrics(metrics), nil
|
||||
}
|
||||
@@ -252,6 +252,15 @@ func (e *Engine) restorePostgreSQLDumpWithOwnership(ctx context.Context, archive
|
||||
|
||||
// restorePostgreSQLSQL restores from PostgreSQL SQL script
|
||||
func (e *Engine) restorePostgreSQLSQL(ctx context.Context, archivePath, targetDB string, compressed bool) error {
|
||||
// Pre-validate SQL dump to detect truncation BEFORE attempting restore
|
||||
// This saves time by catching corrupted files early (vs 49min failures)
|
||||
if err := e.quickValidateSQLDump(archivePath, compressed); err != nil {
|
||||
e.log.Error("Pre-restore validation failed - dump file appears corrupted",
|
||||
"file", archivePath,
|
||||
"error", err)
|
||||
return fmt.Errorf("dump validation failed: %w - the backup file may be truncated or corrupted", err)
|
||||
}
|
||||
|
||||
// Use psql for SQL scripts
|
||||
var cmd []string
|
||||
|
||||
@@ -262,9 +271,10 @@ func (e *Engine) restorePostgreSQLSQL(ctx context.Context, archivePath, targetDB
|
||||
}
|
||||
|
||||
if compressed {
|
||||
psqlCmd := fmt.Sprintf("psql -U %s -d %s", e.cfg.User, targetDB)
|
||||
// Use ON_ERROR_STOP=1 to fail fast on first error (prevents millions of errors on truncated dumps)
|
||||
psqlCmd := fmt.Sprintf("psql -U %s -d %s -v ON_ERROR_STOP=1", e.cfg.User, targetDB)
|
||||
if hostArg != "" {
|
||||
psqlCmd = fmt.Sprintf("psql %s -U %s -d %s", hostArg, e.cfg.User, targetDB)
|
||||
psqlCmd = fmt.Sprintf("psql %s -U %s -d %s -v ON_ERROR_STOP=1", hostArg, e.cfg.User, targetDB)
|
||||
}
|
||||
// Set PGPASSWORD in the bash command for password-less auth
|
||||
cmd = []string{
|
||||
@@ -279,6 +289,7 @@ func (e *Engine) restorePostgreSQLSQL(ctx context.Context, archivePath, targetDB
|
||||
"-p", fmt.Sprintf("%d", e.cfg.Port),
|
||||
"-U", e.cfg.User,
|
||||
"-d", targetDB,
|
||||
"-v", "ON_ERROR_STOP=1",
|
||||
"-f", archivePath,
|
||||
}
|
||||
} else {
|
||||
@@ -286,6 +297,7 @@ func (e *Engine) restorePostgreSQLSQL(ctx context.Context, archivePath, targetDB
|
||||
"psql",
|
||||
"-U", e.cfg.User,
|
||||
"-d", targetDB,
|
||||
"-v", "ON_ERROR_STOP=1",
|
||||
"-f", archivePath,
|
||||
}
|
||||
}
|
||||
@@ -682,6 +694,69 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error {
|
||||
return fmt.Errorf("failed to read dumps directory: %w", err)
|
||||
}
|
||||
|
||||
// PRE-VALIDATE all SQL dumps BEFORE starting restore
|
||||
// This catches truncated files early instead of failing after hours of work
|
||||
e.log.Info("Pre-validating dump files before restore...")
|
||||
e.progress.Update("Pre-validating dump files...")
|
||||
var corruptedDumps []string
|
||||
diagnoser := NewDiagnoser(e.log, false)
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
dumpFile := filepath.Join(dumpsDir, entry.Name())
|
||||
if strings.HasSuffix(dumpFile, ".sql.gz") {
|
||||
result, err := diagnoser.DiagnoseFile(dumpFile)
|
||||
if err != nil {
|
||||
e.log.Warn("Could not validate dump file", "file", entry.Name(), "error", err)
|
||||
continue
|
||||
}
|
||||
if result.IsTruncated || result.IsCorrupted || !result.IsValid {
|
||||
dbName := strings.TrimSuffix(entry.Name(), ".sql.gz")
|
||||
errDetail := "unknown issue"
|
||||
if len(result.Errors) > 0 {
|
||||
errDetail = result.Errors[0]
|
||||
}
|
||||
corruptedDumps = append(corruptedDumps, fmt.Sprintf("%s: %s", dbName, errDetail))
|
||||
e.log.Error("CORRUPTED dump file detected",
|
||||
"database", dbName,
|
||||
"file", entry.Name(),
|
||||
"truncated", result.IsTruncated,
|
||||
"errors", result.Errors)
|
||||
}
|
||||
} else if strings.HasSuffix(dumpFile, ".dump") {
|
||||
// Validate custom format dumps using pg_restore --list
|
||||
cmd := exec.Command("pg_restore", "--list", dumpFile)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
dbName := strings.TrimSuffix(entry.Name(), ".dump")
|
||||
errDetail := strings.TrimSpace(string(output))
|
||||
if len(errDetail) > 100 {
|
||||
errDetail = errDetail[:100] + "..."
|
||||
}
|
||||
// Check for truncation indicators
|
||||
if strings.Contains(errDetail, "unexpected end") || strings.Contains(errDetail, "invalid") {
|
||||
corruptedDumps = append(corruptedDumps, fmt.Sprintf("%s: %s", dbName, errDetail))
|
||||
e.log.Error("CORRUPTED custom dump file detected",
|
||||
"database", dbName,
|
||||
"file", entry.Name(),
|
||||
"error", errDetail)
|
||||
} else {
|
||||
e.log.Warn("pg_restore --list warning (may be recoverable)",
|
||||
"file", entry.Name(),
|
||||
"error", errDetail)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(corruptedDumps) > 0 {
|
||||
operation.Fail("Corrupted dump files detected")
|
||||
e.progress.Fail(fmt.Sprintf("Found %d corrupted dump files - restore aborted", len(corruptedDumps)))
|
||||
return fmt.Errorf("pre-validation failed: %d corrupted dump files detected:\n %s\n\nThe backup archive appears to be damaged. You need to restore from a different backup.",
|
||||
len(corruptedDumps), strings.Join(corruptedDumps, "\n "))
|
||||
}
|
||||
e.log.Info("All dump files passed validation")
|
||||
|
||||
var failedDBs []string
|
||||
totalDBs := 0
|
||||
|
||||
@@ -1274,3 +1349,48 @@ func FormatBytes(bytes int64) string {
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
// quickValidateSQLDump performs a fast validation of SQL dump files
|
||||
// by checking for truncated COPY blocks. This catches corrupted dumps
|
||||
// BEFORE attempting a full restore (which could waste 49+ minutes).
|
||||
func (e *Engine) quickValidateSQLDump(archivePath string, compressed bool) error {
|
||||
e.log.Debug("Pre-validating SQL dump file", "path", archivePath, "compressed", compressed)
|
||||
|
||||
diagnoser := NewDiagnoser(e.log, false) // non-verbose for speed
|
||||
result, err := diagnoser.DiagnoseFile(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("diagnosis error: %w", err)
|
||||
}
|
||||
|
||||
// Check for critical issues that would cause restore failure
|
||||
if result.IsTruncated {
|
||||
errMsg := "SQL dump file is TRUNCATED"
|
||||
if result.Details != nil && result.Details.UnterminatedCopy {
|
||||
errMsg = fmt.Sprintf("%s - unterminated COPY block for table '%s' at line %d",
|
||||
errMsg, result.Details.LastCopyTable, result.Details.LastCopyLineNumber)
|
||||
if len(result.Details.SampleCopyData) > 0 {
|
||||
errMsg = fmt.Sprintf("%s (sample orphaned data: %s)", errMsg, result.Details.SampleCopyData[0])
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("%s", errMsg)
|
||||
}
|
||||
|
||||
if result.IsCorrupted {
|
||||
return fmt.Errorf("SQL dump file is corrupted: %v", result.Errors)
|
||||
}
|
||||
|
||||
if !result.IsValid {
|
||||
if len(result.Errors) > 0 {
|
||||
return fmt.Errorf("dump validation failed: %s", result.Errors[0])
|
||||
}
|
||||
return fmt.Errorf("dump file is invalid (unknown reason)")
|
||||
}
|
||||
|
||||
// Log any warnings but don't fail
|
||||
for _, warning := range result.Warnings {
|
||||
e.log.Warn("Dump validation warning", "warning", warning)
|
||||
}
|
||||
|
||||
e.log.Debug("SQL dump validation passed", "path", archivePath)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -20,12 +20,14 @@ type BackupExecutionModel struct {
|
||||
logger logger.Logger
|
||||
parent tea.Model
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc // Cancel function to stop the operation
|
||||
backupType string
|
||||
databaseName string
|
||||
ratio int
|
||||
status string
|
||||
progress int
|
||||
done bool
|
||||
cancelling bool // True when user has requested cancellation
|
||||
err error
|
||||
result string
|
||||
startTime time.Time
|
||||
@@ -34,11 +36,14 @@ type BackupExecutionModel struct {
|
||||
}
|
||||
|
||||
func NewBackupExecution(cfg *config.Config, log logger.Logger, parent tea.Model, ctx context.Context, backupType, dbName string, ratio int) BackupExecutionModel {
|
||||
// Create a cancellable context derived from parent
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
return BackupExecutionModel{
|
||||
config: cfg,
|
||||
logger: log,
|
||||
parent: parent,
|
||||
ctx: ctx,
|
||||
ctx: childCtx,
|
||||
cancel: cancel,
|
||||
backupType: backupType,
|
||||
databaseName: dbName,
|
||||
ratio: ratio,
|
||||
@@ -206,9 +211,21 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
return m, nil
|
||||
|
||||
case tea.KeyMsg:
|
||||
if m.done {
|
||||
switch msg.String() {
|
||||
case "enter", "esc", "q":
|
||||
switch msg.String() {
|
||||
case "ctrl+c", "esc":
|
||||
if !m.done && !m.cancelling {
|
||||
// User requested cancellation - cancel the context
|
||||
m.cancelling = true
|
||||
m.status = "⏹️ Cancelling backup... (please wait)"
|
||||
if m.cancel != nil {
|
||||
m.cancel()
|
||||
}
|
||||
return m, nil
|
||||
} else if m.done {
|
||||
return m.parent, nil
|
||||
}
|
||||
case "enter", "q":
|
||||
if m.done {
|
||||
return m.parent, nil
|
||||
}
|
||||
}
|
||||
@@ -240,7 +257,12 @@ func (m BackupExecutionModel) View() string {
|
||||
|
||||
// Status with spinner
|
||||
if !m.done {
|
||||
s.WriteString(fmt.Sprintf(" %s %s\n", spinnerFrames[m.spinnerFrame], m.status))
|
||||
if m.cancelling {
|
||||
s.WriteString(fmt.Sprintf(" %s %s\n", spinnerFrames[m.spinnerFrame], m.status))
|
||||
} else {
|
||||
s.WriteString(fmt.Sprintf(" %s %s\n", spinnerFrames[m.spinnerFrame], m.status))
|
||||
s.WriteString("\n ⌨️ Press Ctrl+C or ESC to cancel\n")
|
||||
}
|
||||
} else {
|
||||
s.WriteString(fmt.Sprintf(" %s\n\n", m.status))
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ type RestoreExecutionModel struct {
|
||||
logger logger.Logger
|
||||
parent tea.Model
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc // Cancel function to stop the operation
|
||||
archive ArchiveInfo
|
||||
targetDB string
|
||||
cleanFirst bool
|
||||
@@ -44,19 +45,23 @@ type RestoreExecutionModel struct {
|
||||
spinnerFrames []string
|
||||
|
||||
// Results
|
||||
done bool
|
||||
err error
|
||||
result string
|
||||
elapsed time.Duration
|
||||
done bool
|
||||
cancelling bool // True when user has requested cancellation
|
||||
err error
|
||||
result string
|
||||
elapsed time.Duration
|
||||
}
|
||||
|
||||
// NewRestoreExecution creates a new restore execution model
|
||||
func NewRestoreExecution(cfg *config.Config, log logger.Logger, parent tea.Model, ctx context.Context, archive ArchiveInfo, targetDB string, cleanFirst, createIfMissing bool, restoreType string, cleanClusterFirst bool, existingDBs []string, saveDebugLog bool, workDir string) RestoreExecutionModel {
|
||||
// Create a cancellable context derived from parent
|
||||
childCtx, cancel := context.WithCancel(ctx)
|
||||
return RestoreExecutionModel{
|
||||
config: cfg,
|
||||
logger: log,
|
||||
parent: parent,
|
||||
ctx: ctx,
|
||||
ctx: childCtx,
|
||||
cancel: cancel,
|
||||
archive: archive,
|
||||
targetDB: targetDB,
|
||||
cleanFirst: cleanFirst,
|
||||
@@ -274,11 +279,32 @@ func (m RestoreExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
|
||||
case tea.KeyMsg:
|
||||
switch msg.String() {
|
||||
case "ctrl+c", "q":
|
||||
// Always allow quitting
|
||||
return m.parent, tea.Quit
|
||||
|
||||
case "enter", " ", "esc":
|
||||
case "ctrl+c", "esc":
|
||||
if !m.done && !m.cancelling {
|
||||
// User requested cancellation - cancel the context
|
||||
m.cancelling = true
|
||||
m.status = "⏹️ Cancelling restore... (please wait)"
|
||||
m.phase = "Cancelling"
|
||||
if m.cancel != nil {
|
||||
m.cancel()
|
||||
}
|
||||
return m, nil
|
||||
} else if m.done {
|
||||
return m.parent, nil
|
||||
}
|
||||
case "q":
|
||||
if !m.done && !m.cancelling {
|
||||
m.cancelling = true
|
||||
m.status = "⏹️ Cancelling restore... (please wait)"
|
||||
m.phase = "Cancelling"
|
||||
if m.cancel != nil {
|
||||
m.cancel()
|
||||
}
|
||||
return m, nil
|
||||
} else if m.done {
|
||||
return m.parent, tea.Quit
|
||||
}
|
||||
case "enter", " ":
|
||||
if m.done {
|
||||
return m.parent, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user