Compare commits
209 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 892f94c277 | |||
| 07420b79c9 | |||
| aa15ef1644 | |||
| dc929d9b87 | |||
| 2ef608880d | |||
| 7f1ebd95a1 | |||
| 887d7f7872 | |||
| 59a3e2ebdf | |||
| 3ba7ce897f | |||
| 91b2ff1af9 | |||
| 9a650e339d | |||
| 056094281e | |||
| c52afc5004 | |||
| 047c3b25f5 | |||
| ba435de895 | |||
| a18947a2a5 | |||
| 875f5154f5 | |||
| ca4ec6e9dc | |||
| a33e09d392 | |||
| 0f7d2bf7c6 | |||
| dee0273e6a | |||
| 89769137ad | |||
| 272b0730a8 | |||
| 487293dfc9 | |||
| b8b5264f74 | |||
| 03e9cd81ee | |||
| 6f3282db66 | |||
| 18b1391ede | |||
| 9395d76b90 | |||
| bfc81bfe7a | |||
| 8b4e141d91 | |||
| c6d15d966a | |||
| 5d3526e8ea | |||
| 19571a99cc | |||
| 9e31f620fa | |||
| c244ad152a | |||
| 0e1ed61de2 | |||
| a47817f907 | |||
| 417d6f7349 | |||
| 5e6887054d | |||
| a0e6db4ee9 | |||
| d558a8d16e | |||
| 31cfffee55 | |||
| d6d2d6f867 | |||
| a951048daa | |||
| 8a104d6ce8 | |||
| a7a5e224ee | |||
| 325ca2aecc | |||
| 49a3704554 | |||
| a21b92f091 | |||
| 3153bf965f | |||
| e972a17644 | |||
| 053259604e | |||
| 6aaffbf47c | |||
| 2b6d5b87a1 | |||
| 257cf6ceeb | |||
| 1a10625e5e | |||
| 071334d1e8 | |||
| 323ccb18bc | |||
| 73fe9ef7fa | |||
| 527435a3b8 | |||
| 6a7cf3c11e | |||
| fd3f8770b7 | |||
| 15f10c280c | |||
| 35a9a6e837 | |||
| 82378be971 | |||
| 9fec2c79f8 | |||
| ae34467b4a | |||
| 379ca06146 | |||
| c9bca42f28 | |||
| c90ec1156e | |||
| 23265a33a4 | |||
| 9b9abbfde7 | |||
| 6282d66693 | |||
| 4486a5d617 | |||
| 75dee1fff5 | |||
| 91d494537d | |||
| 8ffc1ba23c | |||
| 8e8045d8c0 | |||
| 0e94dcf384 | |||
| 33adfbdb38 | |||
| af34eaa073 | |||
| babce7cc83 | |||
| ae8c8fde3d | |||
| 346cb7fb61 | |||
| 18549584b1 | |||
| b1d1d57b61 | |||
| d0e1da1bea | |||
| 343a8b782d | |||
| bc5f7c07f4 | |||
| 821521470f | |||
| 147b9fc234 | |||
| 6f3e81a5a6 | |||
| bf1722c316 | |||
| a759f4d3db | |||
| 7cf1d6f85b | |||
| b305d1342e | |||
| 5456da7183 | |||
| f9ff45cf2a | |||
| 72c06ba5c2 | |||
| a0a401cab1 | |||
| 59a717abe7 | |||
| 490a12f858 | |||
| ea4337e298 | |||
| bbd4f0ceac | |||
| f6f8b04785 | |||
| 670c9af2e7 | |||
| e2cf9adc62 | |||
| 29e089fe3b | |||
| 9396c8e605 | |||
| e363e1937f | |||
| df1ab2f55b | |||
| 0e050b2def | |||
| 62d58c77af | |||
| c5be9bcd2b | |||
| b120f1507e | |||
| dd1db844ce | |||
| 4ea3ec2cf8 | |||
| 9200024e50 | |||
| 698b8a761c | |||
| dd7c4da0eb | |||
| b2a78cad2a | |||
| 5728b465e6 | |||
| bfe99e959c | |||
| 780beaadfb | |||
| 838c5b8c15 | |||
| 9d95a193db | |||
| 3201f0fb6a | |||
| 62ddc57fb7 | |||
| 510175ff04 | |||
| a85ad0c88c | |||
| 4938dc1918 | |||
| 09a917766f | |||
| eeacbfa007 | |||
| 7711a206ab | |||
| ba6e8a2b39 | |||
| ec5e89eab7 | |||
| e24d7ab49f | |||
| 721e53fe6a | |||
| 4e09066aa5 | |||
| 6a24ee39be | |||
| dc6dfd8b2c | |||
| 7b4ab76313 | |||
| c0d92b3a81 | |||
| 8c85d85249 | |||
| e0cdcb28be | |||
| 22a7b9e81e | |||
| c71889be47 | |||
| 222bdbef58 | |||
| f7e9fa64f0 | |||
| f153e61dbf | |||
| d19c065658 | |||
| 8dac5efc10 | |||
| fd5edce5ae | |||
| a7e2c86618 | |||
| b2e0c739e0 | |||
| ad23abdf4e | |||
| 390b830976 | |||
| 7e53950967 | |||
| 59d2094241 | |||
| b1f8c6d646 | |||
| b05c2be19d | |||
| ec33959e3e | |||
| 92402f0fdb | |||
| 682510d1bc | |||
| 83ad62b6b5 | |||
| 55d34be32e | |||
| 1831bd7c1f | |||
| 24377eab8f | |||
| 3e41d88445 | |||
| 5fb88b14ba | |||
| cccee4294f | |||
| 9688143176 | |||
| e821e131b4 | |||
| 15a60d2e71 | |||
| 9c65821250 | |||
| 627061cdbb | |||
| e1a7c57e0f | |||
| 22915102d4 | |||
| 3653ced6da | |||
| 9743d571ce | |||
| c519f08ef2 | |||
| b99b05fedb | |||
| c5f2c3322c | |||
| 56ad0824c7 | |||
| ec65df2976 | |||
| 23cc1e0e08 | |||
| 7770abab6f | |||
| f6a20f035b | |||
| 28e54d118f | |||
| ab0ff3f28d | |||
| b7dd325c51 | |||
| 2ed54141a3 | |||
| 495ee31247 | |||
| 78e10f5057 | |||
| f4a0e2d82c | |||
| f66d19acb0 | |||
| 16f377e9b5 | |||
| 7e32a0369d | |||
| 120ee33e3b | |||
| 9f375621d1 | |||
| 9ad925191e | |||
| 9d8a6e763e | |||
| 63b16eee8b | |||
| 91228552fb | |||
| 9ee55309bd | |||
| 0baf741c0b | |||
| faace7271c | |||
| c3ade7a693 |
@ -1,4 +1,6 @@
|
||||
# CI/CD Pipeline for dbbackup
|
||||
# Main repo: Gitea (git.uuxo.net)
|
||||
# Mirror: GitHub (github.com/PlusOne/dbbackup)
|
||||
name: CI/CD
|
||||
|
||||
on:
|
||||
@ -35,6 +37,90 @@ jobs:
|
||||
- name: Coverage summary
|
||||
run: go tool cover -func=coverage.out | tail -1
|
||||
|
||||
test-integration:
|
||||
name: Integration Tests
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test]
|
||||
container:
|
||||
image: golang:1.24-bookworm
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:15
|
||||
env:
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: testdb
|
||||
ports: ['5432:5432']
|
||||
mysql:
|
||||
image: mysql:8
|
||||
env:
|
||||
MYSQL_ROOT_PASSWORD: mysql
|
||||
MYSQL_DATABASE: testdb
|
||||
ports: ['3306:3306']
|
||||
steps:
|
||||
- name: Checkout code
|
||||
env:
|
||||
TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
apt-get update && apt-get install -y -qq git ca-certificates postgresql-client default-mysql-client
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git init
|
||||
git remote add origin "https://${TOKEN}@git.uuxo.net/${GITHUB_REPOSITORY}.git"
|
||||
git fetch --depth=1 origin "${GITHUB_SHA}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
- name: Wait for databases
|
||||
run: |
|
||||
echo "Waiting for PostgreSQL..."
|
||||
for i in $(seq 1 30); do
|
||||
pg_isready -h postgres -p 5432 && break || sleep 1
|
||||
done
|
||||
echo "Waiting for MySQL..."
|
||||
for i in $(seq 1 30); do
|
||||
mysqladmin ping -h mysql -u root -pmysql --silent && break || sleep 1
|
||||
done
|
||||
|
||||
- name: Build dbbackup
|
||||
run: go build -o dbbackup .
|
||||
|
||||
- name: Test PostgreSQL backup/restore
|
||||
env:
|
||||
PGHOST: postgres
|
||||
PGUSER: postgres
|
||||
PGPASSWORD: postgres
|
||||
run: |
|
||||
# Create test data
|
||||
psql -h postgres -c "CREATE TABLE test_table (id SERIAL PRIMARY KEY, name TEXT);"
|
||||
psql -h postgres -c "INSERT INTO test_table (name) VALUES ('test1'), ('test2'), ('test3');"
|
||||
# Run backup - database name is positional argument
|
||||
mkdir -p /tmp/backups
|
||||
./dbbackup backup single testdb --db-type postgres --host postgres --user postgres --password postgres --backup-dir /tmp/backups --no-config --allow-root
|
||||
# Verify backup file exists
|
||||
ls -la /tmp/backups/
|
||||
|
||||
- name: Test MySQL backup/restore
|
||||
env:
|
||||
MYSQL_HOST: mysql
|
||||
MYSQL_USER: root
|
||||
MYSQL_PASSWORD: mysql
|
||||
run: |
|
||||
# Create test data
|
||||
mysql -h mysql -u root -pmysql testdb -e "CREATE TABLE test_table (id INT AUTO_INCREMENT PRIMARY KEY, name VARCHAR(255));"
|
||||
mysql -h mysql -u root -pmysql testdb -e "INSERT INTO test_table (name) VALUES ('test1'), ('test2'), ('test3');"
|
||||
# Run backup - positional arg is db to backup, --database is connection db
|
||||
mkdir -p /tmp/mysql_backups
|
||||
./dbbackup backup single testdb --db-type mysql --host mysql --port 3306 --user root --password mysql --database testdb --backup-dir /tmp/mysql_backups --no-config --allow-root
|
||||
# Verify backup file exists
|
||||
ls -la /tmp/mysql_backups/
|
||||
|
||||
- name: Test verify-locks command
|
||||
env:
|
||||
PGHOST: postgres
|
||||
PGUSER: postgres
|
||||
PGPASSWORD: postgres
|
||||
run: |
|
||||
./dbbackup verify-locks --host postgres --db-type postgres --no-config --allow-root | tee verify-locks.out
|
||||
grep -q 'max_locks_per_transaction' verify-locks.out
|
||||
|
||||
lint:
|
||||
name: Lint
|
||||
runs-on: ubuntu-latest
|
||||
@ -54,43 +140,106 @@ jobs:
|
||||
|
||||
- name: Install and run golangci-lint
|
||||
run: |
|
||||
go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.62.2
|
||||
go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.8.0
|
||||
golangci-lint run --timeout=5m ./...
|
||||
|
||||
build:
|
||||
name: Build
|
||||
build-and-release:
|
||||
name: Build & Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, lint]
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
container:
|
||||
image: golang:1.24-bookworm
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- goos: linux
|
||||
goarch: amd64
|
||||
- goos: linux
|
||||
goarch: arm64
|
||||
- goos: darwin
|
||||
goarch: amd64
|
||||
- goos: darwin
|
||||
goarch: arm64
|
||||
steps:
|
||||
- name: Checkout code
|
||||
env:
|
||||
TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
apt-get update && apt-get install -y -qq git ca-certificates
|
||||
apt-get update && apt-get install -y -qq git ca-certificates curl jq
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git init
|
||||
git remote add origin "https://${TOKEN}@git.uuxo.net/${GITHUB_REPOSITORY}.git"
|
||||
git fetch --depth=1 origin "${GITHUB_SHA}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
- name: Build
|
||||
env:
|
||||
GOOS: ${{ matrix.goos }}
|
||||
GOARCH: ${{ matrix.goarch }}
|
||||
CGO_ENABLED: "0"
|
||||
- name: Build all platforms
|
||||
run: |
|
||||
go build -ldflags="-s -w" -o dbbackup-${GOOS}-${GOARCH} .
|
||||
ls -lh dbbackup-*
|
||||
mkdir -p release
|
||||
|
||||
# Install cross-compilation tools for CGO
|
||||
apt-get update && apt-get install -y -qq gcc-aarch64-linux-gnu
|
||||
|
||||
# Linux amd64 (with CGO for SQLite)
|
||||
echo "Building linux/amd64 (CGO enabled)..."
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-linux-amd64 .
|
||||
|
||||
# Linux arm64 (with CGO for SQLite)
|
||||
echo "Building linux/arm64 (CGO enabled)..."
|
||||
CC=aarch64-linux-gnu-gcc CGO_ENABLED=1 GOOS=linux GOARCH=arm64 go build -ldflags="-s -w" -o release/dbbackup-linux-arm64 .
|
||||
|
||||
# Darwin amd64 (no CGO - cross-compile limitation)
|
||||
echo "Building darwin/amd64 (CGO disabled)..."
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-darwin-amd64 .
|
||||
|
||||
# Darwin arm64 (no CGO - cross-compile limitation)
|
||||
echo "Building darwin/arm64 (CGO disabled)..."
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build -ldflags="-s -w" -o release/dbbackup-darwin-arm64 .
|
||||
|
||||
# FreeBSD amd64 (no CGO - cross-compile limitation)
|
||||
echo "Building freebsd/amd64 (CGO disabled)..."
|
||||
CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-freebsd-amd64 .
|
||||
|
||||
echo "All builds complete:"
|
||||
ls -lh release/
|
||||
|
||||
- name: Create Gitea Release
|
||||
env:
|
||||
GITEA_TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
TAG=${GITHUB_REF#refs/tags/}
|
||||
|
||||
echo "Creating Gitea release for ${TAG}..."
|
||||
echo "Debug: GITHUB_REPOSITORY=${GITHUB_REPOSITORY}"
|
||||
echo "Debug: TAG=${TAG}"
|
||||
|
||||
# Simple body without special characters
|
||||
BODY="Download binaries for your platform"
|
||||
|
||||
# Create release via API with simple inline JSON
|
||||
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"tag_name":"'"${TAG}"'","name":"'"${TAG}"'","body":"'"${BODY}"'","draft":false,"prerelease":false}' \
|
||||
"https://git.uuxo.net/api/v1/repos/${GITHUB_REPOSITORY}/releases")
|
||||
|
||||
HTTP_CODE=$(echo "$RESPONSE" | tail -1)
|
||||
BODY_RESPONSE=$(echo "$RESPONSE" | sed '$d')
|
||||
|
||||
echo "HTTP Code: $HTTP_CODE"
|
||||
echo "Response: $BODY_RESPONSE"
|
||||
|
||||
RELEASE_ID=$(echo "$BODY_RESPONSE" | jq -r '.id')
|
||||
|
||||
if [ "$RELEASE_ID" = "null" ] || [ -z "$RELEASE_ID" ]; then
|
||||
echo "Failed to create release"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Created release ID: $RELEASE_ID"
|
||||
|
||||
# Upload each binary
|
||||
echo "Files to upload:"
|
||||
ls -la release/
|
||||
|
||||
for file in release/dbbackup-*; do
|
||||
FILENAME=$(basename "$file")
|
||||
echo "Uploading $FILENAME..."
|
||||
UPLOAD_RESPONSE=$(curl -s -X POST \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-F "attachment=@${file}" \
|
||||
"https://git.uuxo.net/api/v1/repos/${GITHUB_REPOSITORY}/releases/${RELEASE_ID}/assets?name=${FILENAME}")
|
||||
echo "Upload response: $UPLOAD_RESPONSE"
|
||||
done
|
||||
|
||||
echo "Gitea release complete!"
|
||||
echo "GitHub mirror complete!"
|
||||
75
.gitea/workflows/ci.yml.bak-20260123
Normal file
75
.gitea/workflows/ci.yml.bak-20260123
Normal file
@ -0,0 +1,75 @@
|
||||
# Backup of .gitea/workflows/ci.yml — created before adding integration-verify-locks job
|
||||
# timestamp: 2026-01-23
|
||||
|
||||
# CI/CD Pipeline for dbbackup (backup copy)
|
||||
# Source: .gitea/workflows/ci.yml
|
||||
# Created: 2026-01-23
|
||||
|
||||
name: CI/CD
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, master, develop]
|
||||
tags: ['v*']
|
||||
pull_request:
|
||||
branches: [main, master]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Test
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: golang:1.24-bookworm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
env:
|
||||
TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
apt-get update && apt-get install -y -qq git ca-certificates
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git init
|
||||
git remote add origin "https://${TOKEN}@git.uuxo.net/${GITHUB_REPOSITORY}.git"
|
||||
git fetch --depth=1 origin "${GITHUB_SHA}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
- name: Download dependencies
|
||||
run: go mod download
|
||||
|
||||
- name: Run tests
|
||||
run: go test -race -coverprofile=coverage.out ./...
|
||||
|
||||
- name: Coverage summary
|
||||
run: go tool cover -func=coverage.out | tail -1
|
||||
|
||||
lint:
|
||||
name: Lint
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: golang:1.24-bookworm
|
||||
steps:
|
||||
- name: Checkout code
|
||||
env:
|
||||
TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
apt-get update && apt-get install -y -qq git ca-certificates
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
git init
|
||||
git remote add origin "https://${TOKEN}@git.uuxo.net/${GITHUB_REPOSITORY}.git"
|
||||
git fetch --depth=1 origin "${GITHUB_SHA}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
- name: Install and run golangci-lint
|
||||
run: |
|
||||
go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.8.0
|
||||
golangci-lint run --timeout=5m ./...
|
||||
|
||||
build-and-release:
|
||||
name: Build & Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, lint]
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
container:
|
||||
image: golang:1.24-bookworm
|
||||
steps: |
|
||||
<trimmed for backup>
|
||||
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@ -13,7 +13,8 @@ logs/
|
||||
/dbbackup
|
||||
/dbbackup_*
|
||||
!dbbackup.png
|
||||
bin/
|
||||
bin/dbbackup_*
|
||||
bin/*.exe
|
||||
|
||||
# Ignore development artifacts
|
||||
*.swp
|
||||
@ -33,3 +34,7 @@ coverage.html
|
||||
# Ignore temporary files
|
||||
tmp/
|
||||
temp/
|
||||
CRITICAL_BUGS_FIXED.md
|
||||
LEGAL_DOCUMENTATION.md
|
||||
LEGAL_*.md
|
||||
legal/
|
||||
|
||||
@ -1,16 +1,16 @@
|
||||
# golangci-lint configuration - relaxed for existing codebase
|
||||
version: "2"
|
||||
|
||||
run:
|
||||
timeout: 5m
|
||||
tests: false
|
||||
|
||||
linters:
|
||||
disable-all: true
|
||||
default: none
|
||||
enable:
|
||||
# Only essential linters that catch real bugs
|
||||
- govet
|
||||
- ineffassign
|
||||
|
||||
linters-settings:
|
||||
settings:
|
||||
govet:
|
||||
disable:
|
||||
- fieldalignment
|
||||
|
||||
715
CHANGELOG.md
715
CHANGELOG.md
@ -5,6 +5,721 @@ All notable changes to dbbackup will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [3.42.106] - 2026-01-24
|
||||
|
||||
### Fixed - Cluster Restore Resilience & Performance
|
||||
- **Fixed cluster restore failing on missing roles** - harmless "role does not exist" errors no longer abort restore
|
||||
- Added role-related errors to `isIgnorableError()` with warning log
|
||||
- Removed `ON_ERROR_STOP=1` from psql commands (pre-validation catches real corruption)
|
||||
- Restore now continues gracefully when referenced roles don't exist in target cluster
|
||||
- Previously caused 12h+ restores to fail at 94% completion
|
||||
|
||||
- **Fixed TUI output scrambling in screen/tmux sessions** - added terminal detection
|
||||
- Uses `go-isatty` to detect non-interactive terminals (backgrounded screen sessions, pipes)
|
||||
- Added `viewSimple()` methods for clean line-by-line output without ANSI escape codes
|
||||
- TUI menu now shows warning when running in non-interactive terminal
|
||||
|
||||
### Changed - Consistent Parallel Compression (pgzip)
|
||||
- **Migrated all gzip operations to parallel pgzip** - 2-4x faster compression/decompression on multi-core systems
|
||||
- Systematic audit found 17 files using standard `compress/gzip`
|
||||
- All converted to `github.com/klauspost/pgzip` for consistent performance
|
||||
- **Files updated**:
|
||||
- `internal/backup/`: incremental_tar.go, incremental_extract.go, incremental_mysql.go
|
||||
- `internal/wal/`: compression.go (CompressWALFile, DecompressWALFile, VerifyCompressedFile)
|
||||
- `internal/engine/`: clone.go, snapshot_engine.go, mysqldump.go, binlog/file_target.go
|
||||
- `internal/restore/`: engine.go, safety.go, formats.go, error_report.go
|
||||
- `internal/pitr/`: mysql.go, binlog.go
|
||||
- `internal/dedup/`: store.go
|
||||
- `cmd/`: dedup.go, placeholder.go
|
||||
- **Benefit**: Large backup/restore operations now fully utilize available CPU cores
|
||||
|
||||
## [3.42.105] - 2026-01-23
|
||||
|
||||
### Changed - TUI Visual Cleanup
|
||||
- **Removed ASCII box characters** from backup/restore success/failure banners
|
||||
- Replaced `╔═╗║╚╝` boxes with clean `═══` horizontal line separators
|
||||
- Cleaner, more modern appearance in terminal output
|
||||
- **Consolidated duplicate styles** in TUI components
|
||||
- Unified check status styles (passed/failed/warning/pending) into global definitions
|
||||
- Reduces code duplication across restore preview and diagnose views
|
||||
|
||||
## [3.42.98] - 2025-01-23
|
||||
|
||||
### Fixed - Critical Bug Fixes for v3.42.97
|
||||
- **Fixed CGO/SQLite build issue** - binaries now work when compiled with `CGO_ENABLED=0`
|
||||
- Switched from `github.com/mattn/go-sqlite3` (requires CGO) to `modernc.org/sqlite` (pure Go)
|
||||
- All cross-compiled binaries now work correctly on all platforms
|
||||
- No more "Binary was compiled with 'CGO_ENABLED=0', go-sqlite3 requires cgo to work" errors
|
||||
|
||||
- **Fixed MySQL positional database argument being ignored**
|
||||
- `dbbackup backup single <dbname> --db-type mysql` now correctly uses `<dbname>`
|
||||
- Previously defaulted to 'postgres' regardless of positional argument
|
||||
- Also fixed in `backup sample` command
|
||||
|
||||
## [3.42.97] - 2025-01-23
|
||||
|
||||
### Added - Bandwidth Throttling for Cloud Uploads
|
||||
- **New `--bandwidth-limit` flag for cloud operations** - prevent network saturation during business hours
|
||||
- Works with S3, GCS, Azure Blob Storage, MinIO, Backblaze B2
|
||||
- Supports human-readable formats:
|
||||
- `10MB/s`, `50MiB/s` - megabytes per second
|
||||
- `100KB/s`, `500KiB/s` - kilobytes per second
|
||||
- `1GB/s` - gigabytes per second
|
||||
- `100Mbps` - megabits per second (for network-minded users)
|
||||
- `unlimited` or `0` - no limit (default)
|
||||
- Environment variable: `DBBACKUP_BANDWIDTH_LIMIT`
|
||||
- **Example usage**:
|
||||
```bash
|
||||
# Limit upload to 10 MB/s during business hours
|
||||
dbbackup cloud upload backup.dump --bandwidth-limit 10MB/s
|
||||
|
||||
# Environment variable for all operations
|
||||
export DBBACKUP_BANDWIDTH_LIMIT=50MiB/s
|
||||
```
|
||||
- **Implementation**: Token-bucket style throttling with 100ms windows for smooth rate limiting
|
||||
- **DBA requested feature**: Avoid saturating production network during scheduled backups
|
||||
|
||||
## [3.42.96] - 2025-02-01
|
||||
|
||||
### Changed - Complete Elimination of Shell tar/gzip Dependencies
|
||||
- **All tar/gzip operations now 100% in-process** - ZERO shell dependencies for backup/restore
|
||||
- Removed ALL remaining `exec.Command("tar", ...)` calls
|
||||
- Removed ALL remaining `exec.Command("gzip", ...)` calls
|
||||
- Systematic code audit found and eliminated:
|
||||
- `diagnose.go`: Replaced `tar -tzf` test with direct file open check
|
||||
- `large_restore_check.go`: Replaced `gzip -t` and `gzip -l` with in-process pgzip verification
|
||||
- `pitr/restore.go`: Replaced `tar -xf` with in-process tar extraction
|
||||
- **Benefits**:
|
||||
- No external tool dependencies (works in minimal containers)
|
||||
- 2-4x faster on multi-core systems using parallel pgzip
|
||||
- More reliable error handling with Go-native errors
|
||||
- Consistent behavior across all platforms
|
||||
- Reduced attack surface (no shell spawning)
|
||||
- **Verification**: `strace` and `ps aux` show no tar/gzip/gunzip processes during backup/restore
|
||||
- **Note**: Docker drill container commands still use gunzip for in-container operations (intentional)
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added - Single Database Extraction from Cluster Backups (CLI + TUI)
|
||||
- **Extract and restore individual databases from cluster backups** - selective restore without full cluster restoration
|
||||
- **CLI Commands**:
|
||||
- **List databases**: `dbbackup restore cluster backup.tar.gz --list-databases`
|
||||
- Shows all databases in cluster backup with sizes
|
||||
- Fast scan without full extraction
|
||||
- **Extract single database**: `dbbackup restore cluster backup.tar.gz --database myapp --output-dir /tmp/extract`
|
||||
- Extracts only the specified database dump
|
||||
- No restore, just file extraction
|
||||
- **Restore single database from cluster**: `dbbackup restore cluster backup.tar.gz --database myapp --confirm`
|
||||
- Extracts and restores only one database
|
||||
- Much faster than full cluster restore when you only need one database
|
||||
- **Rename on restore**: `dbbackup restore cluster backup.tar.gz --database myapp --target myapp_test --confirm`
|
||||
- Restore with different database name (useful for testing)
|
||||
- **Extract multiple databases**: `dbbackup restore cluster backup.tar.gz --databases "app1,app2,app3" --output-dir /tmp/extract`
|
||||
- Comma-separated list of databases to extract
|
||||
- **TUI Support**:
|
||||
- Press **'s'** on any cluster backup in archive browser to select individual databases
|
||||
- New **ClusterDatabaseSelector** view shows all databases with sizes
|
||||
- Navigate with arrow keys, select with Enter
|
||||
- Automatic handling when cluster backup selected in single restore mode
|
||||
- Full restore preview and confirmation workflow
|
||||
- **Benefits**:
|
||||
- Faster restores (extract only what you need)
|
||||
- Less disk space usage during restore
|
||||
- Easy database migration/copying
|
||||
- Better testing workflow
|
||||
- Selective disaster recovery
|
||||
|
||||
### Performance - Cluster Restore Optimization
|
||||
- **Eliminated duplicate archive extraction in cluster restore** - saves 30-50% time on large restores
|
||||
- Previously: Archive was extracted twice (once in preflight validation, once in actual restore)
|
||||
- Now: Archive extracted once and reused for both validation and restore
|
||||
- **Time savings**:
|
||||
- 50 GB cluster: ~3-6 minutes faster
|
||||
- 10 GB cluster: ~1-2 minutes faster
|
||||
- Small clusters (<5 GB): ~30 seconds faster
|
||||
- Optimization automatically enabled when `--diagnose` flag is used
|
||||
- New `ValidateAndExtractCluster()` performs combined validation + extraction
|
||||
- `RestoreCluster()` accepts optional `preExtractedPath` parameter to reuse extracted directory
|
||||
- Disk space checks intelligently skipped when using pre-extracted directory
|
||||
- Maintains backward compatibility - works with and without pre-extraction
|
||||
- Log output shows optimization: `"Using pre-extracted cluster directory ... optimization: skipping duplicate extraction"`
|
||||
|
||||
### Improved - Archive Validation
|
||||
- **Enhanced tar.gz validation with stream-based checks**
|
||||
- Fast header-only validation (validates gzip + tar structure without full extraction)
|
||||
- Checks gzip magic bytes (0x1f 0x8b) and tar header signature
|
||||
- Reduces preflight validation time from minutes to seconds on large archives
|
||||
- Falls back to full extraction only when necessary (with `--diagnose`)
|
||||
|
||||
### Added - PostgreSQL lock verification (CLI + preflight)
|
||||
- **`dbbackup verify-locks`** — new CLI command that probes PostgreSQL GUCs (`max_locks_per_transaction`, `max_connections`, `max_prepared_transactions`) and prints total lock capacity plus actionable restore guidance.
|
||||
- **Integrated into preflight checks** — preflight now warns/fails when lock settings are insufficient and provides exact remediation commands and recommended restore flags (e.g. `--jobs 1 --parallel-dbs 1`).
|
||||
- **Implemented in Go (replaces `verify_postgres_locks.sh`)** with robust parsing, sudo/`psql` fallback and unit-tested decision logic.
|
||||
- **Files:** `cmd/verify_locks.go`, `internal/checks/locks.go`, `internal/checks/locks_test.go`, `internal/checks/preflight.go`.
|
||||
- **Why:** Prevents repeated parallel-restore failures by surfacing lock-capacity issues early and providing bulletproof guidance.
|
||||
|
||||
## [3.42.74] - 2026-01-20 "Resource Profile System + Critical Ctrl+C Fix"
|
||||
|
||||
### Critical Bug Fix
|
||||
- **Fixed Ctrl+C not working in TUI backup/restore** - Context cancellation was broken in TUI mode
|
||||
- `executeBackupWithTUIProgress()` and `executeRestoreWithTUIProgress()` created new contexts with `WithCancel(parentCtx)`
|
||||
- When user pressed Ctrl+C, `model.cancel()` was called on parent context but execution had separate context
|
||||
- Fixed by using parent context directly instead of creating new one
|
||||
- Ctrl+C/ESC/q now properly propagate cancellation to running operations
|
||||
- Users can now interrupt long-running TUI operations
|
||||
|
||||
### Added - Resource Profile System
|
||||
- **`--profile` flag for restore operations** with three presets:
|
||||
- **Conservative** (`--profile=conservative`): Single-threaded (`--parallel=1`), minimal memory usage
|
||||
- Best for resource-constrained servers, shared hosting, or when "out of shared memory" errors occur
|
||||
- Automatically enables `LargeDBMode` for better resource management
|
||||
- **Balanced** (default): Auto-detect resources, moderate parallelism
|
||||
- Good default for most scenarios
|
||||
- **Aggressive** (`--profile=aggressive`): Maximum parallelism, all available resources
|
||||
- Best for dedicated database servers with ample resources
|
||||
- **Potato** (`--profile=potato`): Easter egg 🥔, same as conservative
|
||||
- **Profile system applies to both CLI and TUI**:
|
||||
- CLI: `dbbackup restore cluster backup.tar.gz --profile=conservative --confirm`
|
||||
- TUI: Automatically uses conservative profile for safer interactive operation
|
||||
- **User overrides supported**: `--jobs` and `--parallel-dbs` flags override profile settings
|
||||
- **New `internal/config/profile.go`** module:
|
||||
- `GetRestoreProfile(name)` - Returns profile settings
|
||||
- `ApplyProfile(cfg, profile, jobs, parallelDBs)` - Applies profile with overrides
|
||||
- `GetProfileDescription(name)` - Human-readable descriptions
|
||||
- `ListProfiles()` - All available profiles
|
||||
|
||||
### Added - PostgreSQL Diagnostic Tools
|
||||
- **`diagnose_postgres_memory.sh`** - Comprehensive memory and resource analysis script:
|
||||
- System memory overview with usage percentages and warnings
|
||||
- Top 15 memory consuming processes
|
||||
- PostgreSQL-specific memory configuration analysis
|
||||
- Current locks and connections monitoring
|
||||
- Shared memory segments inspection
|
||||
- Disk space and swap usage checks
|
||||
- Identifies other resource consumers (Nessus, Elastic Agent, monitoring tools)
|
||||
- Smart recommendations based on findings
|
||||
- Detects temp file usage (indicator of low work_mem)
|
||||
- **`fix_postgres_locks.sh`** - PostgreSQL lock configuration helper:
|
||||
- Automatically increases `max_locks_per_transaction` to 4096
|
||||
- Shows current configuration before applying changes
|
||||
- Calculates total lock capacity
|
||||
- Provides restart commands for different PostgreSQL setups
|
||||
- References diagnostic tool for comprehensive analysis
|
||||
|
||||
### Added - Documentation
|
||||
- **`RESTORE_PROFILES.md`** - Complete profile guide with real-world scenarios:
|
||||
- Profile comparison table
|
||||
- When to use each profile
|
||||
- Override examples
|
||||
- Troubleshooting guide for "out of shared memory" errors
|
||||
- Integration with diagnostic tools
|
||||
- **`email_infra_team.txt`** - Admin communication template (German):
|
||||
- Analysis results template
|
||||
- Problem identification section
|
||||
- Three solution variants (temporary, permanent, workaround)
|
||||
- Includes diagnostic tool references
|
||||
|
||||
### Changed - TUI Improvements
|
||||
- **TUI mode defaults to conservative profile** for safer operation
|
||||
- Interactive users benefit from stability over speed
|
||||
- Prevents resource exhaustion on shared systems
|
||||
- Can be overridden with environment variable: `export RESOURCE_PROFILE=balanced`
|
||||
|
||||
### Fixed
|
||||
- Context cancellation in TUI backup operations (critical)
|
||||
- Context cancellation in TUI restore operations (critical)
|
||||
- Better error diagnostics for "out of shared memory" errors
|
||||
- Improved resource detection and management
|
||||
|
||||
### Technical Details
|
||||
- Profile system respects explicit user flags (`--jobs`, `--parallel-dbs`)
|
||||
- Conservative profile sets `cfg.LargeDBMode = true` automatically
|
||||
- TUI profile selection logged when `Debug` mode enabled
|
||||
- All profiles support both single and cluster restore operations
|
||||
|
||||
## [3.42.50] - 2026-01-16 "Ctrl+C Signal Handling Fix"
|
||||
|
||||
### Fixed - Proper Ctrl+C/SIGINT Handling in TUI
|
||||
- **Added tea.InterruptMsg handling** - Bubbletea v1.3+ sends `InterruptMsg` for SIGINT signals
|
||||
instead of a `KeyMsg` with "ctrl+c", causing cancellation to not work
|
||||
- **Fixed cluster restore cancellation** - Ctrl+C now properly cancels running restore operations
|
||||
- **Fixed cluster backup cancellation** - Ctrl+C now properly cancels running backup operations
|
||||
- **Added interrupt handling to main menu** - Proper cleanup on SIGINT from menu
|
||||
- **Orphaned process cleanup** - `cleanup.KillOrphanedProcesses()` called on all interrupt paths
|
||||
|
||||
### Changed
|
||||
- All TUI execution views now handle both `tea.KeyMsg` ("ctrl+c") and `tea.InterruptMsg`
|
||||
- Context cancellation properly propagates to child processes via `exec.CommandContext`
|
||||
- No zombie pg_dump/pg_restore/gzip processes left behind on cancellation
|
||||
|
||||
## [3.42.49] - 2026-01-16 "Unified Cluster Backup Progress"
|
||||
|
||||
### Added - Unified Progress Display for Cluster Backup
|
||||
- **Combined overall progress bar** for cluster backup showing all phases:
|
||||
- Phase 1/3: Backing up Globals (0-15% of overall)
|
||||
- Phase 2/3: Backing up Databases (15-90% of overall)
|
||||
- Phase 3/3: Compressing Archive (90-100% of overall)
|
||||
- **Current database indicator** - Shows which database is currently being backed up
|
||||
- **Phase-aware progress tracking** - New fields in backup progress state:
|
||||
- `overallPhase` - Current phase (1=globals, 2=databases, 3=compressing)
|
||||
- `phaseDesc` - Human-readable phase description
|
||||
- **Dual progress bars** for cluster backup:
|
||||
- Overall progress bar showing combined operation progress
|
||||
- Database count progress bar showing individual database progress
|
||||
|
||||
### Changed
|
||||
- Cluster backup TUI now shows unified progress display matching restore
|
||||
- Progress callbacks now include phase information
|
||||
- Better visual feedback during entire cluster backup operation
|
||||
|
||||
## [3.42.48] - 2026-01-15 "Unified Cluster Restore Progress"
|
||||
|
||||
### Added - Unified Progress Display for Cluster Restore
|
||||
- **Combined overall progress bar** showing progress across all restore phases:
|
||||
- Phase 1/3: Extracting Archive (0-60% of overall)
|
||||
- Phase 2/3: Restoring Globals (60-65% of overall)
|
||||
- Phase 3/3: Restoring Databases (65-100% of overall)
|
||||
- **Current database indicator** - Shows which database is currently being restored
|
||||
- **Phase-aware progress tracking** - New fields in progress state:
|
||||
- `overallPhase` - Current phase (1=extraction, 2=globals, 3=databases)
|
||||
- `currentDB` - Name of database currently being restored
|
||||
- `extractionDone` - Boolean flag for phase transition
|
||||
- **Dual progress bars** for cluster restore:
|
||||
- Overall progress bar showing combined operation progress
|
||||
- Phase-specific progress bar (extraction bytes or database count)
|
||||
|
||||
### Changed
|
||||
- Cluster restore TUI now shows unified progress display
|
||||
- Progress callbacks now set phase and current database information
|
||||
- Extraction completion triggers automatic transition to globals phase
|
||||
- Database restore phase shows current database name with spinner
|
||||
|
||||
### Improved
|
||||
- Better visual feedback during entire cluster restore operation
|
||||
- Clear phase indicators help users understand restore progress
|
||||
- Overall progress percentage gives better time estimates
|
||||
|
||||
## [3.42.35] - 2026-01-15 "TUI Detailed Progress"
|
||||
|
||||
### Added - Enhanced TUI Progress Display
|
||||
- **Detailed progress bar in TUI restore** - schollz-style progress bar with:
|
||||
- Byte progress display (e.g., `245 MB / 1.2 GB`)
|
||||
- Transfer speed calculation (e.g., `45 MB/s`)
|
||||
- ETA prediction for long operations
|
||||
- Unicode block-based visual bar
|
||||
- **Real-time extraction progress** - Archive extraction now reports actual bytes processed
|
||||
- **Go-native tar extraction** - Uses Go's `archive/tar` + `compress/gzip` when progress callback is set
|
||||
- **New `DetailedProgress` component** in TUI package:
|
||||
- `NewDetailedProgress(total, description)` - Byte-based progress
|
||||
- `NewDetailedProgressItems(total, description)` - Item count progress
|
||||
- `NewDetailedProgressSpinner(description)` - Indeterminate spinner
|
||||
- `RenderProgressBar(width)` - Generate schollz-style output
|
||||
- **Progress callback API** in restore engine:
|
||||
- `SetProgressCallback(func(current, total int64, description string))`
|
||||
- Allows TUI to receive real-time progress updates from restore operations
|
||||
- **Shared progress state** pattern for Bubble Tea integration
|
||||
|
||||
### Changed
|
||||
- TUI restore execution now shows detailed byte progress during archive extraction
|
||||
- Cluster restore shows extraction progress instead of just spinner
|
||||
- Falls back to shell `tar` command when no progress callback is set (faster)
|
||||
|
||||
### Technical Details
|
||||
- `progressReader` wrapper tracks bytes read through gzip/tar pipeline
|
||||
- Throttled progress updates (every 100ms) to avoid UI flooding
|
||||
- Thread-safe shared state pattern for cross-goroutine progress updates
|
||||
|
||||
## [3.42.34] - 2026-01-14 "Filesystem Abstraction"
|
||||
|
||||
### Added - spf13/afero for Filesystem Abstraction
|
||||
- **New `internal/fs` package** for testable filesystem operations
|
||||
- **In-memory filesystem** for unit testing without disk I/O
|
||||
- **Global FS interface** that can be swapped for testing:
|
||||
```go
|
||||
fs.SetFS(afero.NewMemMapFs()) // Use memory
|
||||
fs.ResetFS() // Back to real disk
|
||||
```
|
||||
- **Wrapper functions** for all common file operations:
|
||||
- `ReadFile`, `WriteFile`, `Create`, `Open`, `Remove`, `RemoveAll`
|
||||
- `Mkdir`, `MkdirAll`, `ReadDir`, `Walk`, `Glob`
|
||||
- `Exists`, `DirExists`, `IsDir`, `IsEmpty`
|
||||
- `TempDir`, `TempFile`, `CopyFile`, `FileSize`
|
||||
- **Testing helpers**:
|
||||
- `WithMemFs(fn)` - Execute function with temp in-memory FS
|
||||
- `SetupTestDir(files)` - Create test directory structure
|
||||
- **Comprehensive test suite** demonstrating usage
|
||||
|
||||
### Changed
|
||||
- Upgraded afero from v1.10.0 to v1.15.0
|
||||
|
||||
## [3.42.33] - 2026-01-14 "Exponential Backoff Retry"
|
||||
|
||||
### Added - cenkalti/backoff for Cloud Operation Retry
|
||||
- **Exponential backoff retry** for all cloud operations (S3, Azure, GCS)
|
||||
- **Retry configurations**:
|
||||
- `DefaultRetryConfig()` - 5 retries, 500ms→30s backoff, 5 min max
|
||||
- `AggressiveRetryConfig()` - 10 retries, 1s→60s backoff, 15 min max
|
||||
- `QuickRetryConfig()` - 3 retries, 100ms→5s backoff, 30s max
|
||||
- **Smart error classification**:
|
||||
- `IsPermanentError()` - Auth/bucket errors (no retry)
|
||||
- `IsRetryableError()` - Timeout/network errors (retry)
|
||||
- **Retry logging** - Each retry attempt is logged with wait duration
|
||||
|
||||
### Changed
|
||||
- S3 simple upload, multipart upload, download now retry on transient failures
|
||||
- Azure simple upload, download now retry on transient failures
|
||||
- GCS upload, download now retry on transient failures
|
||||
- Large file multipart uploads use `AggressiveRetryConfig()` (more retries)
|
||||
|
||||
## [3.42.32] - 2026-01-14 "Cross-Platform Colors"
|
||||
|
||||
### Added - fatih/color for Cross-Platform Terminal Colors
|
||||
- **Windows-compatible colors** - Native Windows console API support
|
||||
- **Color helper functions** in `logger` package:
|
||||
- `Success()`, `Error()`, `Warning()`, `Info()` - Status messages with icons
|
||||
- `Header()`, `Dim()`, `Bold()` - Text styling
|
||||
- `Green()`, `Red()`, `Yellow()`, `Cyan()` - Colored text
|
||||
- `StatusLine()`, `TableRow()` - Formatted output
|
||||
- `DisableColors()`, `EnableColors()` - Runtime control
|
||||
- **Consistent color scheme** across all log levels
|
||||
|
||||
### Changed
|
||||
- Logger `CleanFormatter` now uses fatih/color instead of raw ANSI codes
|
||||
- All progress indicators use fatih/color for `[OK]`/`[FAIL]` status
|
||||
- Automatic color detection (disabled for non-TTY)
|
||||
|
||||
## [3.42.31] - 2026-01-14 "Visual Progress Bars"
|
||||
|
||||
### Added - schollz/progressbar for Enhanced Progress Display
|
||||
- **Visual progress bars** for cloud uploads/downloads with:
|
||||
- Byte transfer display (e.g., `245 MB / 1.2 GB`)
|
||||
- Transfer speed (e.g., `45 MB/s`)
|
||||
- ETA prediction
|
||||
- Color-coded progress with Unicode blocks
|
||||
- **Checksum verification progress** - visual progress while calculating SHA-256
|
||||
- **Spinner for indeterminate operations** - Braille-style spinner when size unknown
|
||||
- New progress types: `NewSchollzBar()`, `NewSchollzBarItems()`, `NewSchollzSpinner()`
|
||||
- Progress bar `Writer()` method for io.Copy integration
|
||||
|
||||
### Changed
|
||||
- Cloud download shows real-time byte progress instead of 10% log messages
|
||||
- Cloud upload shows visual progress bar instead of debug logs
|
||||
- Checksum verification shows progress for large files
|
||||
|
||||
## [3.42.30] - 2026-01-09 "Better Error Aggregation"
|
||||
|
||||
### Added - go-multierror for Cluster Restore Errors
|
||||
- **Enhanced error reporting** - Now shows ALL database failures, not just a count
|
||||
- Uses `hashicorp/go-multierror` for proper error aggregation
|
||||
- Each failed database error is preserved with full context
|
||||
- Bullet-pointed error output for readability:
|
||||
```
|
||||
cluster restore completed with 3 failures:
|
||||
3 database(s) failed:
|
||||
• db1: restore failed: max_locks_per_transaction exceeded
|
||||
• db2: restore failed: connection refused
|
||||
• db3: failed to create database: permission denied
|
||||
```
|
||||
|
||||
### Changed
|
||||
- Replaced string slice error collection with proper `*multierror.Error`
|
||||
- Thread-safe error aggregation with dedicated mutex
|
||||
- Improved error wrapping with `%w` for error chain preservation
|
||||
|
||||
## [3.42.10] - 2026-01-08 "Code Quality"
|
||||
|
||||
### Fixed - Code Quality Issues
|
||||
- Removed deprecated `io/ioutil` usage (replaced with `os`)
|
||||
- Fixed `os.DirEntry.ModTime()` → `file.Info().ModTime()`
|
||||
- Removed unused fields and variables
|
||||
- Fixed ineffective assignments in TUI code
|
||||
- Fixed error strings (no capitalization, no trailing punctuation)
|
||||
|
||||
## [3.42.9] - 2026-01-08 "Diagnose Timeout Fix"
|
||||
|
||||
### Fixed - diagnose.go Timeout Bugs
|
||||
|
||||
**More short timeouts that caused large archive failures:**
|
||||
|
||||
- `diagnoseClusterArchive()`: tar listing 60s → **5 minutes**
|
||||
- `verifyWithPgRestore()`: pg_restore --list 60s → **5 minutes**
|
||||
- `DiagnoseClusterDumps()`: archive listing 120s → **10 minutes**
|
||||
|
||||
**Impact:** These timeouts caused "context deadline exceeded" errors when
|
||||
diagnosing multi-GB backup archives, preventing TUI restore from even starting.
|
||||
|
||||
## [3.42.8] - 2026-01-08 "TUI Timeout Fix"
|
||||
|
||||
### Fixed - TUI Timeout Bugs Causing Backup/Restore Failures
|
||||
|
||||
**ROOT CAUSE of 2-3 month TUI backup/restore failures identified and fixed:**
|
||||
|
||||
#### Critical Timeout Fixes:
|
||||
- **restore_preview.go**: Safety check timeout increased from 60s → **10 minutes**
|
||||
- Large archives (>1GB) take 2+ minutes to diagnose
|
||||
- Users saw "context deadline exceeded" before backup even started
|
||||
- **dbselector.go**: Database listing timeout increased from 15s → **60 seconds**
|
||||
- Busy PostgreSQL servers need more time to respond
|
||||
- **status.go**: Status check timeout increased from 10s → **30 seconds**
|
||||
- SSL negotiation and slow networks caused failures
|
||||
|
||||
#### Stability Improvements:
|
||||
- **Panic recovery** added to parallel goroutines in:
|
||||
- `backup/engine.go:BackupCluster()` - cluster backup workers
|
||||
- `restore/engine.go:RestoreCluster()` - cluster restore workers
|
||||
- Prevents single database panic from crashing entire operation
|
||||
|
||||
#### Bug Fix:
|
||||
- **restore/engine.go**: Fixed variable shadowing `err` → `cmdErr` for exit code detection
|
||||
|
||||
## [3.42.7] - 2026-01-08 "Context Killer Complete"
|
||||
|
||||
### Fixed - Additional Deadlock Bugs in Restore & Engine
|
||||
|
||||
**All remaining cmd.Wait() deadlock bugs fixed across the codebase:**
|
||||
|
||||
#### internal/restore/engine.go:
|
||||
- `executeRestoreWithDecompression()` - gunzip/pigz pipeline restore
|
||||
- `extractArchive()` - tar extraction for cluster restore
|
||||
- `restoreGlobals()` - pg_dumpall globals restore
|
||||
|
||||
#### internal/backup/engine.go:
|
||||
- `createArchive()` - tar/pigz archive creation pipeline
|
||||
|
||||
#### internal/engine/mysqldump.go:
|
||||
- `Backup()` - mysqldump backup operation
|
||||
- `BackupToWriter()` - streaming mysqldump to writer
|
||||
|
||||
**All 6 functions now use proper channel-based context handling with Process.Kill().**
|
||||
|
||||
## [3.42.6] - 2026-01-08 "Deadlock Killer"
|
||||
|
||||
### Fixed - Backup Command Context Handling
|
||||
|
||||
**Critical Bug: pg_dump/mysqldump could hang forever on context cancellation**
|
||||
|
||||
The `executeCommand`, `executeCommandWithProgress`, `executeMySQLWithProgressAndCompression`,
|
||||
and `executeMySQLWithCompression` functions had a race condition where:
|
||||
|
||||
1. A goroutine was spawned to read stderr
|
||||
2. `cmd.Wait()` was called directly
|
||||
3. If context was cancelled, the process was NOT killed
|
||||
4. The goroutine could hang forever waiting for stderr
|
||||
|
||||
**Fix**: All backup execution functions now use proper channel-based context handling:
|
||||
```go
|
||||
// Wait for command with context handling
|
||||
cmdDone := make(chan error, 1)
|
||||
go func() {
|
||||
cmdDone <- cmd.Wait()
|
||||
}()
|
||||
|
||||
select {
|
||||
case cmdErr = <-cmdDone:
|
||||
// Command completed
|
||||
case <-ctx.Done():
|
||||
// Context cancelled - kill process
|
||||
cmd.Process.Kill()
|
||||
<-cmdDone
|
||||
cmdErr = ctx.Err()
|
||||
}
|
||||
```
|
||||
|
||||
**Affected Functions:**
|
||||
- `executeCommand()` - pg_dump for cluster backup
|
||||
- `executeCommandWithProgress()` - pg_dump for single backup with progress
|
||||
- `executeMySQLWithProgressAndCompression()` - mysqldump pipeline
|
||||
- `executeMySQLWithCompression()` - mysqldump pipeline
|
||||
|
||||
**This fixes:** Backup operations hanging indefinitely when cancelled or timing out.
|
||||
|
||||
## [3.42.5] - 2026-01-08 "False Positive Fix"
|
||||
|
||||
### Fixed - Encryption Detection Bug
|
||||
|
||||
**IsBackupEncrypted False Positive:**
|
||||
- **BUG FIX**: `IsBackupEncrypted()` returned `true` for ALL files, blocking normal restores
|
||||
- Root cause: Fallback logic checked if first 12 bytes (nonce size) could be read - always true
|
||||
- Fix: Now properly detects known unencrypted formats by magic bytes:
|
||||
- Gzip: `1f 8b`
|
||||
- PostgreSQL custom: `PGDMP`
|
||||
- Plain SQL: starts with `--`, `SET`, `CREATE`
|
||||
- Returns `false` if no metadata present and format is recognized as unencrypted
|
||||
- Affected file: `internal/backup/encryption.go`
|
||||
|
||||
## [3.42.4] - 2026-01-08 "The Long Haul"
|
||||
|
||||
### Fixed - Critical Restore Timeout Bug
|
||||
|
||||
**Removed Arbitrary Timeouts from Backup/Restore Operations:**
|
||||
- **CRITICAL FIX**: Removed 4-hour timeout that was killing large database restores
|
||||
- PostgreSQL cluster restores of 69GB+ databases no longer fail with "context deadline exceeded"
|
||||
- All backup/restore operations now use `context.WithCancel` instead of `context.WithTimeout`
|
||||
- Operations run until completion or manual cancellation (Ctrl+C)
|
||||
|
||||
**Affected Files:**
|
||||
- `internal/tui/restore_exec.go`: Changed from 4-hour timeout to context.WithCancel
|
||||
- `internal/tui/backup_exec.go`: Changed from 4-hour timeout to context.WithCancel
|
||||
- `internal/backup/engine.go`: Removed per-database timeout in cluster backup
|
||||
- `cmd/restore.go`: CLI restore commands use context.WithCancel
|
||||
|
||||
**exec.Command Context Audit:**
|
||||
- Fixed `exec.Command` without Context in `internal/restore/engine.go:730`
|
||||
- Added proper context handling to all external command calls
|
||||
- Added timeouts only for quick diagnostic/version checks (not restore path):
|
||||
- `restore/version_check.go`: 30s timeout for pg_restore --version check only
|
||||
- `restore/error_report.go`: 10s timeout for tool version detection
|
||||
- `restore/diagnose.go`: 60s timeout for diagnostic functions
|
||||
- `pitr/binlog.go`: 10s timeout for mysqlbinlog --version check
|
||||
- `cleanup/processes.go`: 5s timeout for process listing
|
||||
- `auth/helper.go`: 30s timeout for auth helper commands
|
||||
|
||||
**Verification:**
|
||||
- 54 total `exec.CommandContext` calls verified in backup/restore/pitr path
|
||||
- 0 `exec.Command` without Context in critical restore path
|
||||
- All 14 PostgreSQL exec calls use CommandContext (pg_dump, pg_restore, psql)
|
||||
- All 15 MySQL/MariaDB exec calls use CommandContext (mysqldump, mysql, mysqlbinlog)
|
||||
- All 14 test packages pass
|
||||
|
||||
### Technical Details
|
||||
- Large Object (BLOB/BYTEA) restores are particularly affected by timeouts
|
||||
- 69GB database with large objects can take 5+ hours to restore
|
||||
- Previous 4-hour hard timeout was causing consistent failures
|
||||
- Now: No timeout - runs until complete or user cancels
|
||||
|
||||
## [3.42.1] - 2026-01-07 "Resistance is Futile"
|
||||
|
||||
### Added - Content-Defined Chunking Deduplication
|
||||
|
||||
**Deduplication Engine:**
|
||||
- New `dbbackup dedup` command family for space-efficient backups
|
||||
- Gear hash content-defined chunking (CDC) with 92%+ overlap on shifted data
|
||||
- SHA-256 content-addressed storage - chunks stored by hash
|
||||
- AES-256-GCM per-chunk encryption (optional, via `--encrypt`)
|
||||
- Gzip compression enabled by default
|
||||
- SQLite index for fast chunk lookups
|
||||
- JSON manifests track chunks per backup with full verification
|
||||
|
||||
**Dedup Commands:**
|
||||
```bash
|
||||
dbbackup dedup backup <file> # Create deduplicated backup
|
||||
dbbackup dedup backup <file> --encrypt # With encryption
|
||||
dbbackup dedup restore <id> <output> # Restore from manifest
|
||||
dbbackup dedup list # List all backups
|
||||
dbbackup dedup stats # Show deduplication statistics
|
||||
dbbackup dedup delete <id> # Delete a backup manifest
|
||||
dbbackup dedup gc # Garbage collect unreferenced chunks
|
||||
```
|
||||
|
||||
**Storage Structure:**
|
||||
```
|
||||
<backup-dir>/dedup/
|
||||
chunks/ # Content-addressed chunk files (sharded by hash prefix)
|
||||
manifests/ # JSON manifest per backup
|
||||
chunks.db # SQLite index for fast lookups
|
||||
```
|
||||
|
||||
**Test Results:**
|
||||
- First 5MB backup: 448 chunks, 5MB stored
|
||||
- Modified 5MB file: 448 chunks, only 1 NEW chunk (1.6KB), 100% dedup ratio
|
||||
- Restore with SHA-256 verification
|
||||
|
||||
### Added - Documentation Updates
|
||||
- Prometheus alerting rules added to SYSTEMD.md
|
||||
- Catalog sync instructions for existing backups
|
||||
|
||||
## [3.41.1] - 2026-01-07
|
||||
|
||||
### Fixed
|
||||
- Enabled CGO for Linux builds (required for SQLite catalog)
|
||||
|
||||
## [3.41.0] - 2026-01-07 "The Operator"
|
||||
|
||||
### Added - Systemd Integration & Prometheus Metrics
|
||||
|
||||
**Embedded Systemd Installer:**
|
||||
- New `dbbackup install` command installs as systemd service/timer
|
||||
- Supports single-database (`--backup-type single`) and cluster (`--backup-type cluster`) modes
|
||||
- Automatic `dbbackup` user/group creation with proper permissions
|
||||
- Hardened service units with security features (NoNewPrivileges, ProtectSystem, CapabilityBoundingSet)
|
||||
- Templated timer units with configurable schedules (daily, weekly, or custom OnCalendar)
|
||||
- Built-in dry-run mode (`--dry-run`) to preview installation
|
||||
- `dbbackup install --status` shows current installation state
|
||||
- `dbbackup uninstall` cleanly removes all systemd units and optionally configuration
|
||||
|
||||
**Prometheus Metrics Support:**
|
||||
- New `dbbackup metrics export` command writes textfile collector format
|
||||
- New `dbbackup metrics serve` command runs HTTP exporter on port 9399
|
||||
- Metrics: `dbbackup_last_success_timestamp`, `dbbackup_rpo_seconds`, `dbbackup_backup_total`, etc.
|
||||
- Integration with node_exporter textfile collector
|
||||
- Metrics automatically updated via ExecStopPost in service units
|
||||
- `--with-metrics` flag during install sets up exporter as systemd service
|
||||
|
||||
**New Commands:**
|
||||
```bash
|
||||
# Install as systemd service
|
||||
sudo dbbackup install --backup-type cluster --schedule daily
|
||||
|
||||
# Install with Prometheus metrics
|
||||
sudo dbbackup install --with-metrics --metrics-port 9399
|
||||
|
||||
# Check installation status
|
||||
dbbackup install --status
|
||||
|
||||
# Export metrics for node_exporter
|
||||
dbbackup metrics export --output /var/lib/dbbackup/metrics/dbbackup.prom
|
||||
|
||||
# Run HTTP metrics server
|
||||
dbbackup metrics serve --port 9399
|
||||
```
|
||||
|
||||
### Technical Details
|
||||
- Systemd templates embedded with `//go:embed` for self-contained binary
|
||||
- Templates use ReadWritePaths for security isolation
|
||||
- Service units include proper OOMScoreAdjust (-100) to protect backups
|
||||
- Metrics exporter caches with 30-second TTL for performance
|
||||
- Graceful shutdown on SIGTERM for metrics server
|
||||
|
||||
---
|
||||
|
||||
## [3.41.0] - 2026-01-07 "The Pre-Flight Check"
|
||||
|
||||
### Added - 🛡️ Pre-Restore Validation
|
||||
|
||||
**Automatic Dump Validation Before Restore:**
|
||||
- SQL dump files are now validated BEFORE attempting restore
|
||||
- Detects truncated COPY blocks that cause "syntax error" failures
|
||||
- Catches corrupted backups in seconds instead of wasting 49+ minutes
|
||||
- Cluster restore pre-validates ALL dumps upfront (fail-fast approach)
|
||||
- Custom format `.dump` files now validated with `pg_restore --list`
|
||||
|
||||
**Improved Error Messages:**
|
||||
- Clear indication when dump file is truncated
|
||||
- Shows which table's COPY block was interrupted
|
||||
- Displays sample orphaned data for diagnosis
|
||||
- Provides actionable error messages with root cause
|
||||
|
||||
### Fixed
|
||||
- **P0: SQL Injection** - Added identifier validation for database names in CREATE/DROP DATABASE to prevent SQL injection attacks; uses safe quoting and regex validation (alphanumeric + underscore only)
|
||||
- **P0: Data Race** - Fixed concurrent goroutines appending to shared error slice in notification manager; now uses mutex synchronization
|
||||
- **P0: psql ON_ERROR_STOP** - Added `-v ON_ERROR_STOP=1` to psql commands to fail fast on first error instead of accumulating millions of errors
|
||||
- **P1: Pipe deadlock** - Fixed streaming compression deadlock when pg_dump blocks on full pipe buffer; now uses goroutine with proper context timeout handling
|
||||
- **P1: SIGPIPE handling** - Detect exit code 141 (broken pipe) and report compressor failure as root cause
|
||||
- **P2: .dump validation** - Custom format dumps now validated with `pg_restore --list` before restore
|
||||
- **P2: fsync durability** - Added `outFile.Sync()` after streaming compression to prevent truncation on power loss
|
||||
- Truncated `.sql.gz` dumps no longer waste hours on doomed restores
|
||||
- "syntax error at or near" errors now caught before restore begins
|
||||
- Cluster restores abort immediately if any dump is corrupted
|
||||
|
||||
### Technical Details
|
||||
- Integrated `Diagnoser` into restore pipeline for pre-validation
|
||||
- Added `quickValidateSQLDump()` for fast integrity checks
|
||||
- Pre-validation runs on all `.sql.gz` and `.dump` files in cluster archives
|
||||
- Streaming compression uses channel-based wait with context cancellation
|
||||
- Zero performance impact on valid backups (diagnosis is fast)
|
||||
|
||||
---
|
||||
|
||||
## [3.40.0] - 2026-01-05 "The Diagnostician"
|
||||
|
||||
### Added - 🔍 Restore Diagnostics & Error Reporting
|
||||
|
||||
@ -17,7 +17,7 @@ Be respectful, constructive, and professional in all interactions. We're buildin
|
||||
|
||||
**Bug Report Template:**
|
||||
```
|
||||
**Version:** dbbackup v3.40.0
|
||||
**Version:** dbbackup v3.42.1
|
||||
**OS:** Linux/macOS/BSD
|
||||
**Database:** PostgreSQL 14 / MySQL 8.0 / MariaDB 10.6
|
||||
**Command:** The exact command that failed
|
||||
|
||||
272
QUICK.md
Normal file
272
QUICK.md
Normal file
@ -0,0 +1,272 @@
|
||||
# dbbackup Quick Reference
|
||||
|
||||
Real examples, no fluff.
|
||||
|
||||
## Basic Backups
|
||||
|
||||
```bash
|
||||
# PostgreSQL (auto-detects all databases)
|
||||
dbbackup backup all /mnt/backups/databases
|
||||
|
||||
# Single database
|
||||
dbbackup backup single myapp /mnt/backups/databases
|
||||
|
||||
# MySQL
|
||||
dbbackup backup single gitea --db-type mysql --db-host 127.0.0.1 --db-port 3306 /mnt/backups/databases
|
||||
|
||||
# With compression level (1-9, default 6)
|
||||
dbbackup backup all /mnt/backups/databases --compression-level 9
|
||||
|
||||
# As root (requires flag)
|
||||
sudo dbbackup backup all /mnt/backups/databases --allow-root
|
||||
```
|
||||
|
||||
## PITR (Point-in-Time Recovery)
|
||||
|
||||
```bash
|
||||
# Enable WAL archiving for a database
|
||||
dbbackup pitr enable myapp /mnt/backups/wal
|
||||
|
||||
# Take base backup (required before PITR works)
|
||||
dbbackup pitr base myapp /mnt/backups/wal
|
||||
|
||||
# Check PITR status
|
||||
dbbackup pitr status myapp /mnt/backups/wal
|
||||
|
||||
# Restore to specific point in time
|
||||
dbbackup pitr restore myapp /mnt/backups/wal --target-time "2026-01-23 14:30:00"
|
||||
|
||||
# Restore to latest available
|
||||
dbbackup pitr restore myapp /mnt/backups/wal --target-time latest
|
||||
|
||||
# Disable PITR
|
||||
dbbackup pitr disable myapp
|
||||
```
|
||||
|
||||
## Deduplication
|
||||
|
||||
```bash
|
||||
# Backup with dedup (saves ~60-80% space on similar databases)
|
||||
dbbackup backup all /mnt/backups/databases --dedup
|
||||
|
||||
# Check dedup stats
|
||||
dbbackup dedup stats /mnt/backups/databases
|
||||
|
||||
# Prune orphaned chunks (after deleting old backups)
|
||||
dbbackup dedup prune /mnt/backups/databases
|
||||
|
||||
# Verify chunk integrity
|
||||
dbbackup dedup verify /mnt/backups/databases
|
||||
```
|
||||
|
||||
## Cloud Storage
|
||||
|
||||
```bash
|
||||
# Upload to S3/MinIO
|
||||
dbbackup cloud upload /mnt/backups/databases/myapp_2026-01-23.sql.gz \
|
||||
--provider s3 \
|
||||
--bucket my-backups \
|
||||
--endpoint https://s3.amazonaws.com
|
||||
|
||||
# Upload to MinIO (self-hosted)
|
||||
dbbackup cloud upload backup.sql.gz \
|
||||
--provider s3 \
|
||||
--bucket backups \
|
||||
--endpoint https://minio.internal:9000
|
||||
|
||||
# Upload to Google Cloud Storage
|
||||
dbbackup cloud upload backup.sql.gz \
|
||||
--provider gcs \
|
||||
--bucket my-gcs-bucket
|
||||
|
||||
# Upload to Azure Blob
|
||||
dbbackup cloud upload backup.sql.gz \
|
||||
--provider azure \
|
||||
--bucket mycontainer
|
||||
|
||||
# With bandwidth limit (don't saturate the network)
|
||||
dbbackup cloud upload backup.sql.gz --provider s3 --bucket backups --bandwidth-limit 10MB/s
|
||||
|
||||
# List remote backups
|
||||
dbbackup cloud list --provider s3 --bucket my-backups
|
||||
|
||||
# Download
|
||||
dbbackup cloud download myapp_2026-01-23.sql.gz /tmp/ --provider s3 --bucket my-backups
|
||||
|
||||
# Sync local backup dir to cloud
|
||||
dbbackup cloud sync /mnt/backups/databases --provider s3 --bucket my-backups
|
||||
```
|
||||
|
||||
### Cloud Environment Variables
|
||||
|
||||
```bash
|
||||
# S3/MinIO
|
||||
export AWS_ACCESS_KEY_ID=AKIAXXXXXXXX
|
||||
export AWS_SECRET_ACCESS_KEY=xxxxxxxx
|
||||
export AWS_REGION=eu-central-1
|
||||
|
||||
# GCS
|
||||
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
|
||||
|
||||
# Azure
|
||||
export AZURE_STORAGE_ACCOUNT=mystorageaccount
|
||||
export AZURE_STORAGE_KEY=xxxxxxxx
|
||||
```
|
||||
|
||||
## Encryption
|
||||
|
||||
```bash
|
||||
# Backup with encryption (AES-256-GCM)
|
||||
dbbackup backup all /mnt/backups/databases --encrypt --encrypt-key "my-secret-passphrase"
|
||||
|
||||
# Or use environment variable
|
||||
export DBBACKUP_ENCRYPT_KEY="my-secret-passphrase"
|
||||
dbbackup backup all /mnt/backups/databases --encrypt
|
||||
|
||||
# Restore encrypted backup
|
||||
dbbackup restore /mnt/backups/databases/myapp_2026-01-23.sql.gz.enc myapp_restored \
|
||||
--encrypt-key "my-secret-passphrase"
|
||||
```
|
||||
|
||||
## Catalog (Backup Inventory)
|
||||
|
||||
```bash
|
||||
# Sync local backups to catalog
|
||||
dbbackup catalog sync /mnt/backups/databases
|
||||
|
||||
# List all backups
|
||||
dbbackup catalog list
|
||||
|
||||
# Show gaps (missing daily backups)
|
||||
dbbackup catalog gaps
|
||||
|
||||
# Search backups
|
||||
dbbackup catalog search myapp
|
||||
|
||||
# Export catalog to JSON
|
||||
dbbackup catalog export --format json > backups.json
|
||||
```
|
||||
|
||||
## Restore
|
||||
|
||||
```bash
|
||||
# Restore to new database
|
||||
dbbackup restore /mnt/backups/databases/myapp_2026-01-23.sql.gz myapp_restored
|
||||
|
||||
# Restore to existing database (overwrites!)
|
||||
dbbackup restore /mnt/backups/databases/myapp_2026-01-23.sql.gz myapp --force
|
||||
|
||||
# Restore MySQL
|
||||
dbbackup restore /mnt/backups/databases/gitea_2026-01-23.sql.gz gitea_restored \
|
||||
--db-type mysql --db-host 127.0.0.1
|
||||
|
||||
# Verify restore (restores to temp db, runs checks, drops it)
|
||||
dbbackup verify-restore /mnt/backups/databases/myapp_2026-01-23.sql.gz
|
||||
```
|
||||
|
||||
## Retention & Cleanup
|
||||
|
||||
```bash
|
||||
# Delete backups older than 30 days
|
||||
dbbackup cleanup /mnt/backups/databases --older-than 30d
|
||||
|
||||
# Keep 7 daily, 4 weekly, 12 monthly (GFS)
|
||||
dbbackup cleanup /mnt/backups/databases --keep-daily 7 --keep-weekly 4 --keep-monthly 12
|
||||
|
||||
# Dry run (show what would be deleted)
|
||||
dbbackup cleanup /mnt/backups/databases --older-than 30d --dry-run
|
||||
```
|
||||
|
||||
## Disaster Recovery Drill
|
||||
|
||||
```bash
|
||||
# Full DR test (restores random backup, verifies, cleans up)
|
||||
dbbackup drill /mnt/backups/databases
|
||||
|
||||
# Test specific database
|
||||
dbbackup drill /mnt/backups/databases --database myapp
|
||||
|
||||
# With email report
|
||||
dbbackup drill /mnt/backups/databases --notify admin@example.com
|
||||
```
|
||||
|
||||
## Monitoring & Metrics
|
||||
|
||||
```bash
|
||||
# Prometheus metrics endpoint
|
||||
dbbackup metrics serve --port 9101
|
||||
|
||||
# One-shot status check (for scripts)
|
||||
dbbackup status /mnt/backups/databases
|
||||
echo $? # 0 = OK, 1 = warnings, 2 = critical
|
||||
|
||||
# Generate HTML report
|
||||
dbbackup report /mnt/backups/databases --output backup-report.html
|
||||
```
|
||||
|
||||
## Systemd Timer (Recommended)
|
||||
|
||||
```bash
|
||||
# Install systemd units
|
||||
sudo dbbackup install systemd --backup-path /mnt/backups/databases --schedule "02:00"
|
||||
|
||||
# Creates:
|
||||
# /etc/systemd/system/dbbackup.service
|
||||
# /etc/systemd/system/dbbackup.timer
|
||||
|
||||
# Check timer
|
||||
systemctl status dbbackup.timer
|
||||
systemctl list-timers dbbackup.timer
|
||||
```
|
||||
|
||||
## Common Combinations
|
||||
|
||||
```bash
|
||||
# Full production setup: encrypted, deduplicated, uploaded to S3
|
||||
dbbackup backup all /mnt/backups/databases \
|
||||
--dedup \
|
||||
--encrypt \
|
||||
--compression-level 9
|
||||
|
||||
dbbackup cloud sync /mnt/backups/databases \
|
||||
--provider s3 \
|
||||
--bucket prod-backups \
|
||||
--bandwidth-limit 50MB/s
|
||||
|
||||
# Quick MySQL backup to S3
|
||||
dbbackup backup single shopdb --db-type mysql /tmp/backup && \
|
||||
dbbackup cloud upload /tmp/backup/shopdb_*.sql.gz --provider s3 --bucket backups
|
||||
|
||||
# PITR-enabled PostgreSQL with cloud sync
|
||||
dbbackup pitr enable proddb /mnt/wal
|
||||
dbbackup pitr base proddb /mnt/wal
|
||||
dbbackup cloud sync /mnt/wal --provider gcs --bucket wal-archive
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `DBBACKUP_ENCRYPT_KEY` | Encryption passphrase |
|
||||
| `DBBACKUP_BANDWIDTH_LIMIT` | Cloud upload limit (e.g., `10MB/s`) |
|
||||
| `PGHOST`, `PGPORT`, `PGUSER` | PostgreSQL connection |
|
||||
| `MYSQL_HOST`, `MYSQL_TCP_PORT` | MySQL connection |
|
||||
| `AWS_ACCESS_KEY_ID` | S3/MinIO credentials |
|
||||
| `GOOGLE_APPLICATION_CREDENTIALS` | GCS service account JSON path |
|
||||
| `AZURE_STORAGE_ACCOUNT` | Azure storage account name |
|
||||
|
||||
## Quick Checks
|
||||
|
||||
```bash
|
||||
# What version?
|
||||
dbbackup --version
|
||||
|
||||
# What's installed?
|
||||
dbbackup status
|
||||
|
||||
# Test database connection
|
||||
dbbackup backup single testdb /tmp --dry-run
|
||||
|
||||
# Verify a backup file
|
||||
dbbackup verify /mnt/backups/databases/myapp_2026-01-23.sql.gz
|
||||
```
|
||||
192
README.md
192
README.md
@ -19,6 +19,8 @@ Database backup and restore utility for PostgreSQL, MySQL, and MariaDB.
|
||||
- Point-in-Time Recovery (PITR) for PostgreSQL and MySQL/MariaDB
|
||||
- **GFS retention policies**: Grandfather-Father-Son backup rotation
|
||||
- **Notifications**: SMTP email and webhook alerts
|
||||
- **Systemd integration**: Install as service with scheduled timers
|
||||
- **Prometheus metrics**: Textfile collector and HTTP exporter
|
||||
- Interactive terminal UI
|
||||
- Cross-platform binaries
|
||||
|
||||
@ -54,7 +56,7 @@ Download from [releases](https://git.uuxo.net/UUXO/dbbackup/releases):
|
||||
|
||||
```bash
|
||||
# Linux x86_64
|
||||
wget https://git.uuxo.net/UUXO/dbbackup/releases/download/v3.40.0/dbbackup-linux-amd64
|
||||
wget https://git.uuxo.net/UUXO/dbbackup/releases/download/v3.42.74/dbbackup-linux-amd64
|
||||
chmod +x dbbackup-linux-amd64
|
||||
sudo mv dbbackup-linux-amd64 /usr/local/bin/dbbackup
|
||||
```
|
||||
@ -141,7 +143,7 @@ Backup Execution
|
||||
|
||||
Backup created: cluster_20251128_092928.tar.gz
|
||||
Size: 22.5 GB (compressed)
|
||||
Location: /u01/dba/dumps/
|
||||
Location: /var/backups/postgres/
|
||||
Databases: 7
|
||||
Checksum: SHA-256 verified
|
||||
```
|
||||
@ -192,21 +194,59 @@ r: Restore | v: Verify | i: Info | d: Diagnose | D: Delete | R: Refresh | Esc: B
|
||||
```
|
||||
Configuration Settings
|
||||
|
||||
[SYSTEM] Detected Resources
|
||||
CPU: 8 physical cores, 16 logical cores
|
||||
Memory: 32GB total, 28GB available
|
||||
Recommended Profile: balanced
|
||||
→ 8 cores and 32GB RAM supports moderate parallelism
|
||||
|
||||
[CONFIG] Current Settings
|
||||
Target DB: PostgreSQL (postgres)
|
||||
Database: postgres@localhost:5432
|
||||
Backup Dir: /var/backups/postgres
|
||||
Compression: Level 6
|
||||
Profile: balanced | Cluster: 2 parallel | Jobs: 4
|
||||
|
||||
> Database Type: postgres
|
||||
CPU Workload Type: balanced
|
||||
Backup Directory: /root/db_backups
|
||||
Work Directory: /tmp
|
||||
Resource Profile: balanced (P:2 J:4)
|
||||
Cluster Parallelism: 2
|
||||
Backup Directory: /var/backups/postgres
|
||||
Work Directory: (system temp)
|
||||
Compression Level: 6
|
||||
Parallel Jobs: 16
|
||||
Dump Jobs: 8
|
||||
Parallel Jobs: 4
|
||||
Dump Jobs: 4
|
||||
Database Host: localhost
|
||||
Database Port: 5432
|
||||
Database User: root
|
||||
Database User: postgres
|
||||
SSL Mode: prefer
|
||||
|
||||
s: Save | r: Reset | q: Menu
|
||||
[KEYS] ↑↓ navigate | Enter edit | 'l' toggle LargeDB | 'c' conservative | 'p' recommend | 's' save | 'q' menu
|
||||
```
|
||||
|
||||
**Resource Profiles for Large Databases:**
|
||||
|
||||
When restoring large databases on VMs with limited resources, use the resource profile settings to prevent "out of shared memory" errors:
|
||||
|
||||
| Profile | Cluster Parallel | Jobs | Best For |
|
||||
|---------|------------------|------|----------|
|
||||
| conservative | 1 | 1 | Small VMs (<16GB RAM) |
|
||||
| balanced | 2 | 2-4 | Medium VMs (16-32GB RAM) |
|
||||
| performance | 4 | 4-8 | Large servers (32GB+ RAM) |
|
||||
| max-performance | 8 | 8-16 | High-end servers (64GB+) |
|
||||
|
||||
**Large DB Mode:** Toggle with `l` key. Reduces parallelism by 50% and sets max_locks_per_transaction=8192 for complex databases with many tables/LOBs.
|
||||
|
||||
**Quick shortcuts:** Press `l` to toggle Large DB Mode, `c` for conservative, `p` to show recommendation.
|
||||
|
||||
**Troubleshooting Tools:**
|
||||
|
||||
For PostgreSQL restore issues ("out of shared memory" errors), diagnostic scripts are available:
|
||||
- **diagnose_postgres_memory.sh** - Comprehensive system memory, PostgreSQL configuration, and resource analysis
|
||||
- **fix_postgres_locks.sh** - Automatically increase max_locks_per_transaction to 4096
|
||||
|
||||
See [RESTORE_PROFILES.md](RESTORE_PROFILES.md) for detailed troubleshooting guidance.
|
||||
|
||||
**Database Status:**
|
||||
```
|
||||
Database Status & Health Check
|
||||
@ -246,12 +286,21 @@ dbbackup restore single backup.dump --target myapp_db --create --confirm
|
||||
# Restore cluster
|
||||
dbbackup restore cluster cluster_backup.tar.gz --confirm
|
||||
|
||||
# Restore with resource profile (for resource-constrained servers)
|
||||
dbbackup restore cluster backup.tar.gz --profile=conservative --confirm
|
||||
|
||||
# Restore with debug logging (saves detailed error report on failure)
|
||||
dbbackup restore cluster backup.tar.gz --save-debug-log /tmp/restore-debug.json --confirm
|
||||
|
||||
# Diagnose backup before restore
|
||||
dbbackup restore diagnose backup.dump.gz --deep
|
||||
|
||||
# Check PostgreSQL lock configuration (preflight for large restores)
|
||||
# - warns/fails when `max_locks_per_transaction` is insufficient and prints exact remediation
|
||||
# - safe to run before a restore to determine whether single-threaded restore is required
|
||||
# Example:
|
||||
# dbbackup verify-locks
|
||||
|
||||
# Cloud backup
|
||||
dbbackup backup single mydb --cloud s3://my-bucket/backups/
|
||||
|
||||
@ -271,6 +320,7 @@ dbbackup backup single mydb --dry-run
|
||||
| `restore pitr` | Point-in-Time Recovery |
|
||||
| `restore diagnose` | Diagnose backup file integrity |
|
||||
| `verify-backup` | Verify backup integrity |
|
||||
| `verify-locks` | Check PostgreSQL lock settings and get restore guidance |
|
||||
| `cleanup` | Remove old backups |
|
||||
| `status` | Check connection status |
|
||||
| `preflight` | Run pre-backup checks |
|
||||
@ -284,6 +334,10 @@ dbbackup backup single mydb --dry-run
|
||||
| `drill` | DR drill testing |
|
||||
| `report` | Compliance report generation |
|
||||
| `rto` | RTO/RPO analysis |
|
||||
| `install` | Install as systemd service |
|
||||
| `uninstall` | Remove systemd service |
|
||||
| `metrics export` | Export Prometheus metrics to textfile |
|
||||
| `metrics serve` | Run Prometheus HTTP exporter |
|
||||
|
||||
## Global Flags
|
||||
|
||||
@ -297,6 +351,7 @@ dbbackup backup single mydb --dry-run
|
||||
| `--backup-dir` | Backup directory | ~/db_backups |
|
||||
| `--compression` | Compression level (0-9) | 6 |
|
||||
| `--jobs` | Parallel jobs | 8 |
|
||||
| `--profile` | Resource profile (conservative/balanced/aggressive) | balanced |
|
||||
| `--cloud` | Cloud storage URI | - |
|
||||
| `--encrypt` | Enable encryption | false |
|
||||
| `--dry-run, -n` | Run preflight checks only | false |
|
||||
@ -673,6 +728,102 @@ dbbackup rto analyze mydb --target-rto 4h --target-rpo 1h
|
||||
- Compliance status
|
||||
- Recommendations for improvement
|
||||
|
||||
## Systemd Integration
|
||||
|
||||
Install dbbackup as a systemd service for automated scheduled backups:
|
||||
|
||||
```bash
|
||||
# Install with Prometheus metrics exporter
|
||||
sudo dbbackup install --backup-type cluster --with-metrics
|
||||
|
||||
# Preview what would be installed
|
||||
dbbackup install --dry-run --backup-type cluster
|
||||
|
||||
# Check installation status
|
||||
dbbackup install --status
|
||||
|
||||
# Uninstall
|
||||
sudo dbbackup uninstall cluster --purge
|
||||
```
|
||||
|
||||
**Schedule options:**
|
||||
```bash
|
||||
--schedule daily # Every day at midnight (default)
|
||||
--schedule weekly # Every Monday at midnight
|
||||
--schedule "*-*-* 02:00:00" # Every day at 2am
|
||||
--schedule "Mon *-*-* 03:00" # Every Monday at 3am
|
||||
```
|
||||
|
||||
**What gets installed:**
|
||||
- Systemd service and timer units
|
||||
- Dedicated `dbbackup` user with security hardening
|
||||
- Directories: `/var/lib/dbbackup/`, `/etc/dbbackup/`
|
||||
- Optional: Prometheus HTTP exporter on port 9399
|
||||
|
||||
📖 **Full documentation:** [SYSTEMD.md](SYSTEMD.md) - Manual setup, security hardening, multiple instances, troubleshooting
|
||||
|
||||
## Prometheus Metrics
|
||||
|
||||
Export backup metrics for monitoring with Prometheus:
|
||||
|
||||
### Textfile Collector
|
||||
|
||||
For integration with node_exporter:
|
||||
|
||||
```bash
|
||||
# Export metrics to textfile
|
||||
dbbackup metrics export --output /var/lib/node_exporter/textfile_collector/dbbackup.prom
|
||||
|
||||
# Export for specific instance
|
||||
dbbackup metrics export --instance production --output /var/lib/dbbackup/metrics/production.prom
|
||||
```
|
||||
|
||||
Configure node_exporter:
|
||||
```bash
|
||||
node_exporter --collector.textfile.directory=/var/lib/node_exporter/textfile_collector/
|
||||
```
|
||||
|
||||
### HTTP Exporter
|
||||
|
||||
Run a dedicated metrics HTTP server:
|
||||
|
||||
```bash
|
||||
# Start metrics server on default port 9399
|
||||
dbbackup metrics serve
|
||||
|
||||
# Custom port
|
||||
dbbackup metrics serve --port 9100
|
||||
|
||||
# Run as systemd service (installed via --with-metrics)
|
||||
sudo systemctl start dbbackup-exporter
|
||||
```
|
||||
|
||||
**Endpoints:**
|
||||
- `/metrics` - Prometheus exposition format
|
||||
- `/health` - Health check (returns 200 OK)
|
||||
|
||||
**Available metrics:**
|
||||
| Metric | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `dbbackup_last_success_timestamp` | gauge | Unix timestamp of last successful backup |
|
||||
| `dbbackup_last_backup_duration_seconds` | gauge | Duration of last backup |
|
||||
| `dbbackup_last_backup_size_bytes` | gauge | Size of last backup |
|
||||
| `dbbackup_backup_total` | counter | Total backups by status (success/failure) |
|
||||
| `dbbackup_rpo_seconds` | gauge | Seconds since last successful backup |
|
||||
| `dbbackup_backup_verified` | gauge | Whether last backup was verified (1/0) |
|
||||
| `dbbackup_scrape_timestamp` | gauge | When metrics were collected |
|
||||
|
||||
**Labels:** `instance`, `database`, `engine`
|
||||
|
||||
**Example Prometheus query:**
|
||||
```promql
|
||||
# Alert if RPO exceeds 24 hours
|
||||
dbbackup_rpo_seconds{instance="production"} > 86400
|
||||
|
||||
# Backup success rate
|
||||
sum(rate(dbbackup_backup_total{status="success"}[24h])) / sum(rate(dbbackup_backup_total[24h]))
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### PostgreSQL Authentication
|
||||
@ -756,14 +907,29 @@ Workload types:
|
||||
|
||||
## Documentation
|
||||
|
||||
- [DOCKER.md](DOCKER.md) - Docker deployment
|
||||
- [CLOUD.md](CLOUD.md) - Cloud storage configuration
|
||||
- [PITR.md](PITR.md) - Point-in-Time Recovery
|
||||
- [AZURE.md](AZURE.md) - Azure Blob Storage
|
||||
- [GCS.md](GCS.md) - Google Cloud Storage
|
||||
**Quick Start:**
|
||||
- [QUICK.md](QUICK.md) - Real-world examples cheat sheet
|
||||
|
||||
**Guides:**
|
||||
- [docs/PITR.md](docs/PITR.md) - Point-in-Time Recovery (PostgreSQL)
|
||||
- [docs/MYSQL_PITR.md](docs/MYSQL_PITR.md) - Point-in-Time Recovery (MySQL)
|
||||
- [docs/ENGINES.md](docs/ENGINES.md) - Database engine configuration
|
||||
- [docs/RESTORE_PROFILES.md](docs/RESTORE_PROFILES.md) - Restore resource profiles
|
||||
|
||||
**Cloud Storage:**
|
||||
- [docs/CLOUD.md](docs/CLOUD.md) - Cloud storage overview
|
||||
- [docs/AZURE.md](docs/AZURE.md) - Azure Blob Storage
|
||||
- [docs/GCS.md](docs/GCS.md) - Google Cloud Storage
|
||||
|
||||
**Deployment:**
|
||||
- [docs/DOCKER.md](docs/DOCKER.md) - Docker deployment
|
||||
- [docs/SYSTEMD.md](docs/SYSTEMD.md) - Systemd installation & scheduling
|
||||
|
||||
**Reference:**
|
||||
- [SECURITY.md](SECURITY.md) - Security considerations
|
||||
- [CONTRIBUTING.md](CONTRIBUTING.md) - Contribution guidelines
|
||||
- [CHANGELOG.md](CHANGELOG.md) - Version history
|
||||
- [docs/LOCK_DEBUGGING.md](docs/LOCK_DEBUGGING.md) - Lock troubleshooting
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@ -1,133 +0,0 @@
|
||||
# Why DBAs Are Switching from Veeam to dbbackup
|
||||
|
||||
## The Enterprise Backup Problem
|
||||
|
||||
You're paying **$2,000-10,000/year per database server** for enterprise backup solutions.
|
||||
|
||||
What are you actually getting?
|
||||
|
||||
- Heavy agents eating your CPU
|
||||
- Complex licensing that requires a spreadsheet to understand
|
||||
- Vendor lock-in to proprietary formats
|
||||
- "Cloud support" that means "we'll upload your backup somewhere"
|
||||
- Recovery that requires calling support
|
||||
|
||||
## What If There Was a Better Way?
|
||||
|
||||
**dbbackup v3.2.0** delivers enterprise-grade MySQL/MariaDB backup capabilities in a **single, zero-dependency binary**:
|
||||
|
||||
| Feature | Veeam/Commercial | dbbackup |
|
||||
|---------|------------------|----------|
|
||||
| Physical backups | ✅ Via XtraBackup | ✅ Native Clone Plugin |
|
||||
| Consistent snapshots | ✅ | ✅ LVM/ZFS/Btrfs |
|
||||
| Binlog streaming | ❌ | ✅ Continuous PITR |
|
||||
| Direct cloud streaming | ❌ (stage to disk) | ✅ Zero local storage |
|
||||
| Parallel uploads | ❌ | ✅ Configurable workers |
|
||||
| License cost | $$$$ | **Free (MIT)** |
|
||||
| Dependencies | Agent + XtraBackup + ... | **Single binary** |
|
||||
|
||||
## Real Numbers
|
||||
|
||||
**100GB database backup comparison:**
|
||||
|
||||
| Metric | Traditional | dbbackup v3.2 |
|
||||
|--------|-------------|---------------|
|
||||
| Backup time | 45 min | **12 min** |
|
||||
| Local disk needed | 100GB | **0 GB** |
|
||||
| Network efficiency | 1x | **3x** (parallel) |
|
||||
| Recovery point | Daily | **< 1 second** |
|
||||
|
||||
## The Technical Revolution
|
||||
|
||||
### MySQL Clone Plugin (8.0.17+)
|
||||
```bash
|
||||
# Physical backup at InnoDB page level
|
||||
# No XtraBackup. No external tools. Pure Go.
|
||||
dbbackup backup single mydb --db-type mysql --cloud s3://bucket/backups/
|
||||
```
|
||||
|
||||
### Filesystem Snapshots
|
||||
```bash
|
||||
# Brief lock (<100ms), instant snapshot, stream to cloud
|
||||
dbbackup backup --engine=snapshot --snapshot-backend=lvm
|
||||
```
|
||||
|
||||
### Continuous Binlog Streaming
|
||||
```bash
|
||||
# Real-time binlog capture to S3
|
||||
# Sub-second RPO without touching the database server
|
||||
dbbackup binlog stream --target=s3://bucket/binlogs/
|
||||
```
|
||||
|
||||
### Parallel Cloud Upload
|
||||
```bash
|
||||
# Saturate your network, not your patience
|
||||
dbbackup backup --engine=streaming --parallel-workers=8
|
||||
```
|
||||
|
||||
## Who Should Switch?
|
||||
|
||||
✅ **Cloud-native deployments** - Kubernetes, ECS, Cloud Run
|
||||
✅ **Cost-conscious enterprises** - Same capabilities, zero license fees
|
||||
✅ **DevOps teams** - Single binary, easy automation
|
||||
✅ **Compliance requirements** - AES-256-GCM encryption, audit logging
|
||||
✅ **Multi-cloud strategies** - S3, GCS, Azure Blob native support
|
||||
|
||||
## Migration Path
|
||||
|
||||
**Day 1**: Run dbbackup alongside existing solution
|
||||
```bash
|
||||
# Test backup
|
||||
dbbackup backup single mydb --cloud s3://test-bucket/
|
||||
|
||||
# Verify integrity
|
||||
dbbackup verify s3://test-bucket/mydb_20260115.dump.gz
|
||||
```
|
||||
|
||||
**Week 1**: Compare backup times, storage costs, recovery speed
|
||||
|
||||
**Week 2**: Switch primary backups to dbbackup
|
||||
|
||||
**Month 1**: Cancel Veeam renewal, buy your team pizza with savings 🍕
|
||||
|
||||
## FAQ
|
||||
|
||||
**Q: Is this production-ready?**
|
||||
A: Used in production by organizations managing petabytes of MySQL data.
|
||||
|
||||
**Q: What about support?**
|
||||
A: Community support via GitHub. Enterprise support available.
|
||||
|
||||
**Q: Can it replace XtraBackup?**
|
||||
A: For MySQL 8.0.17+, yes. We use native Clone Plugin instead.
|
||||
|
||||
**Q: What about PostgreSQL?**
|
||||
A: Full PostgreSQL support including WAL archiving and PITR.
|
||||
|
||||
## Get Started
|
||||
|
||||
```bash
|
||||
# Download (single binary, ~15MB)
|
||||
curl -LO https://github.com/UUXO/dbbackup/releases/latest/download/dbbackup_linux_amd64
|
||||
chmod +x dbbackup_linux_amd64
|
||||
|
||||
# Your first backup
|
||||
./dbbackup_linux_amd64 backup single production \
|
||||
--db-type mysql \
|
||||
--cloud s3://my-backups/
|
||||
```
|
||||
|
||||
## The Bottom Line
|
||||
|
||||
Every dollar you spend on backup licensing is a dollar not spent on:
|
||||
- Better hardware
|
||||
- Your team
|
||||
- Actually useful tools
|
||||
|
||||
**dbbackup**: Enterprise capabilities. Zero enterprise pricing.
|
||||
|
||||
---
|
||||
|
||||
*Apache 2.0 Licensed. Free forever. No sales calls required.*
|
||||
|
||||
[GitHub](https://github.com/UUXO/dbbackup) | [Documentation](https://github.com/UUXO/dbbackup#readme) | [Changelog](CHANGELOG.md)
|
||||
87
bin/README.md
Normal file
87
bin/README.md
Normal file
@ -0,0 +1,87 @@
|
||||
# DB Backup Tool - Pre-compiled Binaries
|
||||
|
||||
This directory contains pre-compiled binaries for the DB Backup Tool across multiple platforms and architectures.
|
||||
|
||||
## Build Information
|
||||
- **Version**: 3.42.106
|
||||
- **Build Time**: 2026-01-24_03:49:10_UTC
|
||||
- **Git Commit**: 07420b7
|
||||
|
||||
## Recent Updates (v1.1.0)
|
||||
- ✅ Fixed TUI progress display with line-by-line output
|
||||
- ✅ Added interactive configuration settings menu
|
||||
- ✅ Improved menu navigation and responsiveness
|
||||
- ✅ Enhanced completion status handling
|
||||
- ✅ Better CPU detection and optimization
|
||||
- ✅ Silent mode support for TUI operations
|
||||
|
||||
## Available Binaries
|
||||
|
||||
### Linux
|
||||
- `dbbackup_linux_amd64` - Linux 64-bit (Intel/AMD)
|
||||
- `dbbackup_linux_arm64` - Linux 64-bit (ARM)
|
||||
- `dbbackup_linux_arm_armv7` - Linux 32-bit (ARMv7)
|
||||
|
||||
### macOS
|
||||
- `dbbackup_darwin_amd64` - macOS 64-bit (Intel)
|
||||
- `dbbackup_darwin_arm64` - macOS 64-bit (Apple Silicon)
|
||||
|
||||
### Windows
|
||||
- `dbbackup_windows_amd64.exe` - Windows 64-bit (Intel/AMD)
|
||||
- `dbbackup_windows_arm64.exe` - Windows 64-bit (ARM)
|
||||
|
||||
### BSD Systems
|
||||
- `dbbackup_freebsd_amd64` - FreeBSD 64-bit
|
||||
- `dbbackup_openbsd_amd64` - OpenBSD 64-bit
|
||||
- `dbbackup_netbsd_amd64` - NetBSD 64-bit
|
||||
|
||||
## Usage
|
||||
|
||||
1. Download the appropriate binary for your platform
|
||||
2. Make it executable (Unix-like systems): `chmod +x dbbackup_*`
|
||||
3. Run: `./dbbackup_* --help`
|
||||
|
||||
## Interactive Mode
|
||||
|
||||
Launch the interactive TUI menu for easy configuration and operation:
|
||||
|
||||
```bash
|
||||
# Interactive mode with TUI menu
|
||||
./dbbackup_linux_amd64
|
||||
|
||||
# Features:
|
||||
# - Interactive configuration settings
|
||||
# - Real-time progress display
|
||||
# - Operation history and status
|
||||
# - CPU detection and optimization
|
||||
```
|
||||
|
||||
## Command Line Mode
|
||||
|
||||
Direct command line usage with line-by-line progress:
|
||||
|
||||
```bash
|
||||
# Show CPU information and optimization settings
|
||||
./dbbackup_linux_amd64 cpu
|
||||
|
||||
# Auto-optimize for your hardware
|
||||
./dbbackup_linux_amd64 backup cluster --auto-detect-cores
|
||||
|
||||
# Manual CPU configuration
|
||||
./dbbackup_linux_amd64 backup single mydb --jobs 8 --dump-jobs 4
|
||||
|
||||
# Line-by-line progress output
|
||||
./dbbackup_linux_amd64 backup cluster --progress-type line
|
||||
```
|
||||
|
||||
## CPU Detection
|
||||
|
||||
All binaries include advanced CPU detection capabilities:
|
||||
- Automatic core detection for optimal parallelism
|
||||
- Support for different workload types (CPU-intensive, I/O-intensive, balanced)
|
||||
- Platform-specific optimizations for Linux, macOS, and Windows
|
||||
- Interactive CPU configuration in TUI mode
|
||||
|
||||
## Support
|
||||
|
||||
For issues or questions, please refer to the main project documentation.
|
||||
12
build_all.sh
12
build_all.sh
@ -15,7 +15,7 @@ echo "🔧 Using Go version: $GO_VERSION"
|
||||
|
||||
# Configuration
|
||||
APP_NAME="dbbackup"
|
||||
VERSION="3.40.0"
|
||||
VERSION=$(grep 'version.*=' main.go | head -1 | sed 's/.*"\(.*\)".*/\1/')
|
||||
BUILD_TIME=$(date -u '+%Y-%m-%d_%H:%M:%S_UTC')
|
||||
GIT_COMMIT=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")
|
||||
BIN_DIR="bin"
|
||||
@ -33,7 +33,7 @@ CYAN='\033[0;36m'
|
||||
BOLD='\033[1m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Platform configurations
|
||||
# Platform configurations - Linux & macOS only
|
||||
# Format: "GOOS/GOARCH:binary_suffix:description"
|
||||
PLATFORMS=(
|
||||
"linux/amd64::Linux 64-bit (Intel/AMD)"
|
||||
@ -41,11 +41,6 @@ PLATFORMS=(
|
||||
"linux/arm:_armv7:Linux 32-bit (ARMv7)"
|
||||
"darwin/amd64::macOS 64-bit (Intel)"
|
||||
"darwin/arm64::macOS 64-bit (Apple Silicon)"
|
||||
"windows/amd64:.exe:Windows 64-bit (Intel/AMD)"
|
||||
"windows/arm64:.exe:Windows 64-bit (ARM)"
|
||||
"freebsd/amd64::FreeBSD 64-bit (Intel/AMD)"
|
||||
"openbsd/amd64::OpenBSD 64-bit (Intel/AMD)"
|
||||
"netbsd/amd64::NetBSD 64-bit (Intel/AMD)"
|
||||
)
|
||||
|
||||
echo -e "${BOLD}${BLUE}🔨 Cross-Platform Build Script for ${APP_NAME}${NC}"
|
||||
@ -83,7 +78,8 @@ for platform_config in "${PLATFORMS[@]}"; do
|
||||
echo -e "${YELLOW}[$current/$total_platforms]${NC} Building for ${BOLD}$description${NC} (${platform})"
|
||||
|
||||
# Set environment and build (using export for better compatibility)
|
||||
export GOOS GOARCH
|
||||
# CGO_ENABLED=0 creates static binaries without glibc dependency
|
||||
export CGO_ENABLED=0 GOOS GOARCH
|
||||
if go build -ldflags "$LDFLAGS" -o "${BIN_DIR}/${binary_name}" . 2>/dev/null; then
|
||||
# Get file size
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
|
||||
@ -130,6 +130,10 @@ func runSingleBackup(ctx context.Context, databaseName string) error {
|
||||
// Update config from environment
|
||||
cfg.UpdateFromEnvironment()
|
||||
|
||||
// IMPORTANT: Set the database name from positional argument
|
||||
// This overrides the default 'postgres' when using MySQL
|
||||
cfg.Database = databaseName
|
||||
|
||||
// Validate configuration
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return fmt.Errorf("configuration error: %w", err)
|
||||
@ -312,6 +316,9 @@ func runSampleBackup(ctx context.Context, databaseName string) error {
|
||||
// Update config from environment
|
||||
cfg.UpdateFromEnvironment()
|
||||
|
||||
// IMPORTANT: Set the database name from positional argument
|
||||
cfg.Database = databaseName
|
||||
|
||||
// Validate configuration
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return fmt.Errorf("configuration error: %w", err)
|
||||
|
||||
116
cmd/catalog.go
116
cmd/catalog.go
@ -252,8 +252,8 @@ func runCatalogSync(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
fmt.Printf("📁 Syncing backups from: %s\n", absDir)
|
||||
fmt.Printf("📊 Catalog database: %s\n\n", catalogDBPath)
|
||||
fmt.Printf("[DIR] Syncing backups from: %s\n", absDir)
|
||||
fmt.Printf("[STATS] Catalog database: %s\n\n", catalogDBPath)
|
||||
|
||||
ctx := context.Background()
|
||||
result, err := cat.SyncFromDirectory(ctx, absDir)
|
||||
@ -265,17 +265,17 @@ func runCatalogSync(cmd *cobra.Command, args []string) error {
|
||||
cat.SetLastSync(ctx)
|
||||
|
||||
// Show results
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf("=====================================================\n")
|
||||
fmt.Printf(" Sync Results\n")
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf(" ✅ Added: %d\n", result.Added)
|
||||
fmt.Printf(" 🔄 Updated: %d\n", result.Updated)
|
||||
fmt.Printf(" 🗑️ Removed: %d\n", result.Removed)
|
||||
fmt.Printf("=====================================================\n")
|
||||
fmt.Printf(" [OK] Added: %d\n", result.Added)
|
||||
fmt.Printf(" [SYNC] Updated: %d\n", result.Updated)
|
||||
fmt.Printf(" [DEL] Removed: %d\n", result.Removed)
|
||||
if result.Errors > 0 {
|
||||
fmt.Printf(" ❌ Errors: %d\n", result.Errors)
|
||||
fmt.Printf(" [FAIL] Errors: %d\n", result.Errors)
|
||||
}
|
||||
fmt.Printf(" ⏱️ Duration: %.2fs\n", result.Duration)
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf(" [TIME] Duration: %.2fs\n", result.Duration)
|
||||
fmt.Printf("=====================================================\n")
|
||||
|
||||
// Show details if verbose
|
||||
if catalogVerbose && len(result.Details) > 0 {
|
||||
@ -323,7 +323,7 @@ func runCatalogList(cmd *cobra.Command, args []string) error {
|
||||
// Table format
|
||||
fmt.Printf("%-30s %-12s %-10s %-20s %-10s %s\n",
|
||||
"DATABASE", "TYPE", "SIZE", "CREATED", "STATUS", "PATH")
|
||||
fmt.Println(strings.Repeat("─", 120))
|
||||
fmt.Println(strings.Repeat("-", 120))
|
||||
|
||||
for _, entry := range entries {
|
||||
dbName := truncateString(entry.Database, 28)
|
||||
@ -331,10 +331,10 @@ func runCatalogList(cmd *cobra.Command, args []string) error {
|
||||
|
||||
status := string(entry.Status)
|
||||
if entry.VerifyValid != nil && *entry.VerifyValid {
|
||||
status = "✓ verified"
|
||||
status = "[OK] verified"
|
||||
}
|
||||
if entry.DrillSuccess != nil && *entry.DrillSuccess {
|
||||
status = "✓ tested"
|
||||
status = "[OK] tested"
|
||||
}
|
||||
|
||||
fmt.Printf("%-30s %-12s %-10s %-20s %-10s %s\n",
|
||||
@ -377,20 +377,20 @@ func runCatalogStats(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
// Table format
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf("=====================================================\n")
|
||||
if catalogDatabase != "" {
|
||||
fmt.Printf(" Catalog Statistics: %s\n", catalogDatabase)
|
||||
} else {
|
||||
fmt.Printf(" Catalog Statistics\n")
|
||||
}
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n")
|
||||
fmt.Printf("=====================================================\n\n")
|
||||
|
||||
fmt.Printf("📊 Total Backups: %d\n", stats.TotalBackups)
|
||||
fmt.Printf("💾 Total Size: %s\n", stats.TotalSizeHuman)
|
||||
fmt.Printf("📏 Average Size: %s\n", catalog.FormatSize(stats.AvgSize))
|
||||
fmt.Printf("⏱️ Average Duration: %.1fs\n", stats.AvgDuration)
|
||||
fmt.Printf("✅ Verified: %d\n", stats.VerifiedCount)
|
||||
fmt.Printf("🧪 Drill Tested: %d\n", stats.DrillTestedCount)
|
||||
fmt.Printf("[STATS] Total Backups: %d\n", stats.TotalBackups)
|
||||
fmt.Printf("[SAVE] Total Size: %s\n", stats.TotalSizeHuman)
|
||||
fmt.Printf("[SIZE] Average Size: %s\n", catalog.FormatSize(stats.AvgSize))
|
||||
fmt.Printf("[TIME] Average Duration: %.1fs\n", stats.AvgDuration)
|
||||
fmt.Printf("[OK] Verified: %d\n", stats.VerifiedCount)
|
||||
fmt.Printf("[TEST] Drill Tested: %d\n", stats.DrillTestedCount)
|
||||
|
||||
if stats.OldestBackup != nil {
|
||||
fmt.Printf("📅 Oldest Backup: %s\n", stats.OldestBackup.Format("2006-01-02 15:04"))
|
||||
@ -400,27 +400,27 @@ func runCatalogStats(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
if len(stats.ByDatabase) > 0 && catalogDatabase == "" {
|
||||
fmt.Printf("\n📁 By Database:\n")
|
||||
fmt.Printf("\n[DIR] By Database:\n")
|
||||
for db, count := range stats.ByDatabase {
|
||||
fmt.Printf(" %-30s %d\n", db, count)
|
||||
}
|
||||
}
|
||||
|
||||
if len(stats.ByType) > 0 {
|
||||
fmt.Printf("\n📦 By Type:\n")
|
||||
fmt.Printf("\n[PKG] By Type:\n")
|
||||
for t, count := range stats.ByType {
|
||||
fmt.Printf(" %-15s %d\n", t, count)
|
||||
}
|
||||
}
|
||||
|
||||
if len(stats.ByStatus) > 0 {
|
||||
fmt.Printf("\n📋 By Status:\n")
|
||||
fmt.Printf("\n[LOG] By Status:\n")
|
||||
for s, count := range stats.ByStatus {
|
||||
fmt.Printf(" %-15s %d\n", s, count)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf("\n=====================================================\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -488,26 +488,26 @@ func runCatalogGaps(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
if len(allGaps) == 0 {
|
||||
fmt.Printf("✅ No backup gaps detected (expected interval: %s)\n", interval)
|
||||
fmt.Printf("[OK] No backup gaps detected (expected interval: %s)\n", interval)
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf("=====================================================\n")
|
||||
fmt.Printf(" Backup Gaps Detected (expected interval: %s)\n", interval)
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n")
|
||||
fmt.Printf("=====================================================\n\n")
|
||||
|
||||
totalGaps := 0
|
||||
criticalGaps := 0
|
||||
|
||||
for database, gaps := range allGaps {
|
||||
fmt.Printf("📁 %s (%d gaps)\n", database, len(gaps))
|
||||
fmt.Printf("[DIR] %s (%d gaps)\n", database, len(gaps))
|
||||
|
||||
for _, gap := range gaps {
|
||||
totalGaps++
|
||||
icon := "ℹ️"
|
||||
icon := "[INFO]"
|
||||
switch gap.Severity {
|
||||
case catalog.SeverityWarning:
|
||||
icon = "⚠️"
|
||||
icon = "[WARN]"
|
||||
case catalog.SeverityCritical:
|
||||
icon = "🚨"
|
||||
criticalGaps++
|
||||
@ -523,7 +523,7 @@ func runCatalogGaps(cmd *cobra.Command, args []string) error {
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf("=====================================================\n")
|
||||
fmt.Printf("Total: %d gaps detected", totalGaps)
|
||||
if criticalGaps > 0 {
|
||||
fmt.Printf(" (%d critical)", criticalGaps)
|
||||
@ -598,20 +598,20 @@ func runCatalogSearch(cmd *cobra.Command, args []string) error {
|
||||
fmt.Printf("Found %d matching backups:\n\n", len(entries))
|
||||
|
||||
for _, entry := range entries {
|
||||
fmt.Printf("📁 %s\n", entry.Database)
|
||||
fmt.Printf("[DIR] %s\n", entry.Database)
|
||||
fmt.Printf(" Path: %s\n", entry.BackupPath)
|
||||
fmt.Printf(" Type: %s | Size: %s | Created: %s\n",
|
||||
entry.DatabaseType,
|
||||
catalog.FormatSize(entry.SizeBytes),
|
||||
entry.CreatedAt.Format("2006-01-02 15:04:05"))
|
||||
if entry.Encrypted {
|
||||
fmt.Printf(" 🔒 Encrypted\n")
|
||||
fmt.Printf(" [LOCK] Encrypted\n")
|
||||
}
|
||||
if entry.VerifyValid != nil && *entry.VerifyValid {
|
||||
fmt.Printf(" ✅ Verified: %s\n", entry.VerifiedAt.Format("2006-01-02 15:04"))
|
||||
fmt.Printf(" [OK] Verified: %s\n", entry.VerifiedAt.Format("2006-01-02 15:04"))
|
||||
}
|
||||
if entry.DrillSuccess != nil && *entry.DrillSuccess {
|
||||
fmt.Printf(" 🧪 Drill Tested: %s\n", entry.DrillTestedAt.Format("2006-01-02 15:04"))
|
||||
fmt.Printf(" [TEST] Drill Tested: %s\n", entry.DrillTestedAt.Format("2006-01-02 15:04"))
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
@ -655,64 +655,64 @@ func runCatalogInfo(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf("=====================================================\n")
|
||||
fmt.Printf(" Backup Details\n")
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n")
|
||||
fmt.Printf("=====================================================\n\n")
|
||||
|
||||
fmt.Printf("📁 Database: %s\n", entry.Database)
|
||||
fmt.Printf("[DIR] Database: %s\n", entry.Database)
|
||||
fmt.Printf("🔧 Type: %s\n", entry.DatabaseType)
|
||||
fmt.Printf("🖥️ Host: %s:%d\n", entry.Host, entry.Port)
|
||||
fmt.Printf("[HOST] Host: %s:%d\n", entry.Host, entry.Port)
|
||||
fmt.Printf("📂 Path: %s\n", entry.BackupPath)
|
||||
fmt.Printf("📦 Backup Type: %s\n", entry.BackupType)
|
||||
fmt.Printf("💾 Size: %s (%d bytes)\n", catalog.FormatSize(entry.SizeBytes), entry.SizeBytes)
|
||||
fmt.Printf("🔐 SHA256: %s\n", entry.SHA256)
|
||||
fmt.Printf("[PKG] Backup Type: %s\n", entry.BackupType)
|
||||
fmt.Printf("[SAVE] Size: %s (%d bytes)\n", catalog.FormatSize(entry.SizeBytes), entry.SizeBytes)
|
||||
fmt.Printf("[HASH] SHA256: %s\n", entry.SHA256)
|
||||
fmt.Printf("📅 Created: %s\n", entry.CreatedAt.Format("2006-01-02 15:04:05 MST"))
|
||||
fmt.Printf("⏱️ Duration: %.2fs\n", entry.Duration)
|
||||
fmt.Printf("📋 Status: %s\n", entry.Status)
|
||||
fmt.Printf("[TIME] Duration: %.2fs\n", entry.Duration)
|
||||
fmt.Printf("[LOG] Status: %s\n", entry.Status)
|
||||
|
||||
if entry.Compression != "" {
|
||||
fmt.Printf("📦 Compression: %s\n", entry.Compression)
|
||||
fmt.Printf("[PKG] Compression: %s\n", entry.Compression)
|
||||
}
|
||||
if entry.Encrypted {
|
||||
fmt.Printf("🔒 Encrypted: yes\n")
|
||||
fmt.Printf("[LOCK] Encrypted: yes\n")
|
||||
}
|
||||
if entry.CloudLocation != "" {
|
||||
fmt.Printf("☁️ Cloud: %s\n", entry.CloudLocation)
|
||||
fmt.Printf("[CLOUD] Cloud: %s\n", entry.CloudLocation)
|
||||
}
|
||||
if entry.RetentionPolicy != "" {
|
||||
fmt.Printf("📆 Retention: %s\n", entry.RetentionPolicy)
|
||||
}
|
||||
|
||||
fmt.Printf("\n📊 Verification:\n")
|
||||
fmt.Printf("\n[STATS] Verification:\n")
|
||||
if entry.VerifiedAt != nil {
|
||||
status := "❌ Failed"
|
||||
status := "[FAIL] Failed"
|
||||
if entry.VerifyValid != nil && *entry.VerifyValid {
|
||||
status = "✅ Valid"
|
||||
status = "[OK] Valid"
|
||||
}
|
||||
fmt.Printf(" Status: %s (checked %s)\n", status, entry.VerifiedAt.Format("2006-01-02 15:04"))
|
||||
} else {
|
||||
fmt.Printf(" Status: ⏳ Not verified\n")
|
||||
fmt.Printf(" Status: [WAIT] Not verified\n")
|
||||
}
|
||||
|
||||
fmt.Printf("\n🧪 DR Drill Test:\n")
|
||||
fmt.Printf("\n[TEST] DR Drill Test:\n")
|
||||
if entry.DrillTestedAt != nil {
|
||||
status := "❌ Failed"
|
||||
status := "[FAIL] Failed"
|
||||
if entry.DrillSuccess != nil && *entry.DrillSuccess {
|
||||
status = "✅ Passed"
|
||||
status = "[OK] Passed"
|
||||
}
|
||||
fmt.Printf(" Status: %s (tested %s)\n", status, entry.DrillTestedAt.Format("2006-01-02 15:04"))
|
||||
} else {
|
||||
fmt.Printf(" Status: ⏳ Not tested\n")
|
||||
fmt.Printf(" Status: [WAIT] Not tested\n")
|
||||
}
|
||||
|
||||
if len(entry.Metadata) > 0 {
|
||||
fmt.Printf("\n📝 Additional Metadata:\n")
|
||||
fmt.Printf("\n[NOTE] Additional Metadata:\n")
|
||||
for k, v := range entry.Metadata {
|
||||
fmt.Printf(" %s: %s\n", k, v)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf("\n=====================================================\n")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -115,7 +115,7 @@ func runCleanup(cmd *cobra.Command, args []string) error {
|
||||
DryRun: dryRun,
|
||||
}
|
||||
|
||||
fmt.Printf("🗑️ Cleanup Policy:\n")
|
||||
fmt.Printf("[CLEANUP] Cleanup Policy:\n")
|
||||
fmt.Printf(" Directory: %s\n", backupDir)
|
||||
fmt.Printf(" Retention: %d days\n", policy.RetentionDays)
|
||||
fmt.Printf(" Min backups: %d\n", policy.MinBackups)
|
||||
@ -142,16 +142,16 @@ func runCleanup(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
// Display results
|
||||
fmt.Printf("📊 Results:\n")
|
||||
fmt.Printf("[RESULTS] Results:\n")
|
||||
fmt.Printf(" Total backups: %d\n", result.TotalBackups)
|
||||
fmt.Printf(" Eligible for deletion: %d\n", result.EligibleForDeletion)
|
||||
|
||||
if len(result.Deleted) > 0 {
|
||||
fmt.Printf("\n")
|
||||
if dryRun {
|
||||
fmt.Printf("🔍 Would delete %d backup(s):\n", len(result.Deleted))
|
||||
fmt.Printf("[DRY-RUN] Would delete %d backup(s):\n", len(result.Deleted))
|
||||
} else {
|
||||
fmt.Printf("✅ Deleted %d backup(s):\n", len(result.Deleted))
|
||||
fmt.Printf("[OK] Deleted %d backup(s):\n", len(result.Deleted))
|
||||
}
|
||||
for _, file := range result.Deleted {
|
||||
fmt.Printf(" - %s\n", filepath.Base(file))
|
||||
@ -159,33 +159,33 @@ func runCleanup(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
if len(result.Kept) > 0 && len(result.Kept) <= 10 {
|
||||
fmt.Printf("\n📦 Kept %d backup(s):\n", len(result.Kept))
|
||||
fmt.Printf("\n[KEPT] Kept %d backup(s):\n", len(result.Kept))
|
||||
for _, file := range result.Kept {
|
||||
fmt.Printf(" - %s\n", filepath.Base(file))
|
||||
}
|
||||
} else if len(result.Kept) > 10 {
|
||||
fmt.Printf("\n📦 Kept %d backup(s)\n", len(result.Kept))
|
||||
fmt.Printf("\n[KEPT] Kept %d backup(s)\n", len(result.Kept))
|
||||
}
|
||||
|
||||
if !dryRun && result.SpaceFreed > 0 {
|
||||
fmt.Printf("\n💾 Space freed: %s\n", metadata.FormatSize(result.SpaceFreed))
|
||||
fmt.Printf("\n[FREED] Space freed: %s\n", metadata.FormatSize(result.SpaceFreed))
|
||||
}
|
||||
|
||||
if len(result.Errors) > 0 {
|
||||
fmt.Printf("\n⚠️ Errors:\n")
|
||||
fmt.Printf("\n[WARN] Errors:\n")
|
||||
for _, err := range result.Errors {
|
||||
fmt.Printf(" - %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println(strings.Repeat("─", 50))
|
||||
fmt.Println(strings.Repeat("-", 50))
|
||||
|
||||
if dryRun {
|
||||
fmt.Println("✅ Dry run completed (no files were deleted)")
|
||||
fmt.Println("[OK] Dry run completed (no files were deleted)")
|
||||
} else if len(result.Deleted) > 0 {
|
||||
fmt.Println("✅ Cleanup completed successfully")
|
||||
fmt.Println("[OK] Cleanup completed successfully")
|
||||
} else {
|
||||
fmt.Println("ℹ️ No backups eligible for deletion")
|
||||
fmt.Println("[INFO] No backups eligible for deletion")
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -212,7 +212,7 @@ func runCloudCleanup(ctx context.Context, uri string) error {
|
||||
return fmt.Errorf("invalid cloud URI: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("☁️ Cloud Cleanup Policy:\n")
|
||||
fmt.Printf("[CLOUD] Cloud Cleanup Policy:\n")
|
||||
fmt.Printf(" URI: %s\n", uri)
|
||||
fmt.Printf(" Provider: %s\n", cloudURI.Provider)
|
||||
fmt.Printf(" Bucket: %s\n", cloudURI.Bucket)
|
||||
@ -295,7 +295,7 @@ func runCloudCleanup(ctx context.Context, uri string) error {
|
||||
}
|
||||
|
||||
// Display results
|
||||
fmt.Printf("📊 Results:\n")
|
||||
fmt.Printf("[RESULTS] Results:\n")
|
||||
fmt.Printf(" Total backups: %d\n", totalBackups)
|
||||
fmt.Printf(" Eligible for deletion: %d\n", len(toDelete))
|
||||
fmt.Printf(" Will keep: %d\n", len(toKeep))
|
||||
@ -303,9 +303,9 @@ func runCloudCleanup(ctx context.Context, uri string) error {
|
||||
|
||||
if len(toDelete) > 0 {
|
||||
if dryRun {
|
||||
fmt.Printf("🔍 Would delete %d backup(s):\n", len(toDelete))
|
||||
fmt.Printf("[DRY-RUN] Would delete %d backup(s):\n", len(toDelete))
|
||||
} else {
|
||||
fmt.Printf("🗑️ Deleting %d backup(s):\n", len(toDelete))
|
||||
fmt.Printf("[DELETE] Deleting %d backup(s):\n", len(toDelete))
|
||||
}
|
||||
|
||||
var totalSize int64
|
||||
@ -321,7 +321,7 @@ func runCloudCleanup(ctx context.Context, uri string) error {
|
||||
|
||||
if !dryRun {
|
||||
if err := backend.Delete(ctx, backup.Key); err != nil {
|
||||
fmt.Printf(" ❌ Error: %v\n", err)
|
||||
fmt.Printf(" [FAIL] Error: %v\n", err)
|
||||
} else {
|
||||
deletedCount++
|
||||
// Also try to delete metadata
|
||||
@ -330,12 +330,12 @@ func runCloudCleanup(ctx context.Context, uri string) error {
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("\n💾 Space %s: %s\n",
|
||||
fmt.Printf("\n[FREED] Space %s: %s\n",
|
||||
map[bool]string{true: "would be freed", false: "freed"}[dryRun],
|
||||
cloud.FormatSize(totalSize))
|
||||
|
||||
if !dryRun && deletedCount > 0 {
|
||||
fmt.Printf("✅ Successfully deleted %d backup(s)\n", deletedCount)
|
||||
fmt.Printf("[OK] Successfully deleted %d backup(s)\n", deletedCount)
|
||||
}
|
||||
} else {
|
||||
fmt.Println("No backups eligible for deletion")
|
||||
@ -405,7 +405,7 @@ func runGFSCleanup(backupDir string) error {
|
||||
}
|
||||
|
||||
// Display tier breakdown
|
||||
fmt.Printf("📊 Backup Classification:\n")
|
||||
fmt.Printf("[STATS] Backup Classification:\n")
|
||||
fmt.Printf(" Yearly: %d\n", result.YearlyKept)
|
||||
fmt.Printf(" Monthly: %d\n", result.MonthlyKept)
|
||||
fmt.Printf(" Weekly: %d\n", result.WeeklyKept)
|
||||
@ -416,9 +416,9 @@ func runGFSCleanup(backupDir string) error {
|
||||
// Display deletions
|
||||
if len(result.Deleted) > 0 {
|
||||
if dryRun {
|
||||
fmt.Printf("🔍 Would delete %d backup(s):\n", len(result.Deleted))
|
||||
fmt.Printf("[SEARCH] Would delete %d backup(s):\n", len(result.Deleted))
|
||||
} else {
|
||||
fmt.Printf("✅ Deleted %d backup(s):\n", len(result.Deleted))
|
||||
fmt.Printf("[OK] Deleted %d backup(s):\n", len(result.Deleted))
|
||||
}
|
||||
for _, file := range result.Deleted {
|
||||
fmt.Printf(" - %s\n", filepath.Base(file))
|
||||
@ -427,7 +427,7 @@ func runGFSCleanup(backupDir string) error {
|
||||
|
||||
// Display kept backups (limited display)
|
||||
if len(result.Kept) > 0 && len(result.Kept) <= 15 {
|
||||
fmt.Printf("\n📦 Kept %d backup(s):\n", len(result.Kept))
|
||||
fmt.Printf("\n[PKG] Kept %d backup(s):\n", len(result.Kept))
|
||||
for _, file := range result.Kept {
|
||||
// Show tier classification
|
||||
info, _ := os.Stat(file)
|
||||
@ -440,28 +440,28 @@ func runGFSCleanup(backupDir string) error {
|
||||
}
|
||||
}
|
||||
} else if len(result.Kept) > 15 {
|
||||
fmt.Printf("\n📦 Kept %d backup(s)\n", len(result.Kept))
|
||||
fmt.Printf("\n[PKG] Kept %d backup(s)\n", len(result.Kept))
|
||||
}
|
||||
|
||||
if !dryRun && result.SpaceFreed > 0 {
|
||||
fmt.Printf("\n💾 Space freed: %s\n", metadata.FormatSize(result.SpaceFreed))
|
||||
fmt.Printf("\n[SAVE] Space freed: %s\n", metadata.FormatSize(result.SpaceFreed))
|
||||
}
|
||||
|
||||
if len(result.Errors) > 0 {
|
||||
fmt.Printf("\n⚠️ Errors:\n")
|
||||
fmt.Printf("\n[WARN] Errors:\n")
|
||||
for _, err := range result.Errors {
|
||||
fmt.Printf(" - %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println(strings.Repeat("─", 50))
|
||||
fmt.Println(strings.Repeat("-", 50))
|
||||
|
||||
if dryRun {
|
||||
fmt.Println("✅ GFS dry run completed (no files were deleted)")
|
||||
fmt.Println("[OK] GFS dry run completed (no files were deleted)")
|
||||
} else if len(result.Deleted) > 0 {
|
||||
fmt.Println("✅ GFS cleanup completed successfully")
|
||||
fmt.Println("[OK] GFS cleanup completed successfully")
|
||||
} else {
|
||||
fmt.Println("ℹ️ No backups eligible for deletion under GFS policy")
|
||||
fmt.Println("[INFO] No backups eligible for deletion under GFS policy")
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
99
cmd/cloud.go
99
cmd/cloud.go
@ -30,7 +30,12 @@ Configuration via flags or environment variables:
|
||||
--cloud-region DBBACKUP_CLOUD_REGION
|
||||
--cloud-endpoint DBBACKUP_CLOUD_ENDPOINT
|
||||
--cloud-access-key DBBACKUP_CLOUD_ACCESS_KEY (or AWS_ACCESS_KEY_ID)
|
||||
--cloud-secret-key DBBACKUP_CLOUD_SECRET_KEY (or AWS_SECRET_ACCESS_KEY)`,
|
||||
--cloud-secret-key DBBACKUP_CLOUD_SECRET_KEY (or AWS_SECRET_ACCESS_KEY)
|
||||
--bandwidth-limit DBBACKUP_BANDWIDTH_LIMIT
|
||||
|
||||
Bandwidth Limiting:
|
||||
Limit upload/download speed to avoid saturating network during business hours.
|
||||
Examples: 10MB/s, 50MiB/s, 100Mbps, unlimited`,
|
||||
}
|
||||
|
||||
var cloudUploadCmd = &cobra.Command{
|
||||
@ -103,15 +108,16 @@ Examples:
|
||||
}
|
||||
|
||||
var (
|
||||
cloudProvider string
|
||||
cloudBucket string
|
||||
cloudRegion string
|
||||
cloudEndpoint string
|
||||
cloudAccessKey string
|
||||
cloudSecretKey string
|
||||
cloudPrefix string
|
||||
cloudVerbose bool
|
||||
cloudConfirm bool
|
||||
cloudProvider string
|
||||
cloudBucket string
|
||||
cloudRegion string
|
||||
cloudEndpoint string
|
||||
cloudAccessKey string
|
||||
cloudSecretKey string
|
||||
cloudPrefix string
|
||||
cloudVerbose bool
|
||||
cloudConfirm bool
|
||||
cloudBandwidthLimit string
|
||||
)
|
||||
|
||||
func init() {
|
||||
@ -127,6 +133,7 @@ func init() {
|
||||
cmd.Flags().StringVar(&cloudAccessKey, "cloud-access-key", getEnv("DBBACKUP_CLOUD_ACCESS_KEY", getEnv("AWS_ACCESS_KEY_ID", "")), "Access key")
|
||||
cmd.Flags().StringVar(&cloudSecretKey, "cloud-secret-key", getEnv("DBBACKUP_CLOUD_SECRET_KEY", getEnv("AWS_SECRET_ACCESS_KEY", "")), "Secret key")
|
||||
cmd.Flags().StringVar(&cloudPrefix, "cloud-prefix", getEnv("DBBACKUP_CLOUD_PREFIX", ""), "Key prefix")
|
||||
cmd.Flags().StringVar(&cloudBandwidthLimit, "bandwidth-limit", getEnv("DBBACKUP_BANDWIDTH_LIMIT", ""), "Bandwidth limit (e.g., 10MB/s, 100Mbps, 50MiB/s)")
|
||||
cmd.Flags().BoolVarP(&cloudVerbose, "verbose", "v", false, "Verbose output")
|
||||
}
|
||||
|
||||
@ -141,24 +148,40 @@ func getEnv(key, defaultValue string) string {
|
||||
}
|
||||
|
||||
func getCloudBackend() (cloud.Backend, error) {
|
||||
// Parse bandwidth limit
|
||||
var bandwidthLimit int64
|
||||
if cloudBandwidthLimit != "" {
|
||||
var err error
|
||||
bandwidthLimit, err = cloud.ParseBandwidth(cloudBandwidthLimit)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid bandwidth limit: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
cfg := &cloud.Config{
|
||||
Provider: cloudProvider,
|
||||
Bucket: cloudBucket,
|
||||
Region: cloudRegion,
|
||||
Endpoint: cloudEndpoint,
|
||||
AccessKey: cloudAccessKey,
|
||||
SecretKey: cloudSecretKey,
|
||||
Prefix: cloudPrefix,
|
||||
UseSSL: true,
|
||||
PathStyle: cloudProvider == "minio",
|
||||
Timeout: 300,
|
||||
MaxRetries: 3,
|
||||
Provider: cloudProvider,
|
||||
Bucket: cloudBucket,
|
||||
Region: cloudRegion,
|
||||
Endpoint: cloudEndpoint,
|
||||
AccessKey: cloudAccessKey,
|
||||
SecretKey: cloudSecretKey,
|
||||
Prefix: cloudPrefix,
|
||||
UseSSL: true,
|
||||
PathStyle: cloudProvider == "minio",
|
||||
Timeout: 300,
|
||||
MaxRetries: 3,
|
||||
BandwidthLimit: bandwidthLimit,
|
||||
}
|
||||
|
||||
if cfg.Bucket == "" {
|
||||
return nil, fmt.Errorf("bucket name is required (use --cloud-bucket or DBBACKUP_CLOUD_BUCKET)")
|
||||
}
|
||||
|
||||
// Log bandwidth limit if set
|
||||
if bandwidthLimit > 0 {
|
||||
fmt.Printf("📊 Bandwidth limit: %s\n", cloud.FormatBandwidth(bandwidthLimit))
|
||||
}
|
||||
|
||||
backend, err := cloud.NewBackend(cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create cloud backend: %w", err)
|
||||
@ -189,12 +212,12 @@ func runCloudUpload(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("☁️ Uploading %d file(s) to %s...\n\n", len(files), backend.Name())
|
||||
fmt.Printf("[CLOUD] Uploading %d file(s) to %s...\n\n", len(files), backend.Name())
|
||||
|
||||
successCount := 0
|
||||
for _, localPath := range files {
|
||||
filename := filepath.Base(localPath)
|
||||
fmt.Printf("📤 %s\n", filename)
|
||||
fmt.Printf("[UPLOAD] %s\n", filename)
|
||||
|
||||
// Progress callback
|
||||
var lastPercent int
|
||||
@ -214,21 +237,21 @@ func runCloudUpload(cmd *cobra.Command, args []string) error {
|
||||
|
||||
err := backend.Upload(ctx, localPath, filename, progress)
|
||||
if err != nil {
|
||||
fmt.Printf(" ❌ Failed: %v\n\n", err)
|
||||
fmt.Printf(" [FAIL] Failed: %v\n\n", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Get file size
|
||||
if info, err := os.Stat(localPath); err == nil {
|
||||
fmt.Printf(" ✅ Uploaded (%s)\n\n", cloud.FormatSize(info.Size()))
|
||||
fmt.Printf(" [OK] Uploaded (%s)\n\n", cloud.FormatSize(info.Size()))
|
||||
} else {
|
||||
fmt.Printf(" ✅ Uploaded\n\n")
|
||||
fmt.Printf(" [OK] Uploaded\n\n")
|
||||
}
|
||||
successCount++
|
||||
}
|
||||
|
||||
fmt.Println(strings.Repeat("─", 50))
|
||||
fmt.Printf("✅ Successfully uploaded %d/%d file(s)\n", successCount, len(files))
|
||||
fmt.Println(strings.Repeat("-", 50))
|
||||
fmt.Printf("[OK] Successfully uploaded %d/%d file(s)\n", successCount, len(files))
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -248,8 +271,8 @@ func runCloudDownload(cmd *cobra.Command, args []string) error {
|
||||
localPath = filepath.Join(localPath, filepath.Base(remotePath))
|
||||
}
|
||||
|
||||
fmt.Printf("☁️ Downloading from %s...\n\n", backend.Name())
|
||||
fmt.Printf("📥 %s → %s\n", remotePath, localPath)
|
||||
fmt.Printf("[CLOUD] Downloading from %s...\n\n", backend.Name())
|
||||
fmt.Printf("[DOWNLOAD] %s -> %s\n", remotePath, localPath)
|
||||
|
||||
// Progress callback
|
||||
var lastPercent int
|
||||
@ -274,9 +297,9 @@ func runCloudDownload(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// Get file size
|
||||
if info, err := os.Stat(localPath); err == nil {
|
||||
fmt.Printf(" ✅ Downloaded (%s)\n", cloud.FormatSize(info.Size()))
|
||||
fmt.Printf(" [OK] Downloaded (%s)\n", cloud.FormatSize(info.Size()))
|
||||
} else {
|
||||
fmt.Printf(" ✅ Downloaded\n")
|
||||
fmt.Printf(" [OK] Downloaded\n")
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -294,7 +317,7 @@ func runCloudList(cmd *cobra.Command, args []string) error {
|
||||
prefix = args[0]
|
||||
}
|
||||
|
||||
fmt.Printf("☁️ Listing backups in %s/%s...\n\n", backend.Name(), cloudBucket)
|
||||
fmt.Printf("[CLOUD] Listing backups in %s/%s...\n\n", backend.Name(), cloudBucket)
|
||||
|
||||
backups, err := backend.List(ctx, prefix)
|
||||
if err != nil {
|
||||
@ -311,7 +334,7 @@ func runCloudList(cmd *cobra.Command, args []string) error {
|
||||
totalSize += backup.Size
|
||||
|
||||
if cloudVerbose {
|
||||
fmt.Printf("📦 %s\n", backup.Name)
|
||||
fmt.Printf("[FILE] %s\n", backup.Name)
|
||||
fmt.Printf(" Size: %s\n", cloud.FormatSize(backup.Size))
|
||||
fmt.Printf(" Modified: %s\n", backup.LastModified.Format(time.RFC3339))
|
||||
if backup.StorageClass != "" {
|
||||
@ -328,7 +351,7 @@ func runCloudList(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println(strings.Repeat("─", 50))
|
||||
fmt.Println(strings.Repeat("-", 50))
|
||||
fmt.Printf("Total: %d backup(s), %s\n", len(backups), cloud.FormatSize(totalSize))
|
||||
|
||||
return nil
|
||||
@ -360,7 +383,7 @@ func runCloudDelete(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// Confirmation prompt
|
||||
if !cloudConfirm {
|
||||
fmt.Printf("⚠️ Delete %s (%s) from cloud storage?\n", remotePath, cloud.FormatSize(size))
|
||||
fmt.Printf("[WARN] Delete %s (%s) from cloud storage?\n", remotePath, cloud.FormatSize(size))
|
||||
fmt.Print("Type 'yes' to confirm: ")
|
||||
var response string
|
||||
fmt.Scanln(&response)
|
||||
@ -370,14 +393,14 @@ func runCloudDelete(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("🗑️ Deleting %s...\n", remotePath)
|
||||
fmt.Printf("[DELETE] Deleting %s...\n", remotePath)
|
||||
|
||||
err = backend.Delete(ctx, remotePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete failed: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("✅ Deleted %s (%s)\n", remotePath, cloud.FormatSize(size))
|
||||
fmt.Printf("[OK] Deleted %s (%s)\n", remotePath, cloud.FormatSize(size))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -61,10 +61,10 @@ func runCPUInfo(ctx context.Context) error {
|
||||
|
||||
// Show current vs optimal
|
||||
if cfg.AutoDetectCores {
|
||||
fmt.Println("\n✅ CPU optimization is enabled")
|
||||
fmt.Println("\n[OK] CPU optimization is enabled")
|
||||
fmt.Println("Job counts are automatically optimized based on detected hardware")
|
||||
} else {
|
||||
fmt.Println("\n⚠️ CPU optimization is disabled")
|
||||
fmt.Println("\n[WARN] CPU optimization is disabled")
|
||||
fmt.Println("Consider enabling --auto-detect-cores for better performance")
|
||||
}
|
||||
|
||||
|
||||
1284
cmd/dedup.go
Normal file
1284
cmd/dedup.go
Normal file
@ -0,0 +1,1284 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/dedup"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var dedupCmd = &cobra.Command{
|
||||
Use: "dedup",
|
||||
Short: "Deduplicated backup operations",
|
||||
Long: `Content-defined chunking deduplication for space-efficient backups.
|
||||
|
||||
Similar to restic/borgbackup but with native database dump support.
|
||||
|
||||
Features:
|
||||
- Content-defined chunking (CDC) with Buzhash rolling hash
|
||||
- SHA-256 content-addressed storage
|
||||
- AES-256-GCM encryption (optional)
|
||||
- Gzip compression (optional)
|
||||
- SQLite index for fast lookups
|
||||
|
||||
Storage Structure:
|
||||
<dedup-dir>/
|
||||
chunks/ # Content-addressed chunk files
|
||||
ab/cdef... # Sharded by first 2 chars of hash
|
||||
manifests/ # JSON manifest per backup
|
||||
chunks.db # SQLite index
|
||||
|
||||
NFS/CIFS NOTICE:
|
||||
SQLite may have locking issues on network storage.
|
||||
Use --index-db to put the SQLite index on local storage while keeping
|
||||
chunks on network storage:
|
||||
|
||||
dbbackup dedup backup mydb.sql \
|
||||
--dedup-dir /mnt/nfs/backups/dedup \
|
||||
--index-db /var/lib/dbbackup/dedup-index.db
|
||||
|
||||
This avoids "database is locked" errors while still storing chunks remotely.
|
||||
|
||||
COMPRESSED INPUT NOTICE:
|
||||
Pre-compressed files (.gz) have poor deduplication ratios (<10%).
|
||||
Use --decompress-input to decompress before chunking for better results:
|
||||
|
||||
dbbackup dedup backup mydb.sql.gz --decompress-input`,
|
||||
}
|
||||
|
||||
var dedupBackupCmd = &cobra.Command{
|
||||
Use: "backup <file>",
|
||||
Short: "Create a deduplicated backup of a file",
|
||||
Long: `Chunk a file using content-defined chunking and store deduplicated chunks.
|
||||
|
||||
Example:
|
||||
dbbackup dedup backup /path/to/database.dump
|
||||
dbbackup dedup backup mydb.sql --compress --encrypt`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: runDedupBackup,
|
||||
}
|
||||
|
||||
var dedupRestoreCmd = &cobra.Command{
|
||||
Use: "restore <manifest-id> <output-file>",
|
||||
Short: "Restore a backup from its manifest",
|
||||
Long: `Reconstruct a file from its deduplicated chunks.
|
||||
|
||||
Example:
|
||||
dbbackup dedup restore 2026-01-07_120000_mydb /tmp/restored.dump
|
||||
dbbackup dedup list # to see available manifests`,
|
||||
Args: cobra.ExactArgs(2),
|
||||
RunE: runDedupRestore,
|
||||
}
|
||||
|
||||
var dedupListCmd = &cobra.Command{
|
||||
Use: "list",
|
||||
Short: "List all deduplicated backups",
|
||||
RunE: runDedupList,
|
||||
}
|
||||
|
||||
var dedupStatsCmd = &cobra.Command{
|
||||
Use: "stats",
|
||||
Short: "Show deduplication statistics",
|
||||
RunE: runDedupStats,
|
||||
}
|
||||
|
||||
var dedupGCCmd = &cobra.Command{
|
||||
Use: "gc",
|
||||
Short: "Garbage collect unreferenced chunks",
|
||||
Long: `Remove chunks that are no longer referenced by any manifest.
|
||||
|
||||
Run after deleting old backups to reclaim space.`,
|
||||
RunE: runDedupGC,
|
||||
}
|
||||
|
||||
var dedupDeleteCmd = &cobra.Command{
|
||||
Use: "delete <manifest-id>",
|
||||
Short: "Delete a backup manifest (chunks cleaned by gc)",
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: runDedupDelete,
|
||||
}
|
||||
|
||||
var dedupVerifyCmd = &cobra.Command{
|
||||
Use: "verify [manifest-id]",
|
||||
Short: "Verify chunk integrity against manifests",
|
||||
Long: `Verify that all chunks referenced by manifests exist and have correct hashes.
|
||||
|
||||
Without arguments, verifies all backups. With a manifest ID, verifies only that backup.
|
||||
|
||||
Examples:
|
||||
dbbackup dedup verify # Verify all backups
|
||||
dbbackup dedup verify 2026-01-07_mydb # Verify specific backup`,
|
||||
RunE: runDedupVerify,
|
||||
}
|
||||
|
||||
var dedupPruneCmd = &cobra.Command{
|
||||
Use: "prune",
|
||||
Short: "Apply retention policy to manifests",
|
||||
Long: `Delete old manifests based on retention policy (like borg prune).
|
||||
|
||||
Keeps a specified number of recent backups per database and deletes the rest.
|
||||
|
||||
Examples:
|
||||
dbbackup dedup prune --keep-last 7 # Keep 7 most recent
|
||||
dbbackup dedup prune --keep-daily 7 --keep-weekly 4 # Keep 7 daily + 4 weekly`,
|
||||
RunE: runDedupPrune,
|
||||
}
|
||||
|
||||
var dedupBackupDBCmd = &cobra.Command{
|
||||
Use: "backup-db",
|
||||
Short: "Direct database dump with deduplication",
|
||||
Long: `Dump a database directly into deduplicated chunks without temp files.
|
||||
|
||||
Streams the database dump through the chunker for efficient deduplication.
|
||||
|
||||
Examples:
|
||||
dbbackup dedup backup-db --db-type postgres --db-name mydb
|
||||
dbbackup dedup backup-db -d mariadb --database production_db --host db.local`,
|
||||
RunE: runDedupBackupDB,
|
||||
}
|
||||
|
||||
// Prune flags
|
||||
var (
|
||||
pruneKeepLast int
|
||||
pruneKeepDaily int
|
||||
pruneKeepWeekly int
|
||||
pruneDryRun bool
|
||||
)
|
||||
|
||||
// backup-db flags
|
||||
var (
|
||||
backupDBDatabase string
|
||||
backupDBUser string
|
||||
backupDBPassword string
|
||||
)
|
||||
|
||||
// metrics flags
|
||||
var (
|
||||
dedupMetricsOutput string
|
||||
dedupMetricsInstance string
|
||||
)
|
||||
|
||||
var dedupMetricsCmd = &cobra.Command{
|
||||
Use: "metrics",
|
||||
Short: "Export dedup statistics as Prometheus metrics",
|
||||
Long: `Export deduplication statistics in Prometheus format.
|
||||
|
||||
Can write to a textfile for node_exporter's textfile collector,
|
||||
or print to stdout for custom integrations.
|
||||
|
||||
Examples:
|
||||
dbbackup dedup metrics # Print to stdout
|
||||
dbbackup dedup metrics --output /var/lib/node_exporter/textfile_collector/dedup.prom
|
||||
dbbackup dedup metrics --instance prod-db-1`,
|
||||
RunE: runDedupMetrics,
|
||||
}
|
||||
|
||||
// Flags
|
||||
var (
|
||||
dedupDir string
|
||||
dedupIndexDB string // Separate path for SQLite index (for NFS/CIFS support)
|
||||
dedupCompress bool
|
||||
dedupEncrypt bool
|
||||
dedupKey string
|
||||
dedupName string
|
||||
dedupDBType string
|
||||
dedupDBName string
|
||||
dedupDBHost string
|
||||
dedupDecompress bool // Auto-decompress gzip input
|
||||
)
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(dedupCmd)
|
||||
dedupCmd.AddCommand(dedupBackupCmd)
|
||||
dedupCmd.AddCommand(dedupRestoreCmd)
|
||||
dedupCmd.AddCommand(dedupListCmd)
|
||||
dedupCmd.AddCommand(dedupStatsCmd)
|
||||
dedupCmd.AddCommand(dedupGCCmd)
|
||||
dedupCmd.AddCommand(dedupDeleteCmd)
|
||||
dedupCmd.AddCommand(dedupVerifyCmd)
|
||||
dedupCmd.AddCommand(dedupPruneCmd)
|
||||
dedupCmd.AddCommand(dedupBackupDBCmd)
|
||||
dedupCmd.AddCommand(dedupMetricsCmd)
|
||||
|
||||
// Global dedup flags
|
||||
dedupCmd.PersistentFlags().StringVar(&dedupDir, "dedup-dir", "", "Dedup storage directory (default: $BACKUP_DIR/dedup)")
|
||||
dedupCmd.PersistentFlags().StringVar(&dedupIndexDB, "index-db", "", "SQLite index path (local recommended for NFS/CIFS chunk dirs)")
|
||||
dedupCmd.PersistentFlags().BoolVar(&dedupCompress, "compress", true, "Compress chunks with gzip")
|
||||
dedupCmd.PersistentFlags().BoolVar(&dedupEncrypt, "encrypt", false, "Encrypt chunks with AES-256-GCM")
|
||||
dedupCmd.PersistentFlags().StringVar(&dedupKey, "key", "", "Encryption key (hex) or use DBBACKUP_DEDUP_KEY env")
|
||||
|
||||
// Backup-specific flags
|
||||
dedupBackupCmd.Flags().StringVar(&dedupName, "name", "", "Optional backup name")
|
||||
dedupBackupCmd.Flags().StringVar(&dedupDBType, "db-type", "", "Database type (postgres/mysql)")
|
||||
dedupBackupCmd.Flags().StringVar(&dedupDBName, "db-name", "", "Database name")
|
||||
dedupBackupCmd.Flags().StringVar(&dedupDBHost, "db-host", "", "Database host")
|
||||
dedupBackupCmd.Flags().BoolVar(&dedupDecompress, "decompress-input", false, "Auto-decompress gzip input before chunking (improves dedup ratio)")
|
||||
|
||||
// Prune flags
|
||||
dedupPruneCmd.Flags().IntVar(&pruneKeepLast, "keep-last", 0, "Keep the last N backups")
|
||||
dedupPruneCmd.Flags().IntVar(&pruneKeepDaily, "keep-daily", 0, "Keep N daily backups")
|
||||
dedupPruneCmd.Flags().IntVar(&pruneKeepWeekly, "keep-weekly", 0, "Keep N weekly backups")
|
||||
dedupPruneCmd.Flags().BoolVar(&pruneDryRun, "dry-run", false, "Show what would be deleted without actually deleting")
|
||||
|
||||
// backup-db flags
|
||||
dedupBackupDBCmd.Flags().StringVarP(&dedupDBType, "db-type", "d", "", "Database type (postgres/mariadb/mysql)")
|
||||
dedupBackupDBCmd.Flags().StringVar(&backupDBDatabase, "database", "", "Database name to backup")
|
||||
dedupBackupDBCmd.Flags().StringVar(&dedupDBHost, "host", "localhost", "Database host")
|
||||
dedupBackupDBCmd.Flags().StringVarP(&backupDBUser, "user", "u", "", "Database user")
|
||||
dedupBackupDBCmd.Flags().StringVarP(&backupDBPassword, "password", "p", "", "Database password (or use env)")
|
||||
dedupBackupDBCmd.MarkFlagRequired("db-type")
|
||||
dedupBackupDBCmd.MarkFlagRequired("database")
|
||||
|
||||
// Metrics flags
|
||||
dedupMetricsCmd.Flags().StringVarP(&dedupMetricsOutput, "output", "o", "", "Output file path (default: stdout)")
|
||||
dedupMetricsCmd.Flags().StringVar(&dedupMetricsInstance, "instance", "", "Instance label for metrics (default: hostname)")
|
||||
}
|
||||
|
||||
func getDedupDir() string {
|
||||
if dedupDir != "" {
|
||||
return dedupDir
|
||||
}
|
||||
if cfg != nil && cfg.BackupDir != "" {
|
||||
return filepath.Join(cfg.BackupDir, "dedup")
|
||||
}
|
||||
return filepath.Join(os.Getenv("HOME"), "db_backups", "dedup")
|
||||
}
|
||||
|
||||
func getIndexDBPath() string {
|
||||
if dedupIndexDB != "" {
|
||||
return dedupIndexDB
|
||||
}
|
||||
// Default: same directory as chunks (may have issues on NFS/CIFS)
|
||||
return filepath.Join(getDedupDir(), "chunks.db")
|
||||
}
|
||||
|
||||
func getEncryptionKey() string {
|
||||
if dedupKey != "" {
|
||||
return dedupKey
|
||||
}
|
||||
return os.Getenv("DBBACKUP_DEDUP_KEY")
|
||||
}
|
||||
|
||||
func runDedupBackup(cmd *cobra.Command, args []string) error {
|
||||
inputPath := args[0]
|
||||
|
||||
// Open input file
|
||||
file, err := os.Open(inputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open input file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
info, err := file.Stat()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to stat input file: %w", err)
|
||||
}
|
||||
|
||||
// Check for compressed input and warn/handle
|
||||
var reader io.Reader = file
|
||||
isGzipped := strings.HasSuffix(strings.ToLower(inputPath), ".gz")
|
||||
if isGzipped && !dedupDecompress {
|
||||
fmt.Printf("Warning: Input appears to be gzip compressed (.gz)\n")
|
||||
fmt.Printf(" Compressed data typically has poor dedup ratios (<10%%).\n")
|
||||
fmt.Printf(" Consider using --decompress-input for better deduplication.\n\n")
|
||||
}
|
||||
|
||||
if isGzipped && dedupDecompress {
|
||||
fmt.Printf("Auto-decompressing gzip input for better dedup ratio...\n")
|
||||
gzReader, err := pgzip.NewReader(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decompress gzip input: %w", err)
|
||||
}
|
||||
defer gzReader.Close()
|
||||
reader = gzReader
|
||||
}
|
||||
|
||||
// Setup dedup storage
|
||||
basePath := getDedupDir()
|
||||
encKey := ""
|
||||
if dedupEncrypt {
|
||||
encKey = getEncryptionKey()
|
||||
if encKey == "" {
|
||||
return fmt.Errorf("encryption enabled but no key provided (use --key or DBBACKUP_DEDUP_KEY)")
|
||||
}
|
||||
}
|
||||
|
||||
store, err := dedup.NewChunkStore(dedup.StoreConfig{
|
||||
BasePath: basePath,
|
||||
Compress: dedupCompress,
|
||||
EncryptionKey: encKey,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk store: %w", err)
|
||||
}
|
||||
|
||||
manifestStore, err := dedup.NewManifestStore(basePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open manifest store: %w", err)
|
||||
}
|
||||
|
||||
index, err := dedup.NewChunkIndexAt(getIndexDBPath())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk index: %w", err)
|
||||
}
|
||||
defer index.Close()
|
||||
|
||||
// Generate manifest ID
|
||||
now := time.Now()
|
||||
manifestID := now.Format("2006-01-02_150405")
|
||||
if dedupDBName != "" {
|
||||
manifestID += "_" + dedupDBName
|
||||
} else {
|
||||
base := filepath.Base(inputPath)
|
||||
ext := filepath.Ext(base)
|
||||
// Remove .gz extension if decompressing
|
||||
if isGzipped && dedupDecompress {
|
||||
base = strings.TrimSuffix(base, ext)
|
||||
ext = filepath.Ext(base)
|
||||
}
|
||||
manifestID += "_" + strings.TrimSuffix(base, ext)
|
||||
}
|
||||
|
||||
fmt.Printf("Creating deduplicated backup: %s\n", manifestID)
|
||||
fmt.Printf("Input: %s (%s)\n", inputPath, formatBytes(info.Size()))
|
||||
if isGzipped && dedupDecompress {
|
||||
fmt.Printf("Mode: Decompressing before chunking\n")
|
||||
}
|
||||
fmt.Printf("Store: %s\n", basePath)
|
||||
if dedupIndexDB != "" {
|
||||
fmt.Printf("Index: %s\n", getIndexDBPath())
|
||||
}
|
||||
|
||||
// For decompressed input, we can't seek - use TeeReader to hash while chunking
|
||||
h := sha256.New()
|
||||
var chunkReader io.Reader
|
||||
|
||||
if isGzipped && dedupDecompress {
|
||||
// Can't seek on gzip stream - hash will be computed inline
|
||||
chunkReader = io.TeeReader(reader, h)
|
||||
} else {
|
||||
// Regular file - hash first, then reset and chunk
|
||||
file.Seek(0, 0)
|
||||
io.Copy(h, file)
|
||||
file.Seek(0, 0)
|
||||
chunkReader = file
|
||||
h = sha256.New() // Reset for inline hashing
|
||||
chunkReader = io.TeeReader(file, h)
|
||||
}
|
||||
|
||||
// Chunk the file
|
||||
chunker := dedup.NewChunker(chunkReader, dedup.DefaultChunkerConfig())
|
||||
var chunks []dedup.ChunkRef
|
||||
var totalSize, storedSize int64
|
||||
var chunkCount, newChunks int
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
for {
|
||||
chunk, err := chunker.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("chunking failed: %w", err)
|
||||
}
|
||||
|
||||
chunkCount++
|
||||
totalSize += int64(chunk.Length)
|
||||
|
||||
// Store chunk (deduplication happens here)
|
||||
isNew, err := store.Put(chunk)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to store chunk: %w", err)
|
||||
}
|
||||
|
||||
if isNew {
|
||||
newChunks++
|
||||
storedSize += int64(chunk.Length)
|
||||
// Record in index
|
||||
index.AddChunk(chunk.Hash, chunk.Length, chunk.Length)
|
||||
}
|
||||
|
||||
chunks = append(chunks, dedup.ChunkRef{
|
||||
Hash: chunk.Hash,
|
||||
Offset: chunk.Offset,
|
||||
Length: chunk.Length,
|
||||
})
|
||||
|
||||
// Progress
|
||||
if chunkCount%1000 == 0 {
|
||||
fmt.Printf("\r Processed %d chunks, %d new...", chunkCount, newChunks)
|
||||
}
|
||||
}
|
||||
|
||||
duration := time.Since(startTime)
|
||||
|
||||
// Get final hash (computed inline via TeeReader)
|
||||
fileHash := hex.EncodeToString(h.Sum(nil))
|
||||
|
||||
// Calculate dedup ratio
|
||||
dedupRatio := 0.0
|
||||
if totalSize > 0 {
|
||||
dedupRatio = 1.0 - float64(storedSize)/float64(totalSize)
|
||||
}
|
||||
|
||||
// Create manifest
|
||||
manifest := &dedup.Manifest{
|
||||
ID: manifestID,
|
||||
Name: dedupName,
|
||||
CreatedAt: now,
|
||||
DatabaseType: dedupDBType,
|
||||
DatabaseName: dedupDBName,
|
||||
DatabaseHost: dedupDBHost,
|
||||
Chunks: chunks,
|
||||
OriginalSize: totalSize,
|
||||
StoredSize: storedSize,
|
||||
ChunkCount: chunkCount,
|
||||
NewChunks: newChunks,
|
||||
DedupRatio: dedupRatio,
|
||||
Encrypted: dedupEncrypt,
|
||||
Compressed: dedupCompress,
|
||||
SHA256: fileHash,
|
||||
Decompressed: isGzipped && dedupDecompress, // Track if we decompressed
|
||||
}
|
||||
|
||||
if err := manifestStore.Save(manifest); err != nil {
|
||||
return fmt.Errorf("failed to save manifest: %w", err)
|
||||
}
|
||||
|
||||
if err := index.AddManifest(manifest); err != nil {
|
||||
log.Warn("Failed to index manifest", "error", err)
|
||||
}
|
||||
|
||||
fmt.Printf("\r \r")
|
||||
fmt.Printf("\nBackup complete!\n")
|
||||
fmt.Printf(" Manifest: %s\n", manifestID)
|
||||
fmt.Printf(" Chunks: %d total, %d new\n", chunkCount, newChunks)
|
||||
fmt.Printf(" Original: %s\n", formatBytes(totalSize))
|
||||
fmt.Printf(" Stored: %s (new data)\n", formatBytes(storedSize))
|
||||
fmt.Printf(" Dedup ratio: %.1f%%\n", dedupRatio*100)
|
||||
fmt.Printf(" Duration: %s\n", duration.Round(time.Millisecond))
|
||||
fmt.Printf(" Throughput: %s/s\n", formatBytes(int64(float64(totalSize)/duration.Seconds())))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runDedupRestore(cmd *cobra.Command, args []string) error {
|
||||
manifestID := args[0]
|
||||
outputPath := args[1]
|
||||
|
||||
basePath := getDedupDir()
|
||||
encKey := ""
|
||||
if dedupEncrypt {
|
||||
encKey = getEncryptionKey()
|
||||
}
|
||||
|
||||
store, err := dedup.NewChunkStore(dedup.StoreConfig{
|
||||
BasePath: basePath,
|
||||
Compress: dedupCompress,
|
||||
EncryptionKey: encKey,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk store: %w", err)
|
||||
}
|
||||
|
||||
manifestStore, err := dedup.NewManifestStore(basePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open manifest store: %w", err)
|
||||
}
|
||||
|
||||
manifest, err := manifestStore.Load(manifestID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load manifest: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Restoring backup: %s\n", manifestID)
|
||||
fmt.Printf(" Created: %s\n", manifest.CreatedAt.Format(time.RFC3339))
|
||||
fmt.Printf(" Size: %s\n", formatBytes(manifest.OriginalSize))
|
||||
fmt.Printf(" Chunks: %d\n", manifest.ChunkCount)
|
||||
|
||||
// Create output file
|
||||
outFile, err := os.Create(outputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create output file: %w", err)
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
h := sha256.New()
|
||||
writer := io.MultiWriter(outFile, h)
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
for i, ref := range manifest.Chunks {
|
||||
chunk, err := store.Get(ref.Hash)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read chunk %d (%s): %w", i, ref.Hash[:8], err)
|
||||
}
|
||||
|
||||
if _, err := writer.Write(chunk.Data); err != nil {
|
||||
return fmt.Errorf("failed to write chunk %d: %w", i, err)
|
||||
}
|
||||
|
||||
if (i+1)%1000 == 0 {
|
||||
fmt.Printf("\r Restored %d/%d chunks...", i+1, manifest.ChunkCount)
|
||||
}
|
||||
}
|
||||
|
||||
duration := time.Since(startTime)
|
||||
restoredHash := hex.EncodeToString(h.Sum(nil))
|
||||
|
||||
fmt.Printf("\r \r")
|
||||
fmt.Printf("\nRestore complete!\n")
|
||||
fmt.Printf(" Output: %s\n", outputPath)
|
||||
fmt.Printf(" Duration: %s\n", duration.Round(time.Millisecond))
|
||||
|
||||
// Verify hash
|
||||
if manifest.SHA256 != "" {
|
||||
if restoredHash == manifest.SHA256 {
|
||||
fmt.Printf(" Verification: [OK] SHA-256 matches\n")
|
||||
} else {
|
||||
fmt.Printf(" Verification: [FAIL] SHA-256 MISMATCH!\n")
|
||||
fmt.Printf(" Expected: %s\n", manifest.SHA256)
|
||||
fmt.Printf(" Got: %s\n", restoredHash)
|
||||
return fmt.Errorf("integrity verification failed")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runDedupList(cmd *cobra.Command, args []string) error {
|
||||
basePath := getDedupDir()
|
||||
|
||||
manifestStore, err := dedup.NewManifestStore(basePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open manifest store: %w", err)
|
||||
}
|
||||
|
||||
manifests, err := manifestStore.ListAll()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to list manifests: %w", err)
|
||||
}
|
||||
|
||||
if len(manifests) == 0 {
|
||||
fmt.Println("No deduplicated backups found.")
|
||||
fmt.Printf("Store: %s\n", basePath)
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("Deduplicated Backups (%s)\n\n", basePath)
|
||||
fmt.Printf("%-30s %-12s %-10s %-10s %s\n", "ID", "SIZE", "DEDUP", "CHUNKS", "CREATED")
|
||||
fmt.Println(strings.Repeat("-", 80))
|
||||
|
||||
for _, m := range manifests {
|
||||
fmt.Printf("%-30s %-12s %-10.1f%% %-10d %s\n",
|
||||
truncateStr(m.ID, 30),
|
||||
formatBytes(m.OriginalSize),
|
||||
m.DedupRatio*100,
|
||||
m.ChunkCount,
|
||||
m.CreatedAt.Format("2006-01-02 15:04"),
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runDedupStats(cmd *cobra.Command, args []string) error {
|
||||
basePath := getDedupDir()
|
||||
|
||||
index, err := dedup.NewChunkIndexAt(getIndexDBPath())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk index: %w", err)
|
||||
}
|
||||
defer index.Close()
|
||||
|
||||
stats, err := index.Stats()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get stats: %w", err)
|
||||
}
|
||||
|
||||
store, err := dedup.NewChunkStore(dedup.StoreConfig{BasePath: basePath})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk store: %w", err)
|
||||
}
|
||||
|
||||
storeStats, err := store.Stats()
|
||||
if err != nil {
|
||||
log.Warn("Failed to get store stats", "error", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Deduplication Statistics\n")
|
||||
fmt.Printf("========================\n\n")
|
||||
fmt.Printf("Store: %s\n", basePath)
|
||||
fmt.Printf("Manifests: %d\n", stats.TotalManifests)
|
||||
fmt.Printf("Unique chunks: %d\n", stats.TotalChunks)
|
||||
fmt.Printf("Total raw size: %s\n", formatBytes(stats.TotalSizeRaw))
|
||||
fmt.Printf("Stored size: %s\n", formatBytes(stats.TotalSizeStored))
|
||||
fmt.Printf("\n")
|
||||
fmt.Printf("Backup Statistics (accurate dedup calculation):\n")
|
||||
fmt.Printf(" Total backed up: %s (across all backups)\n", formatBytes(stats.TotalBackupSize))
|
||||
fmt.Printf(" New data stored: %s\n", formatBytes(stats.TotalNewData))
|
||||
fmt.Printf(" Space saved: %s\n", formatBytes(stats.SpaceSaved))
|
||||
fmt.Printf(" Dedup ratio: %.1f%%\n", stats.DedupRatio*100)
|
||||
|
||||
if storeStats != nil {
|
||||
fmt.Printf("Disk usage: %s\n", formatBytes(storeStats.TotalSize))
|
||||
fmt.Printf("Directories: %d\n", storeStats.Directories)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runDedupGC(cmd *cobra.Command, args []string) error {
|
||||
basePath := getDedupDir()
|
||||
|
||||
index, err := dedup.NewChunkIndexAt(getIndexDBPath())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk index: %w", err)
|
||||
}
|
||||
defer index.Close()
|
||||
|
||||
store, err := dedup.NewChunkStore(dedup.StoreConfig{
|
||||
BasePath: basePath,
|
||||
Compress: dedupCompress,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk store: %w", err)
|
||||
}
|
||||
|
||||
// Find orphaned chunks
|
||||
orphans, err := index.ListOrphanedChunks()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to find orphaned chunks: %w", err)
|
||||
}
|
||||
|
||||
if len(orphans) == 0 {
|
||||
fmt.Println("No orphaned chunks to clean up.")
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("Found %d orphaned chunks\n", len(orphans))
|
||||
|
||||
var freed int64
|
||||
for _, hash := range orphans {
|
||||
if meta, _ := index.GetChunk(hash); meta != nil {
|
||||
freed += meta.SizeStored
|
||||
}
|
||||
if err := store.Delete(hash); err != nil {
|
||||
log.Warn("Failed to delete chunk", "hash", hash[:8], "error", err)
|
||||
continue
|
||||
}
|
||||
if err := index.RemoveChunk(hash); err != nil {
|
||||
log.Warn("Failed to remove chunk from index", "hash", hash[:8], "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("Deleted %d chunks, freed %s\n", len(orphans), formatBytes(freed))
|
||||
|
||||
// Vacuum the index
|
||||
if err := index.Vacuum(); err != nil {
|
||||
log.Warn("Failed to vacuum index", "error", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runDedupDelete(cmd *cobra.Command, args []string) error {
|
||||
manifestID := args[0]
|
||||
basePath := getDedupDir()
|
||||
|
||||
manifestStore, err := dedup.NewManifestStore(basePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open manifest store: %w", err)
|
||||
}
|
||||
|
||||
index, err := dedup.NewChunkIndexAt(getIndexDBPath())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk index: %w", err)
|
||||
}
|
||||
defer index.Close()
|
||||
|
||||
// Load manifest to decrement chunk refs
|
||||
manifest, err := manifestStore.Load(manifestID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load manifest: %w", err)
|
||||
}
|
||||
|
||||
// Decrement reference counts
|
||||
for _, ref := range manifest.Chunks {
|
||||
index.DecrementRef(ref.Hash)
|
||||
}
|
||||
|
||||
// Delete manifest
|
||||
if err := manifestStore.Delete(manifestID); err != nil {
|
||||
return fmt.Errorf("failed to delete manifest: %w", err)
|
||||
}
|
||||
|
||||
if err := index.RemoveManifest(manifestID); err != nil {
|
||||
log.Warn("Failed to remove manifest from index", "error", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Deleted backup: %s\n", manifestID)
|
||||
fmt.Println("Run 'dbbackup dedup gc' to reclaim space from unreferenced chunks.")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
func formatBytes(b int64) string {
|
||||
const unit = 1024
|
||||
if b < unit {
|
||||
return fmt.Sprintf("%d B", b)
|
||||
}
|
||||
div, exp := int64(unit), 0
|
||||
for n := b / unit; n >= unit; n /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
func truncateStr(s string, max int) string {
|
||||
if len(s) <= max {
|
||||
return s
|
||||
}
|
||||
return s[:max-3] + "..."
|
||||
}
|
||||
|
||||
func runDedupVerify(cmd *cobra.Command, args []string) error {
|
||||
basePath := getDedupDir()
|
||||
|
||||
store, err := dedup.NewChunkStore(dedup.StoreConfig{
|
||||
BasePath: basePath,
|
||||
Compress: dedupCompress,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk store: %w", err)
|
||||
}
|
||||
|
||||
manifestStore, err := dedup.NewManifestStore(basePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open manifest store: %w", err)
|
||||
}
|
||||
|
||||
index, err := dedup.NewChunkIndexAt(getIndexDBPath())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk index: %w", err)
|
||||
}
|
||||
defer index.Close()
|
||||
|
||||
var manifests []*dedup.Manifest
|
||||
|
||||
if len(args) > 0 {
|
||||
// Verify specific manifest
|
||||
m, err := manifestStore.Load(args[0])
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load manifest: %w", err)
|
||||
}
|
||||
manifests = []*dedup.Manifest{m}
|
||||
} else {
|
||||
// Verify all manifests
|
||||
manifests, err = manifestStore.ListAll()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to list manifests: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(manifests) == 0 {
|
||||
fmt.Println("No manifests to verify.")
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("Verifying %d backup(s)...\n\n", len(manifests))
|
||||
|
||||
var totalChunks, missingChunks, corruptChunks int
|
||||
var allOK = true
|
||||
|
||||
for _, m := range manifests {
|
||||
fmt.Printf("Verifying: %s (%d chunks)\n", m.ID, m.ChunkCount)
|
||||
|
||||
var missing, corrupt int
|
||||
seenHashes := make(map[string]bool)
|
||||
|
||||
for i, ref := range m.Chunks {
|
||||
if seenHashes[ref.Hash] {
|
||||
continue // Already verified this chunk
|
||||
}
|
||||
seenHashes[ref.Hash] = true
|
||||
totalChunks++
|
||||
|
||||
// Check if chunk exists
|
||||
if !store.Has(ref.Hash) {
|
||||
missing++
|
||||
missingChunks++
|
||||
if missing <= 5 {
|
||||
fmt.Printf(" [MISSING] chunk %d: %s\n", i, ref.Hash[:16])
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Verify chunk hash by reading it
|
||||
chunk, err := store.Get(ref.Hash)
|
||||
if err != nil {
|
||||
corrupt++
|
||||
corruptChunks++
|
||||
if corrupt <= 5 {
|
||||
fmt.Printf(" [CORRUPT] chunk %d: %s - %v\n", i, ref.Hash[:16], err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Verify size
|
||||
if chunk.Length != ref.Length {
|
||||
corrupt++
|
||||
corruptChunks++
|
||||
if corrupt <= 5 {
|
||||
fmt.Printf(" [SIZE MISMATCH] chunk %d: expected %d, got %d\n", i, ref.Length, chunk.Length)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if missing > 0 || corrupt > 0 {
|
||||
allOK = false
|
||||
fmt.Printf(" Result: FAILED (%d missing, %d corrupt)\n", missing, corrupt)
|
||||
if missing > 5 || corrupt > 5 {
|
||||
fmt.Printf(" ... and %d more errors\n", (missing+corrupt)-10)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf(" Result: OK (%d unique chunks verified)\n", len(seenHashes))
|
||||
// Update verified timestamp
|
||||
m.VerifiedAt = time.Now()
|
||||
manifestStore.Save(m)
|
||||
index.UpdateManifestVerified(m.ID, m.VerifiedAt)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
fmt.Println("========================================")
|
||||
if allOK {
|
||||
fmt.Printf("All %d backup(s) verified successfully!\n", len(manifests))
|
||||
fmt.Printf("Total unique chunks checked: %d\n", totalChunks)
|
||||
} else {
|
||||
fmt.Printf("Verification FAILED!\n")
|
||||
fmt.Printf("Missing chunks: %d\n", missingChunks)
|
||||
fmt.Printf("Corrupt chunks: %d\n", corruptChunks)
|
||||
return fmt.Errorf("verification failed: %d missing, %d corrupt chunks", missingChunks, corruptChunks)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runDedupPrune(cmd *cobra.Command, args []string) error {
|
||||
if pruneKeepLast == 0 && pruneKeepDaily == 0 && pruneKeepWeekly == 0 {
|
||||
return fmt.Errorf("at least one of --keep-last, --keep-daily, or --keep-weekly must be specified")
|
||||
}
|
||||
|
||||
basePath := getDedupDir()
|
||||
|
||||
manifestStore, err := dedup.NewManifestStore(basePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open manifest store: %w", err)
|
||||
}
|
||||
|
||||
index, err := dedup.NewChunkIndexAt(getIndexDBPath())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk index: %w", err)
|
||||
}
|
||||
defer index.Close()
|
||||
|
||||
manifests, err := manifestStore.ListAll()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to list manifests: %w", err)
|
||||
}
|
||||
|
||||
if len(manifests) == 0 {
|
||||
fmt.Println("No backups to prune.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Group by database name
|
||||
byDatabase := make(map[string][]*dedup.Manifest)
|
||||
for _, m := range manifests {
|
||||
key := m.DatabaseName
|
||||
if key == "" {
|
||||
key = "_default"
|
||||
}
|
||||
byDatabase[key] = append(byDatabase[key], m)
|
||||
}
|
||||
|
||||
var toDelete []*dedup.Manifest
|
||||
|
||||
for dbName, dbManifests := range byDatabase {
|
||||
// Already sorted by time (newest first from ListAll)
|
||||
kept := make(map[string]bool)
|
||||
var keepReasons = make(map[string]string)
|
||||
|
||||
// Keep last N
|
||||
if pruneKeepLast > 0 {
|
||||
for i := 0; i < pruneKeepLast && i < len(dbManifests); i++ {
|
||||
kept[dbManifests[i].ID] = true
|
||||
keepReasons[dbManifests[i].ID] = "keep-last"
|
||||
}
|
||||
}
|
||||
|
||||
// Keep daily (one per day)
|
||||
if pruneKeepDaily > 0 {
|
||||
seenDays := make(map[string]bool)
|
||||
count := 0
|
||||
for _, m := range dbManifests {
|
||||
day := m.CreatedAt.Format("2006-01-02")
|
||||
if !seenDays[day] {
|
||||
seenDays[day] = true
|
||||
if count < pruneKeepDaily {
|
||||
kept[m.ID] = true
|
||||
if keepReasons[m.ID] == "" {
|
||||
keepReasons[m.ID] = "keep-daily"
|
||||
}
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Keep weekly (one per week)
|
||||
if pruneKeepWeekly > 0 {
|
||||
seenWeeks := make(map[string]bool)
|
||||
count := 0
|
||||
for _, m := range dbManifests {
|
||||
year, week := m.CreatedAt.ISOWeek()
|
||||
weekKey := fmt.Sprintf("%d-W%02d", year, week)
|
||||
if !seenWeeks[weekKey] {
|
||||
seenWeeks[weekKey] = true
|
||||
if count < pruneKeepWeekly {
|
||||
kept[m.ID] = true
|
||||
if keepReasons[m.ID] == "" {
|
||||
keepReasons[m.ID] = "keep-weekly"
|
||||
}
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if dbName != "_default" {
|
||||
fmt.Printf("\nDatabase: %s\n", dbName)
|
||||
} else {
|
||||
fmt.Printf("\nUnnamed backups:\n")
|
||||
}
|
||||
|
||||
for _, m := range dbManifests {
|
||||
if kept[m.ID] {
|
||||
fmt.Printf(" [KEEP] %s (%s) - %s\n", m.ID, m.CreatedAt.Format("2006-01-02"), keepReasons[m.ID])
|
||||
} else {
|
||||
fmt.Printf(" [DELETE] %s (%s)\n", m.ID, m.CreatedAt.Format("2006-01-02"))
|
||||
toDelete = append(toDelete, m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(toDelete) == 0 {
|
||||
fmt.Printf("\nNo backups to prune (all match retention policy).\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("\n%d backup(s) will be deleted.\n", len(toDelete))
|
||||
|
||||
if pruneDryRun {
|
||||
fmt.Println("\n[DRY RUN] No changes made. Remove --dry-run to actually delete.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Actually delete
|
||||
for _, m := range toDelete {
|
||||
// Decrement chunk references
|
||||
for _, ref := range m.Chunks {
|
||||
index.DecrementRef(ref.Hash)
|
||||
}
|
||||
|
||||
if err := manifestStore.Delete(m.ID); err != nil {
|
||||
log.Warn("Failed to delete manifest", "id", m.ID, "error", err)
|
||||
}
|
||||
index.RemoveManifest(m.ID)
|
||||
}
|
||||
|
||||
fmt.Printf("\nDeleted %d backup(s).\n", len(toDelete))
|
||||
fmt.Println("Run 'dbbackup dedup gc' to reclaim space from unreferenced chunks.")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runDedupBackupDB(cmd *cobra.Command, args []string) error {
|
||||
dbType := strings.ToLower(dedupDBType)
|
||||
dbName := backupDBDatabase
|
||||
|
||||
// Validate db type
|
||||
var dumpCmd string
|
||||
var dumpArgs []string
|
||||
|
||||
switch dbType {
|
||||
case "postgres", "postgresql", "pg":
|
||||
dbType = "postgres"
|
||||
dumpCmd = "pg_dump"
|
||||
dumpArgs = []string{"-Fc"} // Custom format for better compression
|
||||
if dedupDBHost != "" && dedupDBHost != "localhost" {
|
||||
dumpArgs = append(dumpArgs, "-h", dedupDBHost)
|
||||
}
|
||||
if backupDBUser != "" {
|
||||
dumpArgs = append(dumpArgs, "-U", backupDBUser)
|
||||
}
|
||||
dumpArgs = append(dumpArgs, dbName)
|
||||
|
||||
case "mysql":
|
||||
dumpCmd = "mysqldump"
|
||||
dumpArgs = []string{
|
||||
"--single-transaction",
|
||||
"--routines",
|
||||
"--triggers",
|
||||
"--events",
|
||||
}
|
||||
if dedupDBHost != "" {
|
||||
dumpArgs = append(dumpArgs, "-h", dedupDBHost)
|
||||
}
|
||||
if backupDBUser != "" {
|
||||
dumpArgs = append(dumpArgs, "-u", backupDBUser)
|
||||
}
|
||||
if backupDBPassword != "" {
|
||||
dumpArgs = append(dumpArgs, "-p"+backupDBPassword)
|
||||
}
|
||||
dumpArgs = append(dumpArgs, dbName)
|
||||
|
||||
case "mariadb":
|
||||
dumpCmd = "mariadb-dump"
|
||||
// Fall back to mysqldump if mariadb-dump not available
|
||||
if _, err := exec.LookPath(dumpCmd); err != nil {
|
||||
dumpCmd = "mysqldump"
|
||||
}
|
||||
dumpArgs = []string{
|
||||
"--single-transaction",
|
||||
"--routines",
|
||||
"--triggers",
|
||||
"--events",
|
||||
}
|
||||
if dedupDBHost != "" {
|
||||
dumpArgs = append(dumpArgs, "-h", dedupDBHost)
|
||||
}
|
||||
if backupDBUser != "" {
|
||||
dumpArgs = append(dumpArgs, "-u", backupDBUser)
|
||||
}
|
||||
if backupDBPassword != "" {
|
||||
dumpArgs = append(dumpArgs, "-p"+backupDBPassword)
|
||||
}
|
||||
dumpArgs = append(dumpArgs, dbName)
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unsupported database type: %s (use postgres, mysql, or mariadb)", dbType)
|
||||
}
|
||||
|
||||
// Verify dump command exists
|
||||
if _, err := exec.LookPath(dumpCmd); err != nil {
|
||||
return fmt.Errorf("%s not found in PATH: %w", dumpCmd, err)
|
||||
}
|
||||
|
||||
// Setup dedup storage
|
||||
basePath := getDedupDir()
|
||||
encKey := ""
|
||||
if dedupEncrypt {
|
||||
encKey = getEncryptionKey()
|
||||
if encKey == "" {
|
||||
return fmt.Errorf("encryption enabled but no key provided (use --key or DBBACKUP_DEDUP_KEY)")
|
||||
}
|
||||
}
|
||||
|
||||
store, err := dedup.NewChunkStore(dedup.StoreConfig{
|
||||
BasePath: basePath,
|
||||
Compress: dedupCompress,
|
||||
EncryptionKey: encKey,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk store: %w", err)
|
||||
}
|
||||
|
||||
manifestStore, err := dedup.NewManifestStore(basePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open manifest store: %w", err)
|
||||
}
|
||||
|
||||
index, err := dedup.NewChunkIndexAt(getIndexDBPath())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open chunk index: %w", err)
|
||||
}
|
||||
defer index.Close()
|
||||
|
||||
// Generate manifest ID
|
||||
now := time.Now()
|
||||
manifestID := now.Format("2006-01-02_150405") + "_" + dbName
|
||||
|
||||
fmt.Printf("Creating deduplicated database backup: %s\n", manifestID)
|
||||
fmt.Printf("Database: %s (%s)\n", dbName, dbType)
|
||||
fmt.Printf("Command: %s %s\n", dumpCmd, strings.Join(dumpArgs, " "))
|
||||
fmt.Printf("Store: %s\n", basePath)
|
||||
|
||||
// Start the dump command
|
||||
dumpExec := exec.Command(dumpCmd, dumpArgs...)
|
||||
|
||||
// Set password via environment for postgres
|
||||
if dbType == "postgres" && backupDBPassword != "" {
|
||||
dumpExec.Env = append(os.Environ(), "PGPASSWORD="+backupDBPassword)
|
||||
}
|
||||
|
||||
stdout, err := dumpExec.StdoutPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get stdout pipe: %w", err)
|
||||
}
|
||||
|
||||
stderr, err := dumpExec.StderrPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get stderr pipe: %w", err)
|
||||
}
|
||||
|
||||
if err := dumpExec.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start %s: %w", dumpCmd, err)
|
||||
}
|
||||
|
||||
// Hash while chunking using TeeReader
|
||||
h := sha256.New()
|
||||
reader := io.TeeReader(stdout, h)
|
||||
|
||||
// Chunk the stream directly
|
||||
chunker := dedup.NewChunker(reader, dedup.DefaultChunkerConfig())
|
||||
var chunks []dedup.ChunkRef
|
||||
var totalSize, storedSize int64
|
||||
var chunkCount, newChunks int
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
for {
|
||||
chunk, err := chunker.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("chunking failed: %w", err)
|
||||
}
|
||||
|
||||
chunkCount++
|
||||
totalSize += int64(chunk.Length)
|
||||
|
||||
// Store chunk (deduplication happens here)
|
||||
isNew, err := store.Put(chunk)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to store chunk: %w", err)
|
||||
}
|
||||
|
||||
if isNew {
|
||||
newChunks++
|
||||
storedSize += int64(chunk.Length)
|
||||
index.AddChunk(chunk.Hash, chunk.Length, chunk.Length)
|
||||
}
|
||||
|
||||
chunks = append(chunks, dedup.ChunkRef{
|
||||
Hash: chunk.Hash,
|
||||
Offset: chunk.Offset,
|
||||
Length: chunk.Length,
|
||||
})
|
||||
|
||||
if chunkCount%1000 == 0 {
|
||||
fmt.Printf("\r Processed %d chunks, %d new, %s...", chunkCount, newChunks, formatBytes(totalSize))
|
||||
}
|
||||
}
|
||||
|
||||
// Read any stderr
|
||||
stderrBytes, _ := io.ReadAll(stderr)
|
||||
|
||||
// Wait for command to complete
|
||||
if err := dumpExec.Wait(); err != nil {
|
||||
return fmt.Errorf("%s failed: %w\nstderr: %s", dumpCmd, err, string(stderrBytes))
|
||||
}
|
||||
|
||||
duration := time.Since(startTime)
|
||||
fileHash := hex.EncodeToString(h.Sum(nil))
|
||||
|
||||
// Calculate dedup ratio
|
||||
dedupRatio := 0.0
|
||||
if totalSize > 0 {
|
||||
dedupRatio = 1.0 - float64(storedSize)/float64(totalSize)
|
||||
}
|
||||
|
||||
// Create manifest
|
||||
manifest := &dedup.Manifest{
|
||||
ID: manifestID,
|
||||
Name: dedupName,
|
||||
CreatedAt: now,
|
||||
DatabaseType: dbType,
|
||||
DatabaseName: dbName,
|
||||
DatabaseHost: dedupDBHost,
|
||||
Chunks: chunks,
|
||||
OriginalSize: totalSize,
|
||||
StoredSize: storedSize,
|
||||
ChunkCount: chunkCount,
|
||||
NewChunks: newChunks,
|
||||
DedupRatio: dedupRatio,
|
||||
Encrypted: dedupEncrypt,
|
||||
Compressed: dedupCompress,
|
||||
SHA256: fileHash,
|
||||
}
|
||||
|
||||
if err := manifestStore.Save(manifest); err != nil {
|
||||
return fmt.Errorf("failed to save manifest: %w", err)
|
||||
}
|
||||
|
||||
if err := index.AddManifest(manifest); err != nil {
|
||||
log.Warn("Failed to index manifest", "error", err)
|
||||
}
|
||||
|
||||
fmt.Printf("\r \r")
|
||||
fmt.Printf("\nBackup complete!\n")
|
||||
fmt.Printf(" Manifest: %s\n", manifestID)
|
||||
fmt.Printf(" Chunks: %d total, %d new\n", chunkCount, newChunks)
|
||||
fmt.Printf(" Dump size: %s\n", formatBytes(totalSize))
|
||||
fmt.Printf(" Stored: %s (new data)\n", formatBytes(storedSize))
|
||||
fmt.Printf(" Dedup ratio: %.1f%%\n", dedupRatio*100)
|
||||
fmt.Printf(" Duration: %s\n", duration.Round(time.Millisecond))
|
||||
fmt.Printf(" Throughput: %s/s\n", formatBytes(int64(float64(totalSize)/duration.Seconds())))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runDedupMetrics(cmd *cobra.Command, args []string) error {
|
||||
basePath := getDedupDir()
|
||||
indexPath := getIndexDBPath()
|
||||
|
||||
instance := dedupMetricsInstance
|
||||
if instance == "" {
|
||||
hostname, _ := os.Hostname()
|
||||
instance = hostname
|
||||
}
|
||||
|
||||
metrics, err := dedup.CollectMetrics(basePath, indexPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to collect metrics: %w", err)
|
||||
}
|
||||
|
||||
output := dedup.FormatPrometheusMetrics(metrics, instance)
|
||||
|
||||
if dedupMetricsOutput != "" {
|
||||
if err := dedup.WritePrometheusTextfile(dedupMetricsOutput, instance, basePath, indexPath); err != nil {
|
||||
return fmt.Errorf("failed to write metrics: %w", err)
|
||||
}
|
||||
fmt.Printf("Wrote metrics to %s\n", dedupMetricsOutput)
|
||||
} else {
|
||||
fmt.Print(output)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
58
cmd/drill.go
58
cmd/drill.go
@ -318,7 +318,7 @@ func runDrillList(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
fmt.Printf("%-15s %-40s %-20s %s\n", "ID", "NAME", "IMAGE", "STATUS")
|
||||
fmt.Println(strings.Repeat("─", 100))
|
||||
fmt.Println(strings.Repeat("-", 100))
|
||||
|
||||
for _, c := range containers {
|
||||
fmt.Printf("%-15s %-40s %-20s %s\n",
|
||||
@ -345,7 +345,7 @@ func runDrillCleanup(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Println("✅ Cleanup completed")
|
||||
fmt.Println("[OK] Cleanup completed")
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -369,32 +369,32 @@ func runDrillReport(cmd *cobra.Command, args []string) error {
|
||||
|
||||
func printDrillResult(result *drill.DrillResult) {
|
||||
fmt.Printf("\n")
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf("=====================================================\n")
|
||||
fmt.Printf(" DR Drill Report: %s\n", result.DrillID)
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n")
|
||||
fmt.Printf("=====================================================\n\n")
|
||||
|
||||
status := "✅ PASSED"
|
||||
status := "[OK] PASSED"
|
||||
if !result.Success {
|
||||
status = "❌ FAILED"
|
||||
status = "[FAIL] FAILED"
|
||||
} else if result.Status == drill.StatusPartial {
|
||||
status = "⚠️ PARTIAL"
|
||||
status = "[WARN] PARTIAL"
|
||||
}
|
||||
|
||||
fmt.Printf("📋 Status: %s\n", status)
|
||||
fmt.Printf("💾 Backup: %s\n", filepath.Base(result.BackupPath))
|
||||
fmt.Printf("🗄️ Database: %s (%s)\n", result.DatabaseName, result.DatabaseType)
|
||||
fmt.Printf("⏱️ Duration: %.2fs\n", result.Duration)
|
||||
fmt.Printf("[LOG] Status: %s\n", status)
|
||||
fmt.Printf("[SAVE] Backup: %s\n", filepath.Base(result.BackupPath))
|
||||
fmt.Printf("[DB] Database: %s (%s)\n", result.DatabaseName, result.DatabaseType)
|
||||
fmt.Printf("[TIME] Duration: %.2fs\n", result.Duration)
|
||||
fmt.Printf("📅 Started: %s\n", result.StartTime.Format(time.RFC3339))
|
||||
fmt.Printf("\n")
|
||||
|
||||
// Phases
|
||||
fmt.Printf("📊 Phases:\n")
|
||||
fmt.Printf("[STATS] Phases:\n")
|
||||
for _, phase := range result.Phases {
|
||||
icon := "✅"
|
||||
icon := "[OK]"
|
||||
if phase.Status == "failed" {
|
||||
icon = "❌"
|
||||
icon = "[FAIL]"
|
||||
} else if phase.Status == "running" {
|
||||
icon = "🔄"
|
||||
icon = "[SYNC]"
|
||||
}
|
||||
fmt.Printf(" %s %-20s (%.2fs) %s\n", icon, phase.Name, phase.Duration, phase.Message)
|
||||
}
|
||||
@ -412,10 +412,10 @@ func printDrillResult(result *drill.DrillResult) {
|
||||
fmt.Printf("\n")
|
||||
|
||||
// RTO
|
||||
fmt.Printf("⏱️ RTO Analysis:\n")
|
||||
rtoIcon := "✅"
|
||||
fmt.Printf("[TIME] RTO Analysis:\n")
|
||||
rtoIcon := "[OK]"
|
||||
if !result.RTOMet {
|
||||
rtoIcon = "❌"
|
||||
rtoIcon = "[FAIL]"
|
||||
}
|
||||
fmt.Printf(" Actual RTO: %.2fs\n", result.ActualRTO)
|
||||
fmt.Printf(" Target RTO: %.0fs\n", result.TargetRTO)
|
||||
@ -424,11 +424,11 @@ func printDrillResult(result *drill.DrillResult) {
|
||||
|
||||
// Validation results
|
||||
if len(result.ValidationResults) > 0 {
|
||||
fmt.Printf("🔍 Validation Queries:\n")
|
||||
fmt.Printf("[SEARCH] Validation Queries:\n")
|
||||
for _, vr := range result.ValidationResults {
|
||||
icon := "✅"
|
||||
icon := "[OK]"
|
||||
if !vr.Success {
|
||||
icon = "❌"
|
||||
icon = "[FAIL]"
|
||||
}
|
||||
fmt.Printf(" %s %s: %s\n", icon, vr.Name, vr.Result)
|
||||
if vr.Error != "" {
|
||||
@ -440,11 +440,11 @@ func printDrillResult(result *drill.DrillResult) {
|
||||
|
||||
// Check results
|
||||
if len(result.CheckResults) > 0 {
|
||||
fmt.Printf("✓ Checks:\n")
|
||||
fmt.Printf("[OK] Checks:\n")
|
||||
for _, cr := range result.CheckResults {
|
||||
icon := "✅"
|
||||
icon := "[OK]"
|
||||
if !cr.Success {
|
||||
icon = "❌"
|
||||
icon = "[FAIL]"
|
||||
}
|
||||
fmt.Printf(" %s %s\n", icon, cr.Message)
|
||||
}
|
||||
@ -453,7 +453,7 @@ func printDrillResult(result *drill.DrillResult) {
|
||||
|
||||
// Errors and warnings
|
||||
if len(result.Errors) > 0 {
|
||||
fmt.Printf("❌ Errors:\n")
|
||||
fmt.Printf("[FAIL] Errors:\n")
|
||||
for _, e := range result.Errors {
|
||||
fmt.Printf(" • %s\n", e)
|
||||
}
|
||||
@ -461,7 +461,7 @@ func printDrillResult(result *drill.DrillResult) {
|
||||
}
|
||||
|
||||
if len(result.Warnings) > 0 {
|
||||
fmt.Printf("⚠️ Warnings:\n")
|
||||
fmt.Printf("[WARN] Warnings:\n")
|
||||
for _, w := range result.Warnings {
|
||||
fmt.Printf(" • %s\n", w)
|
||||
}
|
||||
@ -470,14 +470,14 @@ func printDrillResult(result *drill.DrillResult) {
|
||||
|
||||
// Container info
|
||||
if result.ContainerKept {
|
||||
fmt.Printf("📦 Container kept: %s\n", result.ContainerID[:12])
|
||||
fmt.Printf("[PKG] Container kept: %s\n", result.ContainerID[:12])
|
||||
fmt.Printf(" Connect with: docker exec -it %s bash\n", result.ContainerID[:12])
|
||||
fmt.Printf("\n")
|
||||
}
|
||||
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf("=====================================================\n")
|
||||
fmt.Printf(" %s\n", result.Message)
|
||||
fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
|
||||
fmt.Printf("=====================================================\n")
|
||||
}
|
||||
|
||||
func updateCatalogWithDrillResult(ctx context.Context, backupPath string, result *drill.DrillResult) {
|
||||
|
||||
@ -63,9 +63,9 @@ func runEngineList(cmd *cobra.Command, args []string) error {
|
||||
continue
|
||||
}
|
||||
|
||||
status := "✓ Available"
|
||||
status := "[Y] Available"
|
||||
if !avail.Available {
|
||||
status = "✗ Not available"
|
||||
status = "[N] Not available"
|
||||
}
|
||||
|
||||
fmt.Printf("\n%s (%s)\n", info.Name, info.Description)
|
||||
|
||||
239
cmd/install.go
Normal file
239
cmd/install.go
Normal file
@ -0,0 +1,239 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"dbbackup/internal/installer"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
// Install flags
|
||||
installInstance string
|
||||
installSchedule string
|
||||
installBackupType string
|
||||
installUser string
|
||||
installGroup string
|
||||
installBackupDir string
|
||||
installConfigPath string
|
||||
installTimeout int
|
||||
installWithMetrics bool
|
||||
installMetricsPort int
|
||||
installDryRun bool
|
||||
installStatus bool
|
||||
|
||||
// Uninstall flags
|
||||
uninstallPurge bool
|
||||
)
|
||||
|
||||
// installCmd represents the install command
|
||||
var installCmd = &cobra.Command{
|
||||
Use: "install",
|
||||
Short: "Install dbbackup as a systemd service",
|
||||
Long: `Install dbbackup as a systemd service with automatic scheduling.
|
||||
|
||||
This command creates systemd service and timer units for automated database backups.
|
||||
It supports both single database and cluster backup modes.
|
||||
|
||||
Examples:
|
||||
# Interactive installation (will prompt for options)
|
||||
sudo dbbackup install
|
||||
|
||||
# Install cluster backup running daily at 2am
|
||||
sudo dbbackup install --backup-type cluster --schedule "daily"
|
||||
|
||||
# Install single database backup with custom schedule
|
||||
sudo dbbackup install --instance production --backup-type single --schedule "*-*-* 03:00:00"
|
||||
|
||||
# Install with Prometheus metrics exporter
|
||||
sudo dbbackup install --with-metrics --metrics-port 9399
|
||||
|
||||
# Check installation status
|
||||
dbbackup install --status
|
||||
|
||||
# Dry-run to see what would be installed
|
||||
sudo dbbackup install --dry-run
|
||||
|
||||
Schedule format (OnCalendar):
|
||||
daily - Every day at midnight
|
||||
weekly - Every Monday at midnight
|
||||
*-*-* 02:00:00 - Every day at 2am
|
||||
*-*-* 02,14:00 - Twice daily at 2am and 2pm
|
||||
Mon *-*-* 03:00 - Every Monday at 3am
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Handle --status flag
|
||||
if installStatus {
|
||||
return runInstallStatus(cmd.Context())
|
||||
}
|
||||
|
||||
return runInstall(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
// uninstallCmd represents the uninstall command
|
||||
var uninstallCmd = &cobra.Command{
|
||||
Use: "uninstall [instance]",
|
||||
Short: "Uninstall dbbackup systemd service",
|
||||
Long: `Uninstall dbbackup systemd service and timer.
|
||||
|
||||
Examples:
|
||||
# Uninstall default instance
|
||||
sudo dbbackup uninstall
|
||||
|
||||
# Uninstall specific instance
|
||||
sudo dbbackup uninstall production
|
||||
|
||||
# Uninstall and remove all configuration
|
||||
sudo dbbackup uninstall --purge
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
instance := "cluster"
|
||||
if len(args) > 0 {
|
||||
instance = args[0]
|
||||
}
|
||||
return runUninstall(cmd.Context(), instance)
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(installCmd)
|
||||
rootCmd.AddCommand(uninstallCmd)
|
||||
|
||||
// Install flags
|
||||
installCmd.Flags().StringVarP(&installInstance, "instance", "i", "", "Instance name (e.g., production, staging)")
|
||||
installCmd.Flags().StringVarP(&installSchedule, "schedule", "s", "daily", "Backup schedule (OnCalendar format)")
|
||||
installCmd.Flags().StringVarP(&installBackupType, "backup-type", "t", "cluster", "Backup type: single or cluster")
|
||||
installCmd.Flags().StringVar(&installUser, "user", "dbbackup", "System user to run backups")
|
||||
installCmd.Flags().StringVar(&installGroup, "group", "dbbackup", "System group for backup user")
|
||||
installCmd.Flags().StringVar(&installBackupDir, "backup-dir", "/var/lib/dbbackup/backups", "Directory for backups")
|
||||
installCmd.Flags().StringVar(&installConfigPath, "config-path", "/etc/dbbackup/dbbackup.conf", "Path to config file")
|
||||
installCmd.Flags().IntVar(&installTimeout, "timeout", 3600, "Backup timeout in seconds")
|
||||
installCmd.Flags().BoolVar(&installWithMetrics, "with-metrics", false, "Install Prometheus metrics exporter")
|
||||
installCmd.Flags().IntVar(&installMetricsPort, "metrics-port", 9399, "Prometheus metrics port")
|
||||
installCmd.Flags().BoolVar(&installDryRun, "dry-run", false, "Show what would be installed without making changes")
|
||||
installCmd.Flags().BoolVar(&installStatus, "status", false, "Show installation status")
|
||||
|
||||
// Uninstall flags
|
||||
uninstallCmd.Flags().BoolVar(&uninstallPurge, "purge", false, "Also remove configuration files")
|
||||
}
|
||||
|
||||
func runInstall(ctx context.Context) error {
|
||||
// Create context with signal handling
|
||||
ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
// Expand schedule shortcuts
|
||||
schedule := expandSchedule(installSchedule)
|
||||
|
||||
// Create installer
|
||||
inst := installer.NewInstaller(log, installDryRun)
|
||||
|
||||
// Set up options
|
||||
opts := installer.InstallOptions{
|
||||
Instance: installInstance,
|
||||
BackupType: installBackupType,
|
||||
Schedule: schedule,
|
||||
User: installUser,
|
||||
Group: installGroup,
|
||||
BackupDir: installBackupDir,
|
||||
ConfigPath: installConfigPath,
|
||||
TimeoutSeconds: installTimeout,
|
||||
WithMetrics: installWithMetrics,
|
||||
MetricsPort: installMetricsPort,
|
||||
}
|
||||
|
||||
// For cluster backup, override instance
|
||||
if installBackupType == "cluster" {
|
||||
opts.Instance = "cluster"
|
||||
}
|
||||
|
||||
return inst.Install(ctx, opts)
|
||||
}
|
||||
|
||||
func runUninstall(ctx context.Context, instance string) error {
|
||||
ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
inst := installer.NewInstaller(log, false)
|
||||
return inst.Uninstall(ctx, instance, uninstallPurge)
|
||||
}
|
||||
|
||||
func runInstallStatus(ctx context.Context) error {
|
||||
inst := installer.NewInstaller(log, false)
|
||||
|
||||
// Check cluster status
|
||||
clusterStatus, err := inst.Status(ctx, "cluster")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("[STATUS] DBBackup Installation Status")
|
||||
fmt.Println(strings.Repeat("=", 50))
|
||||
|
||||
if clusterStatus.Installed {
|
||||
fmt.Println()
|
||||
fmt.Println(" * Cluster Backup:")
|
||||
fmt.Printf(" Service: %s\n", formatStatus(clusterStatus.Installed, clusterStatus.Active))
|
||||
fmt.Printf(" Timer: %s\n", formatStatus(clusterStatus.TimerEnabled, clusterStatus.TimerActive))
|
||||
if clusterStatus.NextRun != "" {
|
||||
fmt.Printf(" Next run: %s\n", clusterStatus.NextRun)
|
||||
}
|
||||
if clusterStatus.LastRun != "" {
|
||||
fmt.Printf(" Last run: %s\n", clusterStatus.LastRun)
|
||||
}
|
||||
} else {
|
||||
fmt.Println()
|
||||
fmt.Println("[NONE] No systemd services installed")
|
||||
fmt.Println()
|
||||
fmt.Println("Run 'sudo dbbackup install' to install as a systemd service")
|
||||
}
|
||||
|
||||
// Check for exporter
|
||||
if _, err := os.Stat("/etc/systemd/system/dbbackup-exporter.service"); err == nil {
|
||||
fmt.Println()
|
||||
fmt.Println(" * Metrics Exporter:")
|
||||
// Check if exporter is active using systemctl
|
||||
cmd := exec.CommandContext(ctx, "systemctl", "is-active", "dbbackup-exporter")
|
||||
if err := cmd.Run(); err == nil {
|
||||
fmt.Printf(" Service: [OK] active\n")
|
||||
} else {
|
||||
fmt.Printf(" Service: [-] inactive\n")
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
return nil
|
||||
}
|
||||
|
||||
func formatStatus(installed, active bool) string {
|
||||
if !installed {
|
||||
return "not installed"
|
||||
}
|
||||
if active {
|
||||
return "[OK] active"
|
||||
}
|
||||
return "[-] inactive"
|
||||
}
|
||||
|
||||
func expandSchedule(schedule string) string {
|
||||
shortcuts := map[string]string{
|
||||
"hourly": "*-*-* *:00:00",
|
||||
"daily": "*-*-* 02:00:00",
|
||||
"weekly": "Mon *-*-* 02:00:00",
|
||||
"monthly": "*-*-01 02:00:00",
|
||||
}
|
||||
|
||||
if expanded, ok := shortcuts[strings.ToLower(schedule)]; ok {
|
||||
return expanded
|
||||
}
|
||||
return schedule
|
||||
}
|
||||
138
cmd/metrics.go
Normal file
138
cmd/metrics.go
Normal file
@ -0,0 +1,138 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"dbbackup/internal/prometheus"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
metricsInstance string
|
||||
metricsOutput string
|
||||
metricsPort int
|
||||
)
|
||||
|
||||
// metricsCmd represents the metrics command
|
||||
var metricsCmd = &cobra.Command{
|
||||
Use: "metrics",
|
||||
Short: "Prometheus metrics management",
|
||||
Long: `Prometheus metrics management for dbbackup.
|
||||
|
||||
Export metrics to a textfile for node_exporter, or run an HTTP server
|
||||
for direct Prometheus scraping.`,
|
||||
}
|
||||
|
||||
// metricsExportCmd exports metrics to a textfile
|
||||
var metricsExportCmd = &cobra.Command{
|
||||
Use: "export",
|
||||
Short: "Export metrics to textfile",
|
||||
Long: `Export Prometheus metrics to a textfile for node_exporter.
|
||||
|
||||
The textfile collector in node_exporter can scrape metrics from files
|
||||
in a designated directory (typically /var/lib/node_exporter/textfile_collector/).
|
||||
|
||||
Examples:
|
||||
# Export metrics to default location
|
||||
dbbackup metrics export
|
||||
|
||||
# Export with custom output path
|
||||
dbbackup metrics export --output /var/lib/dbbackup/metrics/dbbackup.prom
|
||||
|
||||
# Export for specific instance
|
||||
dbbackup metrics export --instance production --output /var/lib/dbbackup/metrics/production.prom
|
||||
|
||||
After export, configure node_exporter with:
|
||||
--collector.textfile.directory=/var/lib/dbbackup/metrics/
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runMetricsExport(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
// metricsServeCmd runs the HTTP metrics server
|
||||
var metricsServeCmd = &cobra.Command{
|
||||
Use: "serve",
|
||||
Short: "Run Prometheus HTTP server",
|
||||
Long: `Run an HTTP server exposing Prometheus metrics.
|
||||
|
||||
This starts a long-running daemon that serves metrics at /metrics.
|
||||
Prometheus can scrape this endpoint directly.
|
||||
|
||||
Examples:
|
||||
# Start server on default port 9399
|
||||
dbbackup metrics serve
|
||||
|
||||
# Start server on custom port
|
||||
dbbackup metrics serve --port 9100
|
||||
|
||||
# Run as systemd service (installed via 'dbbackup install --with-metrics')
|
||||
sudo systemctl start dbbackup-exporter
|
||||
|
||||
Endpoints:
|
||||
/metrics - Prometheus metrics
|
||||
/health - Health check (returns 200 OK)
|
||||
/ - Service info page
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runMetricsServe(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(metricsCmd)
|
||||
metricsCmd.AddCommand(metricsExportCmd)
|
||||
metricsCmd.AddCommand(metricsServeCmd)
|
||||
|
||||
// Export flags
|
||||
metricsExportCmd.Flags().StringVar(&metricsInstance, "instance", "default", "Instance name for metrics labels")
|
||||
metricsExportCmd.Flags().StringVarP(&metricsOutput, "output", "o", "/var/lib/dbbackup/metrics/dbbackup.prom", "Output file path")
|
||||
|
||||
// Serve flags
|
||||
metricsServeCmd.Flags().StringVar(&metricsInstance, "instance", "default", "Instance name for metrics labels")
|
||||
metricsServeCmd.Flags().IntVarP(&metricsPort, "port", "p", 9399, "HTTP server port")
|
||||
}
|
||||
|
||||
func runMetricsExport(ctx context.Context) error {
|
||||
// Open catalog
|
||||
cat, err := openCatalog()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open catalog: %w", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
// Create metrics writer
|
||||
writer := prometheus.NewMetricsWriter(log, cat, metricsInstance)
|
||||
|
||||
// Write textfile
|
||||
if err := writer.WriteTextfile(metricsOutput); err != nil {
|
||||
return fmt.Errorf("failed to write metrics: %w", err)
|
||||
}
|
||||
|
||||
log.Info("Exported metrics to textfile", "path", metricsOutput, "instance", metricsInstance)
|
||||
return nil
|
||||
}
|
||||
|
||||
func runMetricsServe(ctx context.Context) error {
|
||||
// Setup signal handling
|
||||
ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
// Open catalog
|
||||
cat, err := openCatalog()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open catalog: %w", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
// Create exporter
|
||||
exporter := prometheus.NewExporter(log, cat, metricsInstance, metricsPort)
|
||||
|
||||
// Run server (blocks until context is cancelled)
|
||||
return exporter.Serve(ctx)
|
||||
}
|
||||
@ -203,9 +203,17 @@ func runMigrateCluster(cmd *cobra.Command, args []string) error {
|
||||
migrateTargetUser = migrateSourceUser
|
||||
}
|
||||
|
||||
// Create source config first to get WorkDir
|
||||
sourceCfg := config.New()
|
||||
sourceCfg.Host = migrateSourceHost
|
||||
sourceCfg.Port = migrateSourcePort
|
||||
sourceCfg.User = migrateSourceUser
|
||||
sourceCfg.Password = migrateSourcePassword
|
||||
|
||||
workdir := migrateWorkdir
|
||||
if workdir == "" {
|
||||
workdir = filepath.Join(os.TempDir(), "dbbackup-migrate")
|
||||
// Use WorkDir from config if available
|
||||
workdir = filepath.Join(sourceCfg.GetEffectiveWorkDir(), "dbbackup-migrate")
|
||||
}
|
||||
|
||||
// Create working directory
|
||||
@ -213,12 +221,7 @@ func runMigrateCluster(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("failed to create working directory: %w", err)
|
||||
}
|
||||
|
||||
// Create source config
|
||||
sourceCfg := config.New()
|
||||
sourceCfg.Host = migrateSourceHost
|
||||
sourceCfg.Port = migrateSourcePort
|
||||
sourceCfg.User = migrateSourceUser
|
||||
sourceCfg.Password = migrateSourcePassword
|
||||
// Update source config with remaining settings
|
||||
sourceCfg.SSLMode = migrateSourceSSLMode
|
||||
sourceCfg.Database = "postgres" // Default connection database
|
||||
sourceCfg.DatabaseType = cfg.DatabaseType
|
||||
@ -342,7 +345,8 @@ func runMigrateSingle(cmd *cobra.Command, args []string) error {
|
||||
|
||||
workdir := migrateWorkdir
|
||||
if workdir == "" {
|
||||
workdir = filepath.Join(os.TempDir(), "dbbackup-migrate")
|
||||
tempCfg := config.New()
|
||||
workdir = filepath.Join(tempCfg.GetEffectiveWorkDir(), "dbbackup-migrate")
|
||||
}
|
||||
|
||||
// Create working directory
|
||||
|
||||
76
cmd/pitr.go
76
cmd/pitr.go
@ -436,7 +436,7 @@ func runPITREnable(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("failed to enable PITR: %w", err)
|
||||
}
|
||||
|
||||
log.Info("✅ PITR enabled successfully!")
|
||||
log.Info("[OK] PITR enabled successfully!")
|
||||
log.Info("")
|
||||
log.Info("Next steps:")
|
||||
log.Info("1. Restart PostgreSQL: sudo systemctl restart postgresql")
|
||||
@ -463,7 +463,7 @@ func runPITRDisable(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("failed to disable PITR: %w", err)
|
||||
}
|
||||
|
||||
log.Info("✅ PITR disabled successfully!")
|
||||
log.Info("[OK] PITR disabled successfully!")
|
||||
log.Info("PostgreSQL restart required: sudo systemctl restart postgresql")
|
||||
|
||||
return nil
|
||||
@ -483,15 +483,15 @@ func runPITRStatus(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
// Display PITR configuration
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("======================================================")
|
||||
fmt.Println(" Point-in-Time Recovery (PITR) Status")
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("======================================================")
|
||||
fmt.Println()
|
||||
|
||||
if config.Enabled {
|
||||
fmt.Println("Status: ✅ ENABLED")
|
||||
fmt.Println("Status: [OK] ENABLED")
|
||||
} else {
|
||||
fmt.Println("Status: ❌ DISABLED")
|
||||
fmt.Println("Status: [FAIL] DISABLED")
|
||||
}
|
||||
|
||||
fmt.Printf("WAL Level: %s\n", config.WALLevel)
|
||||
@ -510,7 +510,7 @@ func runPITRStatus(cmd *cobra.Command, args []string) error {
|
||||
// Extract archive dir from command (simple parsing)
|
||||
fmt.Println()
|
||||
fmt.Println("WAL Archive Statistics:")
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("======================================================")
|
||||
// TODO: Parse archive dir and show stats
|
||||
fmt.Println(" (Use 'dbbackup wal list --archive-dir <dir>' to view archives)")
|
||||
}
|
||||
@ -574,13 +574,13 @@ func runWALList(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
// Display archives
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("======================================================")
|
||||
fmt.Printf(" WAL Archives (%d files)\n", len(archives))
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("======================================================")
|
||||
fmt.Println()
|
||||
|
||||
fmt.Printf("%-28s %10s %10s %8s %s\n", "WAL Filename", "Timeline", "Segment", "Size", "Archived At")
|
||||
fmt.Println("────────────────────────────────────────────────────────────────────────────────")
|
||||
fmt.Println("--------------------------------------------------------------------------------")
|
||||
|
||||
for _, archive := range archives {
|
||||
size := formatWALSize(archive.ArchivedSize)
|
||||
@ -644,7 +644,7 @@ func runWALCleanup(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("WAL cleanup failed: %w", err)
|
||||
}
|
||||
|
||||
log.Info("✅ WAL cleanup completed", "deleted", deleted, "retention_days", archiveConfig.RetentionDays)
|
||||
log.Info("[OK] WAL cleanup completed", "deleted", deleted, "retention_days", archiveConfig.RetentionDays)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -671,7 +671,7 @@ func runWALTimeline(cmd *cobra.Command, args []string) error {
|
||||
// Display timeline details
|
||||
if len(history.Timelines) > 0 {
|
||||
fmt.Println("\nTimeline Details:")
|
||||
fmt.Println("═════════════════")
|
||||
fmt.Println("=================")
|
||||
for _, tl := range history.Timelines {
|
||||
fmt.Printf("\nTimeline %d:\n", tl.TimelineID)
|
||||
if tl.ParentTimeline > 0 {
|
||||
@ -690,7 +690,7 @@ func runWALTimeline(cmd *cobra.Command, args []string) error {
|
||||
fmt.Printf(" Created: %s\n", tl.CreatedAt.Format("2006-01-02 15:04:05"))
|
||||
}
|
||||
if tl.TimelineID == history.CurrentTimeline {
|
||||
fmt.Printf(" Status: ⚡ CURRENT\n")
|
||||
fmt.Printf(" Status: [CURR] CURRENT\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -759,15 +759,15 @@ func runBinlogList(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("=============================================================")
|
||||
fmt.Printf(" Binary Log Files (%s)\n", bm.ServerType())
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("=============================================================")
|
||||
fmt.Println()
|
||||
|
||||
if len(binlogs) > 0 {
|
||||
fmt.Println("Source Directory:")
|
||||
fmt.Printf("%-24s %10s %-19s %-19s %s\n", "Filename", "Size", "Start Time", "End Time", "Format")
|
||||
fmt.Println("────────────────────────────────────────────────────────────────────────────────")
|
||||
fmt.Println("--------------------------------------------------------------------------------")
|
||||
|
||||
var totalSize int64
|
||||
for _, b := range binlogs {
|
||||
@ -797,7 +797,7 @@ func runBinlogList(cmd *cobra.Command, args []string) error {
|
||||
fmt.Println()
|
||||
fmt.Println("Archived Binlogs:")
|
||||
fmt.Printf("%-24s %10s %-19s %s\n", "Original", "Size", "Archived At", "Flags")
|
||||
fmt.Println("────────────────────────────────────────────────────────────────────────────────")
|
||||
fmt.Println("--------------------------------------------------------------------------------")
|
||||
|
||||
var totalSize int64
|
||||
for _, a := range archived {
|
||||
@ -914,7 +914,7 @@ func runBinlogArchive(cmd *cobra.Command, args []string) error {
|
||||
bm.SaveArchiveMetadata(allArchived)
|
||||
}
|
||||
|
||||
log.Info("✅ Binlog archiving completed", "archived", len(newArchives))
|
||||
log.Info("[OK] Binlog archiving completed", "archived", len(newArchives))
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -1014,15 +1014,15 @@ func runBinlogValidate(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("validating binlog chain: %w", err)
|
||||
}
|
||||
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("=============================================================")
|
||||
fmt.Println(" Binlog Chain Validation")
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("=============================================================")
|
||||
fmt.Println()
|
||||
|
||||
if validation.Valid {
|
||||
fmt.Println("Status: ✅ VALID - Binlog chain is complete")
|
||||
fmt.Println("Status: [OK] VALID - Binlog chain is complete")
|
||||
} else {
|
||||
fmt.Println("Status: ❌ INVALID - Binlog chain has gaps")
|
||||
fmt.Println("Status: [FAIL] INVALID - Binlog chain has gaps")
|
||||
}
|
||||
|
||||
fmt.Printf("Files: %d binlog files\n", validation.LogCount)
|
||||
@ -1055,7 +1055,7 @@ func runBinlogValidate(cmd *cobra.Command, args []string) error {
|
||||
fmt.Println()
|
||||
fmt.Println("Errors:")
|
||||
for _, e := range validation.Errors {
|
||||
fmt.Printf(" ✗ %s\n", e)
|
||||
fmt.Printf(" [FAIL] %s\n", e)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1094,9 +1094,9 @@ func runBinlogPosition(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("=============================================================")
|
||||
fmt.Println(" Current Binary Log Position")
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("=============================================================")
|
||||
fmt.Println()
|
||||
|
||||
if rows.Next() {
|
||||
@ -1178,24 +1178,24 @@ func runMySQLPITRStatus(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("getting PITR status: %w", err)
|
||||
}
|
||||
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("=============================================================")
|
||||
fmt.Printf(" MySQL/MariaDB PITR Status (%s)\n", status.DatabaseType)
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Println("=============================================================")
|
||||
fmt.Println()
|
||||
|
||||
if status.Enabled {
|
||||
fmt.Println("PITR Status: ✅ ENABLED")
|
||||
fmt.Println("PITR Status: [OK] ENABLED")
|
||||
} else {
|
||||
fmt.Println("PITR Status: ❌ NOT CONFIGURED")
|
||||
fmt.Println("PITR Status: [FAIL] NOT CONFIGURED")
|
||||
}
|
||||
|
||||
// Get binary logging status
|
||||
var logBin string
|
||||
db.QueryRowContext(ctx, "SELECT @@log_bin").Scan(&logBin)
|
||||
if logBin == "1" || logBin == "ON" {
|
||||
fmt.Println("Binary Logging: ✅ ENABLED")
|
||||
fmt.Println("Binary Logging: [OK] ENABLED")
|
||||
} else {
|
||||
fmt.Println("Binary Logging: ❌ DISABLED")
|
||||
fmt.Println("Binary Logging: [FAIL] DISABLED")
|
||||
}
|
||||
|
||||
fmt.Printf("Binlog Format: %s\n", status.LogLevel)
|
||||
@ -1205,14 +1205,14 @@ func runMySQLPITRStatus(cmd *cobra.Command, args []string) error {
|
||||
if status.DatabaseType == pitr.DatabaseMariaDB {
|
||||
db.QueryRowContext(ctx, "SELECT @@gtid_current_pos").Scan(>idMode)
|
||||
if gtidMode != "" {
|
||||
fmt.Println("GTID Mode: ✅ ENABLED")
|
||||
fmt.Println("GTID Mode: [OK] ENABLED")
|
||||
} else {
|
||||
fmt.Println("GTID Mode: ❌ DISABLED")
|
||||
fmt.Println("GTID Mode: [FAIL] DISABLED")
|
||||
}
|
||||
} else {
|
||||
db.QueryRowContext(ctx, "SELECT @@gtid_mode").Scan(>idMode)
|
||||
if gtidMode == "ON" {
|
||||
fmt.Println("GTID Mode: ✅ ENABLED")
|
||||
fmt.Println("GTID Mode: [OK] ENABLED")
|
||||
} else {
|
||||
fmt.Printf("GTID Mode: %s\n", gtidMode)
|
||||
}
|
||||
@ -1237,12 +1237,12 @@ func runMySQLPITRStatus(cmd *cobra.Command, args []string) error {
|
||||
fmt.Println()
|
||||
fmt.Println("PITR Requirements:")
|
||||
if logBin == "1" || logBin == "ON" {
|
||||
fmt.Println(" ✅ Binary logging enabled")
|
||||
fmt.Println(" [OK] Binary logging enabled")
|
||||
} else {
|
||||
fmt.Println(" ❌ Binary logging must be enabled (log_bin = mysql-bin)")
|
||||
fmt.Println(" [FAIL] Binary logging must be enabled (log_bin = mysql-bin)")
|
||||
}
|
||||
if status.LogLevel == "ROW" {
|
||||
fmt.Println(" ✅ Row-based logging (recommended)")
|
||||
fmt.Println(" [OK] Row-based logging (recommended)")
|
||||
} else {
|
||||
fmt.Printf(" ⚠ binlog_format = %s (ROW recommended for PITR)\n", status.LogLevel)
|
||||
}
|
||||
@ -1299,7 +1299,7 @@ func runMySQLPITREnable(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("enabling PITR: %w", err)
|
||||
}
|
||||
|
||||
log.Info("✅ MySQL PITR enabled successfully!")
|
||||
log.Info("[OK] MySQL PITR enabled successfully!")
|
||||
log.Info("")
|
||||
log.Info("Next steps:")
|
||||
log.Info("1. Start binlog archiving: dbbackup binlog watch --archive-dir " + mysqlArchiveDir)
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
@ -15,6 +14,7 @@ import (
|
||||
"dbbackup/internal/logger"
|
||||
"dbbackup/internal/tui"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
@ -66,6 +66,15 @@ TUI Automation Flags (for testing and CI/CD):
|
||||
cfg.TUIVerbose, _ = cmd.Flags().GetBool("verbose-tui")
|
||||
cfg.TUILogFile, _ = cmd.Flags().GetString("tui-log-file")
|
||||
|
||||
// Set conservative profile as default for TUI mode (safer for interactive users)
|
||||
if cfg.ResourceProfile == "" || cfg.ResourceProfile == "balanced" {
|
||||
cfg.ResourceProfile = "conservative"
|
||||
cfg.LargeDBMode = true
|
||||
if cfg.Debug {
|
||||
log.Info("TUI mode: using conservative profile by default")
|
||||
}
|
||||
}
|
||||
|
||||
// Check authentication before starting TUI
|
||||
if cfg.IsPostgreSQL() {
|
||||
if mismatch, msg := auth.CheckAuthenticationMismatch(cfg); mismatch {
|
||||
@ -141,7 +150,7 @@ func runList(ctx context.Context) error {
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf("📦 %s\n", file.Name)
|
||||
fmt.Printf("[FILE] %s\n", file.Name)
|
||||
fmt.Printf(" Size: %s\n", formatFileSize(stat.Size()))
|
||||
fmt.Printf(" Modified: %s\n", stat.ModTime().Format("2006-01-02 15:04:05"))
|
||||
fmt.Printf(" Type: %s\n", getBackupType(file.Name))
|
||||
@ -237,56 +246,56 @@ func runPreflight(ctx context.Context) error {
|
||||
totalChecks := 6
|
||||
|
||||
// 1. Database connectivity check
|
||||
fmt.Print("🔗 Database connectivity... ")
|
||||
fmt.Print("[1] Database connectivity... ")
|
||||
if err := testDatabaseConnection(); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
|
||||
// 2. Required tools check
|
||||
fmt.Print("🛠️ Required tools (pg_dump/pg_restore)... ")
|
||||
fmt.Print("[2] Required tools (pg_dump/pg_restore)... ")
|
||||
if err := checkRequiredTools(); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
|
||||
// 3. Backup directory check
|
||||
fmt.Print("📁 Backup directory access... ")
|
||||
fmt.Print("[3] Backup directory access... ")
|
||||
if err := checkBackupDirectory(); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
|
||||
// 4. Disk space check
|
||||
fmt.Print("💾 Available disk space... ")
|
||||
fmt.Print("[4] Available disk space... ")
|
||||
if err := checkDiskSpace(); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
|
||||
// 5. Permissions check
|
||||
fmt.Print("🔐 File permissions... ")
|
||||
fmt.Print("[5] File permissions... ")
|
||||
if err := checkPermissions(); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
|
||||
// 6. CPU/Memory resources check
|
||||
fmt.Print("🖥️ System resources... ")
|
||||
fmt.Print("[6] System resources... ")
|
||||
if err := checkSystemResources(); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
|
||||
@ -294,10 +303,10 @@ func runPreflight(ctx context.Context) error {
|
||||
fmt.Printf("Results: %d/%d checks passed\n", checksPassed, totalChecks)
|
||||
|
||||
if checksPassed == totalChecks {
|
||||
fmt.Println("🎉 All preflight checks passed! System is ready for backup operations.")
|
||||
fmt.Println("[SUCCESS] All preflight checks passed! System is ready for backup operations.")
|
||||
return nil
|
||||
} else {
|
||||
fmt.Printf("⚠️ %d check(s) failed. Please address the issues before running backups.\n", totalChecks-checksPassed)
|
||||
fmt.Printf("[WARN] %d check(s) failed. Please address the issues before running backups.\n", totalChecks-checksPassed)
|
||||
return fmt.Errorf("preflight checks failed: %d/%d passed", checksPassed, totalChecks)
|
||||
}
|
||||
}
|
||||
@ -414,44 +423,44 @@ func runRestore(ctx context.Context, archiveName string) error {
|
||||
fmt.Println()
|
||||
|
||||
// Show warning
|
||||
fmt.Println("⚠️ WARNING: This will restore data to the target database.")
|
||||
fmt.Println("[WARN] WARNING: This will restore data to the target database.")
|
||||
fmt.Println(" Existing data may be overwritten or merged depending on the restore method.")
|
||||
fmt.Println()
|
||||
|
||||
// For safety, show what would be done without actually doing it
|
||||
switch archiveType {
|
||||
case "Single Database (.dump)":
|
||||
fmt.Println("🔄 Would execute: pg_restore to restore single database")
|
||||
fmt.Println("[EXEC] Would execute: pg_restore to restore single database")
|
||||
fmt.Printf(" Command: pg_restore -h %s -p %d -U %s -d %s --verbose %s\n",
|
||||
cfg.Host, cfg.Port, cfg.User, cfg.Database, archivePath)
|
||||
case "Single Database (.dump.gz)":
|
||||
fmt.Println("🔄 Would execute: gunzip and pg_restore to restore single database")
|
||||
fmt.Println("[EXEC] Would execute: gunzip and pg_restore to restore single database")
|
||||
fmt.Printf(" Command: gunzip -c %s | pg_restore -h %s -p %d -U %s -d %s --verbose\n",
|
||||
archivePath, cfg.Host, cfg.Port, cfg.User, cfg.Database)
|
||||
case "SQL Script (.sql)":
|
||||
if cfg.IsPostgreSQL() {
|
||||
fmt.Println("🔄 Would execute: psql to run SQL script")
|
||||
fmt.Println("[EXEC] Would execute: psql to run SQL script")
|
||||
fmt.Printf(" Command: psql -h %s -p %d -U %s -d %s -f %s\n",
|
||||
cfg.Host, cfg.Port, cfg.User, cfg.Database, archivePath)
|
||||
} else if cfg.IsMySQL() {
|
||||
fmt.Println("🔄 Would execute: mysql to run SQL script")
|
||||
fmt.Println("[EXEC] Would execute: mysql to run SQL script")
|
||||
fmt.Printf(" Command: %s\n", mysqlRestoreCommand(archivePath, false))
|
||||
} else {
|
||||
fmt.Println("🔄 Would execute: SQL client to run script (database type unknown)")
|
||||
fmt.Println("[EXEC] Would execute: SQL client to run script (database type unknown)")
|
||||
}
|
||||
case "SQL Script (.sql.gz)":
|
||||
if cfg.IsPostgreSQL() {
|
||||
fmt.Println("🔄 Would execute: gunzip and psql to run SQL script")
|
||||
fmt.Println("[EXEC] Would execute: gunzip and psql to run SQL script")
|
||||
fmt.Printf(" Command: gunzip -c %s | psql -h %s -p %d -U %s -d %s\n",
|
||||
archivePath, cfg.Host, cfg.Port, cfg.User, cfg.Database)
|
||||
} else if cfg.IsMySQL() {
|
||||
fmt.Println("🔄 Would execute: gunzip and mysql to run SQL script")
|
||||
fmt.Println("[EXEC] Would execute: gunzip and mysql to run SQL script")
|
||||
fmt.Printf(" Command: %s\n", mysqlRestoreCommand(archivePath, true))
|
||||
} else {
|
||||
fmt.Println("🔄 Would execute: gunzip and SQL client to run script (database type unknown)")
|
||||
fmt.Println("[EXEC] Would execute: gunzip and SQL client to run script (database type unknown)")
|
||||
}
|
||||
case "Cluster Backup (.tar.gz)":
|
||||
fmt.Println("🔄 Would execute: Extract and restore cluster backup")
|
||||
fmt.Println("[EXEC] Would execute: Extract and restore cluster backup")
|
||||
fmt.Println(" Steps:")
|
||||
fmt.Println(" 1. Extract tar.gz archive")
|
||||
fmt.Println(" 2. Restore global objects (roles, tablespaces)")
|
||||
@ -461,7 +470,7 @@ func runRestore(ctx context.Context, archiveName string) error {
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("🛡️ SAFETY MODE: Restore command is in preview mode.")
|
||||
fmt.Println("[SAFETY] SAFETY MODE: Restore command is in preview mode.")
|
||||
fmt.Println(" This shows what would be executed without making changes.")
|
||||
fmt.Println(" To enable actual restore, add --confirm flag (not yet implemented).")
|
||||
|
||||
@ -520,25 +529,25 @@ func runVerify(ctx context.Context, archiveName string) error {
|
||||
checksPassed := 0
|
||||
|
||||
// Basic file existence and readability
|
||||
fmt.Print("📁 File accessibility... ")
|
||||
fmt.Print("[CHK] File accessibility... ")
|
||||
if file, err := os.Open(archivePath); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
file.Close()
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
checksRun++
|
||||
|
||||
// File size sanity check
|
||||
fmt.Print("📏 File size check... ")
|
||||
fmt.Print("[CHK] File size check... ")
|
||||
if stat.Size() == 0 {
|
||||
fmt.Println("❌ FAILED: File is empty")
|
||||
fmt.Println("[FAIL] FAILED: File is empty")
|
||||
} else if stat.Size() < 100 {
|
||||
fmt.Println("⚠️ WARNING: File is very small (< 100 bytes)")
|
||||
fmt.Println("[WARN] WARNING: File is very small (< 100 bytes)")
|
||||
checksPassed++
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
checksRun++
|
||||
@ -546,51 +555,51 @@ func runVerify(ctx context.Context, archiveName string) error {
|
||||
// Type-specific verification
|
||||
switch archiveType {
|
||||
case "Single Database (.dump)":
|
||||
fmt.Print("🔍 PostgreSQL dump format check... ")
|
||||
fmt.Print("[CHK] PostgreSQL dump format check... ")
|
||||
if err := verifyPgDump(archivePath); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
checksRun++
|
||||
|
||||
case "Single Database (.dump.gz)":
|
||||
fmt.Print("🔍 PostgreSQL dump format check (gzip)... ")
|
||||
fmt.Print("[CHK] PostgreSQL dump format check (gzip)... ")
|
||||
if err := verifyPgDumpGzip(archivePath); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
checksRun++
|
||||
|
||||
case "SQL Script (.sql)":
|
||||
fmt.Print("📜 SQL script validation... ")
|
||||
fmt.Print("[CHK] SQL script validation... ")
|
||||
if err := verifySqlScript(archivePath); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
checksRun++
|
||||
|
||||
case "SQL Script (.sql.gz)":
|
||||
fmt.Print("📜 SQL script validation (gzip)... ")
|
||||
fmt.Print("[CHK] SQL script validation (gzip)... ")
|
||||
if err := verifyGzipSqlScript(archivePath); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
checksRun++
|
||||
|
||||
case "Cluster Backup (.tar.gz)":
|
||||
fmt.Print("📦 Archive extraction test... ")
|
||||
fmt.Print("[CHK] Archive extraction test... ")
|
||||
if err := verifyTarGz(archivePath); err != nil {
|
||||
fmt.Printf("❌ FAILED: %v\n", err)
|
||||
fmt.Printf("[FAIL] FAILED: %v\n", err)
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
checksRun++
|
||||
@ -598,11 +607,11 @@ func runVerify(ctx context.Context, archiveName string) error {
|
||||
|
||||
// Check for metadata file
|
||||
metadataPath := archivePath + ".info"
|
||||
fmt.Print("📋 Metadata file check... ")
|
||||
fmt.Print("[CHK] Metadata file check... ")
|
||||
if _, err := os.Stat(metadataPath); os.IsNotExist(err) {
|
||||
fmt.Println("⚠️ WARNING: No metadata file found")
|
||||
fmt.Println("[WARN] WARNING: No metadata file found")
|
||||
} else {
|
||||
fmt.Println("✅ PASSED")
|
||||
fmt.Println("[OK] PASSED")
|
||||
checksPassed++
|
||||
}
|
||||
checksRun++
|
||||
@ -611,13 +620,13 @@ func runVerify(ctx context.Context, archiveName string) error {
|
||||
fmt.Printf("Verification Results: %d/%d checks passed\n", checksPassed, checksRun)
|
||||
|
||||
if checksPassed == checksRun {
|
||||
fmt.Println("🎉 Archive verification completed successfully!")
|
||||
fmt.Println("[SUCCESS] Archive verification completed successfully!")
|
||||
return nil
|
||||
} else if float64(checksPassed)/float64(checksRun) >= 0.8 {
|
||||
fmt.Println("⚠️ Archive verification completed with warnings.")
|
||||
fmt.Println("[WARN] Archive verification completed with warnings.")
|
||||
return nil
|
||||
} else {
|
||||
fmt.Println("❌ Archive verification failed. Archive may be corrupted.")
|
||||
fmt.Println("[FAIL] Archive verification failed. Archive may be corrupted.")
|
||||
return fmt.Errorf("verification failed: %d/%d checks passed", checksPassed, checksRun)
|
||||
}
|
||||
}
|
||||
@ -649,7 +658,7 @@ func verifyPgDumpGzip(path string) error {
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
gz, err := gzip.NewReader(file)
|
||||
gz, err := pgzip.NewReader(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open gzip stream: %w", err)
|
||||
}
|
||||
@ -698,7 +707,7 @@ func verifyGzipSqlScript(path string) error {
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
gz, err := gzip.NewReader(file)
|
||||
gz, err := pgzip.NewReader(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open gzip stream: %w", err)
|
||||
}
|
||||
|
||||
446
cmd/restore.go
446
cmd/restore.go
@ -13,8 +13,10 @@ import (
|
||||
|
||||
"dbbackup/internal/backup"
|
||||
"dbbackup/internal/cloud"
|
||||
"dbbackup/internal/config"
|
||||
"dbbackup/internal/database"
|
||||
"dbbackup/internal/pitr"
|
||||
"dbbackup/internal/progress"
|
||||
"dbbackup/internal/restore"
|
||||
"dbbackup/internal/security"
|
||||
|
||||
@ -22,24 +24,35 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
restoreConfirm bool
|
||||
restoreDryRun bool
|
||||
restoreForce bool
|
||||
restoreClean bool
|
||||
restoreCreate bool
|
||||
restoreJobs int
|
||||
restoreTarget string
|
||||
restoreVerbose bool
|
||||
restoreNoProgress bool
|
||||
restoreWorkdir string
|
||||
restoreCleanCluster bool
|
||||
restoreDiagnose bool // Run diagnosis before restore
|
||||
restoreSaveDebugLog string // Path to save debug log on failure
|
||||
restoreConfirm bool
|
||||
restoreDryRun bool
|
||||
restoreForce bool
|
||||
restoreClean bool
|
||||
restoreCreate bool
|
||||
restoreJobs int
|
||||
restoreParallelDBs int // Number of parallel database restores
|
||||
restoreProfile string // Resource profile: conservative, balanced, aggressive
|
||||
restoreTarget string
|
||||
restoreVerbose bool
|
||||
restoreNoProgress bool
|
||||
restoreWorkdir string
|
||||
restoreCleanCluster bool
|
||||
restoreDiagnose bool // Run diagnosis before restore
|
||||
restoreSaveDebugLog string // Path to save debug log on failure
|
||||
restoreDebugLocks bool // Enable detailed lock debugging
|
||||
restoreOOMProtection bool // Enable OOM protection for large restores
|
||||
restoreLowMemory bool // Force low-memory mode for constrained systems
|
||||
|
||||
// Single database extraction from cluster flags
|
||||
restoreDatabase string // Single database to extract/restore from cluster
|
||||
restoreDatabases string // Comma-separated list of databases to extract
|
||||
restoreOutputDir string // Extract to directory (no restore)
|
||||
restoreListDBs bool // List databases in cluster backup
|
||||
|
||||
// Diagnose flags
|
||||
diagnoseJSON bool
|
||||
diagnoseDeep bool
|
||||
diagnoseKeepTemp bool
|
||||
diagnoseJSON bool
|
||||
diagnoseDeep bool
|
||||
diagnoseKeepTemp bool
|
||||
|
||||
// Encryption flags
|
||||
restoreEncryptionKeyFile string
|
||||
@ -111,6 +124,9 @@ Examples:
|
||||
# Restore to different database
|
||||
dbbackup restore single mydb.dump.gz --target mydb_test --confirm
|
||||
|
||||
# Memory-constrained server (single-threaded, minimal memory)
|
||||
dbbackup restore single mydb.dump.gz --profile=conservative --confirm
|
||||
|
||||
# Clean target database before restore
|
||||
dbbackup restore single mydb.sql.gz --clean --confirm
|
||||
|
||||
@ -130,6 +146,11 @@ var restoreClusterCmd = &cobra.Command{
|
||||
This command restores all databases that were backed up together
|
||||
in a cluster backup operation.
|
||||
|
||||
Single Database Extraction:
|
||||
Use --list-databases to see available databases
|
||||
Use --database to extract/restore a specific database
|
||||
Use --output-dir to extract without restoring
|
||||
|
||||
Safety features:
|
||||
- Dry-run by default (use --confirm to execute)
|
||||
- Archive validation and listing
|
||||
@ -137,12 +158,33 @@ Safety features:
|
||||
- Sequential database restoration
|
||||
|
||||
Examples:
|
||||
# List databases in cluster backup
|
||||
dbbackup restore cluster backup.tar.gz --list-databases
|
||||
|
||||
# Extract single database (no restore)
|
||||
dbbackup restore cluster backup.tar.gz --database myapp --output-dir /tmp/extract
|
||||
|
||||
# Restore single database from cluster
|
||||
dbbackup restore cluster backup.tar.gz --database myapp --confirm
|
||||
|
||||
# Restore single database with different name
|
||||
dbbackup restore cluster backup.tar.gz --database myapp --target myapp_test --confirm
|
||||
|
||||
# Extract multiple databases
|
||||
dbbackup restore cluster backup.tar.gz --databases "app1,app2,app3" --output-dir /tmp/extract
|
||||
|
||||
# Preview cluster restore
|
||||
dbbackup restore cluster cluster_backup_20240101_120000.tar.gz
|
||||
|
||||
# Restore full cluster
|
||||
dbbackup restore cluster cluster_backup_20240101_120000.tar.gz --confirm
|
||||
|
||||
# Memory-constrained server (conservative profile)
|
||||
dbbackup restore cluster cluster_backup.tar.gz --profile=conservative --confirm
|
||||
|
||||
# Maximum performance (dedicated server)
|
||||
dbbackup restore cluster cluster_backup.tar.gz --profile=aggressive --confirm
|
||||
|
||||
# Use parallel decompression
|
||||
dbbackup restore cluster cluster_backup.tar.gz --jobs 4 --confirm
|
||||
|
||||
@ -276,19 +318,27 @@ func init() {
|
||||
restoreSingleCmd.Flags().BoolVar(&restoreClean, "clean", false, "Drop and recreate target database")
|
||||
restoreSingleCmd.Flags().BoolVar(&restoreCreate, "create", false, "Create target database if it doesn't exist")
|
||||
restoreSingleCmd.Flags().StringVar(&restoreTarget, "target", "", "Target database name (defaults to original)")
|
||||
restoreSingleCmd.Flags().StringVar(&restoreProfile, "profile", "balanced", "Resource profile: conservative (--parallel=1, low memory), balanced, aggressive (max performance)")
|
||||
restoreSingleCmd.Flags().BoolVar(&restoreVerbose, "verbose", false, "Show detailed restore progress")
|
||||
restoreSingleCmd.Flags().BoolVar(&restoreNoProgress, "no-progress", false, "Disable progress indicators")
|
||||
restoreSingleCmd.Flags().StringVar(&restoreEncryptionKeyFile, "encryption-key-file", "", "Path to encryption key file (required for encrypted backups)")
|
||||
restoreSingleCmd.Flags().StringVar(&restoreEncryptionKeyEnv, "encryption-key-env", "DBBACKUP_ENCRYPTION_KEY", "Environment variable containing encryption key")
|
||||
restoreSingleCmd.Flags().BoolVar(&restoreDiagnose, "diagnose", false, "Run deep diagnosis before restore to detect corruption/truncation")
|
||||
restoreSingleCmd.Flags().StringVar(&restoreSaveDebugLog, "save-debug-log", "", "Save detailed error report to file on failure (e.g., /tmp/restore-debug.json)")
|
||||
restoreSingleCmd.Flags().BoolVar(&restoreDebugLocks, "debug-locks", false, "Enable detailed lock debugging (captures PostgreSQL config, Guard decisions, boost attempts)")
|
||||
|
||||
// Cluster restore flags
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreListDBs, "list-databases", false, "List databases in cluster backup and exit")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreDatabase, "database", "", "Extract/restore single database from cluster")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreDatabases, "databases", "", "Extract multiple databases (comma-separated)")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreOutputDir, "output-dir", "", "Extract to directory without restoring (requires --database or --databases)")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreConfirm, "confirm", false, "Confirm and execute restore (required)")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreDryRun, "dry-run", false, "Show what would be done without executing")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreForce, "force", false, "Skip safety checks and confirmations")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreCleanCluster, "clean-cluster", false, "Drop all existing user databases before restore (disaster recovery)")
|
||||
restoreClusterCmd.Flags().IntVar(&restoreJobs, "jobs", 0, "Number of parallel decompression jobs (0 = auto)")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreProfile, "profile", "conservative", "Resource profile: conservative (single-threaded, prevents lock issues), balanced (auto-detect), aggressive (max speed)")
|
||||
restoreClusterCmd.Flags().IntVar(&restoreJobs, "jobs", 0, "Number of parallel decompression jobs (0 = auto, overrides profile)")
|
||||
restoreClusterCmd.Flags().IntVar(&restoreParallelDBs, "parallel-dbs", 0, "Number of databases to restore in parallel (0 = use profile, 1 = sequential, -1 = auto-detect, overrides profile)")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreWorkdir, "workdir", "", "Working directory for extraction (use when system disk is small, e.g. /mnt/storage/restore_tmp)")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreVerbose, "verbose", false, "Show detailed restore progress")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreNoProgress, "no-progress", false, "Disable progress indicators")
|
||||
@ -296,6 +346,11 @@ func init() {
|
||||
restoreClusterCmd.Flags().StringVar(&restoreEncryptionKeyEnv, "encryption-key-env", "DBBACKUP_ENCRYPTION_KEY", "Environment variable containing encryption key")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreDiagnose, "diagnose", false, "Run deep diagnosis on all dumps before restore")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreSaveDebugLog, "save-debug-log", "", "Save detailed error report to file on failure (e.g., /tmp/restore-debug.json)")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreDebugLocks, "debug-locks", false, "Enable detailed lock debugging (captures PostgreSQL config, Guard decisions, boost attempts)")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreClean, "clean", false, "Drop and recreate target database (for single DB restore)")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreCreate, "create", false, "Create target database if it doesn't exist (for single DB restore)")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreOOMProtection, "oom-protection", false, "Enable OOM protection: disable swap, tune PostgreSQL memory, protect from OOM killer")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreLowMemory, "low-memory", false, "Force low-memory mode: single-threaded restore with minimal memory (use for <8GB RAM or very large backups)")
|
||||
|
||||
// PITR restore flags
|
||||
restorePITRCmd.Flags().StringVar(&pitrBaseBackup, "base-backup", "", "Path to base backup file (.tar.gz) (required)")
|
||||
@ -342,7 +397,7 @@ func runRestoreDiagnose(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("archive not found: %s", archivePath)
|
||||
}
|
||||
|
||||
log.Info("🔍 Diagnosing backup file", "path", archivePath)
|
||||
log.Info("[DIAG] Diagnosing backup file", "path", archivePath)
|
||||
|
||||
diagnoser := restore.NewDiagnoser(log, restoreVerbose)
|
||||
|
||||
@ -350,10 +405,11 @@ func runRestoreDiagnose(cmd *cobra.Command, args []string) error {
|
||||
format := restore.DetectArchiveFormat(archivePath)
|
||||
|
||||
if format.IsClusterBackup() && diagnoseDeep {
|
||||
// Create temp directory for extraction
|
||||
tempDir, err := os.MkdirTemp("", "dbbackup-diagnose-*")
|
||||
// Create temp directory for extraction in configured WorkDir
|
||||
workDir := cfg.GetEffectiveWorkDir()
|
||||
tempDir, err := os.MkdirTemp(workDir, "dbbackup-diagnose-*")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create temp directory: %w", err)
|
||||
return fmt.Errorf("failed to create temp directory in %s: %w", workDir, err)
|
||||
}
|
||||
|
||||
if !diagnoseKeepTemp {
|
||||
@ -386,7 +442,7 @@ func runRestoreDiagnose(cmd *cobra.Command, args []string) error {
|
||||
// Summary
|
||||
if !diagnoseJSON {
|
||||
fmt.Println("\n" + strings.Repeat("=", 70))
|
||||
fmt.Printf("📊 CLUSTER SUMMARY: %d databases analyzed\n", len(results))
|
||||
fmt.Printf("[SUMMARY] CLUSTER SUMMARY: %d databases analyzed\n", len(results))
|
||||
|
||||
validCount := 0
|
||||
for _, r := range results {
|
||||
@ -396,9 +452,9 @@ func runRestoreDiagnose(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
if validCount == len(results) {
|
||||
fmt.Println("✅ All dumps are valid")
|
||||
fmt.Println("[OK] All dumps are valid")
|
||||
} else {
|
||||
fmt.Printf("❌ %d/%d dumps have issues\n", len(results)-validCount, len(results))
|
||||
fmt.Printf("[FAIL] %d/%d dumps have issues\n", len(results)-validCount, len(results))
|
||||
}
|
||||
fmt.Println(strings.Repeat("=", 70))
|
||||
}
|
||||
@ -425,7 +481,7 @@ func runRestoreDiagnose(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("backup file has validation errors")
|
||||
}
|
||||
|
||||
log.Info("✅ Backup file appears valid")
|
||||
log.Info("[OK] Backup file appears valid")
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -433,6 +489,16 @@ func runRestoreDiagnose(cmd *cobra.Command, args []string) error {
|
||||
func runRestoreSingle(cmd *cobra.Command, args []string) error {
|
||||
archivePath := args[0]
|
||||
|
||||
// Apply resource profile
|
||||
if err := config.ApplyProfile(cfg, restoreProfile, restoreJobs, 0); err != nil {
|
||||
log.Warn("Invalid profile, using balanced", "error", err)
|
||||
restoreProfile = "balanced"
|
||||
_ = config.ApplyProfile(cfg, restoreProfile, restoreJobs, 0)
|
||||
}
|
||||
if cfg.Debug && restoreProfile != "balanced" {
|
||||
log.Info("Using restore profile", "profile", restoreProfile)
|
||||
}
|
||||
|
||||
// Check if this is a cloud URI
|
||||
var cleanupFunc func() error
|
||||
|
||||
@ -544,7 +610,7 @@ func runRestoreSingle(cmd *cobra.Command, args []string) error {
|
||||
isDryRun := restoreDryRun || !restoreConfirm
|
||||
|
||||
if isDryRun {
|
||||
fmt.Println("\n🔍 DRY-RUN MODE - No changes will be made")
|
||||
fmt.Println("\n[DRY-RUN] DRY-RUN MODE - No changes will be made")
|
||||
fmt.Printf("\nWould restore:\n")
|
||||
fmt.Printf(" Archive: %s\n", archivePath)
|
||||
fmt.Printf(" Format: %s\n", format.String())
|
||||
@ -564,13 +630,19 @@ func runRestoreSingle(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// Create restore engine
|
||||
engine := restore.New(cfg, log, db)
|
||||
|
||||
|
||||
// Enable debug logging if requested
|
||||
if restoreSaveDebugLog != "" {
|
||||
engine.SetDebugLogPath(restoreSaveDebugLog)
|
||||
log.Info("Debug logging enabled", "output", restoreSaveDebugLog)
|
||||
}
|
||||
|
||||
// Enable lock debugging if requested (single restore)
|
||||
if restoreDebugLocks {
|
||||
cfg.DebugLocks = true
|
||||
log.Info("🔍 Lock debugging enabled - will capture PostgreSQL lock config, Guard decisions, boost attempts")
|
||||
}
|
||||
|
||||
// Setup signal handling
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
@ -587,18 +659,18 @@ func runRestoreSingle(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// Run pre-restore diagnosis if requested
|
||||
if restoreDiagnose {
|
||||
log.Info("🔍 Running pre-restore diagnosis...")
|
||||
|
||||
log.Info("[DIAG] Running pre-restore diagnosis...")
|
||||
|
||||
diagnoser := restore.NewDiagnoser(log, restoreVerbose)
|
||||
result, err := diagnoser.DiagnoseFile(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("diagnosis failed: %w", err)
|
||||
}
|
||||
|
||||
|
||||
diagnoser.PrintDiagnosis(result)
|
||||
|
||||
|
||||
if !result.IsValid {
|
||||
log.Error("❌ Pre-restore diagnosis found issues")
|
||||
log.Error("[FAIL] Pre-restore diagnosis found issues")
|
||||
if result.IsTruncated {
|
||||
log.Error(" The backup file appears to be TRUNCATED")
|
||||
}
|
||||
@ -606,13 +678,13 @@ func runRestoreSingle(cmd *cobra.Command, args []string) error {
|
||||
log.Error(" The backup file appears to be CORRUPTED")
|
||||
}
|
||||
fmt.Println("\nUse --force to attempt restore anyway.")
|
||||
|
||||
|
||||
if !restoreForce {
|
||||
return fmt.Errorf("aborting restore due to backup file issues")
|
||||
}
|
||||
log.Warn("Continuing despite diagnosis errors (--force enabled)")
|
||||
} else {
|
||||
log.Info("✅ Backup file passed diagnosis")
|
||||
log.Info("[OK] Backup file passed diagnosis")
|
||||
}
|
||||
}
|
||||
|
||||
@ -632,7 +704,7 @@ func runRestoreSingle(cmd *cobra.Command, args []string) error {
|
||||
// Audit log: restore success
|
||||
auditLogger.LogRestoreComplete(user, targetDB, time.Since(startTime))
|
||||
|
||||
log.Info("✅ Restore completed successfully", "database", targetDB)
|
||||
log.Info("[OK] Restore completed successfully", "database", targetDB)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -654,6 +726,203 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("archive not found: %s", archivePath)
|
||||
}
|
||||
|
||||
// Handle --list-databases flag
|
||||
if restoreListDBs {
|
||||
return runListDatabases(archivePath)
|
||||
}
|
||||
|
||||
// Handle single/multiple database extraction
|
||||
if restoreDatabase != "" || restoreDatabases != "" {
|
||||
return runExtractDatabases(archivePath)
|
||||
}
|
||||
|
||||
// Otherwise proceed with full cluster restore
|
||||
return runFullClusterRestore(archivePath)
|
||||
}
|
||||
|
||||
// runListDatabases lists all databases in a cluster backup
|
||||
func runListDatabases(archivePath string) error {
|
||||
ctx := context.Background()
|
||||
|
||||
log.Info("Scanning cluster backup", "archive", filepath.Base(archivePath))
|
||||
fmt.Println()
|
||||
|
||||
databases, err := restore.ListDatabasesInCluster(ctx, archivePath, log)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to list databases: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("📦 Databases in cluster backup:\n")
|
||||
var totalSize int64
|
||||
for _, db := range databases {
|
||||
sizeStr := formatSize(db.Size)
|
||||
fmt.Printf(" - %-30s (%s)\n", db.Name, sizeStr)
|
||||
totalSize += db.Size
|
||||
}
|
||||
|
||||
fmt.Printf("\nTotal: %s across %d database(s)\n", formatSize(totalSize), len(databases))
|
||||
return nil
|
||||
}
|
||||
|
||||
// runExtractDatabases extracts single or multiple databases from cluster backup
|
||||
func runExtractDatabases(archivePath string) error {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
// Setup signal handling
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
|
||||
defer signal.Stop(sigChan)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
log.Warn("Extraction interrupted by user")
|
||||
cancel()
|
||||
}()
|
||||
|
||||
// Single database extraction
|
||||
if restoreDatabase != "" {
|
||||
return handleSingleDatabaseExtraction(ctx, archivePath, restoreDatabase)
|
||||
}
|
||||
|
||||
// Multiple database extraction
|
||||
if restoreDatabases != "" {
|
||||
return handleMultipleDatabaseExtraction(ctx, archivePath, restoreDatabases)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleSingleDatabaseExtraction handles single database extraction or restore
|
||||
func handleSingleDatabaseExtraction(ctx context.Context, archivePath, dbName string) error {
|
||||
// Extract-only mode (no restore)
|
||||
if restoreOutputDir != "" {
|
||||
return extractSingleDatabase(ctx, archivePath, dbName, restoreOutputDir)
|
||||
}
|
||||
|
||||
// Restore mode
|
||||
if !restoreConfirm {
|
||||
fmt.Println("\n[DRY-RUN] DRY-RUN MODE - No changes will be made")
|
||||
fmt.Printf("\nWould extract and restore:\n")
|
||||
fmt.Printf(" Database: %s\n", dbName)
|
||||
fmt.Printf(" From: %s\n", archivePath)
|
||||
targetDB := restoreTarget
|
||||
if targetDB == "" {
|
||||
targetDB = dbName
|
||||
}
|
||||
fmt.Printf(" Target: %s\n", targetDB)
|
||||
if restoreClean {
|
||||
fmt.Printf(" Clean: true (drop and recreate)\n")
|
||||
}
|
||||
if restoreCreate {
|
||||
fmt.Printf(" Create: true (create if missing)\n")
|
||||
}
|
||||
fmt.Println("\nTo execute this restore, add --confirm flag")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create database instance
|
||||
db, err := database.New(cfg, log)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create database instance: %w", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Create restore engine
|
||||
engine := restore.New(cfg, log, db)
|
||||
|
||||
// Determine target database name
|
||||
targetDB := restoreTarget
|
||||
if targetDB == "" {
|
||||
targetDB = dbName
|
||||
}
|
||||
|
||||
log.Info("Restoring single database from cluster", "database", dbName, "target", targetDB)
|
||||
|
||||
// Restore single database from cluster
|
||||
if err := engine.RestoreSingleFromCluster(ctx, archivePath, dbName, targetDB, restoreClean, restoreCreate); err != nil {
|
||||
return fmt.Errorf("restore failed: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("\n✅ Successfully restored '%s' as '%s'\n", dbName, targetDB)
|
||||
return nil
|
||||
}
|
||||
|
||||
// extractSingleDatabase extracts a single database without restoring
|
||||
func extractSingleDatabase(ctx context.Context, archivePath, dbName, outputDir string) error {
|
||||
log.Info("Extracting database", "database", dbName, "output", outputDir)
|
||||
|
||||
// Create progress indicator
|
||||
prog := progress.NewIndicator(!restoreNoProgress, "dots")
|
||||
|
||||
extractedPath, err := restore.ExtractDatabaseFromCluster(ctx, archivePath, dbName, outputDir, log, prog)
|
||||
if err != nil {
|
||||
return fmt.Errorf("extraction failed: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("\n✅ Extracted: %s\n", extractedPath)
|
||||
fmt.Printf(" Database: %s\n", dbName)
|
||||
fmt.Printf(" Location: %s\n", outputDir)
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleMultipleDatabaseExtraction handles multiple database extraction
|
||||
func handleMultipleDatabaseExtraction(ctx context.Context, archivePath, databases string) error {
|
||||
if restoreOutputDir == "" {
|
||||
return fmt.Errorf("--output-dir required when using --databases")
|
||||
}
|
||||
|
||||
// Parse database list
|
||||
dbNames := strings.Split(databases, ",")
|
||||
for i := range dbNames {
|
||||
dbNames[i] = strings.TrimSpace(dbNames[i])
|
||||
}
|
||||
|
||||
log.Info("Extracting multiple databases", "count", len(dbNames), "output", restoreOutputDir)
|
||||
|
||||
// Create progress indicator
|
||||
prog := progress.NewIndicator(!restoreNoProgress, "dots")
|
||||
|
||||
extractedPaths, err := restore.ExtractMultipleDatabasesFromCluster(ctx, archivePath, dbNames, restoreOutputDir, log, prog)
|
||||
if err != nil {
|
||||
return fmt.Errorf("extraction failed: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("\n✅ Extracted %d database(s):\n", len(extractedPaths))
|
||||
for dbName, path := range extractedPaths {
|
||||
fmt.Printf(" - %s → %s\n", dbName, filepath.Base(path))
|
||||
}
|
||||
fmt.Printf(" Location: %s\n", restoreOutputDir)
|
||||
return nil
|
||||
}
|
||||
|
||||
// runFullClusterRestore performs a full cluster restore
|
||||
func runFullClusterRestore(archivePath string) error {
|
||||
|
||||
// Apply resource profile
|
||||
if err := config.ApplyProfile(cfg, restoreProfile, restoreJobs, restoreParallelDBs); err != nil {
|
||||
log.Warn("Invalid profile, using balanced", "error", err)
|
||||
restoreProfile = "balanced"
|
||||
_ = config.ApplyProfile(cfg, restoreProfile, restoreJobs, restoreParallelDBs)
|
||||
}
|
||||
if cfg.Debug || restoreProfile != "balanced" {
|
||||
log.Info("Using restore profile", "profile", restoreProfile, "parallel_dbs", cfg.ClusterParallelism, "jobs", cfg.Jobs)
|
||||
}
|
||||
|
||||
// Convert to absolute path
|
||||
if !filepath.IsAbs(archivePath) {
|
||||
absPath, err := filepath.Abs(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid archive path: %w", err)
|
||||
}
|
||||
archivePath = absPath
|
||||
}
|
||||
|
||||
// Check if file exists
|
||||
if _, err := os.Stat(archivePath); err != nil {
|
||||
return fmt.Errorf("archive not found: %s", archivePath)
|
||||
}
|
||||
|
||||
// Check if backup is encrypted and decrypt if necessary
|
||||
if backup.IsBackupEncrypted(archivePath) {
|
||||
log.Info("Encrypted cluster backup detected, decrypting...")
|
||||
@ -700,7 +969,7 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
}
|
||||
|
||||
log.Warn("⚠️ Using alternative working directory for extraction")
|
||||
log.Warn("[WARN] Using alternative working directory for extraction")
|
||||
log.Warn(" This is recommended when system disk space is limited")
|
||||
log.Warn(" Location: " + restoreWorkdir)
|
||||
}
|
||||
@ -753,7 +1022,7 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
isDryRun := restoreDryRun || !restoreConfirm
|
||||
|
||||
if isDryRun {
|
||||
fmt.Println("\n🔍 DRY-RUN MODE - No changes will be made")
|
||||
fmt.Println("\n[DRY-RUN] DRY-RUN MODE - No changes will be made")
|
||||
fmt.Printf("\nWould restore cluster:\n")
|
||||
fmt.Printf(" Archive: %s\n", archivePath)
|
||||
fmt.Printf(" Parallel Jobs: %d (0 = auto)\n", restoreJobs)
|
||||
@ -763,7 +1032,7 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
if restoreCleanCluster {
|
||||
fmt.Printf(" Clean Cluster: true (will drop %d existing database(s))\n", len(existingDBs))
|
||||
if len(existingDBs) > 0 {
|
||||
fmt.Printf("\n⚠️ Databases to be dropped:\n")
|
||||
fmt.Printf("\n[WARN] Databases to be dropped:\n")
|
||||
for _, dbName := range existingDBs {
|
||||
fmt.Printf(" - %s\n", dbName)
|
||||
}
|
||||
@ -775,22 +1044,39 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// Warning for clean-cluster
|
||||
if restoreCleanCluster && len(existingDBs) > 0 {
|
||||
log.Warn("🔥 Clean cluster mode enabled")
|
||||
log.Warn("[!!] Clean cluster mode enabled")
|
||||
log.Warn(fmt.Sprintf(" %d existing database(s) will be DROPPED before restore!", len(existingDBs)))
|
||||
for _, dbName := range existingDBs {
|
||||
log.Warn(" - " + dbName)
|
||||
}
|
||||
}
|
||||
|
||||
// Override cluster parallelism if --parallel-dbs is specified
|
||||
if restoreParallelDBs == -1 {
|
||||
// Auto-detect optimal parallelism based on system resources
|
||||
autoParallel := restore.CalculateOptimalParallel()
|
||||
cfg.ClusterParallelism = autoParallel
|
||||
log.Info("Auto-detected optimal parallelism for database restores", "parallel_dbs", autoParallel, "mode", "auto")
|
||||
} else if restoreParallelDBs > 0 {
|
||||
cfg.ClusterParallelism = restoreParallelDBs
|
||||
log.Info("Using custom parallelism for database restores", "parallel_dbs", restoreParallelDBs)
|
||||
}
|
||||
|
||||
// Create restore engine
|
||||
engine := restore.New(cfg, log, db)
|
||||
|
||||
|
||||
// Enable debug logging if requested
|
||||
if restoreSaveDebugLog != "" {
|
||||
engine.SetDebugLogPath(restoreSaveDebugLog)
|
||||
log.Info("Debug logging enabled", "output", restoreSaveDebugLog)
|
||||
}
|
||||
|
||||
// Enable lock debugging if requested (cluster restore)
|
||||
if restoreDebugLocks {
|
||||
cfg.DebugLocks = true
|
||||
log.Info("🔍 Lock debugging enabled - will capture PostgreSQL lock config, Guard decisions, boost attempts")
|
||||
}
|
||||
|
||||
// Setup signal handling
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
@ -826,23 +1112,52 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
log.Info("Database cleanup completed")
|
||||
}
|
||||
|
||||
// Run pre-restore diagnosis if requested
|
||||
// OPTIMIZATION: Pre-extract archive once for both diagnosis and restore
|
||||
// This avoids extracting the same tar.gz twice (saves 5-10 min on large clusters)
|
||||
var extractedDir string
|
||||
var extractErr error
|
||||
|
||||
if restoreDiagnose || restoreConfirm {
|
||||
log.Info("Pre-extracting cluster archive (shared for validation and restore)...")
|
||||
extractedDir, extractErr = safety.ValidateAndExtractCluster(ctx, archivePath)
|
||||
if extractErr != nil {
|
||||
return fmt.Errorf("failed to extract cluster archive: %w", extractErr)
|
||||
}
|
||||
defer os.RemoveAll(extractedDir) // Cleanup at end
|
||||
log.Info("Archive extracted successfully", "location", extractedDir)
|
||||
}
|
||||
|
||||
// Run pre-restore diagnosis if requested (using already-extracted directory)
|
||||
if restoreDiagnose {
|
||||
log.Info("🔍 Running pre-restore diagnosis...")
|
||||
|
||||
// Create temp directory for extraction
|
||||
diagTempDir, err := os.MkdirTemp("", "dbbackup-diagnose-*")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create temp directory for diagnosis: %w", err)
|
||||
}
|
||||
defer os.RemoveAll(diagTempDir)
|
||||
|
||||
log.Info("[DIAG] Running pre-restore diagnosis on extracted dumps...")
|
||||
|
||||
diagnoser := restore.NewDiagnoser(log, restoreVerbose)
|
||||
results, err := diagnoser.DiagnoseClusterDumps(archivePath, diagTempDir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("diagnosis failed: %w", err)
|
||||
// Diagnose dumps directly from extracted directory
|
||||
dumpsDir := filepath.Join(extractedDir, "dumps")
|
||||
if _, err := os.Stat(dumpsDir); err != nil {
|
||||
return fmt.Errorf("no dumps directory found in extracted archive: %w", err)
|
||||
}
|
||||
|
||||
|
||||
entries, err := os.ReadDir(dumpsDir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read dumps directory: %w", err)
|
||||
}
|
||||
|
||||
// Diagnose each dump file
|
||||
var results []*restore.DiagnoseResult
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
dumpPath := filepath.Join(dumpsDir, entry.Name())
|
||||
result, err := diagnoser.DiagnoseFile(dumpPath)
|
||||
if err != nil {
|
||||
log.Warn("Could not diagnose dump", "file", entry.Name(), "error", err)
|
||||
continue
|
||||
}
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
// Check for any invalid dumps
|
||||
var invalidDumps []string
|
||||
for _, result := range results {
|
||||
@ -851,24 +1166,24 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
diagnoser.PrintDiagnosis(result)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if len(invalidDumps) > 0 {
|
||||
log.Error("❌ Pre-restore diagnosis found issues",
|
||||
log.Error("[FAIL] Pre-restore diagnosis found issues",
|
||||
"invalid_dumps", len(invalidDumps),
|
||||
"total_dumps", len(results))
|
||||
fmt.Println("\n⚠️ The following dumps have issues and will likely fail during restore:")
|
||||
fmt.Println("\n[WARN] The following dumps have issues and will likely fail during restore:")
|
||||
for _, name := range invalidDumps {
|
||||
fmt.Printf(" - %s\n", name)
|
||||
}
|
||||
fmt.Println("\nRun 'dbbackup restore diagnose <archive> --deep' for full details.")
|
||||
fmt.Println("Use --force to attempt restore anyway.")
|
||||
|
||||
|
||||
if !restoreForce {
|
||||
return fmt.Errorf("aborting restore due to %d invalid dump(s)", len(invalidDumps))
|
||||
}
|
||||
log.Warn("Continuing despite diagnosis errors (--force enabled)")
|
||||
} else {
|
||||
log.Info("✅ All dumps passed diagnosis", "count", len(results))
|
||||
log.Info("[OK] All dumps passed diagnosis", "count", len(results))
|
||||
}
|
||||
}
|
||||
|
||||
@ -880,7 +1195,8 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
startTime := time.Now()
|
||||
auditLogger.LogRestoreStart(user, "all_databases", archivePath)
|
||||
|
||||
if err := engine.RestoreCluster(ctx, archivePath); err != nil {
|
||||
// Pass pre-extracted directory to avoid double extraction
|
||||
if err := engine.RestoreCluster(ctx, archivePath, extractedDir); err != nil {
|
||||
auditLogger.LogRestoreFailed(user, "all_databases", err)
|
||||
return fmt.Errorf("cluster restore failed: %w", err)
|
||||
}
|
||||
@ -888,7 +1204,7 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
// Audit log: restore success
|
||||
auditLogger.LogRestoreComplete(user, "all_databases", time.Since(startTime))
|
||||
|
||||
log.Info("✅ Cluster restore completed successfully")
|
||||
log.Info("[OK] Cluster restore completed successfully")
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -937,7 +1253,7 @@ func runRestoreList(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
// Print header
|
||||
fmt.Printf("\n📦 Available backup archives in %s\n\n", backupDir)
|
||||
fmt.Printf("\n[LIST] Available backup archives in %s\n\n", backupDir)
|
||||
fmt.Printf("%-40s %-25s %-12s %-20s %s\n",
|
||||
"FILENAME", "FORMAT", "SIZE", "MODIFIED", "DATABASE")
|
||||
fmt.Println(strings.Repeat("-", 120))
|
||||
@ -1054,9 +1370,9 @@ func runRestorePITR(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
// Display recovery target info
|
||||
log.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
log.Info("=====================================================")
|
||||
log.Info(" Point-in-Time Recovery (PITR)")
|
||||
log.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
log.Info("=====================================================")
|
||||
log.Info("")
|
||||
log.Info(target.String())
|
||||
log.Info("")
|
||||
@ -1080,6 +1396,6 @@ func runRestorePITR(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("PITR restore failed: %w", err)
|
||||
}
|
||||
|
||||
log.Info("✅ PITR restore completed successfully")
|
||||
log.Info("[OK] PITR restore completed successfully")
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -134,6 +134,7 @@ func Execute(ctx context.Context, config *config.Config, logger logger.Logger) e
|
||||
rootCmd.PersistentFlags().StringVar(&cfg.BackupDir, "backup-dir", cfg.BackupDir, "Backup directory")
|
||||
rootCmd.PersistentFlags().BoolVar(&cfg.NoColor, "no-color", cfg.NoColor, "Disable colored output")
|
||||
rootCmd.PersistentFlags().BoolVar(&cfg.Debug, "debug", cfg.Debug, "Enable debug logging")
|
||||
rootCmd.PersistentFlags().BoolVar(&cfg.DebugLocks, "debug-locks", cfg.DebugLocks, "Enable detailed lock debugging (captures PostgreSQL lock configuration, Large DB Guard decisions, boost attempts)")
|
||||
rootCmd.PersistentFlags().IntVar(&cfg.Jobs, "jobs", cfg.Jobs, "Number of parallel jobs")
|
||||
rootCmd.PersistentFlags().IntVar(&cfg.DumpJobs, "dump-jobs", cfg.DumpJobs, "Number of parallel dump jobs")
|
||||
rootCmd.PersistentFlags().IntVar(&cfg.MaxCores, "max-cores", cfg.MaxCores, "Maximum CPU cores to use")
|
||||
|
||||
56
cmd/rto.go
56
cmd/rto.go
@ -181,13 +181,13 @@ func runRTOStatus(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// Display status
|
||||
fmt.Println()
|
||||
fmt.Println("╔═══════════════════════════════════════════════════════════╗")
|
||||
fmt.Println("║ RTO/RPO STATUS SUMMARY ║")
|
||||
fmt.Println("╠═══════════════════════════════════════════════════════════╣")
|
||||
fmt.Printf("║ Target RTO: %-15s Target RPO: %-15s ║\n",
|
||||
fmt.Println("+-----------------------------------------------------------+")
|
||||
fmt.Println("| RTO/RPO STATUS SUMMARY |")
|
||||
fmt.Println("+-----------------------------------------------------------+")
|
||||
fmt.Printf("| Target RTO: %-15s Target RPO: %-15s |\n",
|
||||
formatDuration(config.TargetRTO),
|
||||
formatDuration(config.TargetRPO))
|
||||
fmt.Println("╠═══════════════════════════════════════════════════════════╣")
|
||||
fmt.Println("+-----------------------------------------------------------+")
|
||||
|
||||
// Compliance status
|
||||
rpoRate := 0.0
|
||||
@ -199,31 +199,31 @@ func runRTOStatus(cmd *cobra.Command, args []string) error {
|
||||
fullRate = float64(summary.FullyCompliant) / float64(summary.TotalDatabases) * 100
|
||||
}
|
||||
|
||||
fmt.Printf("║ Databases: %-5d ║\n", summary.TotalDatabases)
|
||||
fmt.Printf("║ RPO Compliant: %-5d (%.0f%%) ║\n", summary.RPOCompliant, rpoRate)
|
||||
fmt.Printf("║ RTO Compliant: %-5d (%.0f%%) ║\n", summary.RTOCompliant, rtoRate)
|
||||
fmt.Printf("║ Fully Compliant: %-3d (%.0f%%) ║\n", summary.FullyCompliant, fullRate)
|
||||
fmt.Printf("| Databases: %-5d |\n", summary.TotalDatabases)
|
||||
fmt.Printf("| RPO Compliant: %-5d (%.0f%%) |\n", summary.RPOCompliant, rpoRate)
|
||||
fmt.Printf("| RTO Compliant: %-5d (%.0f%%) |\n", summary.RTOCompliant, rtoRate)
|
||||
fmt.Printf("| Fully Compliant: %-3d (%.0f%%) |\n", summary.FullyCompliant, fullRate)
|
||||
|
||||
if summary.CriticalIssues > 0 {
|
||||
fmt.Printf("║ ⚠️ Critical Issues: %-3d ║\n", summary.CriticalIssues)
|
||||
fmt.Printf("| [WARN] Critical Issues: %-3d |\n", summary.CriticalIssues)
|
||||
}
|
||||
|
||||
fmt.Println("╠═══════════════════════════════════════════════════════════╣")
|
||||
fmt.Printf("║ Average RPO: %-15s Worst: %-15s ║\n",
|
||||
fmt.Println("+-----------------------------------------------------------+")
|
||||
fmt.Printf("| Average RPO: %-15s Worst: %-15s |\n",
|
||||
formatDuration(summary.AverageRPO),
|
||||
formatDuration(summary.WorstRPO))
|
||||
fmt.Printf("║ Average RTO: %-15s Worst: %-15s ║\n",
|
||||
fmt.Printf("| Average RTO: %-15s Worst: %-15s |\n",
|
||||
formatDuration(summary.AverageRTO),
|
||||
formatDuration(summary.WorstRTO))
|
||||
|
||||
if summary.WorstRPODatabase != "" {
|
||||
fmt.Printf("║ Worst RPO Database: %-38s║\n", summary.WorstRPODatabase)
|
||||
fmt.Printf("| Worst RPO Database: %-38s|\n", summary.WorstRPODatabase)
|
||||
}
|
||||
if summary.WorstRTODatabase != "" {
|
||||
fmt.Printf("║ Worst RTO Database: %-38s║\n", summary.WorstRTODatabase)
|
||||
fmt.Printf("| Worst RTO Database: %-38s|\n", summary.WorstRTODatabase)
|
||||
}
|
||||
|
||||
fmt.Println("╚═══════════════════════════════════════════════════════════╝")
|
||||
fmt.Println("+-----------------------------------------------------------+")
|
||||
fmt.Println()
|
||||
|
||||
// Per-database status
|
||||
@ -234,19 +234,19 @@ func runRTOStatus(cmd *cobra.Command, args []string) error {
|
||||
fmt.Println(strings.Repeat("-", 70))
|
||||
|
||||
for _, a := range analyses {
|
||||
status := "✅"
|
||||
status := "[OK]"
|
||||
if !a.RPOCompliant || !a.RTOCompliant {
|
||||
status = "❌"
|
||||
status = "[FAIL]"
|
||||
}
|
||||
|
||||
rpoStr := formatDuration(a.CurrentRPO)
|
||||
rtoStr := formatDuration(a.CurrentRTO)
|
||||
|
||||
if !a.RPOCompliant {
|
||||
rpoStr = "⚠️ " + rpoStr
|
||||
rpoStr = "[WARN] " + rpoStr
|
||||
}
|
||||
if !a.RTOCompliant {
|
||||
rtoStr = "⚠️ " + rtoStr
|
||||
rtoStr = "[WARN] " + rtoStr
|
||||
}
|
||||
|
||||
fmt.Printf("%-25s %-12s %-12s %s\n",
|
||||
@ -306,21 +306,21 @@ func runRTOCheck(cmd *cobra.Command, args []string) error {
|
||||
exitCode := 0
|
||||
for _, a := range analyses {
|
||||
if !a.RPOCompliant {
|
||||
fmt.Printf("❌ %s: RPO violation - current %s exceeds target %s\n",
|
||||
fmt.Printf("[FAIL] %s: RPO violation - current %s exceeds target %s\n",
|
||||
a.Database,
|
||||
formatDuration(a.CurrentRPO),
|
||||
formatDuration(config.TargetRPO))
|
||||
exitCode = 1
|
||||
}
|
||||
if !a.RTOCompliant {
|
||||
fmt.Printf("❌ %s: RTO violation - estimated %s exceeds target %s\n",
|
||||
fmt.Printf("[FAIL] %s: RTO violation - estimated %s exceeds target %s\n",
|
||||
a.Database,
|
||||
formatDuration(a.CurrentRTO),
|
||||
formatDuration(config.TargetRTO))
|
||||
exitCode = 1
|
||||
}
|
||||
if a.RPOCompliant && a.RTOCompliant {
|
||||
fmt.Printf("✅ %s: Compliant (RPO: %s, RTO: %s)\n",
|
||||
fmt.Printf("[OK] %s: Compliant (RPO: %s, RTO: %s)\n",
|
||||
a.Database,
|
||||
formatDuration(a.CurrentRPO),
|
||||
formatDuration(a.CurrentRTO))
|
||||
@ -371,13 +371,13 @@ func outputAnalysisText(analyses []*rto.Analysis) error {
|
||||
fmt.Println(strings.Repeat("=", 60))
|
||||
|
||||
// Status
|
||||
rpoStatus := "✅ Compliant"
|
||||
rpoStatus := "[OK] Compliant"
|
||||
if !a.RPOCompliant {
|
||||
rpoStatus = "❌ Violation"
|
||||
rpoStatus = "[FAIL] Violation"
|
||||
}
|
||||
rtoStatus := "✅ Compliant"
|
||||
rtoStatus := "[OK] Compliant"
|
||||
if !a.RTOCompliant {
|
||||
rtoStatus = "❌ Violation"
|
||||
rtoStatus = "[FAIL] Violation"
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
@ -420,7 +420,7 @@ func outputAnalysisText(analyses []*rto.Analysis) error {
|
||||
fmt.Println(" Recommendations:")
|
||||
fmt.Println(strings.Repeat("-", 50))
|
||||
for _, r := range a.Recommendations {
|
||||
icon := "💡"
|
||||
icon := "[TIP]"
|
||||
switch r.Priority {
|
||||
case rto.PriorityCritical:
|
||||
icon = "🔴"
|
||||
|
||||
@ -141,7 +141,7 @@ func testConnection(ctx context.Context) error {
|
||||
|
||||
// Display results
|
||||
fmt.Println("Connection Test Results:")
|
||||
fmt.Printf(" Status: Connected ✅\n")
|
||||
fmt.Printf(" Status: Connected [OK]\n")
|
||||
fmt.Printf(" Version: %s\n", version)
|
||||
fmt.Printf(" Databases: %d found\n", len(databases))
|
||||
|
||||
@ -167,7 +167,7 @@ func testConnection(ctx context.Context) error {
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("✅ Status check completed successfully!")
|
||||
fmt.Println("[OK] Status check completed successfully!")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -96,17 +96,17 @@ func runVerifyBackup(cmd *cobra.Command, args []string) error {
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf("📁 %s\n", filepath.Base(backupFile))
|
||||
fmt.Printf("[FILE] %s\n", filepath.Base(backupFile))
|
||||
|
||||
if quickVerify {
|
||||
// Quick check: size only
|
||||
err := verification.QuickCheck(backupFile)
|
||||
if err != nil {
|
||||
fmt.Printf(" ❌ FAILED: %v\n\n", err)
|
||||
fmt.Printf(" [FAIL] FAILED: %v\n\n", err)
|
||||
failureCount++
|
||||
continue
|
||||
}
|
||||
fmt.Printf(" ✅ VALID (quick check)\n\n")
|
||||
fmt.Printf(" [OK] VALID (quick check)\n\n")
|
||||
successCount++
|
||||
} else {
|
||||
// Full verification with SHA-256
|
||||
@ -116,7 +116,7 @@ func runVerifyBackup(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
if result.Valid {
|
||||
fmt.Printf(" ✅ VALID\n")
|
||||
fmt.Printf(" [OK] VALID\n")
|
||||
if verboseVerify {
|
||||
meta, _ := metadata.Load(backupFile)
|
||||
fmt.Printf(" Size: %s\n", metadata.FormatSize(meta.SizeBytes))
|
||||
@ -127,7 +127,7 @@ func runVerifyBackup(cmd *cobra.Command, args []string) error {
|
||||
fmt.Println()
|
||||
successCount++
|
||||
} else {
|
||||
fmt.Printf(" ❌ FAILED: %v\n", result.Error)
|
||||
fmt.Printf(" [FAIL] FAILED: %v\n", result.Error)
|
||||
if verboseVerify {
|
||||
if !result.FileExists {
|
||||
fmt.Printf(" File does not exist\n")
|
||||
@ -147,11 +147,11 @@ func runVerifyBackup(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
// Summary
|
||||
fmt.Println(strings.Repeat("─", 50))
|
||||
fmt.Println(strings.Repeat("-", 50))
|
||||
fmt.Printf("Total: %d backups\n", len(backupFiles))
|
||||
fmt.Printf("✅ Valid: %d\n", successCount)
|
||||
fmt.Printf("[OK] Valid: %d\n", successCount)
|
||||
if failureCount > 0 {
|
||||
fmt.Printf("❌ Failed: %d\n", failureCount)
|
||||
fmt.Printf("[FAIL] Failed: %d\n", failureCount)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
@ -195,16 +195,16 @@ func runVerifyCloudBackup(cmd *cobra.Command, args []string) error {
|
||||
|
||||
for _, uri := range args {
|
||||
if !isCloudURI(uri) {
|
||||
fmt.Printf("⚠️ Skipping non-cloud URI: %s\n", uri)
|
||||
fmt.Printf("[WARN] Skipping non-cloud URI: %s\n", uri)
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf("☁️ %s\n", uri)
|
||||
fmt.Printf("[CLOUD] %s\n", uri)
|
||||
|
||||
// Download and verify
|
||||
result, err := verifyCloudBackup(cmd.Context(), uri, quickVerify, verboseVerify)
|
||||
if err != nil {
|
||||
fmt.Printf(" ❌ FAILED: %v\n\n", err)
|
||||
fmt.Printf(" [FAIL] FAILED: %v\n\n", err)
|
||||
failureCount++
|
||||
continue
|
||||
}
|
||||
@ -212,7 +212,7 @@ func runVerifyCloudBackup(cmd *cobra.Command, args []string) error {
|
||||
// Cleanup temp file
|
||||
defer result.Cleanup()
|
||||
|
||||
fmt.Printf(" ✅ VALID\n")
|
||||
fmt.Printf(" [OK] VALID\n")
|
||||
if verboseVerify && result.MetadataPath != "" {
|
||||
meta, _ := metadata.Load(result.MetadataPath)
|
||||
if meta != nil {
|
||||
@ -226,7 +226,7 @@ func runVerifyCloudBackup(cmd *cobra.Command, args []string) error {
|
||||
successCount++
|
||||
}
|
||||
|
||||
fmt.Printf("\n✅ Summary: %d valid, %d failed\n", successCount, failureCount)
|
||||
fmt.Printf("\n[OK] Summary: %d valid, %d failed\n", successCount, failureCount)
|
||||
|
||||
if failureCount > 0 {
|
||||
os.Exit(1)
|
||||
|
||||
64
cmd/verify_locks.go
Normal file
64
cmd/verify_locks.go
Normal file
@ -0,0 +1,64 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"dbbackup/internal/checks"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var verifyLocksCmd = &cobra.Command{
|
||||
Use: "verify-locks",
|
||||
Short: "Check PostgreSQL lock settings and print restore guidance",
|
||||
Long: `Probe PostgreSQL for lock-related GUCs (max_locks_per_transaction, max_connections, max_prepared_transactions) and print capacity + recommended restore options.`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runVerifyLocks(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
func runVerifyLocks(ctx context.Context) error {
|
||||
p := checks.NewPreflightChecker(cfg, log)
|
||||
res, err := p.RunAllChecks(ctx, cfg.Database)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Find the Postgres lock check in the preflight results
|
||||
var chk checks.PreflightCheck
|
||||
found := false
|
||||
for _, c := range res.Checks {
|
||||
if c.Name == "PostgreSQL lock configuration" {
|
||||
chk = c
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
fmt.Println("No PostgreSQL lock check available (skipped)")
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("%s\n", chk.Name)
|
||||
fmt.Printf("Status: %s\n", chk.Status.String())
|
||||
fmt.Printf("%s\n\n", chk.Message)
|
||||
if chk.Details != "" {
|
||||
fmt.Println(chk.Details)
|
||||
}
|
||||
|
||||
// exit non-zero for failures so scripts can react
|
||||
if chk.Status == checks.StatusFailed {
|
||||
os.Exit(2)
|
||||
}
|
||||
if chk.Status == checks.StatusWarning {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(verifyLocksCmd)
|
||||
}
|
||||
384
cmd/verify_restore.go
Normal file
384
cmd/verify_restore.go
Normal file
@ -0,0 +1,384 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
"dbbackup/internal/verification"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var verifyRestoreCmd = &cobra.Command{
|
||||
Use: "verify-restore",
|
||||
Short: "Systematic verification for large database restores",
|
||||
Long: `Comprehensive verification tool for large database restores with BLOB support.
|
||||
|
||||
This tool performs systematic checks to ensure 100% data integrity after restore:
|
||||
- Table counts and row counts verification
|
||||
- BLOB/Large Object integrity (PostgreSQL large objects, bytea columns)
|
||||
- Table checksums (for non-BLOB tables)
|
||||
- Database-specific integrity checks
|
||||
- Orphaned object detection
|
||||
- Index validity checks
|
||||
|
||||
Designed to work with VERY LARGE databases and BLOBs with 100% reliability.
|
||||
|
||||
Examples:
|
||||
# Verify a restored PostgreSQL database
|
||||
dbbackup verify-restore --engine postgres --database mydb
|
||||
|
||||
# Verify with connection details
|
||||
dbbackup verify-restore --engine postgres --host localhost --port 5432 \
|
||||
--user postgres --password secret --database mydb
|
||||
|
||||
# Verify a MySQL database
|
||||
dbbackup verify-restore --engine mysql --database mydb
|
||||
|
||||
# Verify and output JSON report
|
||||
dbbackup verify-restore --engine postgres --database mydb --json
|
||||
|
||||
# Compare source and restored database
|
||||
dbbackup verify-restore --engine postgres --database source_db --compare restored_db
|
||||
|
||||
# Verify a backup file before restore
|
||||
dbbackup verify-restore --backup-file /backups/mydb.dump
|
||||
|
||||
# Verify multiple databases in parallel
|
||||
dbbackup verify-restore --engine postgres --databases "db1,db2,db3" --parallel 4`,
|
||||
RunE: runVerifyRestore,
|
||||
}
|
||||
|
||||
var (
|
||||
verifyEngine string
|
||||
verifyHost string
|
||||
verifyPort int
|
||||
verifyUser string
|
||||
verifyPassword string
|
||||
verifyDatabase string
|
||||
verifyDatabases string
|
||||
verifyCompareDB string
|
||||
verifyBackupFile string
|
||||
verifyJSON bool
|
||||
verifyParallel int
|
||||
)
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(verifyRestoreCmd)
|
||||
|
||||
verifyRestoreCmd.Flags().StringVar(&verifyEngine, "engine", "postgres", "Database engine (postgres, mysql)")
|
||||
verifyRestoreCmd.Flags().StringVar(&verifyHost, "host", "localhost", "Database host")
|
||||
verifyRestoreCmd.Flags().IntVar(&verifyPort, "port", 5432, "Database port")
|
||||
verifyRestoreCmd.Flags().StringVar(&verifyUser, "user", "", "Database user")
|
||||
verifyRestoreCmd.Flags().StringVar(&verifyPassword, "password", "", "Database password")
|
||||
verifyRestoreCmd.Flags().StringVar(&verifyDatabase, "database", "", "Database to verify")
|
||||
verifyRestoreCmd.Flags().StringVar(&verifyDatabases, "databases", "", "Comma-separated list of databases to verify")
|
||||
verifyRestoreCmd.Flags().StringVar(&verifyCompareDB, "compare", "", "Compare with another database (source vs restored)")
|
||||
verifyRestoreCmd.Flags().StringVar(&verifyBackupFile, "backup-file", "", "Verify backup file integrity before restore")
|
||||
verifyRestoreCmd.Flags().BoolVar(&verifyJSON, "json", false, "Output results as JSON")
|
||||
verifyRestoreCmd.Flags().IntVar(&verifyParallel, "parallel", 1, "Number of parallel verification workers")
|
||||
}
|
||||
|
||||
func runVerifyRestore(cmd *cobra.Command, args []string) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 24*time.Hour) // Long timeout for large DBs
|
||||
defer cancel()
|
||||
|
||||
log := logger.New("INFO", "text")
|
||||
|
||||
// Get credentials from environment if not provided
|
||||
if verifyUser == "" {
|
||||
verifyUser = os.Getenv("PGUSER")
|
||||
if verifyUser == "" {
|
||||
verifyUser = os.Getenv("MYSQL_USER")
|
||||
}
|
||||
if verifyUser == "" {
|
||||
verifyUser = "postgres"
|
||||
}
|
||||
}
|
||||
|
||||
if verifyPassword == "" {
|
||||
verifyPassword = os.Getenv("PGPASSWORD")
|
||||
if verifyPassword == "" {
|
||||
verifyPassword = os.Getenv("MYSQL_PASSWORD")
|
||||
}
|
||||
}
|
||||
|
||||
// Set default port based on engine
|
||||
if verifyPort == 5432 && (verifyEngine == "mysql" || verifyEngine == "mariadb") {
|
||||
verifyPort = 3306
|
||||
}
|
||||
|
||||
checker := verification.NewLargeRestoreChecker(log, verifyEngine, verifyHost, verifyPort, verifyUser, verifyPassword)
|
||||
|
||||
// Mode 1: Verify backup file
|
||||
if verifyBackupFile != "" {
|
||||
return verifyBackupFileMode(ctx, checker)
|
||||
}
|
||||
|
||||
// Mode 2: Compare two databases
|
||||
if verifyCompareDB != "" {
|
||||
return verifyCompareMode(ctx, checker)
|
||||
}
|
||||
|
||||
// Mode 3: Verify multiple databases in parallel
|
||||
if verifyDatabases != "" {
|
||||
return verifyMultipleDatabases(ctx, log)
|
||||
}
|
||||
|
||||
// Mode 4: Verify single database
|
||||
if verifyDatabase == "" {
|
||||
return fmt.Errorf("--database is required")
|
||||
}
|
||||
|
||||
return verifySingleDatabase(ctx, checker)
|
||||
}
|
||||
|
||||
func verifyBackupFileMode(ctx context.Context, checker *verification.LargeRestoreChecker) error {
|
||||
fmt.Println()
|
||||
fmt.Println("╔══════════════════════════════════════════════════════════════╗")
|
||||
fmt.Println("║ 🔍 BACKUP FILE VERIFICATION ║")
|
||||
fmt.Println("╚══════════════════════════════════════════════════════════════╝")
|
||||
fmt.Println()
|
||||
|
||||
result, err := checker.VerifyBackupFile(ctx, verifyBackupFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("verification failed: %w", err)
|
||||
}
|
||||
|
||||
if verifyJSON {
|
||||
return outputJSON(result, "")
|
||||
}
|
||||
|
||||
fmt.Printf(" File: %s\n", result.Path)
|
||||
fmt.Printf(" Size: %s\n", formatBytes(result.SizeBytes))
|
||||
fmt.Printf(" Format: %s\n", result.Format)
|
||||
fmt.Printf(" Checksum: %s\n", result.Checksum)
|
||||
|
||||
if result.TableCount > 0 {
|
||||
fmt.Printf(" Tables: %d\n", result.TableCount)
|
||||
}
|
||||
if result.LargeObjectCount > 0 {
|
||||
fmt.Printf(" Large Objects: %d\n", result.LargeObjectCount)
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
|
||||
if result.Valid {
|
||||
fmt.Println(" ✅ Backup file verification PASSED")
|
||||
} else {
|
||||
fmt.Printf(" ❌ Backup file verification FAILED: %s\n", result.Error)
|
||||
return fmt.Errorf("verification failed")
|
||||
}
|
||||
|
||||
if len(result.Warnings) > 0 {
|
||||
fmt.Println()
|
||||
fmt.Println(" Warnings:")
|
||||
for _, w := range result.Warnings {
|
||||
fmt.Printf(" ⚠️ %s\n", w)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
return nil
|
||||
}
|
||||
|
||||
func verifyCompareMode(ctx context.Context, checker *verification.LargeRestoreChecker) error {
|
||||
if verifyDatabase == "" {
|
||||
return fmt.Errorf("--database (source) is required for comparison")
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("╔══════════════════════════════════════════════════════════════╗")
|
||||
fmt.Println("║ 🔍 DATABASE COMPARISON ║")
|
||||
fmt.Println("╚══════════════════════════════════════════════════════════════╝")
|
||||
fmt.Println()
|
||||
fmt.Printf(" Source: %s\n", verifyDatabase)
|
||||
fmt.Printf(" Target: %s\n", verifyCompareDB)
|
||||
fmt.Println()
|
||||
|
||||
result, err := checker.CompareSourceTarget(ctx, verifyDatabase, verifyCompareDB)
|
||||
if err != nil {
|
||||
return fmt.Errorf("comparison failed: %w", err)
|
||||
}
|
||||
|
||||
if verifyJSON {
|
||||
return outputJSON(result, "")
|
||||
}
|
||||
|
||||
if result.Match {
|
||||
fmt.Println(" ✅ Databases MATCH - restore verified successfully")
|
||||
} else {
|
||||
fmt.Println(" ❌ Databases DO NOT MATCH")
|
||||
fmt.Println()
|
||||
fmt.Println(" Differences:")
|
||||
for _, d := range result.Differences {
|
||||
fmt.Printf(" • %s\n", d)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
return nil
|
||||
}
|
||||
|
||||
func verifyMultipleDatabases(ctx context.Context, log logger.Logger) error {
|
||||
databases := splitDatabases(verifyDatabases)
|
||||
if len(databases) == 0 {
|
||||
return fmt.Errorf("no databases specified")
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("╔══════════════════════════════════════════════════════════════╗")
|
||||
fmt.Println("║ 🔍 PARALLEL DATABASE VERIFICATION ║")
|
||||
fmt.Println("╚══════════════════════════════════════════════════════════════╝")
|
||||
fmt.Println()
|
||||
fmt.Printf(" Databases: %d\n", len(databases))
|
||||
fmt.Printf(" Workers: %d\n", verifyParallel)
|
||||
fmt.Println()
|
||||
|
||||
results, err := verification.ParallelVerify(ctx, log, verifyEngine, verifyHost, verifyPort, verifyUser, verifyPassword, databases, verifyParallel)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parallel verification failed: %w", err)
|
||||
}
|
||||
|
||||
if verifyJSON {
|
||||
return outputJSON(results, "")
|
||||
}
|
||||
|
||||
allValid := true
|
||||
for _, r := range results {
|
||||
if r == nil {
|
||||
continue
|
||||
}
|
||||
status := "✅"
|
||||
if !r.Valid {
|
||||
status = "❌"
|
||||
allValid = false
|
||||
}
|
||||
fmt.Printf(" %s %s: %d tables, %d rows, %d BLOBs (%s)\n",
|
||||
status, r.Database, r.TotalTables, r.TotalRows, r.TotalBlobCount, r.Duration.Round(time.Millisecond))
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
if allValid {
|
||||
fmt.Println(" ✅ All databases verified successfully")
|
||||
} else {
|
||||
fmt.Println(" ❌ Some databases failed verification")
|
||||
return fmt.Errorf("verification failed")
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
return nil
|
||||
}
|
||||
|
||||
func verifySingleDatabase(ctx context.Context, checker *verification.LargeRestoreChecker) error {
|
||||
fmt.Println()
|
||||
fmt.Println("╔══════════════════════════════════════════════════════════════╗")
|
||||
fmt.Println("║ 🔍 SYSTEMATIC RESTORE VERIFICATION ║")
|
||||
fmt.Println("║ For Large Databases & BLOBs ║")
|
||||
fmt.Println("╚══════════════════════════════════════════════════════════════╝")
|
||||
fmt.Println()
|
||||
fmt.Printf(" Database: %s\n", verifyDatabase)
|
||||
fmt.Printf(" Engine: %s\n", verifyEngine)
|
||||
fmt.Printf(" Host: %s:%d\n", verifyHost, verifyPort)
|
||||
fmt.Println()
|
||||
|
||||
result, err := checker.CheckDatabase(ctx, verifyDatabase)
|
||||
if err != nil {
|
||||
return fmt.Errorf("verification failed: %w", err)
|
||||
}
|
||||
|
||||
if verifyJSON {
|
||||
return outputJSON(result, "")
|
||||
}
|
||||
|
||||
// Summary
|
||||
fmt.Println(" ═══════════════════════════════════════════════════════════")
|
||||
fmt.Println(" VERIFICATION SUMMARY")
|
||||
fmt.Println(" ═══════════════════════════════════════════════════════════")
|
||||
fmt.Println()
|
||||
fmt.Printf(" Tables: %d\n", result.TotalTables)
|
||||
fmt.Printf(" Total Rows: %d\n", result.TotalRows)
|
||||
fmt.Printf(" Large Objects: %d\n", result.TotalBlobCount)
|
||||
fmt.Printf(" BLOB Size: %s\n", formatBytes(result.TotalBlobBytes))
|
||||
fmt.Printf(" Duration: %s\n", result.Duration.Round(time.Millisecond))
|
||||
fmt.Println()
|
||||
|
||||
// Table details
|
||||
if len(result.TableChecks) > 0 && len(result.TableChecks) <= 50 {
|
||||
fmt.Println(" Tables:")
|
||||
for _, t := range result.TableChecks {
|
||||
blobIndicator := ""
|
||||
if t.HasBlobColumn {
|
||||
blobIndicator = " [BLOB]"
|
||||
}
|
||||
status := "✓"
|
||||
if !t.Valid {
|
||||
status = "✗"
|
||||
}
|
||||
fmt.Printf(" %s %s.%s: %d rows%s\n", status, t.Schema, t.TableName, t.RowCount, blobIndicator)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
// Integrity errors
|
||||
if len(result.IntegrityErrors) > 0 {
|
||||
fmt.Println(" ❌ INTEGRITY ERRORS:")
|
||||
for _, e := range result.IntegrityErrors {
|
||||
fmt.Printf(" • %s\n", e)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
// Warnings
|
||||
if len(result.Warnings) > 0 {
|
||||
fmt.Println(" ⚠️ WARNINGS:")
|
||||
for _, w := range result.Warnings {
|
||||
fmt.Printf(" • %s\n", w)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
// Final verdict
|
||||
fmt.Println(" ═══════════════════════════════════════════════════════════")
|
||||
if result.Valid {
|
||||
fmt.Println(" ✅ RESTORE VERIFICATION PASSED - Data integrity confirmed")
|
||||
} else {
|
||||
fmt.Println(" ❌ RESTORE VERIFICATION FAILED - See errors above")
|
||||
return fmt.Errorf("verification failed")
|
||||
}
|
||||
fmt.Println(" ═══════════════════════════════════════════════════════════")
|
||||
fmt.Println()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func splitDatabases(s string) []string {
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
var dbs []string
|
||||
for _, db := range strings.Split(s, ",") {
|
||||
db = strings.TrimSpace(db)
|
||||
if db != "" {
|
||||
dbs = append(dbs, db)
|
||||
}
|
||||
}
|
||||
return dbs
|
||||
}
|
||||
|
||||
func verifyFormatBytes(bytes int64) string {
|
||||
const unit = 1024
|
||||
if bytes < unit {
|
||||
return fmt.Sprintf("%d B", bytes)
|
||||
}
|
||||
div, exp := int64(unit), 0
|
||||
for n := bytes / unit; n >= unit; n /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
266
docs/LOCK_DEBUGGING.md
Normal file
266
docs/LOCK_DEBUGGING.md
Normal file
@ -0,0 +1,266 @@
|
||||
# Lock Debugging Feature
|
||||
|
||||
## Overview
|
||||
|
||||
The `--debug-locks` flag provides complete visibility into the lock protection system introduced in v3.42.82. This eliminates the need for blind troubleshooting when diagnosing lock exhaustion issues.
|
||||
|
||||
## Problem
|
||||
|
||||
When PostgreSQL lock exhaustion occurs during restore:
|
||||
- User sees "out of shared memory" error after 7 hours
|
||||
- No visibility into why Large DB Guard chose conservative mode
|
||||
- Unknown whether lock boost attempts succeeded
|
||||
- Unclear what actions are required to fix the issue
|
||||
- Requires 14 days of troubleshooting to understand the problem
|
||||
|
||||
## Solution
|
||||
|
||||
New `--debug-locks` flag captures every decision point in the lock protection system with detailed logging prefixed by 🔍 [LOCK-DEBUG].
|
||||
|
||||
## Usage
|
||||
|
||||
### CLI
|
||||
```bash
|
||||
# Single database restore with lock debugging
|
||||
dbbackup restore single mydb.dump --debug-locks --confirm
|
||||
|
||||
# Cluster restore with lock debugging
|
||||
dbbackup restore cluster backup.tar.gz --debug-locks --confirm
|
||||
|
||||
# Can also use global flag
|
||||
dbbackup --debug-locks restore cluster backup.tar.gz --confirm
|
||||
```
|
||||
|
||||
### TUI (Interactive Mode)
|
||||
```bash
|
||||
dbbackup # Start interactive mode
|
||||
# Navigate to restore operation
|
||||
# Select your archive
|
||||
# Press 'l' to toggle lock debugging (🔍 icon appears when enabled)
|
||||
# Press Enter to proceed
|
||||
```
|
||||
|
||||
## What Gets Logged
|
||||
|
||||
### 1. Strategy Analysis Entry Point
|
||||
```
|
||||
🔍 [LOCK-DEBUG] Large DB Guard: Starting strategy analysis
|
||||
archive=cluster_backup.tar.gz
|
||||
dump_count=15
|
||||
```
|
||||
|
||||
### 2. PostgreSQL Configuration Detection
|
||||
```
|
||||
🔍 [LOCK-DEBUG] Querying PostgreSQL for lock configuration
|
||||
host=localhost
|
||||
port=5432
|
||||
user=postgres
|
||||
|
||||
🔍 [LOCK-DEBUG] Successfully retrieved PostgreSQL lock settings
|
||||
max_locks_per_transaction=2048
|
||||
max_connections=256
|
||||
total_capacity=524288
|
||||
```
|
||||
|
||||
### 3. Guard Decision Logic
|
||||
```
|
||||
🔍 [LOCK-DEBUG] PostgreSQL lock configuration detected
|
||||
max_locks_per_transaction=2048
|
||||
max_connections=256
|
||||
calculated_capacity=524288
|
||||
threshold_required=4096
|
||||
below_threshold=true
|
||||
|
||||
🔍 [LOCK-DEBUG] Guard decision: CONSERVATIVE mode
|
||||
jobs=1
|
||||
parallel_dbs=1
|
||||
reason="Lock threshold not met (max_locks < 4096)"
|
||||
```
|
||||
|
||||
### 4. Lock Boost Attempts
|
||||
```
|
||||
🔍 [LOCK-DEBUG] boostPostgreSQLSettings: Starting lock boost procedure
|
||||
target_lock_value=4096
|
||||
|
||||
🔍 [LOCK-DEBUG] Current PostgreSQL lock configuration
|
||||
current_max_locks=2048
|
||||
target_max_locks=4096
|
||||
boost_required=true
|
||||
|
||||
🔍 [LOCK-DEBUG] Executing ALTER SYSTEM to boost locks
|
||||
from=2048
|
||||
to=4096
|
||||
|
||||
🔍 [LOCK-DEBUG] ALTER SYSTEM succeeded - restart required
|
||||
setting_saved_to=postgresql.auto.conf
|
||||
active_after="PostgreSQL restart"
|
||||
```
|
||||
|
||||
### 5. PostgreSQL Restart Attempts
|
||||
```
|
||||
🔍 [LOCK-DEBUG] Attempting PostgreSQL restart to activate new lock setting
|
||||
|
||||
# If restart succeeds:
|
||||
🔍 [LOCK-DEBUG] PostgreSQL restart SUCCEEDED
|
||||
|
||||
🔍 [LOCK-DEBUG] Post-restart verification
|
||||
new_max_locks=4096
|
||||
target_was=4096
|
||||
verification=PASS
|
||||
|
||||
# If restart fails:
|
||||
🔍 [LOCK-DEBUG] PostgreSQL restart FAILED
|
||||
current_locks=2048
|
||||
required_locks=4096
|
||||
setting_saved=true
|
||||
setting_active=false
|
||||
verdict="ABORT - Manual restart required"
|
||||
```
|
||||
|
||||
### 6. Final Verification
|
||||
```
|
||||
🔍 [LOCK-DEBUG] Lock boost function returned
|
||||
original_max_locks=2048
|
||||
target_max_locks=4096
|
||||
boost_successful=false
|
||||
|
||||
🔍 [LOCK-DEBUG] CRITICAL: Lock verification FAILED
|
||||
actual_locks=2048
|
||||
required_locks=4096
|
||||
delta=2048
|
||||
verdict="ABORT RESTORE"
|
||||
```
|
||||
|
||||
## Example Workflow
|
||||
|
||||
### Scenario: Lock Exhaustion on New System
|
||||
|
||||
```bash
|
||||
# Step 1: Run restore with lock debugging enabled
|
||||
dbbackup restore cluster backup.tar.gz --debug-locks --confirm
|
||||
|
||||
# Output shows:
|
||||
# 🔍 [LOCK-DEBUG] Guard decision: CONSERVATIVE mode
|
||||
# current_locks=2048, required=4096
|
||||
# verdict="ABORT - Manual restart required"
|
||||
|
||||
# Step 2: Follow the actionable instructions
|
||||
sudo -u postgres psql -c "ALTER SYSTEM SET max_locks_per_transaction = 4096;"
|
||||
sudo systemctl restart postgresql
|
||||
|
||||
# Step 3: Verify the change
|
||||
sudo -u postgres psql -c "SHOW max_locks_per_transaction;"
|
||||
# Output: 4096
|
||||
|
||||
# Step 4: Retry restore (can disable debug now)
|
||||
dbbackup restore cluster backup.tar.gz --confirm
|
||||
|
||||
# Success! Restore proceeds with verified lock protection
|
||||
```
|
||||
|
||||
## When to Use
|
||||
|
||||
### Enable Lock Debugging When:
|
||||
- Diagnosing lock exhaustion failures
|
||||
- Understanding why conservative mode was triggered
|
||||
- Verifying lock boost attempts worked
|
||||
- Troubleshooting "out of shared memory" errors
|
||||
- Setting up restore on new systems with unknown lock config
|
||||
- Documenting lock requirements for compliance/security
|
||||
|
||||
### Leave Disabled For:
|
||||
- Normal production restores (cleaner logs)
|
||||
- Scripted/automated restores (less noise)
|
||||
- When lock config is known to be sufficient
|
||||
- When restore performance is critical
|
||||
|
||||
## Integration Points
|
||||
|
||||
### Configuration
|
||||
- **Config Field:** `cfg.DebugLocks` (bool)
|
||||
- **CLI Flag:** `--debug-locks` (persistent flag on root command)
|
||||
- **TUI Toggle:** Press 'l' in restore preview screen
|
||||
- **Default:** `false` (opt-in only)
|
||||
|
||||
### Files Modified
|
||||
- `internal/config/config.go` - Added DebugLocks field
|
||||
- `cmd/root.go` - Added --debug-locks persistent flag
|
||||
- `cmd/restore.go` - Wired flag to single/cluster restore commands
|
||||
- `internal/restore/large_db_guard.go` - 20+ debug log points
|
||||
- `internal/restore/engine.go` - 15+ debug log points in boost logic
|
||||
- `internal/tui/restore_preview.go` - 'l' key toggle with 🔍 icon
|
||||
|
||||
### Log Locations
|
||||
All lock debug logs go to the configured logger (usually syslog or file) with level INFO. The 🔍 [LOCK-DEBUG] prefix makes them easy to grep:
|
||||
|
||||
```bash
|
||||
# Filter lock debug logs
|
||||
journalctl -u dbbackup | grep 'LOCK-DEBUG'
|
||||
|
||||
# Or in log files
|
||||
grep 'LOCK-DEBUG' /var/log/dbbackup.log
|
||||
```
|
||||
|
||||
## Backward Compatibility
|
||||
|
||||
- ✅ No breaking changes
|
||||
- ✅ Flag defaults to false (no output unless enabled)
|
||||
- ✅ Existing scripts continue to work unchanged
|
||||
- ✅ TUI users get new 'l' toggle automatically
|
||||
- ✅ CLI users can add --debug-locks when needed
|
||||
|
||||
## Performance Impact
|
||||
|
||||
Negligible - the debug logging only adds:
|
||||
- ~5 database queries (SHOW commands)
|
||||
- ~10 conditional if statements checking cfg.DebugLocks
|
||||
- ~50KB of additional log output when enabled
|
||||
- No impact on restore performance itself
|
||||
|
||||
## Relationship to v3.42.82
|
||||
|
||||
This feature completes the lock protection system:
|
||||
|
||||
**v3.42.82 (Protection):**
|
||||
- Fixed Guard to always force conservative mode if max_locks < 4096
|
||||
- Fixed engine to abort restore if lock boost fails
|
||||
- Ensures no path allows 7-hour failures
|
||||
|
||||
**v3.42.83 (Visibility):**
|
||||
- Shows why Guard chose conservative mode
|
||||
- Displays lock config that was detected
|
||||
- Tracks boost attempts and outcomes
|
||||
- Explains why restore was aborted
|
||||
|
||||
Together: Bulletproof protection + complete transparency.
|
||||
|
||||
## Deployment
|
||||
|
||||
1. Update to v3.42.83:
|
||||
```bash
|
||||
wget https://github.com/PlusOne/dbbackup/releases/download/v3.42.83/dbbackup_linux_amd64
|
||||
chmod +x dbbackup_linux_amd64
|
||||
sudo mv dbbackup_linux_amd64 /usr/local/bin/dbbackup
|
||||
```
|
||||
|
||||
2. Test lock debugging:
|
||||
```bash
|
||||
dbbackup restore cluster test_backup.tar.gz --debug-locks --dry-run
|
||||
```
|
||||
|
||||
3. Enable for production if diagnosing issues:
|
||||
```bash
|
||||
dbbackup restore cluster production_backup.tar.gz --debug-locks --confirm
|
||||
```
|
||||
|
||||
## Support
|
||||
|
||||
For issues related to lock debugging:
|
||||
- Check logs for 🔍 [LOCK-DEBUG] entries
|
||||
- Verify PostgreSQL version supports ALTER SYSTEM (9.4+)
|
||||
- Ensure user has SUPERUSER role for ALTER SYSTEM
|
||||
- Check systemd/init scripts can restart PostgreSQL
|
||||
|
||||
Related documentation:
|
||||
- verify_postgres_locks.sh - Script to check lock configuration
|
||||
- v3.42.82 release notes - Lock exhaustion bug fixes
|
||||
@ -584,6 +584,100 @@ Document your recovery procedure:
|
||||
9. Create new base backup
|
||||
```
|
||||
|
||||
## Large Database Support (600+ GB)
|
||||
|
||||
For databases larger than 600 GB, PITR is the **recommended approach** over full dump/restore.
|
||||
|
||||
### Why PITR Works Better for Large DBs
|
||||
|
||||
| Approach | 600 GB Database | Recovery Time (RTO) |
|
||||
|----------|-----------------|---------------------|
|
||||
| Full pg_dump/restore | Hours to dump, hours to restore | 4-12+ hours |
|
||||
| PITR (base + WAL) | Incremental WAL only | 30 min - 2 hours |
|
||||
|
||||
### Setup for Large Databases
|
||||
|
||||
**1. Enable WAL archiving with compression:**
|
||||
```bash
|
||||
dbbackup pitr enable --archive-dir /backups/wal_archive --compress
|
||||
```
|
||||
|
||||
**2. Take ONE base backup weekly/monthly (use pg_basebackup):**
|
||||
```bash
|
||||
# For 600+ GB, use fast checkpoint to minimize impact
|
||||
pg_basebackup -D /backups/base_$(date +%Y%m%d).tar.gz \
|
||||
-Ft -z -P --checkpoint=fast --wal-method=none
|
||||
|
||||
# Duration: 2-6 hours for 600 GB, but only needed weekly/monthly
|
||||
```
|
||||
|
||||
**3. WAL files archive continuously** (~1-5 GB/hour typical), capturing every change.
|
||||
|
||||
**4. Recover to any point in time:**
|
||||
```bash
|
||||
dbbackup restore pitr \
|
||||
--base-backup /backups/base_20260101.tar.gz \
|
||||
--wal-archive /backups/wal_archive \
|
||||
--target-time "2026-01-13 14:30:00" \
|
||||
--target-dir /var/lib/postgresql/16/restored
|
||||
```
|
||||
|
||||
### PostgreSQL Optimizations for 600+ GB
|
||||
|
||||
| Setting | Value | Purpose |
|
||||
|---------|-------|---------|
|
||||
| `wal_compression = on` | postgresql.conf | 70-80% smaller WAL files |
|
||||
| `max_wal_size = 4GB` | postgresql.conf | Reduce checkpoint frequency |
|
||||
| `checkpoint_timeout = 30min` | postgresql.conf | Less frequent checkpoints |
|
||||
| `archive_timeout = 300` | postgresql.conf | Force archive every 5 min |
|
||||
|
||||
### Recovery Optimizations
|
||||
|
||||
| Optimization | How | Benefit |
|
||||
|--------------|-----|---------|
|
||||
| Parallel recovery | PostgreSQL 15+ automatic | 2-4x faster WAL replay |
|
||||
| NVMe/SSD for WAL | Hardware | 3-10x faster recovery |
|
||||
| Separate WAL disk | Dedicated mount | Avoid I/O contention |
|
||||
| `recovery_prefetch = on` | PostgreSQL 15+ | Faster page reads |
|
||||
|
||||
### Storage Planning
|
||||
|
||||
| Component | Size Estimate | Retention |
|
||||
|-----------|---------------|-----------|
|
||||
| Base backup | ~200-400 GB compressed | 1-2 copies |
|
||||
| WAL per day | 5-50 GB (depends on writes) | 7-14 days |
|
||||
| Total archive | 100-400 GB WAL + base | - |
|
||||
|
||||
### RTO Estimates for Large Databases
|
||||
|
||||
| Database Size | Base Extraction | WAL Replay (1 week) | Total RTO |
|
||||
|---------------|-----------------|---------------------|-----------|
|
||||
| 200 GB | 15-30 min | 15-30 min | 30-60 min |
|
||||
| 600 GB | 45-90 min | 30-60 min | 1-2.5 hours |
|
||||
| 1 TB | 60-120 min | 45-90 min | 2-3.5 hours |
|
||||
| 2 TB | 2-4 hours | 1-2 hours | 3-6 hours |
|
||||
|
||||
**Compare to full restore:** 600 GB pg_dump restore takes 8-12+ hours.
|
||||
|
||||
### Best Practices for 600+ GB
|
||||
|
||||
1. **Weekly base backups** - Monthly if storage is tight
|
||||
2. **Test recovery monthly** - Verify WAL chain integrity
|
||||
3. **Monitor WAL lag** - Alert if archive falls behind
|
||||
4. **Use streaming replication** - For HA, combine with PITR for DR
|
||||
5. **Separate archive storage** - Don't fill up the DB disk
|
||||
|
||||
```bash
|
||||
# Quick health check for large DB PITR setup
|
||||
dbbackup pitr status --verbose
|
||||
|
||||
# Expected output:
|
||||
# Base Backup: 2026-01-06 (7 days old) - OK
|
||||
# WAL Archive: 847 files, 52 GB
|
||||
# Recovery Window: 2026-01-06 to 2026-01-13 (7 days)
|
||||
# Estimated RTO: ~90 minutes
|
||||
```
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### WAL Archive Size
|
||||
195
docs/RESTORE_PROFILES.md
Normal file
195
docs/RESTORE_PROFILES.md
Normal file
@ -0,0 +1,195 @@
|
||||
# Restore Profiles
|
||||
|
||||
## Overview
|
||||
|
||||
The `--profile` flag allows you to optimize restore operations based on your server's resources and current workload. This is particularly useful when dealing with "out of shared memory" errors or resource-constrained environments.
|
||||
|
||||
## Available Profiles
|
||||
|
||||
### Conservative Profile (`--profile=conservative`)
|
||||
**Best for:** Resource-constrained servers, production systems with other running services, or when dealing with "out of shared memory" errors.
|
||||
|
||||
**Settings:**
|
||||
- Single-threaded restore (`--parallel=1`)
|
||||
- Single-threaded decompression (`--jobs=1`)
|
||||
- Memory-conservative mode enabled
|
||||
- Minimal memory footprint
|
||||
|
||||
**When to use:**
|
||||
- Server RAM usage > 70%
|
||||
- Other critical services running (web servers, monitoring agents)
|
||||
- "out of shared memory" errors during restore
|
||||
- Small VMs or shared hosting environments
|
||||
- Disk I/O is the bottleneck
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
dbbackup restore cluster backup.tar.gz --profile=conservative --confirm
|
||||
```
|
||||
|
||||
### Balanced Profile (`--profile=balanced`) - DEFAULT
|
||||
**Best for:** Most scenarios, general-purpose servers with adequate resources.
|
||||
|
||||
**Settings:**
|
||||
- Auto-detect parallelism based on CPU/RAM
|
||||
- Moderate resource usage
|
||||
- Good balance between speed and stability
|
||||
|
||||
**When to use:**
|
||||
- Default choice for most restores
|
||||
- Dedicated database server with moderate load
|
||||
- Unknown or variable server conditions
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
dbbackup restore cluster backup.tar.gz --confirm
|
||||
# or explicitly:
|
||||
dbbackup restore cluster backup.tar.gz --profile=balanced --confirm
|
||||
```
|
||||
|
||||
### Aggressive Profile (`--profile=aggressive`)
|
||||
**Best for:** Dedicated database servers with ample resources, maintenance windows, performance-critical restores.
|
||||
|
||||
**Settings:**
|
||||
- Maximum parallelism (auto-detect based on CPU cores)
|
||||
- Maximum resource utilization
|
||||
- Fastest restore speed
|
||||
|
||||
**When to use:**
|
||||
- Dedicated database server (no other services)
|
||||
- Server RAM usage < 50%
|
||||
- Time-critical restores (RTO minimization)
|
||||
- Maintenance windows with service downtime
|
||||
- Testing/development environments
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
dbbackup restore cluster backup.tar.gz --profile=aggressive --confirm
|
||||
```
|
||||
|
||||
### Potato Profile (`--profile=potato`) 🥔
|
||||
**Easter egg:** Same as conservative, for servers running on a potato.
|
||||
|
||||
## Profile Comparison
|
||||
|
||||
| Setting | Conservative | Balanced | Aggressive |
|
||||
|---------|-------------|----------|-----------|
|
||||
| Parallel DBs | 1 (sequential) | Auto (2-4) | Auto (all CPUs) |
|
||||
| Jobs (decompression) | 1 | Auto (2-4) | Auto (all CPUs) |
|
||||
| Memory Usage | Minimal | Moderate | Maximum |
|
||||
| Speed | Slowest | Medium | Fastest |
|
||||
| Stability | Most stable | Stable | Requires resources |
|
||||
|
||||
## Overriding Profile Settings
|
||||
|
||||
You can override specific profile settings:
|
||||
|
||||
```bash
|
||||
# Use conservative profile but allow 2 parallel jobs for decompression
|
||||
dbbackup restore cluster backup.tar.gz \\
|
||||
--profile=conservative \\
|
||||
--jobs=2 \\
|
||||
--confirm
|
||||
|
||||
# Use aggressive profile but limit to 2 parallel databases
|
||||
dbbackup restore cluster backup.tar.gz \\
|
||||
--profile=aggressive \\
|
||||
--parallel-dbs=2 \\
|
||||
--confirm
|
||||
```
|
||||
|
||||
## Real-World Scenarios
|
||||
|
||||
### Scenario 1: "Out of Shared Memory" Error
|
||||
**Problem:** PostgreSQL restore fails with `ERROR: out of shared memory`
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Step 1: Use conservative profile
|
||||
dbbackup restore cluster backup.tar.gz --profile=conservative --confirm
|
||||
|
||||
# Step 2: If still failing, temporarily stop monitoring agents
|
||||
sudo systemctl stop nessus-agent elastic-agent
|
||||
dbbackup restore cluster backup.tar.gz --profile=conservative --confirm
|
||||
sudo systemctl start nessus-agent elastic-agent
|
||||
|
||||
# Step 3: Ask infrastructure team to increase work_mem (see email_infra_team.txt)
|
||||
```
|
||||
|
||||
### Scenario 2: Fast Disaster Recovery
|
||||
**Goal:** Restore as quickly as possible during maintenance window
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Stop all non-essential services first
|
||||
sudo systemctl stop nginx php-fpm
|
||||
dbbackup restore cluster backup.tar.gz --profile=aggressive --confirm
|
||||
sudo systemctl start nginx php-fpm
|
||||
```
|
||||
|
||||
### Scenario 3: Shared Server with Multiple Services
|
||||
**Environment:** Web server + database + monitoring all on same VM
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Always use conservative to avoid impacting other services
|
||||
dbbackup restore cluster backup.tar.gz --profile=conservative --confirm
|
||||
```
|
||||
|
||||
### Scenario 4: Unknown Server Conditions
|
||||
**Situation:** Restoring to a new server, unsure of resources
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Step 1: Run diagnostics first
|
||||
./diagnose_postgres_memory.sh > diagnosis.log
|
||||
|
||||
# Step 2: Choose profile based on memory usage:
|
||||
# - If memory > 80%: use conservative
|
||||
# - If memory 50-80%: use balanced (default)
|
||||
# - If memory < 50%: use aggressive
|
||||
|
||||
# Step 3: Start with balanced and adjust if needed
|
||||
dbbackup restore cluster backup.tar.gz --confirm
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Profile Selection Guide
|
||||
|
||||
**Use Conservative when:**
|
||||
- ✅ Memory usage > 70%
|
||||
- ✅ Other services running
|
||||
- ✅ Getting "out of shared memory" errors
|
||||
- ✅ Restore keeps failing
|
||||
- ✅ Small VM (< 4 GB RAM)
|
||||
- ✅ High swap usage
|
||||
|
||||
**Use Balanced when:**
|
||||
- ✅ Normal operation
|
||||
- ✅ Moderate server load
|
||||
- ✅ Unsure what to use
|
||||
- ✅ Medium VM (4-16 GB RAM)
|
||||
|
||||
**Use Aggressive when:**
|
||||
- ✅ Dedicated database server
|
||||
- ✅ Memory usage < 50%
|
||||
- ✅ No other critical services
|
||||
- ✅ Need fastest possible restore
|
||||
- ✅ Large VM (> 16 GB RAM)
|
||||
- ✅ Maintenance window
|
||||
|
||||
## Environment Variables
|
||||
|
||||
You can set a default profile:
|
||||
|
||||
```bash
|
||||
export RESOURCE_PROFILE=conservative
|
||||
dbbackup restore cluster backup.tar.gz --confirm
|
||||
```
|
||||
|
||||
## See Also
|
||||
|
||||
- [diagnose_postgres_memory.sh](diagnose_postgres_memory.sh) - Analyze system resources before restore
|
||||
- [fix_postgres_locks.sh](fix_postgres_locks.sh) - Fix PostgreSQL lock exhaustion
|
||||
- [email_infra_team.txt](email_infra_team.txt) - Template email for infrastructure team
|
||||
621
docs/SYSTEMD.md
Normal file
621
docs/SYSTEMD.md
Normal file
@ -0,0 +1,621 @@
|
||||
# Systemd Integration Guide
|
||||
|
||||
This guide covers installing dbbackup as a systemd service for automated scheduled backups.
|
||||
|
||||
## Quick Start (Installer)
|
||||
|
||||
The easiest way to set up systemd services is using the built-in installer:
|
||||
|
||||
```bash
|
||||
# Install as cluster backup service (daily at midnight)
|
||||
sudo dbbackup install --backup-type cluster --schedule daily
|
||||
|
||||
# Check what would be installed (dry-run)
|
||||
dbbackup install --dry-run --backup-type cluster
|
||||
|
||||
# Check installation status
|
||||
dbbackup install --status
|
||||
|
||||
# Uninstall
|
||||
sudo dbbackup uninstall cluster --purge
|
||||
```
|
||||
|
||||
## Installer Options
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--instance NAME` | Instance name for named backups | - |
|
||||
| `--backup-type TYPE` | Backup type: `cluster`, `single`, `sample` | `cluster` |
|
||||
| `--schedule SPEC` | Timer schedule (see below) | `daily` |
|
||||
| `--with-metrics` | Install Prometheus metrics exporter | false |
|
||||
| `--metrics-port PORT` | HTTP port for metrics exporter | 9399 |
|
||||
| `--dry-run` | Preview changes without applying | false |
|
||||
|
||||
### Schedule Format
|
||||
|
||||
The `--schedule` option accepts systemd OnCalendar format:
|
||||
|
||||
| Value | Description |
|
||||
|-------|-------------|
|
||||
| `daily` | Every day at midnight |
|
||||
| `weekly` | Every Monday at midnight |
|
||||
| `hourly` | Every hour |
|
||||
| `*-*-* 02:00:00` | Every day at 2:00 AM |
|
||||
| `*-*-* 00/6:00:00` | Every 6 hours |
|
||||
| `Mon *-*-* 03:00` | Every Monday at 3:00 AM |
|
||||
| `*-*-01 00:00:00` | First day of every month |
|
||||
|
||||
Test schedule with: `systemd-analyze calendar "Mon *-*-* 03:00"`
|
||||
|
||||
## What Gets Installed
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
/etc/dbbackup/
|
||||
├── dbbackup.conf # Main configuration
|
||||
└── env.d/
|
||||
└── cluster.conf # Instance credentials (mode 0600)
|
||||
|
||||
/var/lib/dbbackup/
|
||||
├── catalog/
|
||||
│ └── backups.db # SQLite backup catalog
|
||||
├── backups/ # Default backup storage
|
||||
└── metrics/ # Prometheus textfile metrics
|
||||
|
||||
/var/log/dbbackup/ # Log files
|
||||
|
||||
/usr/local/bin/dbbackup # Binary copy
|
||||
```
|
||||
|
||||
### Systemd Units
|
||||
|
||||
**For cluster backups:**
|
||||
- `/etc/systemd/system/dbbackup-cluster.service` - Backup service
|
||||
- `/etc/systemd/system/dbbackup-cluster.timer` - Backup scheduler
|
||||
|
||||
**For named instances:**
|
||||
- `/etc/systemd/system/dbbackup@.service` - Template service
|
||||
- `/etc/systemd/system/dbbackup@.timer` - Template timer
|
||||
|
||||
**Metrics exporter (optional):**
|
||||
- `/etc/systemd/system/dbbackup-exporter.service`
|
||||
|
||||
### System User
|
||||
|
||||
A dedicated `dbbackup` user and group are created:
|
||||
- Home: `/var/lib/dbbackup`
|
||||
- Shell: `/usr/sbin/nologin`
|
||||
- Purpose: Run backup services with minimal privileges
|
||||
|
||||
## Manual Installation
|
||||
|
||||
If you prefer to set up systemd services manually without the installer:
|
||||
|
||||
### Step 1: Create User and Directories
|
||||
|
||||
```bash
|
||||
# Create system user
|
||||
sudo useradd --system --home-dir /var/lib/dbbackup --shell /usr/sbin/nologin dbbackup
|
||||
|
||||
# Create directories
|
||||
sudo mkdir -p /etc/dbbackup/env.d
|
||||
sudo mkdir -p /var/lib/dbbackup/{catalog,backups,metrics}
|
||||
sudo mkdir -p /var/log/dbbackup
|
||||
|
||||
# Set ownership
|
||||
sudo chown -R dbbackup:dbbackup /var/lib/dbbackup /var/log/dbbackup
|
||||
sudo chown root:dbbackup /etc/dbbackup
|
||||
sudo chmod 750 /etc/dbbackup
|
||||
|
||||
# Copy binary
|
||||
sudo cp dbbackup /usr/local/bin/
|
||||
sudo chmod 755 /usr/local/bin/dbbackup
|
||||
```
|
||||
|
||||
### Step 2: Create Configuration
|
||||
|
||||
```bash
|
||||
# Main configuration in working directory (where service runs from)
|
||||
# dbbackup reads .dbbackup.conf from WorkingDirectory
|
||||
sudo tee /var/lib/dbbackup/.dbbackup.conf << 'EOF'
|
||||
# DBBackup Configuration
|
||||
db-type=postgres
|
||||
host=localhost
|
||||
port=5432
|
||||
user=postgres
|
||||
backup-dir=/var/lib/dbbackup/backups
|
||||
compression=6
|
||||
retention-days=30
|
||||
min-backups=7
|
||||
EOF
|
||||
sudo chown dbbackup:dbbackup /var/lib/dbbackup/.dbbackup.conf
|
||||
sudo chmod 600 /var/lib/dbbackup/.dbbackup.conf
|
||||
|
||||
# Instance credentials (secure permissions)
|
||||
sudo tee /etc/dbbackup/env.d/cluster.conf << 'EOF'
|
||||
PGPASSWORD=your_secure_password
|
||||
# Or for MySQL:
|
||||
# MYSQL_PWD=your_secure_password
|
||||
EOF
|
||||
sudo chmod 600 /etc/dbbackup/env.d/cluster.conf
|
||||
sudo chown dbbackup:dbbackup /etc/dbbackup/env.d/cluster.conf
|
||||
```
|
||||
|
||||
### Step 3: Create Service Unit
|
||||
|
||||
```bash
|
||||
sudo tee /etc/systemd/system/dbbackup-cluster.service << 'EOF'
|
||||
[Unit]
|
||||
Description=DBBackup Cluster Backup
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
After=network.target postgresql.service mysql.service
|
||||
Wants=network.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=dbbackup
|
||||
Group=dbbackup
|
||||
|
||||
# Load configuration
|
||||
EnvironmentFile=-/etc/dbbackup/env.d/cluster.conf
|
||||
|
||||
# Working directory (config is loaded from .dbbackup.conf here)
|
||||
WorkingDirectory=/var/lib/dbbackup
|
||||
|
||||
# Execute backup (reads .dbbackup.conf from WorkingDirectory)
|
||||
ExecStart=/usr/local/bin/dbbackup backup cluster \
|
||||
--backup-dir /var/lib/dbbackup/backups \
|
||||
--host localhost \
|
||||
--port 5432 \
|
||||
--user postgres \
|
||||
--allow-root
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=yes
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
RestrictNamespaces=yes
|
||||
RestrictRealtime=yes
|
||||
RestrictSUIDSGID=yes
|
||||
MemoryDenyWriteExecute=yes
|
||||
LockPersonality=yes
|
||||
|
||||
# Allow write to specific paths
|
||||
ReadWritePaths=/var/lib/dbbackup /var/log/dbbackup
|
||||
|
||||
# Capability restrictions
|
||||
CapabilityBoundingSet=CAP_DAC_READ_SEARCH CAP_NET_CONNECT
|
||||
AmbientCapabilities=
|
||||
|
||||
# Resource limits
|
||||
MemoryMax=4G
|
||||
CPUQuota=80%
|
||||
|
||||
# Prevent OOM killer from terminating backups
|
||||
OOMScoreAdjust=-100
|
||||
|
||||
# Logging
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=dbbackup
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
```
|
||||
|
||||
### Step 4: Create Timer Unit
|
||||
|
||||
```bash
|
||||
sudo tee /etc/systemd/system/dbbackup-cluster.timer << 'EOF'
|
||||
[Unit]
|
||||
Description=DBBackup Cluster Backup Timer
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
|
||||
[Timer]
|
||||
# Run daily at midnight
|
||||
OnCalendar=daily
|
||||
|
||||
# Randomize start time within 15 minutes to avoid thundering herd
|
||||
RandomizedDelaySec=900
|
||||
|
||||
# Run immediately if we missed the last scheduled time
|
||||
Persistent=true
|
||||
|
||||
# Run even if system was sleeping
|
||||
WakeSystem=false
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
```
|
||||
|
||||
### Step 5: Enable and Start
|
||||
|
||||
```bash
|
||||
# Reload systemd
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
# Enable timer (auto-start on boot)
|
||||
sudo systemctl enable dbbackup-cluster.timer
|
||||
|
||||
# Start timer
|
||||
sudo systemctl start dbbackup-cluster.timer
|
||||
|
||||
# Verify timer is active
|
||||
sudo systemctl status dbbackup-cluster.timer
|
||||
|
||||
# View next scheduled run
|
||||
sudo systemctl list-timers dbbackup-cluster.timer
|
||||
```
|
||||
|
||||
### Step 6: Test Backup
|
||||
|
||||
```bash
|
||||
# Run backup manually
|
||||
sudo systemctl start dbbackup-cluster.service
|
||||
|
||||
# Check status
|
||||
sudo systemctl status dbbackup-cluster.service
|
||||
|
||||
# View logs
|
||||
sudo journalctl -u dbbackup-cluster.service -f
|
||||
```
|
||||
|
||||
## Prometheus Metrics Exporter (Manual)
|
||||
|
||||
### Service Unit
|
||||
|
||||
```bash
|
||||
sudo tee /etc/systemd/system/dbbackup-exporter.service << 'EOF'
|
||||
[Unit]
|
||||
Description=DBBackup Prometheus Metrics Exporter
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=dbbackup
|
||||
Group=dbbackup
|
||||
|
||||
# Working directory
|
||||
WorkingDirectory=/var/lib/dbbackup
|
||||
|
||||
# Start HTTP metrics server
|
||||
ExecStart=/usr/local/bin/dbbackup metrics serve --port 9399
|
||||
|
||||
# Restart on failure
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=yes
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
RestrictNamespaces=yes
|
||||
RestrictRealtime=yes
|
||||
RestrictSUIDSGID=yes
|
||||
LockPersonality=yes
|
||||
|
||||
# Catalog access
|
||||
ReadWritePaths=/var/lib/dbbackup
|
||||
|
||||
# Capability restrictions
|
||||
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
|
||||
AmbientCapabilities=
|
||||
|
||||
# Logging
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=dbbackup-exporter
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
```
|
||||
|
||||
### Enable Exporter
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable dbbackup-exporter
|
||||
sudo systemctl start dbbackup-exporter
|
||||
|
||||
# Test
|
||||
curl http://localhost:9399/health
|
||||
curl http://localhost:9399/metrics
|
||||
```
|
||||
|
||||
### Prometheus Configuration
|
||||
|
||||
Add to `prometheus.yml`:
|
||||
|
||||
```yaml
|
||||
scrape_configs:
|
||||
- job_name: 'dbbackup'
|
||||
static_configs:
|
||||
- targets: ['localhost:9399']
|
||||
scrape_interval: 60s
|
||||
```
|
||||
|
||||
## Security Hardening
|
||||
|
||||
The systemd units include comprehensive security hardening:
|
||||
|
||||
| Setting | Purpose |
|
||||
|---------|---------|
|
||||
| `NoNewPrivileges=yes` | Prevent privilege escalation |
|
||||
| `ProtectSystem=strict` | Read-only filesystem except allowed paths |
|
||||
| `ProtectHome=yes` | Block access to /home, /root, /run/user |
|
||||
| `PrivateTmp=yes` | Isolated /tmp namespace |
|
||||
| `PrivateDevices=yes` | No access to physical devices |
|
||||
| `RestrictAddressFamilies` | Only Unix and IP sockets |
|
||||
| `MemoryDenyWriteExecute=yes` | Prevent code injection |
|
||||
| `CapabilityBoundingSet` | Minimal Linux capabilities |
|
||||
| `OOMScoreAdjust=-100` | Protect backup from OOM killer |
|
||||
|
||||
### Database Access
|
||||
|
||||
For PostgreSQL with peer authentication:
|
||||
```bash
|
||||
# Add dbbackup user to postgres group
|
||||
sudo usermod -aG postgres dbbackup
|
||||
|
||||
# Or create a .pgpass file
|
||||
sudo -u dbbackup tee /var/lib/dbbackup/.pgpass << EOF
|
||||
localhost:5432:*:postgres:password
|
||||
EOF
|
||||
sudo chmod 600 /var/lib/dbbackup/.pgpass
|
||||
```
|
||||
|
||||
For PostgreSQL with password authentication:
|
||||
```bash
|
||||
# Store password in environment file
|
||||
echo "PGPASSWORD=your_password" | sudo tee /etc/dbbackup/env.d/cluster.conf
|
||||
sudo chmod 600 /etc/dbbackup/env.d/cluster.conf
|
||||
```
|
||||
|
||||
## Multiple Instances
|
||||
|
||||
Run different backup configurations as separate instances:
|
||||
|
||||
```bash
|
||||
# Install multiple instances
|
||||
sudo dbbackup install --instance production --schedule "*-*-* 02:00:00"
|
||||
sudo dbbackup install --instance staging --schedule "*-*-* 04:00:00"
|
||||
sudo dbbackup install --instance analytics --schedule "weekly"
|
||||
|
||||
# Manage individually
|
||||
sudo systemctl status dbbackup@production.timer
|
||||
sudo systemctl start dbbackup@staging.service
|
||||
```
|
||||
|
||||
Each instance has its own:
|
||||
- Configuration: `/etc/dbbackup/env.d/<instance>.conf`
|
||||
- Timer schedule
|
||||
- Journal logs: `journalctl -u dbbackup@<instance>.service`
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### View Logs
|
||||
|
||||
```bash
|
||||
# Real-time logs
|
||||
sudo journalctl -u dbbackup-cluster.service -f
|
||||
|
||||
# Last backup run
|
||||
sudo journalctl -u dbbackup-cluster.service -n 100
|
||||
|
||||
# All dbbackup logs
|
||||
sudo journalctl -t dbbackup
|
||||
|
||||
# Exporter logs
|
||||
sudo journalctl -u dbbackup-exporter -f
|
||||
```
|
||||
|
||||
### Timer Not Running
|
||||
|
||||
```bash
|
||||
# Check timer status
|
||||
sudo systemctl status dbbackup-cluster.timer
|
||||
|
||||
# List all timers
|
||||
sudo systemctl list-timers --all | grep dbbackup
|
||||
|
||||
# Check if timer is enabled
|
||||
sudo systemctl is-enabled dbbackup-cluster.timer
|
||||
```
|
||||
|
||||
### Service Fails to Start
|
||||
|
||||
```bash
|
||||
# Check service status
|
||||
sudo systemctl status dbbackup-cluster.service
|
||||
|
||||
# View detailed error
|
||||
sudo journalctl -u dbbackup-cluster.service -n 50 --no-pager
|
||||
|
||||
# Test manually as dbbackup user (run from working directory with .dbbackup.conf)
|
||||
cd /var/lib/dbbackup && sudo -u dbbackup /usr/local/bin/dbbackup backup cluster
|
||||
|
||||
# Check permissions
|
||||
ls -la /var/lib/dbbackup/
|
||||
ls -la /var/lib/dbbackup/.dbbackup.conf
|
||||
```
|
||||
|
||||
### Permission Denied
|
||||
|
||||
```bash
|
||||
# Fix ownership
|
||||
sudo chown -R dbbackup:dbbackup /var/lib/dbbackup
|
||||
|
||||
# Check SELinux (if enabled)
|
||||
sudo ausearch -m avc -ts recent
|
||||
|
||||
# Check AppArmor (if enabled)
|
||||
sudo aa-status
|
||||
```
|
||||
|
||||
### Exporter Not Accessible
|
||||
|
||||
```bash
|
||||
# Check if running
|
||||
sudo systemctl status dbbackup-exporter
|
||||
|
||||
# Check port binding
|
||||
sudo ss -tlnp | grep 9399
|
||||
|
||||
# Test locally
|
||||
curl -v http://localhost:9399/health
|
||||
|
||||
# Check firewall
|
||||
sudo ufw status
|
||||
sudo iptables -L -n | grep 9399
|
||||
```
|
||||
|
||||
## Prometheus Alerting Rules
|
||||
|
||||
Add these alert rules to your Prometheus configuration for backup monitoring:
|
||||
|
||||
```yaml
|
||||
# /etc/prometheus/rules/dbbackup.yml
|
||||
groups:
|
||||
- name: dbbackup
|
||||
rules:
|
||||
# Alert if no successful backup in 24 hours
|
||||
- alert: DBBackupMissing
|
||||
expr: time() - dbbackup_last_success_timestamp > 86400
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "No backup in 24 hours on {{ $labels.instance }}"
|
||||
description: "Database {{ $labels.database }} has not had a successful backup in over 24 hours."
|
||||
|
||||
# Alert if backup verification failed
|
||||
- alert: DBBackupVerificationFailed
|
||||
expr: dbbackup_backup_verified == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Backup verification failed on {{ $labels.instance }}"
|
||||
description: "Last backup for {{ $labels.database }} failed verification check."
|
||||
|
||||
# Alert if RPO exceeded (48 hours)
|
||||
- alert: DBBackupRPOExceeded
|
||||
expr: dbbackup_rpo_seconds > 172800
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "RPO exceeded on {{ $labels.instance }}"
|
||||
description: "Recovery Point Objective exceeded 48 hours for {{ $labels.database }}."
|
||||
|
||||
# Alert if exporter is down
|
||||
- alert: DBBackupExporterDown
|
||||
expr: up{job="dbbackup"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "DBBackup exporter down on {{ $labels.instance }}"
|
||||
description: "Cannot scrape metrics from dbbackup-exporter."
|
||||
|
||||
# Alert if backup size dropped significantly (possible truncation)
|
||||
- alert: DBBackupSizeAnomaly
|
||||
expr: dbbackup_last_backup_size_bytes < (dbbackup_last_backup_size_bytes offset 1d) * 0.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Backup size anomaly on {{ $labels.instance }}"
|
||||
description: "Backup size for {{ $labels.database }} dropped by more than 50%."
|
||||
```
|
||||
|
||||
### Loading Alert Rules
|
||||
|
||||
```bash
|
||||
# Test rules syntax
|
||||
promtool check rules /etc/prometheus/rules/dbbackup.yml
|
||||
|
||||
# Reload Prometheus
|
||||
sudo systemctl reload prometheus
|
||||
# or via API:
|
||||
curl -X POST http://localhost:9090/-/reload
|
||||
```
|
||||
|
||||
## Catalog Sync for Existing Backups
|
||||
|
||||
If you have existing backups created before installing v3.41+, sync them to the catalog:
|
||||
|
||||
```bash
|
||||
# Sync existing backups to catalog
|
||||
dbbackup catalog sync /path/to/backup/directory --allow-root
|
||||
|
||||
# Verify catalog contents
|
||||
dbbackup catalog list --allow-root
|
||||
|
||||
# Show statistics
|
||||
dbbackup catalog stats --allow-root
|
||||
```
|
||||
|
||||
## Uninstallation
|
||||
|
||||
### Using Installer
|
||||
|
||||
```bash
|
||||
# Remove cluster backup (keeps config)
|
||||
sudo dbbackup uninstall cluster
|
||||
|
||||
# Remove and purge configuration
|
||||
sudo dbbackup uninstall cluster --purge
|
||||
|
||||
# Remove named instance
|
||||
sudo dbbackup uninstall production --purge
|
||||
```
|
||||
|
||||
### Manual Removal
|
||||
|
||||
```bash
|
||||
# Stop and disable services
|
||||
sudo systemctl stop dbbackup-cluster.timer dbbackup-cluster.service dbbackup-exporter
|
||||
sudo systemctl disable dbbackup-cluster.timer dbbackup-exporter
|
||||
|
||||
# Remove unit files
|
||||
sudo rm /etc/systemd/system/dbbackup-cluster.service
|
||||
sudo rm /etc/systemd/system/dbbackup-cluster.timer
|
||||
sudo rm /etc/systemd/system/dbbackup-exporter.service
|
||||
sudo rm /etc/systemd/system/dbbackup@.service
|
||||
sudo rm /etc/systemd/system/dbbackup@.timer
|
||||
|
||||
# Reload systemd
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
# Optional: Remove user and directories
|
||||
sudo userdel dbbackup
|
||||
sudo rm -rf /var/lib/dbbackup
|
||||
sudo rm -rf /etc/dbbackup
|
||||
sudo rm -rf /var/log/dbbackup
|
||||
sudo rm /usr/local/bin/dbbackup
|
||||
```
|
||||
|
||||
## See Also
|
||||
|
||||
- [README.md](README.md) - Main documentation
|
||||
- [DOCKER.md](DOCKER.md) - Docker deployment
|
||||
- [CLOUD.md](CLOUD.md) - Cloud storage configuration
|
||||
- [PITR.md](PITR.md) - Point-in-Time Recovery
|
||||
51
go.mod
51
go.mod
@ -5,15 +5,33 @@ go 1.24.0
|
||||
toolchain go1.24.9
|
||||
|
||||
require (
|
||||
github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2
|
||||
cloud.google.com/go/storage v1.57.2
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0
|
||||
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3
|
||||
github.com/aws/aws-sdk-go-v2 v1.40.0
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.2
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.2
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.12
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.92.1
|
||||
github.com/cenkalti/backoff/v4 v4.3.0
|
||||
github.com/charmbracelet/bubbles v0.21.0
|
||||
github.com/charmbracelet/bubbletea v1.3.10
|
||||
github.com/charmbracelet/lipgloss v1.1.0
|
||||
github.com/dustin/go-humanize v1.0.1
|
||||
github.com/fatih/color v1.18.0
|
||||
github.com/go-sql-driver/mysql v1.9.3
|
||||
github.com/hashicorp/go-multierror v1.1.1
|
||||
github.com/jackc/pgx/v5 v5.7.6
|
||||
github.com/klauspost/pgzip v1.2.6
|
||||
github.com/schollz/progressbar/v3 v3.19.0
|
||||
github.com/shirou/gopsutil/v3 v3.24.5
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/spf13/afero v1.15.0
|
||||
github.com/spf13/cobra v1.10.1
|
||||
github.com/spf13/pflag v1.0.9
|
||||
golang.org/x/crypto v0.43.0
|
||||
google.golang.org/api v0.256.0
|
||||
modernc.org/sqlite v1.44.3
|
||||
)
|
||||
|
||||
require (
|
||||
@ -24,20 +42,13 @@ require (
|
||||
cloud.google.com/go/compute/metadata v0.9.0 // indirect
|
||||
cloud.google.com/go/iam v1.5.2 // indirect
|
||||
cloud.google.com/go/monitoring v1.24.2 // indirect
|
||||
cloud.google.com/go/storage v1.57.2 // indirect
|
||||
filippo.io/edwards25519 v1.1.0 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3 // indirect
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0 // indirect
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0 // indirect
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0 // indirect
|
||||
github.com/aws/aws-sdk-go-v2 v1.40.0 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.2 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.2 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.14 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.12 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.14 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.14 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
|
||||
@ -46,7 +57,6 @@ require (
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.5 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.14 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.14 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.92.1 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.2 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.5 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.10 // indirect
|
||||
@ -59,7 +69,6 @@ require (
|
||||
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd // indirect
|
||||
github.com/charmbracelet/x/term v0.2.1 // indirect
|
||||
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect
|
||||
github.com/creack/pty v1.1.17 // indirect
|
||||
github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect
|
||||
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
|
||||
@ -67,26 +76,37 @@ require (
|
||||
github.com/go-jose/go-jose/v4 v4.1.2 // indirect
|
||||
github.com/go-logr/logr v1.4.3 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/google/s2a-go v0.1.9 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect
|
||||
github.com/googleapis/gax-go/v2 v2.15.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.0.0 // indirect
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||
github.com/jackc/puddle/v2 v2.2.2 // indirect
|
||||
github.com/klauspost/compress v1.18.3 // indirect
|
||||
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-localereader v0.0.1 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.16 // indirect
|
||||
github.com/mattn/go-sqlite3 v1.14.32 // indirect
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
|
||||
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||
github.com/muesli/termenv v0.16.0 // indirect
|
||||
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/spiffe/go-spiffe/v2 v2.5.0 // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.12 // indirect
|
||||
github.com/tklauser/numcpus v0.6.1 // indirect
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||
github.com/zeebo/errs v1.4.0 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
|
||||
go.opentelemetry.io/contrib/detectors/gcp v1.36.0 // indirect
|
||||
@ -97,17 +117,20 @@ require (
|
||||
go.opentelemetry.io/otel/sdk v1.37.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk/metric v1.37.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.37.0 // indirect
|
||||
golang.org/x/crypto v0.43.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
|
||||
golang.org/x/net v0.46.0 // indirect
|
||||
golang.org/x/oauth2 v0.33.0 // indirect
|
||||
golang.org/x/sync v0.18.0 // indirect
|
||||
golang.org/x/sync v0.19.0 // indirect
|
||||
golang.org/x/sys v0.38.0 // indirect
|
||||
golang.org/x/term v0.36.0 // indirect
|
||||
golang.org/x/text v0.30.0 // indirect
|
||||
golang.org/x/time v0.14.0 // indirect
|
||||
google.golang.org/api v0.256.0 // indirect
|
||||
google.golang.org/genproto v0.0.0-20250603155806-513f23925822 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20251103181224-f26f9409b101 // indirect
|
||||
google.golang.org/grpc v1.76.0 // indirect
|
||||
google.golang.org/protobuf v1.36.10 // indirect
|
||||
modernc.org/libc v1.67.6 // indirect
|
||||
modernc.org/mathutil v1.7.1 // indirect
|
||||
modernc.org/memory v1.11.0 // indirect
|
||||
)
|
||||
|
||||
178
go.sum
178
go.sum
@ -10,36 +10,44 @@ cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdB
|
||||
cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10=
|
||||
cloud.google.com/go/iam v1.5.2 h1:qgFRAGEmd8z6dJ/qyEchAuL9jpswyODjA2lS+w234g8=
|
||||
cloud.google.com/go/iam v1.5.2/go.mod h1:SE1vg0N81zQqLzQEwxL2WI6yhetBdbNQuTvIKCSkUHE=
|
||||
cloud.google.com/go/logging v1.13.0 h1:7j0HgAp0B94o1YRDqiqm26w4q1rDMH7XNRU34lJXHYc=
|
||||
cloud.google.com/go/logging v1.13.0/go.mod h1:36CoKh6KA/M0PbhPKMq6/qety2DCAErbhXT62TuXALA=
|
||||
cloud.google.com/go/longrunning v0.7.0 h1:FV0+SYF1RIj59gyoWDRi45GiYUMM3K1qO51qoboQT1E=
|
||||
cloud.google.com/go/longrunning v0.7.0/go.mod h1:ySn2yXmjbK9Ba0zsQqunhDkYi0+9rlXIwnoAf+h+TPY=
|
||||
cloud.google.com/go/monitoring v1.24.2 h1:5OTsoJ1dXYIiMiuL+sYscLc9BumrL3CarVLL7dd7lHM=
|
||||
cloud.google.com/go/monitoring v1.24.2/go.mod h1:x7yzPWcgDRnPEv3sI+jJGBkwl5qINf+6qY4eq0I9B4U=
|
||||
cloud.google.com/go/storage v1.57.2 h1:sVlym3cHGYhrp6XZKkKb+92I1V42ks2qKKpB0CF5Mb4=
|
||||
cloud.google.com/go/storage v1.57.2/go.mod h1:n5ijg4yiRXXpCu0sJTD6k+eMf7GRrJmPyr9YxLXGHOk=
|
||||
cloud.google.com/go/trace v1.11.6 h1:2O2zjPzqPYAHrn3OKl029qlqG6W8ZdYaOWRyr8NgMT4=
|
||||
cloud.google.com/go/trace v1.11.6/go.mod h1:GA855OeDEBiBMzcckLPE2kDunIpC72N+Pq8WFieFjnI=
|
||||
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
|
||||
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 h1:JXg2dwJUmPB9JmtVmdEB16APJ7jurfbY5jnfXpJoRMc=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0/go.mod h1:YD5h/ldMsG0XiIw7PdyNhLxaM317eFh5yNLccNfGdyw=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.0 h1:KpMC6LFL7mqpExyMC9jVOYRiVhLmamjeZfRsUpB7l4s=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.0/go.mod h1:J7MUC/wtRpfGVbQ5sIItY5/FuVWmvzlY21WAOfQnq/I=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDozdmndjTm8DXdpCzPajMgA=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.8.1 h1:/Zt+cDPnpC3OVDm/JKLOs7M2DKmLRIIp3XIx9pHHiig=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.8.1/go.mod h1:Ng3urmn6dYe8gnbCMoHHVl5APYz2txho3koEkV2o2HA=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3 h1:ZJJNFaQ86GVKQ9ehwqyAFE6pIfyicpuJ8IkVaPBc6/4=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3/go.mod h1:URuDvhmATVKqHBH9/0nOiNKk0+YcwfQ3WkK5PqHKxc8=
|
||||
github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0 h1:XkkQbfMyuH2jTSjQjSoihryI8GINRcs4xp8lNawg0FI=
|
||||
github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk=
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0 h1:UQUsRi8WTzhZntp5313l+CHIAT95ojUI2lpP/ExlZa4=
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0/go.mod h1:Cz6ft6Dkn3Et6l2v2a9/RpN7epQ1GtDlO6lj8bEcOvw=
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0 h1:owcC2UnmsZycprQ5RfRgjydWhuoxg71LUfyiQdijZuM=
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0/go.mod h1:ZPpqegjbE99EPKsu3iUWV22A04wzGPcAY/ziSIQEEgs=
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.53.0 h1:4LP6hvB4I5ouTbGgWtixJhgED6xdf67twf9PoY96Tbg=
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.53.0/go.mod h1:jUZ5LYlw40WMd07qxcQJD5M40aUxrfwqQX1g7zxYnrQ=
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0 h1:Ron4zCA/yk6U7WOBXhTJcDpsUBG9npumK6xw2auFltQ=
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0/go.mod h1:cSgYe11MCNYunTnRXrKiR/tHc0eoKjICUuWpNZoVCOo=
|
||||
github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2 h1:+vx7roKuyA63nhn5WAunQHLTznkw5W8b1Xc0dNjp83s=
|
||||
github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2/go.mod h1:HBCaDeC1lPdgDeDbhX8XFpy1jqjK0IBG8W5K+xYqA0w=
|
||||
github.com/aws/aws-sdk-go-v2 v1.40.0 h1:/WMUA0kjhZExjOQN2z3oLALDREea1A7TobfuiBrKlwc=
|
||||
github.com/aws/aws-sdk-go-v2 v1.40.0/go.mod h1:c9pm7VwuW0UPxAEYGyTmyurVcNrbF6Rt/wixFqDhcjE=
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 h1:DHctwEM8P8iTXFxC/QK0MRjwEpWQeM9yzidCRjldUz0=
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3/go.mod h1:xdCzcZEtnSTKVDOmUZs4l/j3pSV6rpo1WXl5ugNsL8Y=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.1 h1:iODUDLgk3q8/flEC7ymhmxjfoAnBDwEEYEVyKZ9mzjU=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.1/go.mod h1:xoAgo17AGrPpJBSLg81W+ikM0cpOZG8ad04T2r+d5P0=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.2 h1:4liUsdEpUUPZs5WVapsJLx5NPmQhQdez7nYFcovrytk=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.2/go.mod h1:l0hs06IFz1eCT+jTacU/qZtC33nvcnLADAPL/XyrkZI=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.1 h1:JeW+EwmtTE0yXFK8SmklrFh/cGTTXsQJumgMZNlbxfM=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.1/go.mod h1:BOoXiStwTF+fT2XufhO0Efssbi1CNIO/ZXpZu87N0pw=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.2 h1:qZry8VUyTK4VIo5aEdUcBjPZHL2v4FyQ3QEOaWcFLu4=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.2/go.mod h1:YUqm5a1/kBnoK+/NY5WEiMocZihKSo15/tJdmdXnM5g=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.14 h1:WZVR5DbDgxzA0BJeudId89Kmgy6DIU4ORpxwsVHz0qA=
|
||||
@ -62,30 +70,22 @@ github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.14 h1:FIouAnCE
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.14/go.mod h1:UTwDc5COa5+guonQU8qBikJo1ZJ4ln2r1MkF7Dqag1E=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.14 h1:FzQE21lNtUor0Fb7QNgnEyiRCBlolLTX/Z1j65S7teM=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.14/go.mod h1:s1ydyWG9pm3ZwmmYN21HKyG9WzAZhYVW85wMHs5FV6w=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.92.0 h1:8FshVvnV2sr9kOSAbOnc/vwVmmAwMjOedKH6JW2ddPM=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.92.0/go.mod h1:wYNqY3L02Z3IgRYxOBPH9I1zD9Cjh9hI5QOy/eOjQvw=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.92.1 h1:OgQy/+0+Kc3khtqiEOk23xQAglXi3Tj0y5doOxbi5tg=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.92.1/go.mod h1:wYNqY3L02Z3IgRYxOBPH9I1zD9Cjh9hI5QOy/eOjQvw=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.1 h1:BDgIUYGEo5TkayOWv/oBLPphWwNm/A91AebUjAu5L5g=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.1/go.mod h1:iS6EPmNeqCsGo+xQmXv0jIMjyYtQfnwg36zl2FwEouk=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.2 h1:MxMBdKTYBjPQChlJhi4qlEueqB1p1KcbTEa7tD5aqPs=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.2/go.mod h1:iS6EPmNeqCsGo+xQmXv0jIMjyYtQfnwg36zl2FwEouk=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.4 h1:U//SlnkE1wOQiIImxzdY5PXat4Wq+8rlfVEw4Y7J8as=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.4/go.mod h1:av+ArJpoYf3pgyrj6tcehSFW+y9/QvAY8kMooR9bZCw=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.5 h1:ksUT5KtgpZd3SAiFJNJ0AFEJVva3gjBmN7eXUZjzUwQ=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.5/go.mod h1:av+ArJpoYf3pgyrj6tcehSFW+y9/QvAY8kMooR9bZCw=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.9 h1:LU8S9W/mPDAU9q0FjCLi0TrCheLMGwzbRpvUMwYspcA=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.9/go.mod h1:/j67Z5XBVDx8nZVp9EuFM9/BS5dvBznbqILGuu73hug=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.10 h1:GtsxyiF3Nd3JahRBJbxLCCdYW9ltGQYrFWg8XdkGDd8=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.10/go.mod h1:/j67Z5XBVDx8nZVp9EuFM9/BS5dvBznbqILGuu73hug=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.1 h1:GdGmKtG+/Krag7VfyOXV17xjTCz0i9NT+JnqLTOI5nA=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.1/go.mod h1:6TxbXoDSgBQ225Qd8Q+MbxUxUh6TtNKwbRt/EPS9xso=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.2 h1:a5UTtD4mHBU3t0o6aHQZFJTNKVfxFWfPX7J0Lr7G+uY=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.2/go.mod h1:6TxbXoDSgBQ225Qd8Q+MbxUxUh6TtNKwbRt/EPS9xso=
|
||||
github.com/aws/smithy-go v1.23.2 h1:Crv0eatJUQhaManss33hS5r40CG3ZFH+21XSkqMrIUM=
|
||||
github.com/aws/smithy-go v1.23.2/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0=
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
|
||||
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
|
||||
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs=
|
||||
@ -102,20 +102,29 @@ github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd h1:vy0G
|
||||
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs=
|
||||
github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ=
|
||||
github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg=
|
||||
github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM=
|
||||
github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY=
|
||||
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls=
|
||||
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||
github.com/creack/pty v1.1.17 h1:QeVUsEDNrLBW4tMgZHvxy18sKtr6VI492kBhUfhDJNI=
|
||||
github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/envoyproxy/go-control-plane v0.13.4 h1:zEqyPVyku6IvWCFwux4x9RxkLOMUL+1vC9xUFv5l2/M=
|
||||
github.com/envoyproxy/go-control-plane v0.13.4/go.mod h1:kDfuBlDVsSj2MjrLEtRWtHlsWIFcGyB2RMO44Dc5GZA=
|
||||
github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A=
|
||||
github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw=
|
||||
github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI=
|
||||
github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4=
|
||||
github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
|
||||
github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
|
||||
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
|
||||
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/go-jose/go-jose/v4 v4.1.2 h1:TK/7NqRQZfgAh+Td8AlsrvtPoUyiHh0LqVvokh+1vHI=
|
||||
@ -125,8 +134,21 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo=
|
||||
github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc=
|
||||
github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0=
|
||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
|
||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
|
||||
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
|
||||
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
@ -135,6 +157,12 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.7 h1:zrn2Ee/nWmHulBx5sAV
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.7/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA=
|
||||
github.com/googleapis/gax-go/v2 v2.15.0 h1:SyjDc1mGgZU5LncH8gimWo9lW1DtIfPibOG81vgd/bo=
|
||||
github.com/googleapis/gax-go/v2 v2.15.0/go.mod h1:zVVkkxAQHa1RQpg9z2AUCMnKhi0Qld9rcmyfL1OZhoc=
|
||||
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
|
||||
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
|
||||
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||
@ -145,32 +173,58 @@ github.com/jackc/pgx/v5 v5.7.6 h1:rWQc5FwZSPX58r1OQmkuaNicxdmExaEz5A2DO2hUuTk=
|
||||
github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
|
||||
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
|
||||
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||
github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=
|
||||
github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
|
||||
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
|
||||
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
|
||||
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
|
||||
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
|
||||
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs=
|
||||
github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
|
||||
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
|
||||
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
|
||||
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
|
||||
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
|
||||
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
||||
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
|
||||
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
|
||||
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
|
||||
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/schollz/progressbar/v3 v3.19.0 h1:Ea18xuIRQXLAUidVDox3AbwfUhD0/1IvohyTutOIFoc=
|
||||
github.com/schollz/progressbar/v3 v3.19.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec=
|
||||
github.com/shirou/gopsutil/v3 v3.24.5 h1:i0t8kL+kQTvpAYToeuiVk3TgDeKOFioZO3Ztz/iZ9pI=
|
||||
github.com/shirou/gopsutil/v3 v3.24.5/go.mod h1:bsoOS1aStSs9ErQ1WWfxllSeS1K5D+U30r2NfcubMVk=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I=
|
||||
github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg=
|
||||
github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=
|
||||
github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0=
|
||||
github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
|
||||
@ -179,13 +233,17 @@ github.com/spiffe/go-spiffe/v2 v2.5.0 h1:N2I01KCUkv1FAjZXJMwh95KK1ZIQLYbPfhaxw8W
|
||||
github.com/spiffe/go-spiffe/v2 v2.5.0/go.mod h1:P+NxobPc6wXhVtINNtFjNWGBTreew1GBUCwT2wPmb7g=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
|
||||
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
|
||||
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
|
||||
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
|
||||
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
|
||||
github.com/zeebo/errs v1.4.0 h1:XNdoD/RRMKP7HD0UhJnIzUy74ISdGGxURlYG8HSWSfM=
|
||||
github.com/zeebo/errs v1.4.0/go.mod h1:sgbWHsvVuTPHcqJJGQ1WhI5KbWlHYz+2+2C/LSEtCw4=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
||||
@ -198,6 +256,8 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6h
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
|
||||
go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
|
||||
go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
|
||||
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.36.0 h1:rixTyDGXFxRy1xzhKrotaHy3/KXdPhlWARrCgK+eqUY=
|
||||
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.36.0/go.mod h1:dowW6UsM9MKbJq5JTz2AMVp3/5iW5I/TStsk8S+CfHw=
|
||||
go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
|
||||
go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
|
||||
go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI=
|
||||
@ -206,43 +266,39 @@ go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFh
|
||||
go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps=
|
||||
go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
|
||||
go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
|
||||
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
|
||||
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
|
||||
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
|
||||
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
|
||||
golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
|
||||
golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
|
||||
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E=
|
||||
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
|
||||
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
|
||||
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
|
||||
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY=
|
||||
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70=
|
||||
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
|
||||
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
|
||||
golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
|
||||
golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
|
||||
golang.org/x/oauth2 v0.33.0 h1:4Q+qn+E5z8gPRJfmRy7C2gGG3T4jIprK6aSYgTXGRpo=
|
||||
golang.org/x/oauth2 v0.33.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
|
||||
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
|
||||
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
|
||||
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
|
||||
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
|
||||
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
|
||||
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
|
||||
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
|
||||
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
|
||||
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
|
||||
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
|
||||
golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
|
||||
golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
|
||||
golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
|
||||
golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
|
||||
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
|
||||
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
||||
golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
|
||||
golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
|
||||
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
|
||||
google.golang.org/api v0.256.0 h1:u6Khm8+F9sxbCTYNoBHg6/Hwv0N/i+V94MvkOSor6oI=
|
||||
google.golang.org/api v0.256.0/go.mod h1:KIgPhksXADEKJlnEoRa9qAII4rXcy40vfI8HRqcU964=
|
||||
google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuOnu87KpaYtjK5zBMLcULh7gxkCXu4=
|
||||
@ -259,3 +315,31 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=
|
||||
modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
|
||||
modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc=
|
||||
modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM=
|
||||
modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA=
|
||||
modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
|
||||
modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
|
||||
modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
|
||||
modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
|
||||
modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
|
||||
modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
|
||||
modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
|
||||
modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI=
|
||||
modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE=
|
||||
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
|
||||
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
|
||||
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
|
||||
modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
|
||||
modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
|
||||
modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
|
||||
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
|
||||
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
|
||||
modernc.org/sqlite v1.44.3 h1:+39JvV/HWMcYslAwRxHb8067w+2zowvFOUrOWIy9PjY=
|
||||
modernc.org/sqlite v1.44.3/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA=
|
||||
modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
|
||||
modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
|
||||
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
|
||||
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
|
||||
|
||||
1474
grafana/dbbackup-dashboard.json
Normal file
1474
grafana/dbbackup-dashboard.json
Normal file
@ -0,0 +1,1474 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "red",
|
||||
"index": 1,
|
||||
"text": "FAILED"
|
||||
},
|
||||
"1": {
|
||||
"color": "green",
|
||||
"index": 0,
|
||||
"text": "SUCCESS"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_rpo_seconds{instance=~\"$instance\"} < bool 604800",
|
||||
"legendFormat": "{{database}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Last Backup Status",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 43200
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 86400
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_rpo_seconds{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{database}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Time Since Last Backup",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_backup_total{instance=~\"$instance\", status=\"success\"}",
|
||||
"legendFormat": "{{database}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Successful Backups",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_backup_total{instance=~\"$instance\", status=\"failure\"}",
|
||||
"legendFormat": "{{database}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Failed Backups",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "line"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 86400
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 4
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_rpo_seconds{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} - {{database}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "RPO Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 4
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_last_backup_size_bytes{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} - {{database}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Backup Size",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 12
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_last_backup_duration_seconds{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{instance}} - {{database}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Backup Duration",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Status"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "mappings",
|
||||
"value": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "red",
|
||||
"index": 1,
|
||||
"text": "FAILED"
|
||||
},
|
||||
"1": {
|
||||
"color": "green",
|
||||
"index": 0,
|
||||
"text": "SUCCESS"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "custom.cellOptions",
|
||||
"value": {
|
||||
"mode": "basic",
|
||||
"type": "color-background"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "RPO"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "s"
|
||||
},
|
||||
{
|
||||
"id": "thresholds",
|
||||
"value": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 43200
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 86400
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.cellOptions",
|
||||
"value": {
|
||||
"mode": "basic",
|
||||
"type": "color-background"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Size"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "bytes"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 12
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_rpo_seconds{instance=~\"$instance\"}",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
"legendFormat": "__auto",
|
||||
"range": false,
|
||||
"refId": "RPO"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_last_backup_size_bytes{instance=~\"$instance\"}",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
"legendFormat": "__auto",
|
||||
"range": false,
|
||||
"refId": "Size"
|
||||
}
|
||||
],
|
||||
"title": "Backup Status Overview",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "database",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"__name__": true,
|
||||
"__name__ 1": true,
|
||||
"__name__ 2": true,
|
||||
"instance 1": true,
|
||||
"instance 2": true,
|
||||
"job": true,
|
||||
"job 1": true,
|
||||
"job 2": true,
|
||||
"engine 1": true,
|
||||
"engine 2": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Database": 0,
|
||||
"Instance": 1,
|
||||
"Engine": 2,
|
||||
"RPO": 3,
|
||||
"Size": 4
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #RPO": "RPO",
|
||||
"Value #Size": "Size",
|
||||
"database": "Database",
|
||||
"instance": "Instance",
|
||||
"engine": "Engine"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 30
|
||||
},
|
||||
"id": 100,
|
||||
"panels": [],
|
||||
"title": "Deduplication Statistics",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "blue",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 31
|
||||
},
|
||||
"id": 101,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_dedup_ratio{instance=~\"$instance\"}",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Dedup Ratio",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 31
|
||||
},
|
||||
"id": 102,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_dedup_space_saved_bytes{instance=~\"$instance\"}",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Space Saved",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 31
|
||||
},
|
||||
"id": 103,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_dedup_disk_usage_bytes{instance=~\"$instance\"}",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk Usage",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "purple",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 31
|
||||
},
|
||||
"id": 104,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_dedup_chunks_total{instance=~\"$instance\"}",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Chunks",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "orange",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 36
|
||||
},
|
||||
"id": 107,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_dedup_compression_ratio{instance=~\"$instance\"}",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Compression Ratio",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "semi-dark-blue",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "dateTimeFromNow"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 4,
|
||||
"x": 4,
|
||||
"y": 36
|
||||
},
|
||||
"id": 108,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_dedup_oldest_chunk_timestamp{instance=~\"$instance\"} * 1000",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Oldest Chunk",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "semi-dark-green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "dateTimeFromNow"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 4,
|
||||
"x": 8,
|
||||
"y": 36
|
||||
},
|
||||
"id": 109,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_dedup_newest_chunk_timestamp{instance=~\"$instance\"} * 1000",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Newest Chunk",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 36
|
||||
},
|
||||
"id": 105,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_dedup_database_ratio{instance=~\"$instance\"}",
|
||||
"legendFormat": "{{database}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Dedup Ratio by Database",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 36
|
||||
},
|
||||
"id": 106,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_dedup_space_saved_bytes{instance=~\"$instance\"}",
|
||||
"legendFormat": "Space Saved",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_dedup_disk_usage_bytes{instance=~\"$instance\"}",
|
||||
"legendFormat": "Disk Usage",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Dedup Storage Over Time",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"tags": [
|
||||
"dbbackup",
|
||||
"backup",
|
||||
"database",
|
||||
"dedup"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"definition": "label_values(dbbackup_rpo_seconds, instance)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(dbbackup_rpo_seconds, instance)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"hide": 2,
|
||||
"name": "DS_PROMETHEUS",
|
||||
"query": "prometheus",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "DBBackup Overview",
|
||||
"uid": "dbbackup-overview",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@ -2,12 +2,14 @@ package auth
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/config"
|
||||
)
|
||||
@ -69,7 +71,10 @@ func checkPgHbaConf(user string) AuthMethod {
|
||||
|
||||
// findHbaFileViaPostgres asks PostgreSQL for the hba_file location
|
||||
func findHbaFileViaPostgres() string {
|
||||
cmd := exec.Command("psql", "-U", "postgres", "-t", "-c", "SHOW hba_file;")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, "psql", "-U", "postgres", "-t", "-c", "SHOW hba_file;")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return ""
|
||||
@ -79,16 +84,13 @@ func findHbaFileViaPostgres() string {
|
||||
|
||||
// parsePgHbaConf parses pg_hba.conf and returns the authentication method
|
||||
func parsePgHbaConf(path string, user string) AuthMethod {
|
||||
// Try with sudo if we can't read directly
|
||||
// Try to read the file directly - do NOT use sudo as it triggers password prompts
|
||||
// If we can't read pg_hba.conf, we'll rely on connection attempts to determine auth
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
// Try with sudo
|
||||
cmd := exec.Command("sudo", "cat", path)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return AuthUnknown
|
||||
}
|
||||
return parseHbaContent(string(output), user)
|
||||
// If we can't read the file, return unknown and let the connection determine auth
|
||||
// This avoids sudo password prompts when running as postgres via su
|
||||
return AuthUnknown
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
@ -196,13 +198,13 @@ func CheckAuthenticationMismatch(cfg *config.Config) (bool, string) {
|
||||
func buildAuthMismatchMessage(osUser, dbUser string, method AuthMethod) string {
|
||||
var msg strings.Builder
|
||||
|
||||
msg.WriteString("\n⚠️ Authentication Mismatch Detected\n")
|
||||
msg.WriteString("\n[WARN] Authentication Mismatch Detected\n")
|
||||
msg.WriteString(strings.Repeat("=", 60) + "\n\n")
|
||||
|
||||
msg.WriteString(fmt.Sprintf(" PostgreSQL is using '%s' authentication\n", method))
|
||||
msg.WriteString(fmt.Sprintf(" OS user '%s' cannot authenticate as DB user '%s'\n\n", osUser, dbUser))
|
||||
|
||||
msg.WriteString("💡 Solutions (choose one):\n\n")
|
||||
msg.WriteString("[TIP] Solutions (choose one):\n\n")
|
||||
|
||||
msg.WriteString(fmt.Sprintf(" 1. Run as matching user:\n"))
|
||||
msg.WriteString(fmt.Sprintf(" sudo -u %s %s\n\n", dbUser, getCommandLine()))
|
||||
@ -218,7 +220,7 @@ func buildAuthMismatchMessage(osUser, dbUser string, method AuthMethod) string {
|
||||
msg.WriteString(" 4. Provide password via flag:\n")
|
||||
msg.WriteString(fmt.Sprintf(" %s --password your_password\n\n", getCommandLine()))
|
||||
|
||||
msg.WriteString("📝 Note: For production use, ~/.pgpass or PGPASSWORD are recommended\n")
|
||||
msg.WriteString("[NOTE] Note: For production use, ~/.pgpass or PGPASSWORD are recommended\n")
|
||||
msg.WriteString(" to avoid exposing passwords in command history.\n\n")
|
||||
|
||||
msg.WriteString(strings.Repeat("=", 60) + "\n")
|
||||
|
||||
@ -87,20 +87,46 @@ func IsBackupEncrypted(backupPath string) bool {
|
||||
return meta.Encrypted
|
||||
}
|
||||
|
||||
// Fallback: check if file starts with encryption nonce
|
||||
// No metadata found - check file format to determine if encrypted
|
||||
// Known unencrypted formats have specific magic bytes:
|
||||
// - Gzip: 1f 8b
|
||||
// - PGDMP (PostgreSQL custom): 50 47 44 4d 50 (PGDMP)
|
||||
// - Plain SQL: starts with text (-- or SET or CREATE)
|
||||
// - Tar: 75 73 74 61 72 (ustar) at offset 257
|
||||
//
|
||||
// If file doesn't match any known format, it MIGHT be encrypted,
|
||||
// but we return false to avoid false positives. User must provide
|
||||
// metadata file or use --encrypt flag explicitly.
|
||||
file, err := os.Open(backupPath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Try to read nonce - if it succeeds, likely encrypted
|
||||
nonce := make([]byte, crypto.NonceSize)
|
||||
if n, err := file.Read(nonce); err != nil || n != crypto.NonceSize {
|
||||
header := make([]byte, 6)
|
||||
if n, err := file.Read(header); err != nil || n < 2 {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
// Check for known unencrypted formats
|
||||
// Gzip magic: 1f 8b
|
||||
if header[0] == 0x1f && header[1] == 0x8b {
|
||||
return false // Gzip compressed - not encrypted
|
||||
}
|
||||
|
||||
// PGDMP magic (PostgreSQL custom format)
|
||||
if len(header) >= 5 && string(header[:5]) == "PGDMP" {
|
||||
return false // PostgreSQL custom dump - not encrypted
|
||||
}
|
||||
|
||||
// Plain text SQL (starts with --, SET, CREATE, etc.)
|
||||
if header[0] == '-' || header[0] == 'S' || header[0] == 'C' || header[0] == '/' {
|
||||
return false // Plain text SQL - not encrypted
|
||||
}
|
||||
|
||||
// Without metadata, we cannot reliably determine encryption status
|
||||
// Return false to avoid blocking restores with false positives
|
||||
return false
|
||||
}
|
||||
|
||||
// DecryptBackupFile decrypts an encrypted backup file
|
||||
|
||||
@ -20,6 +20,7 @@ import (
|
||||
"dbbackup/internal/cloud"
|
||||
"dbbackup/internal/config"
|
||||
"dbbackup/internal/database"
|
||||
"dbbackup/internal/fs"
|
||||
"dbbackup/internal/logger"
|
||||
"dbbackup/internal/metadata"
|
||||
"dbbackup/internal/metrics"
|
||||
@ -28,14 +29,22 @@ import (
|
||||
"dbbackup/internal/swap"
|
||||
)
|
||||
|
||||
// ProgressCallback is called with byte-level progress updates during backup operations
|
||||
type ProgressCallback func(current, total int64, description string)
|
||||
|
||||
// DatabaseProgressCallback is called with database count progress during cluster backup
|
||||
type DatabaseProgressCallback func(done, total int, dbName string)
|
||||
|
||||
// Engine handles backup operations
|
||||
type Engine struct {
|
||||
cfg *config.Config
|
||||
log logger.Logger
|
||||
db database.Database
|
||||
progress progress.Indicator
|
||||
detailedReporter *progress.DetailedReporter
|
||||
silent bool // Silent mode for TUI
|
||||
cfg *config.Config
|
||||
log logger.Logger
|
||||
db database.Database
|
||||
progress progress.Indicator
|
||||
detailedReporter *progress.DetailedReporter
|
||||
silent bool // Silent mode for TUI
|
||||
progressCallback ProgressCallback
|
||||
dbProgressCallback DatabaseProgressCallback
|
||||
}
|
||||
|
||||
// New creates a new backup engine
|
||||
@ -86,6 +95,30 @@ func NewSilent(cfg *config.Config, log logger.Logger, db database.Database, prog
|
||||
}
|
||||
}
|
||||
|
||||
// SetProgressCallback sets a callback for detailed progress reporting (for TUI mode)
|
||||
func (e *Engine) SetProgressCallback(cb ProgressCallback) {
|
||||
e.progressCallback = cb
|
||||
}
|
||||
|
||||
// SetDatabaseProgressCallback sets a callback for database count progress during cluster backup
|
||||
func (e *Engine) SetDatabaseProgressCallback(cb DatabaseProgressCallback) {
|
||||
e.dbProgressCallback = cb
|
||||
}
|
||||
|
||||
// reportProgress reports progress to the callback if set
|
||||
func (e *Engine) reportProgress(current, total int64, description string) {
|
||||
if e.progressCallback != nil {
|
||||
e.progressCallback(current, total, description)
|
||||
}
|
||||
}
|
||||
|
||||
// reportDatabaseProgress reports database count progress to the callback if set
|
||||
func (e *Engine) reportDatabaseProgress(done, total int, dbName string) {
|
||||
if e.dbProgressCallback != nil {
|
||||
e.dbProgressCallback(done, total, dbName)
|
||||
}
|
||||
}
|
||||
|
||||
// loggerAdapter adapts our logger to the progress.Logger interface
|
||||
type loggerAdapter struct {
|
||||
logger logger.Logger
|
||||
@ -443,6 +476,14 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
||||
defer wg.Done()
|
||||
defer func() { <-semaphore }() // Release
|
||||
|
||||
// Panic recovery - prevent one database failure from crashing entire cluster backup
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
e.log.Error("Panic in database backup goroutine", "database", name, "panic", r)
|
||||
atomic.AddInt32(&failCount, 1)
|
||||
}
|
||||
}()
|
||||
|
||||
// Check for cancellation at start of goroutine
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@ -457,6 +498,8 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
||||
estimator.UpdateProgress(idx)
|
||||
e.printf(" [%d/%d] Backing up database: %s\n", idx+1, len(databases), name)
|
||||
quietProgress.Update(fmt.Sprintf("Backing up database %d/%d: %s", idx+1, len(databases), name))
|
||||
// Report database progress to TUI callback
|
||||
e.reportDatabaseProgress(idx+1, len(databases), name)
|
||||
mu.Unlock()
|
||||
|
||||
// Check database size and warn if very large
|
||||
@ -465,7 +508,7 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
||||
mu.Lock()
|
||||
e.printf(" Database size: %s\n", sizeStr)
|
||||
if size > 10*1024*1024*1024 { // > 10GB
|
||||
e.printf(" ⚠️ Large database detected - this may take a while\n")
|
||||
e.printf(" [WARN] Large database detected - this may take a while\n")
|
||||
}
|
||||
mu.Unlock()
|
||||
}
|
||||
@ -502,40 +545,24 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
||||
|
||||
cmd := e.db.BuildBackupCommand(name, dumpFile, options)
|
||||
|
||||
// Calculate timeout based on database size:
|
||||
// - Minimum 2 hours for small databases
|
||||
// - Add 1 hour per 20GB for large databases
|
||||
// - This allows ~69GB database to take up to 5+ hours
|
||||
timeout := 2 * time.Hour
|
||||
if size, err := e.db.GetDatabaseSize(ctx, name); err == nil {
|
||||
sizeGB := size / (1024 * 1024 * 1024)
|
||||
if sizeGB > 20 {
|
||||
extraHours := (sizeGB / 20) + 1
|
||||
timeout = time.Duration(2+extraHours) * time.Hour
|
||||
mu.Lock()
|
||||
e.printf(" Extended timeout: %v (for %dGB database)\n", timeout, sizeGB)
|
||||
mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
dbCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
err := e.executeCommand(dbCtx, cmd, dumpFile)
|
||||
cancel()
|
||||
// NO TIMEOUT for individual database backups
|
||||
// Large databases with large objects can take many hours
|
||||
// The parent context handles cancellation if needed
|
||||
err := e.executeCommand(ctx, cmd, dumpFile)
|
||||
|
||||
if err != nil {
|
||||
e.log.Warn("Failed to backup database", "database", name, "error", err)
|
||||
mu.Lock()
|
||||
e.printf(" ⚠️ WARNING: Failed to backup %s: %v\n", name, err)
|
||||
e.printf(" [WARN] WARNING: Failed to backup %s: %v\n", name, err)
|
||||
mu.Unlock()
|
||||
atomic.AddInt32(&failCount, 1)
|
||||
} else {
|
||||
compressedCandidate := strings.TrimSuffix(dumpFile, ".dump") + ".sql.gz"
|
||||
mu.Lock()
|
||||
if info, err := os.Stat(compressedCandidate); err == nil {
|
||||
e.printf(" ✅ Completed %s (%s)\n", name, formatBytes(info.Size()))
|
||||
e.printf(" [OK] Completed %s (%s)\n", name, formatBytes(info.Size()))
|
||||
} else if info, err := os.Stat(dumpFile); err == nil {
|
||||
e.printf(" ✅ Completed %s (%s)\n", name, formatBytes(info.Size()))
|
||||
e.printf(" [OK] Completed %s (%s)\n", name, formatBytes(info.Size()))
|
||||
}
|
||||
mu.Unlock()
|
||||
atomic.AddInt32(&successCount, 1)
|
||||
@ -614,12 +641,36 @@ func (e *Engine) executeCommandWithProgress(ctx context.Context, cmdArgs []strin
|
||||
return fmt.Errorf("failed to start command: %w", err)
|
||||
}
|
||||
|
||||
// Monitor progress via stderr
|
||||
go e.monitorCommandProgress(stderr, tracker)
|
||||
// Monitor progress via stderr in goroutine
|
||||
stderrDone := make(chan struct{})
|
||||
go func() {
|
||||
defer close(stderrDone)
|
||||
e.monitorCommandProgress(stderr, tracker)
|
||||
}()
|
||||
|
||||
// Wait for command to complete
|
||||
if err := cmd.Wait(); err != nil {
|
||||
return fmt.Errorf("backup command failed: %w", err)
|
||||
// Wait for command to complete with proper context handling
|
||||
cmdDone := make(chan error, 1)
|
||||
go func() {
|
||||
cmdDone <- cmd.Wait()
|
||||
}()
|
||||
|
||||
var cmdErr error
|
||||
select {
|
||||
case cmdErr = <-cmdDone:
|
||||
// Command completed (success or failure)
|
||||
case <-ctx.Done():
|
||||
// Context cancelled - kill process to unblock
|
||||
e.log.Warn("Backup cancelled - killing process")
|
||||
cmd.Process.Kill()
|
||||
<-cmdDone // Wait for goroutine to finish
|
||||
cmdErr = ctx.Err()
|
||||
}
|
||||
|
||||
// Wait for stderr reader to finish
|
||||
<-stderrDone
|
||||
|
||||
if cmdErr != nil {
|
||||
return fmt.Errorf("backup command failed: %w", cmdErr)
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -663,6 +714,7 @@ func (e *Engine) monitorCommandProgress(stderr io.ReadCloser, tracker *progress.
|
||||
}
|
||||
|
||||
// executeMySQLWithProgressAndCompression handles MySQL backup with compression and progress
|
||||
// Uses in-process pgzip for parallel compression (2-4x faster on multi-core systems)
|
||||
func (e *Engine) executeMySQLWithProgressAndCompression(ctx context.Context, cmdArgs []string, outputFile string, tracker *progress.OperationTracker) error {
|
||||
// Create mysqldump command
|
||||
dumpCmd := exec.CommandContext(ctx, cmdArgs[0], cmdArgs[1:]...)
|
||||
@ -671,9 +723,6 @@ func (e *Engine) executeMySQLWithProgressAndCompression(ctx context.Context, cmd
|
||||
dumpCmd.Env = append(dumpCmd.Env, "MYSQL_PWD="+e.cfg.Password)
|
||||
}
|
||||
|
||||
// Create gzip command
|
||||
gzipCmd := exec.CommandContext(ctx, "gzip", fmt.Sprintf("-%d", e.cfg.CompressionLevel))
|
||||
|
||||
// Create output file
|
||||
outFile, err := os.Create(outputFile)
|
||||
if err != nil {
|
||||
@ -681,48 +730,83 @@ func (e *Engine) executeMySQLWithProgressAndCompression(ctx context.Context, cmd
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
// Set up pipeline: mysqldump | gzip > outputfile
|
||||
// Create parallel gzip writer using pgzip
|
||||
gzWriter, err := fs.NewParallelGzipWriter(outFile, e.cfg.CompressionLevel)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create gzip writer: %w", err)
|
||||
}
|
||||
defer gzWriter.Close()
|
||||
|
||||
// Set up pipeline: mysqldump stdout -> pgzip writer -> file
|
||||
pipe, err := dumpCmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create pipe: %w", err)
|
||||
}
|
||||
|
||||
gzipCmd.Stdin = pipe
|
||||
gzipCmd.Stdout = outFile
|
||||
|
||||
// Get stderr for progress monitoring
|
||||
stderr, err := dumpCmd.StderrPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get stderr pipe: %w", err)
|
||||
}
|
||||
|
||||
// Start monitoring progress
|
||||
go e.monitorCommandProgress(stderr, tracker)
|
||||
|
||||
// Start both commands
|
||||
if err := gzipCmd.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start gzip: %w", err)
|
||||
}
|
||||
// Start monitoring progress in goroutine
|
||||
stderrDone := make(chan struct{})
|
||||
go func() {
|
||||
defer close(stderrDone)
|
||||
e.monitorCommandProgress(stderr, tracker)
|
||||
}()
|
||||
|
||||
// Start mysqldump
|
||||
if err := dumpCmd.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start mysqldump: %w", err)
|
||||
}
|
||||
|
||||
// Wait for mysqldump to complete
|
||||
if err := dumpCmd.Wait(); err != nil {
|
||||
return fmt.Errorf("mysqldump failed: %w", err)
|
||||
// Copy mysqldump output through pgzip in a goroutine
|
||||
copyDone := make(chan error, 1)
|
||||
go func() {
|
||||
_, err := io.Copy(gzWriter, pipe)
|
||||
copyDone <- err
|
||||
}()
|
||||
|
||||
// Wait for mysqldump with context handling
|
||||
dumpDone := make(chan error, 1)
|
||||
go func() {
|
||||
dumpDone <- dumpCmd.Wait()
|
||||
}()
|
||||
|
||||
var dumpErr error
|
||||
select {
|
||||
case dumpErr = <-dumpDone:
|
||||
// mysqldump completed
|
||||
case <-ctx.Done():
|
||||
e.log.Warn("Backup cancelled - killing mysqldump")
|
||||
dumpCmd.Process.Kill()
|
||||
<-dumpDone
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
// Close pipe and wait for gzip
|
||||
pipe.Close()
|
||||
if err := gzipCmd.Wait(); err != nil {
|
||||
return fmt.Errorf("gzip failed: %w", err)
|
||||
// Wait for stderr reader
|
||||
<-stderrDone
|
||||
|
||||
// Wait for copy to complete
|
||||
if copyErr := <-copyDone; copyErr != nil {
|
||||
return fmt.Errorf("compression failed: %w", copyErr)
|
||||
}
|
||||
|
||||
// Close gzip writer to flush all data
|
||||
if err := gzWriter.Close(); err != nil {
|
||||
return fmt.Errorf("failed to close gzip writer: %w", err)
|
||||
}
|
||||
|
||||
if dumpErr != nil {
|
||||
return fmt.Errorf("mysqldump failed: %w", dumpErr)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// executeMySQLWithCompression handles MySQL backup with compression
|
||||
// Uses in-process pgzip for parallel compression (2-4x faster on multi-core systems)
|
||||
func (e *Engine) executeMySQLWithCompression(ctx context.Context, cmdArgs []string, outputFile string) error {
|
||||
// Create mysqldump command
|
||||
dumpCmd := exec.CommandContext(ctx, cmdArgs[0], cmdArgs[1:]...)
|
||||
@ -731,9 +815,6 @@ func (e *Engine) executeMySQLWithCompression(ctx context.Context, cmdArgs []stri
|
||||
dumpCmd.Env = append(dumpCmd.Env, "MYSQL_PWD="+e.cfg.Password)
|
||||
}
|
||||
|
||||
// Create gzip command
|
||||
gzipCmd := exec.CommandContext(ctx, "gzip", fmt.Sprintf("-%d", e.cfg.CompressionLevel))
|
||||
|
||||
// Create output file
|
||||
outFile, err := os.Create(outputFile)
|
||||
if err != nil {
|
||||
@ -741,25 +822,60 @@ func (e *Engine) executeMySQLWithCompression(ctx context.Context, cmdArgs []stri
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
// Set up pipeline: mysqldump | gzip > outputfile
|
||||
stdin, err := dumpCmd.StdoutPipe()
|
||||
// Create parallel gzip writer using pgzip
|
||||
gzWriter, err := fs.NewParallelGzipWriter(outFile, e.cfg.CompressionLevel)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create gzip writer: %w", err)
|
||||
}
|
||||
defer gzWriter.Close()
|
||||
|
||||
// Set up pipeline: mysqldump stdout -> pgzip writer -> file
|
||||
pipe, err := dumpCmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create pipe: %w", err)
|
||||
}
|
||||
gzipCmd.Stdin = stdin
|
||||
gzipCmd.Stdout = outFile
|
||||
|
||||
// Start both commands
|
||||
if err := gzipCmd.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start gzip: %w", err)
|
||||
// Start mysqldump
|
||||
if err := dumpCmd.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start mysqldump: %w", err)
|
||||
}
|
||||
|
||||
if err := dumpCmd.Run(); err != nil {
|
||||
return fmt.Errorf("mysqldump failed: %w", err)
|
||||
// Copy mysqldump output through pgzip in a goroutine
|
||||
copyDone := make(chan error, 1)
|
||||
go func() {
|
||||
_, err := io.Copy(gzWriter, pipe)
|
||||
copyDone <- err
|
||||
}()
|
||||
|
||||
// Wait for mysqldump with context handling
|
||||
dumpDone := make(chan error, 1)
|
||||
go func() {
|
||||
dumpDone <- dumpCmd.Wait()
|
||||
}()
|
||||
|
||||
var dumpErr error
|
||||
select {
|
||||
case dumpErr = <-dumpDone:
|
||||
// mysqldump completed
|
||||
case <-ctx.Done():
|
||||
e.log.Warn("Backup cancelled - killing mysqldump")
|
||||
dumpCmd.Process.Kill()
|
||||
<-dumpDone
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
if err := gzipCmd.Wait(); err != nil {
|
||||
return fmt.Errorf("gzip failed: %w", err)
|
||||
// Wait for copy to complete
|
||||
if copyErr := <-copyDone; copyErr != nil {
|
||||
return fmt.Errorf("compression failed: %w", copyErr)
|
||||
}
|
||||
|
||||
// Close gzip writer to flush all data
|
||||
if err := gzWriter.Close(); err != nil {
|
||||
return fmt.Errorf("failed to close gzip writer: %w", err)
|
||||
}
|
||||
|
||||
if dumpErr != nil {
|
||||
return fmt.Errorf("mysqldump failed: %w", dumpErr)
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -836,105 +952,89 @@ func (e *Engine) createSampleBackup(ctx context.Context, databaseName, outputFil
|
||||
func (e *Engine) backupGlobals(ctx context.Context, tempDir string) error {
|
||||
globalsFile := filepath.Join(tempDir, "globals.sql")
|
||||
|
||||
cmd := exec.CommandContext(ctx, "pg_dumpall", "--globals-only")
|
||||
if e.cfg.Host != "localhost" {
|
||||
cmd.Args = append(cmd.Args, "-h", e.cfg.Host, "-p", fmt.Sprintf("%d", e.cfg.Port))
|
||||
// CRITICAL: Always pass port even for localhost - user may have non-standard port
|
||||
cmd := exec.CommandContext(ctx, "pg_dumpall", "--globals-only",
|
||||
"-p", fmt.Sprintf("%d", e.cfg.Port),
|
||||
"-U", e.cfg.User)
|
||||
|
||||
// Only add -h flag for non-localhost to use Unix socket for peer auth
|
||||
if e.cfg.Host != "localhost" && e.cfg.Host != "127.0.0.1" && e.cfg.Host != "" {
|
||||
cmd.Args = append([]string{cmd.Args[0], "-h", e.cfg.Host}, cmd.Args[1:]...)
|
||||
}
|
||||
cmd.Args = append(cmd.Args, "-U", e.cfg.User)
|
||||
|
||||
cmd.Env = os.Environ()
|
||||
if e.cfg.Password != "" {
|
||||
cmd.Env = append(cmd.Env, "PGPASSWORD="+e.cfg.Password)
|
||||
}
|
||||
|
||||
output, err := cmd.Output()
|
||||
// Use Start/Wait pattern for proper Ctrl+C handling
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("pg_dumpall failed: %w", err)
|
||||
return fmt.Errorf("failed to create stdout pipe: %w", err)
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start pg_dumpall: %w", err)
|
||||
}
|
||||
|
||||
// Read output in goroutine
|
||||
var output []byte
|
||||
var readErr error
|
||||
readDone := make(chan struct{})
|
||||
go func() {
|
||||
defer close(readDone)
|
||||
output, readErr = io.ReadAll(stdout)
|
||||
}()
|
||||
|
||||
// Wait for command with proper context handling
|
||||
cmdDone := make(chan error, 1)
|
||||
go func() {
|
||||
cmdDone <- cmd.Wait()
|
||||
}()
|
||||
|
||||
var cmdErr error
|
||||
select {
|
||||
case cmdErr = <-cmdDone:
|
||||
// Command completed normally
|
||||
case <-ctx.Done():
|
||||
e.log.Warn("Globals backup cancelled - killing pg_dumpall")
|
||||
cmd.Process.Kill()
|
||||
<-cmdDone
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
<-readDone
|
||||
|
||||
if cmdErr != nil {
|
||||
return fmt.Errorf("pg_dumpall failed: %w", cmdErr)
|
||||
}
|
||||
if readErr != nil {
|
||||
return fmt.Errorf("failed to read pg_dumpall output: %w", readErr)
|
||||
}
|
||||
|
||||
return os.WriteFile(globalsFile, output, 0644)
|
||||
}
|
||||
|
||||
// createArchive creates a compressed tar archive
|
||||
// createArchive creates a compressed tar archive using parallel gzip compression
|
||||
// Uses in-process pgzip for 2-4x faster compression on multi-core systems
|
||||
func (e *Engine) createArchive(ctx context.Context, sourceDir, outputFile string) error {
|
||||
// Use pigz for faster parallel compression if available, otherwise use standard gzip
|
||||
compressCmd := "tar"
|
||||
compressArgs := []string{"-czf", outputFile, "-C", sourceDir, "."}
|
||||
e.log.Debug("Creating archive with parallel compression",
|
||||
"source", sourceDir,
|
||||
"output", outputFile,
|
||||
"compression", e.cfg.CompressionLevel)
|
||||
|
||||
// Check if pigz is available for faster parallel compression
|
||||
if _, err := exec.LookPath("pigz"); err == nil {
|
||||
// Use pigz with number of cores for parallel compression
|
||||
compressArgs = []string{"-cf", "-", "-C", sourceDir, "."}
|
||||
cmd := exec.CommandContext(ctx, "tar", compressArgs...)
|
||||
|
||||
// Create output file
|
||||
outFile, err := os.Create(outputFile)
|
||||
if err != nil {
|
||||
// Fallback to regular tar
|
||||
goto regularTar
|
||||
// Use in-process parallel compression with pgzip
|
||||
err := fs.CreateTarGzParallel(ctx, sourceDir, outputFile, e.cfg.CompressionLevel, func(progress fs.CreateProgress) {
|
||||
// Optional: log progress for large archives
|
||||
if progress.FilesCount%100 == 0 && progress.FilesCount > 0 {
|
||||
e.log.Debug("Archive progress", "files", progress.FilesCount, "bytes", progress.BytesWritten)
|
||||
}
|
||||
defer outFile.Close()
|
||||
})
|
||||
|
||||
// Pipe to pigz for parallel compression
|
||||
pigzCmd := exec.CommandContext(ctx, "pigz", "-p", strconv.Itoa(e.cfg.Jobs))
|
||||
|
||||
tarOut, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
outFile.Close()
|
||||
// Fallback to regular tar
|
||||
goto regularTar
|
||||
}
|
||||
pigzCmd.Stdin = tarOut
|
||||
pigzCmd.Stdout = outFile
|
||||
|
||||
// Start both commands
|
||||
if err := pigzCmd.Start(); err != nil {
|
||||
outFile.Close()
|
||||
goto regularTar
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
pigzCmd.Process.Kill()
|
||||
outFile.Close()
|
||||
goto regularTar
|
||||
}
|
||||
|
||||
// Wait for tar to finish
|
||||
if err := cmd.Wait(); err != nil {
|
||||
pigzCmd.Process.Kill()
|
||||
return fmt.Errorf("tar failed: %w", err)
|
||||
}
|
||||
|
||||
// Wait for pigz to finish
|
||||
if err := pigzCmd.Wait(); err != nil {
|
||||
return fmt.Errorf("pigz compression failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
if err != nil {
|
||||
return fmt.Errorf("parallel archive creation failed: %w", err)
|
||||
}
|
||||
|
||||
regularTar:
|
||||
// Standard tar with gzip (fallback)
|
||||
cmd := exec.CommandContext(ctx, compressCmd, compressArgs...)
|
||||
|
||||
// Stream stderr to avoid memory issues
|
||||
// Use io.Copy to ensure goroutine completes when pipe closes
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err == nil {
|
||||
go func() {
|
||||
scanner := bufio.NewScanner(stderr)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if line != "" {
|
||||
e.log.Debug("Archive creation", "output", line)
|
||||
}
|
||||
}
|
||||
// Scanner will exit when stderr pipe closes after cmd.Wait()
|
||||
}()
|
||||
}
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
return fmt.Errorf("tar failed: %w", err)
|
||||
}
|
||||
// cmd.Run() calls Wait() which closes stderr pipe, terminating the goroutine
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -1144,23 +1244,29 @@ func (e *Engine) uploadToCloud(ctx context.Context, backupFile string, tracker *
|
||||
filename := filepath.Base(backupFile)
|
||||
e.log.Info("Uploading backup to cloud", "file", filename, "size", cloud.FormatSize(info.Size()))
|
||||
|
||||
// Progress callback
|
||||
var lastPercent int
|
||||
// Create schollz progressbar for visual upload progress
|
||||
bar := progress.NewSchollzBar(info.Size(), fmt.Sprintf("Uploading %s", filename))
|
||||
|
||||
// Progress callback with schollz progressbar
|
||||
var lastBytes int64
|
||||
progressCallback := func(transferred, total int64) {
|
||||
percent := int(float64(transferred) / float64(total) * 100)
|
||||
if percent != lastPercent && percent%10 == 0 {
|
||||
e.log.Debug("Upload progress", "percent", percent, "transferred", cloud.FormatSize(transferred), "total", cloud.FormatSize(total))
|
||||
lastPercent = percent
|
||||
delta := transferred - lastBytes
|
||||
if delta > 0 {
|
||||
_ = bar.Add64(delta)
|
||||
}
|
||||
lastBytes = transferred
|
||||
}
|
||||
|
||||
// Upload to cloud
|
||||
err = backend.Upload(ctx, backupFile, filename, progressCallback)
|
||||
if err != nil {
|
||||
bar.Fail("Upload failed")
|
||||
uploadStep.Fail(fmt.Errorf("cloud upload failed: %w", err))
|
||||
return err
|
||||
}
|
||||
|
||||
_ = bar.Finish()
|
||||
|
||||
// Also upload metadata file
|
||||
metaFile := backupFile + ".meta.json"
|
||||
if _, err := os.Stat(metaFile); err == nil {
|
||||
@ -1230,6 +1336,27 @@ func (e *Engine) executeCommand(ctx context.Context, cmdArgs []string, outputFil
|
||||
// NO GO BUFFERING - pg_dump writes directly to disk
|
||||
cmd := exec.CommandContext(ctx, cmdArgs[0], cmdArgs[1:]...)
|
||||
|
||||
// Start heartbeat ticker for backup progress
|
||||
backupStart := time.Now()
|
||||
heartbeatCtx, cancelHeartbeat := context.WithCancel(ctx)
|
||||
heartbeatTicker := time.NewTicker(5 * time.Second)
|
||||
defer heartbeatTicker.Stop()
|
||||
defer cancelHeartbeat()
|
||||
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-heartbeatTicker.C:
|
||||
elapsed := time.Since(backupStart)
|
||||
if e.progress != nil {
|
||||
e.progress.Update(fmt.Sprintf("Backing up database... (elapsed: %s)", formatDuration(elapsed)))
|
||||
}
|
||||
case <-heartbeatCtx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Set environment variables for database tools
|
||||
cmd.Env = os.Environ()
|
||||
if e.cfg.Password != "" {
|
||||
@ -1251,8 +1378,10 @@ func (e *Engine) executeCommand(ctx context.Context, cmdArgs []string, outputFil
|
||||
return fmt.Errorf("failed to start backup command: %w", err)
|
||||
}
|
||||
|
||||
// Stream stderr output (don't buffer it all in memory)
|
||||
// Stream stderr output in goroutine (don't buffer it all in memory)
|
||||
stderrDone := make(chan struct{})
|
||||
go func() {
|
||||
defer close(stderrDone)
|
||||
scanner := bufio.NewScanner(stderr)
|
||||
scanner.Buffer(make([]byte, 64*1024), 1024*1024) // 1MB max line size
|
||||
for scanner.Scan() {
|
||||
@ -1263,10 +1392,30 @@ func (e *Engine) executeCommand(ctx context.Context, cmdArgs []string, outputFil
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for command to complete
|
||||
if err := cmd.Wait(); err != nil {
|
||||
e.log.Error("Backup command failed", "error", err, "database", filepath.Base(outputFile))
|
||||
return fmt.Errorf("backup command failed: %w", err)
|
||||
// Wait for command to complete with proper context handling
|
||||
cmdDone := make(chan error, 1)
|
||||
go func() {
|
||||
cmdDone <- cmd.Wait()
|
||||
}()
|
||||
|
||||
var cmdErr error
|
||||
select {
|
||||
case cmdErr = <-cmdDone:
|
||||
// Command completed (success or failure)
|
||||
case <-ctx.Done():
|
||||
// Context cancelled - kill process to unblock
|
||||
e.log.Warn("Backup cancelled - killing pg_dump process")
|
||||
cmd.Process.Kill()
|
||||
<-cmdDone // Wait for goroutine to finish
|
||||
cmdErr = ctx.Err()
|
||||
}
|
||||
|
||||
// Wait for stderr reader to finish
|
||||
<-stderrDone
|
||||
|
||||
if cmdErr != nil {
|
||||
e.log.Error("Backup command failed", "error", cmdErr, "database", filepath.Base(outputFile))
|
||||
return fmt.Errorf("backup command failed: %w", cmdErr)
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -1368,25 +1517,53 @@ func (e *Engine) executeWithStreamingCompression(ctx context.Context, cmdArgs []
|
||||
|
||||
// Then start pg_dump
|
||||
if err := dumpCmd.Start(); err != nil {
|
||||
compressCmd.Process.Kill()
|
||||
return fmt.Errorf("failed to start pg_dump: %w", err)
|
||||
}
|
||||
|
||||
// Wait for pg_dump to complete
|
||||
dumpErr := dumpCmd.Wait()
|
||||
// Wait for pg_dump in a goroutine to handle context timeout properly
|
||||
// This prevents deadlock if pipe buffer fills and pg_dump blocks
|
||||
dumpDone := make(chan error, 1)
|
||||
go func() {
|
||||
dumpDone <- dumpCmd.Wait()
|
||||
}()
|
||||
|
||||
// Close stdout pipe to signal compressor we're done - MUST happen after Wait()
|
||||
// but before we check for errors, so compressor gets EOF
|
||||
var dumpErr error
|
||||
select {
|
||||
case dumpErr = <-dumpDone:
|
||||
// pg_dump completed (success or failure)
|
||||
case <-ctx.Done():
|
||||
// Context cancelled/timeout - kill pg_dump to unblock
|
||||
e.log.Warn("Backup timeout - killing pg_dump process")
|
||||
dumpCmd.Process.Kill()
|
||||
<-dumpDone // Wait for goroutine to finish
|
||||
dumpErr = ctx.Err()
|
||||
}
|
||||
|
||||
// Close stdout pipe to signal compressor we're done
|
||||
// This MUST happen after pg_dump exits to avoid broken pipe
|
||||
dumpStdout.Close()
|
||||
|
||||
// Wait for compression to complete
|
||||
compressErr := compressCmd.Wait()
|
||||
|
||||
// Check errors in order
|
||||
// Check errors - compressor failure first (it's usually the root cause)
|
||||
if compressErr != nil {
|
||||
e.log.Error("Compressor failed", "error", compressErr)
|
||||
return fmt.Errorf("compression failed (check disk space): %w", compressErr)
|
||||
}
|
||||
if dumpErr != nil {
|
||||
// Check for SIGPIPE (exit code 141) - indicates compressor died first
|
||||
if exitErr, ok := dumpErr.(*exec.ExitError); ok && exitErr.ExitCode() == 141 {
|
||||
e.log.Error("pg_dump received SIGPIPE - compressor may have failed")
|
||||
return fmt.Errorf("pg_dump broken pipe - check disk space and compressor")
|
||||
}
|
||||
return fmt.Errorf("pg_dump failed: %w", dumpErr)
|
||||
}
|
||||
if compressErr != nil {
|
||||
return fmt.Errorf("compression failed: %w", compressErr)
|
||||
|
||||
// Sync file to disk to ensure durability (prevents truncation on power loss)
|
||||
if err := outFile.Sync(); err != nil {
|
||||
e.log.Warn("Failed to sync output file", "error", err)
|
||||
}
|
||||
|
||||
e.log.Debug("Streaming compression completed", "output", compressedFile)
|
||||
@ -1406,3 +1583,22 @@ func formatBytes(bytes int64) string {
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
// formatDuration formats a duration to human readable format (e.g., "3m 45s", "1h 23m", "45s")
|
||||
func formatDuration(d time.Duration) string {
|
||||
if d < time.Second {
|
||||
return "0s"
|
||||
}
|
||||
|
||||
hours := int(d.Hours())
|
||||
minutes := int(d.Minutes()) % 60
|
||||
seconds := int(d.Seconds()) % 60
|
||||
|
||||
if hours > 0 {
|
||||
return fmt.Sprintf("%dh %dm", hours, minutes)
|
||||
}
|
||||
if minutes > 0 {
|
||||
return fmt.Sprintf("%dm %ds", minutes, seconds)
|
||||
}
|
||||
return fmt.Sprintf("%ds", seconds)
|
||||
}
|
||||
|
||||
@ -2,12 +2,13 @@ package backup
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
)
|
||||
|
||||
// extractTarGz extracts a tar.gz archive to the specified directory
|
||||
@ -20,8 +21,8 @@ func (e *PostgresIncrementalEngine) extractTarGz(ctx context.Context, archivePat
|
||||
}
|
||||
defer archiveFile.Close()
|
||||
|
||||
// Create gzip reader
|
||||
gzReader, err := gzip.NewReader(archiveFile)
|
||||
// Create parallel gzip reader for faster decompression
|
||||
gzReader, err := pgzip.NewReader(archiveFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create gzip reader: %w", err)
|
||||
}
|
||||
|
||||
@ -2,7 +2,6 @@ package backup
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
@ -13,6 +12,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
"dbbackup/internal/metadata"
|
||||
)
|
||||
@ -367,15 +368,15 @@ func (e *MySQLIncrementalEngine) CalculateFileChecksum(path string) (string, err
|
||||
|
||||
// createTarGz creates a tar.gz archive with the specified changed files
|
||||
func (e *MySQLIncrementalEngine) createTarGz(ctx context.Context, outputFile string, changedFiles []ChangedFile, config *IncrementalBackupConfig) error {
|
||||
// Import needed for tar/gzip
|
||||
// Create output file
|
||||
outFile, err := os.Create(outputFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create output file: %w", err)
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
// Create gzip writer
|
||||
gzWriter, err := gzip.NewWriterLevel(outFile, config.CompressionLevel)
|
||||
// Create parallel gzip writer for faster compression
|
||||
gzWriter, err := pgzip.NewWriterLevel(outFile, config.CompressionLevel)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create gzip writer: %w", err)
|
||||
}
|
||||
@ -460,8 +461,8 @@ func (e *MySQLIncrementalEngine) extractTarGz(ctx context.Context, archivePath,
|
||||
}
|
||||
defer archiveFile.Close()
|
||||
|
||||
// Create gzip reader
|
||||
gzReader, err := gzip.NewReader(archiveFile)
|
||||
// Create parallel gzip reader for faster decompression
|
||||
gzReader, err := pgzip.NewReader(archiveFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create gzip reader: %w", err)
|
||||
}
|
||||
|
||||
@ -2,11 +2,12 @@ package backup
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
)
|
||||
|
||||
// createTarGz creates a tar.gz archive with the specified changed files
|
||||
@ -18,8 +19,8 @@ func (e *PostgresIncrementalEngine) createTarGz(ctx context.Context, outputFile
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
// Create gzip writer
|
||||
gzWriter, err := gzip.NewWriterLevel(outFile, config.CompressionLevel)
|
||||
// Create parallel gzip writer for faster compression
|
||||
gzWriter, err := pgzip.NewWriterLevel(outFile, config.CompressionLevel)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create gzip writer: %w", err)
|
||||
}
|
||||
|
||||
@ -242,7 +242,7 @@ func TestIncrementalBackupRestore(t *testing.T) {
|
||||
t.Errorf("Unchanged file base/12345/1235 not found in restore: %v", err)
|
||||
}
|
||||
|
||||
t.Log("✅ Incremental backup and restore test completed successfully")
|
||||
t.Log("[OK] Incremental backup and restore test completed successfully")
|
||||
}
|
||||
|
||||
// TestIncrementalBackupErrors tests error handling
|
||||
|
||||
@ -11,7 +11,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
_ "modernc.org/sqlite" // Pure Go SQLite driver (no CGO required)
|
||||
)
|
||||
|
||||
// SQLiteCatalog implements Catalog interface with SQLite storage
|
||||
@ -28,7 +28,7 @@ func NewSQLiteCatalog(dbPath string) (*SQLiteCatalog, error) {
|
||||
return nil, fmt.Errorf("failed to create catalog directory: %w", err)
|
||||
}
|
||||
|
||||
db, err := sql.Open("sqlite3", dbPath+"?_journal_mode=WAL&_foreign_keys=ON")
|
||||
db, err := sql.Open("sqlite", dbPath+"?_journal_mode=WAL&_foreign_keys=ON")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open catalog database: %w", err)
|
||||
}
|
||||
|
||||
@ -75,16 +75,16 @@ func FormatDiskSpaceMessage(check *DiskSpaceCheck) string {
|
||||
|
||||
if check.Critical {
|
||||
status = "CRITICAL"
|
||||
icon = "❌"
|
||||
icon = "[X]"
|
||||
} else if check.Warning {
|
||||
status = "WARNING"
|
||||
icon = "⚠️ "
|
||||
icon = "[!]"
|
||||
} else {
|
||||
status = "OK"
|
||||
icon = "✓"
|
||||
icon = "[+]"
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf(`📊 Disk Space Check (%s):
|
||||
msg := fmt.Sprintf(`[DISK] Disk Space Check (%s):
|
||||
Path: %s
|
||||
Total: %s
|
||||
Available: %s (%.1f%% used)
|
||||
@ -98,13 +98,13 @@ func FormatDiskSpaceMessage(check *DiskSpaceCheck) string {
|
||||
status)
|
||||
|
||||
if check.Critical {
|
||||
msg += "\n \n ⚠️ CRITICAL: Insufficient disk space!"
|
||||
msg += "\n \n [!!] CRITICAL: Insufficient disk space!"
|
||||
msg += "\n Operation blocked. Free up space before continuing."
|
||||
} else if check.Warning {
|
||||
msg += "\n \n ⚠️ WARNING: Low disk space!"
|
||||
msg += "\n \n [!] WARNING: Low disk space!"
|
||||
msg += "\n Backup may fail if database is larger than estimated."
|
||||
} else {
|
||||
msg += "\n \n ✓ Sufficient space available"
|
||||
msg += "\n \n [+] Sufficient space available"
|
||||
}
|
||||
|
||||
return msg
|
||||
|
||||
@ -75,16 +75,16 @@ func FormatDiskSpaceMessage(check *DiskSpaceCheck) string {
|
||||
|
||||
if check.Critical {
|
||||
status = "CRITICAL"
|
||||
icon = "❌"
|
||||
icon = "[X]"
|
||||
} else if check.Warning {
|
||||
status = "WARNING"
|
||||
icon = "⚠️ "
|
||||
icon = "[!]"
|
||||
} else {
|
||||
status = "OK"
|
||||
icon = "✓"
|
||||
icon = "[+]"
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf(`📊 Disk Space Check (%s):
|
||||
msg := fmt.Sprintf(`[DISK] Disk Space Check (%s):
|
||||
Path: %s
|
||||
Total: %s
|
||||
Available: %s (%.1f%% used)
|
||||
@ -98,13 +98,13 @@ func FormatDiskSpaceMessage(check *DiskSpaceCheck) string {
|
||||
status)
|
||||
|
||||
if check.Critical {
|
||||
msg += "\n \n ⚠️ CRITICAL: Insufficient disk space!"
|
||||
msg += "\n \n [!!] CRITICAL: Insufficient disk space!"
|
||||
msg += "\n Operation blocked. Free up space before continuing."
|
||||
} else if check.Warning {
|
||||
msg += "\n \n ⚠️ WARNING: Low disk space!"
|
||||
msg += "\n \n [!] WARNING: Low disk space!"
|
||||
msg += "\n Backup may fail if database is larger than estimated."
|
||||
} else {
|
||||
msg += "\n \n ✓ Sufficient space available"
|
||||
msg += "\n \n [+] Sufficient space available"
|
||||
}
|
||||
|
||||
return msg
|
||||
|
||||
@ -58,16 +58,16 @@ func FormatDiskSpaceMessage(check *DiskSpaceCheck) string {
|
||||
|
||||
if check.Critical {
|
||||
status = "CRITICAL"
|
||||
icon = "❌"
|
||||
icon = "[X]"
|
||||
} else if check.Warning {
|
||||
status = "WARNING"
|
||||
icon = "⚠️ "
|
||||
icon = "[!]"
|
||||
} else {
|
||||
status = "OK"
|
||||
icon = "✓"
|
||||
icon = "[+]"
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf(`📊 Disk Space Check (%s):
|
||||
msg := fmt.Sprintf(`[DISK] Disk Space Check (%s):
|
||||
Path: %s
|
||||
Total: %s
|
||||
Available: %s (%.1f%% used)
|
||||
@ -81,13 +81,13 @@ func FormatDiskSpaceMessage(check *DiskSpaceCheck) string {
|
||||
status)
|
||||
|
||||
if check.Critical {
|
||||
msg += "\n \n ⚠️ CRITICAL: Insufficient disk space!"
|
||||
msg += "\n \n [!!] CRITICAL: Insufficient disk space!"
|
||||
msg += "\n Operation blocked. Free up space before continuing."
|
||||
} else if check.Warning {
|
||||
msg += "\n \n ⚠️ WARNING: Low disk space!"
|
||||
msg += "\n \n [!] WARNING: Low disk space!"
|
||||
msg += "\n Backup may fail if database is larger than estimated."
|
||||
} else {
|
||||
msg += "\n \n ✓ Sufficient space available"
|
||||
msg += "\n \n [+] Sufficient space available"
|
||||
}
|
||||
|
||||
return msg
|
||||
|
||||
@ -94,16 +94,16 @@ func FormatDiskSpaceMessage(check *DiskSpaceCheck) string {
|
||||
|
||||
if check.Critical {
|
||||
status = "CRITICAL"
|
||||
icon = "❌"
|
||||
icon = "[X]"
|
||||
} else if check.Warning {
|
||||
status = "WARNING"
|
||||
icon = "⚠️ "
|
||||
icon = "[!]"
|
||||
} else {
|
||||
status = "OK"
|
||||
icon = "✓"
|
||||
icon = "[+]"
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf(`📊 Disk Space Check (%s):
|
||||
msg := fmt.Sprintf(`[DISK] Disk Space Check (%s):
|
||||
Path: %s
|
||||
Total: %s
|
||||
Available: %s (%.1f%% used)
|
||||
@ -117,13 +117,13 @@ func FormatDiskSpaceMessage(check *DiskSpaceCheck) string {
|
||||
status)
|
||||
|
||||
if check.Critical {
|
||||
msg += "\n \n ⚠️ CRITICAL: Insufficient disk space!"
|
||||
msg += "\n \n [!!] CRITICAL: Insufficient disk space!"
|
||||
msg += "\n Operation blocked. Free up space before continuing."
|
||||
} else if check.Warning {
|
||||
msg += "\n \n ⚠️ WARNING: Low disk space!"
|
||||
msg += "\n \n [!] WARNING: Low disk space!"
|
||||
msg += "\n Backup may fail if database is larger than estimated."
|
||||
} else {
|
||||
msg += "\n \n ✓ Sufficient space available"
|
||||
msg += "\n \n [+] Sufficient space available"
|
||||
}
|
||||
|
||||
return msg
|
||||
|
||||
@ -68,8 +68,8 @@ func ClassifyError(errorMsg string) *ErrorClassification {
|
||||
Type: "critical",
|
||||
Category: "locks",
|
||||
Message: errorMsg,
|
||||
Hint: "Lock table exhausted - typically caused by large objects in parallel restore",
|
||||
Action: "Increase max_locks_per_transaction in postgresql.conf to 512 or higher",
|
||||
Hint: "Lock table exhausted. Total capacity = max_locks_per_transaction × (max_connections + max_prepared_transactions). If you reduced VM size or max_connections, you need higher max_locks_per_transaction to compensate.",
|
||||
Action: "Fix: ALTER SYSTEM SET max_locks_per_transaction = 4096; then RESTART PostgreSQL. For smaller VMs with fewer connections, you need higher max_locks_per_transaction values.",
|
||||
Severity: 2,
|
||||
}
|
||||
case "permission_denied":
|
||||
@ -142,8 +142,8 @@ func ClassifyError(errorMsg string) *ErrorClassification {
|
||||
Type: "critical",
|
||||
Category: "locks",
|
||||
Message: errorMsg,
|
||||
Hint: "Lock table exhausted - typically caused by large objects in parallel restore",
|
||||
Action: "Increase max_locks_per_transaction in postgresql.conf to 512 or higher",
|
||||
Hint: "Lock table exhausted. Total capacity = max_locks_per_transaction × (max_connections + max_prepared_transactions). If you reduced VM size or max_connections, you need higher max_locks_per_transaction to compensate.",
|
||||
Action: "Fix: ALTER SYSTEM SET max_locks_per_transaction = 4096; then RESTART PostgreSQL. For smaller VMs with fewer connections, you need higher max_locks_per_transaction values.",
|
||||
Severity: 2,
|
||||
}
|
||||
}
|
||||
@ -234,22 +234,22 @@ func FormatErrorWithHint(errorMsg string) string {
|
||||
var icon string
|
||||
switch classification.Type {
|
||||
case "ignorable":
|
||||
icon = "ℹ️ "
|
||||
icon = "[i]"
|
||||
case "warning":
|
||||
icon = "⚠️ "
|
||||
icon = "[!]"
|
||||
case "critical":
|
||||
icon = "❌"
|
||||
icon = "[X]"
|
||||
case "fatal":
|
||||
icon = "🛑"
|
||||
icon = "[!!]"
|
||||
default:
|
||||
icon = "⚠️ "
|
||||
icon = "[!]"
|
||||
}
|
||||
|
||||
output := fmt.Sprintf("%s %s Error\n\n", icon, strings.ToUpper(classification.Type))
|
||||
output += fmt.Sprintf("Category: %s\n", classification.Category)
|
||||
output += fmt.Sprintf("Message: %s\n\n", classification.Message)
|
||||
output += fmt.Sprintf("💡 Hint: %s\n\n", classification.Hint)
|
||||
output += fmt.Sprintf("🔧 Action: %s\n", classification.Action)
|
||||
output += fmt.Sprintf("[HINT] Hint: %s\n\n", classification.Hint)
|
||||
output += fmt.Sprintf("[ACTION] Action: %s\n", classification.Action)
|
||||
|
||||
return output
|
||||
}
|
||||
@ -257,7 +257,7 @@ func FormatErrorWithHint(errorMsg string) string {
|
||||
// FormatMultipleErrors formats multiple errors with classification
|
||||
func FormatMultipleErrors(errors []string) string {
|
||||
if len(errors) == 0 {
|
||||
return "✓ No errors"
|
||||
return "[+] No errors"
|
||||
}
|
||||
|
||||
ignorable := 0
|
||||
@ -285,22 +285,22 @@ func FormatMultipleErrors(errors []string) string {
|
||||
}
|
||||
}
|
||||
|
||||
output := "📊 Error Summary:\n\n"
|
||||
output := "[SUMMARY] Error Summary:\n\n"
|
||||
if ignorable > 0 {
|
||||
output += fmt.Sprintf(" ℹ️ %d ignorable (objects already exist)\n", ignorable)
|
||||
output += fmt.Sprintf(" [i] %d ignorable (objects already exist)\n", ignorable)
|
||||
}
|
||||
if warnings > 0 {
|
||||
output += fmt.Sprintf(" ⚠️ %d warnings\n", warnings)
|
||||
output += fmt.Sprintf(" [!] %d warnings\n", warnings)
|
||||
}
|
||||
if critical > 0 {
|
||||
output += fmt.Sprintf(" ❌ %d critical errors\n", critical)
|
||||
output += fmt.Sprintf(" [X] %d critical errors\n", critical)
|
||||
}
|
||||
if fatal > 0 {
|
||||
output += fmt.Sprintf(" 🛑 %d fatal errors\n", fatal)
|
||||
output += fmt.Sprintf(" [!!] %d fatal errors\n", fatal)
|
||||
}
|
||||
|
||||
if len(criticalErrors) > 0 {
|
||||
output += "\n📝 Critical Issues:\n\n"
|
||||
output += "\n[CRITICAL] Critical Issues:\n\n"
|
||||
for i, err := range criticalErrors {
|
||||
class := ClassifyError(err)
|
||||
output += fmt.Sprintf("%d. %s\n", i+1, class.Hint)
|
||||
|
||||
181
internal/checks/locks.go
Normal file
181
internal/checks/locks.go
Normal file
@ -0,0 +1,181 @@
|
||||
package checks
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// lockRecommendation represents a normalized recommendation for locks
|
||||
type lockRecommendation int
|
||||
|
||||
const (
|
||||
recIncrease lockRecommendation = iota
|
||||
recSingleThreadedOrIncrease
|
||||
recSingleThreaded
|
||||
)
|
||||
|
||||
// determineLockRecommendation contains the pure logic (easy to unit-test).
|
||||
func determineLockRecommendation(locks, conns, prepared int64) (status CheckStatus, rec lockRecommendation) {
|
||||
// follow same thresholds as legacy script
|
||||
switch {
|
||||
case locks < 2048:
|
||||
return StatusFailed, recIncrease
|
||||
case locks < 8192:
|
||||
return StatusWarning, recIncrease
|
||||
case locks < 65536:
|
||||
return StatusWarning, recSingleThreadedOrIncrease
|
||||
default:
|
||||
return StatusPassed, recSingleThreaded
|
||||
}
|
||||
}
|
||||
|
||||
var nonDigits = regexp.MustCompile(`[^0-9]+`)
|
||||
|
||||
// parseNumeric strips non-digits and parses up to 10 characters (like the shell helper)
|
||||
func parseNumeric(s string) (int64, error) {
|
||||
if s == "" {
|
||||
return 0, fmt.Errorf("empty string")
|
||||
}
|
||||
s = nonDigits.ReplaceAllString(s, "")
|
||||
if len(s) > 10 {
|
||||
s = s[:10]
|
||||
}
|
||||
v, err := strconv.ParseInt(s, 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parse error: %w", err)
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// execPsql runs psql with the supplied arguments and returns stdout (trimmed).
|
||||
// It attempts to avoid leaking passwords in error messages.
|
||||
func execPsql(ctx context.Context, args []string, env []string, useSudo bool) (string, error) {
|
||||
var cmd *exec.Cmd
|
||||
if useSudo {
|
||||
// sudo -u postgres psql --no-psqlrc -t -A -c "..."
|
||||
all := append([]string{"-u", "postgres", "--"}, "psql")
|
||||
all = append(all, args...)
|
||||
cmd = exec.CommandContext(ctx, "sudo", all...)
|
||||
} else {
|
||||
cmd = exec.CommandContext(ctx, "psql", args...)
|
||||
}
|
||||
cmd.Env = append(os.Environ(), env...)
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
// prefer a concise error
|
||||
return "", fmt.Errorf("psql failed: %w", err)
|
||||
}
|
||||
return strings.TrimSpace(string(out)), nil
|
||||
}
|
||||
|
||||
// checkPostgresLocks probes PostgreSQL (via psql) and returns a PreflightCheck.
|
||||
// It intentionally does not require a live internal/database.Database; it uses
|
||||
// the configured connection parameters or falls back to local sudo when possible.
|
||||
func (p *PreflightChecker) checkPostgresLocks(ctx context.Context) PreflightCheck {
|
||||
check := PreflightCheck{Name: "PostgreSQL lock configuration"}
|
||||
|
||||
if !p.cfg.IsPostgreSQL() {
|
||||
check.Status = StatusSkipped
|
||||
check.Message = "Skipped (not a PostgreSQL configuration)"
|
||||
return check
|
||||
}
|
||||
|
||||
// Build common psql args
|
||||
psqlArgs := []string{"--no-psqlrc", "-t", "-A", "-c"}
|
||||
queryLocks := "SHOW max_locks_per_transaction;"
|
||||
queryConns := "SHOW max_connections;"
|
||||
queryPrepared := "SHOW max_prepared_transactions;"
|
||||
|
||||
// Build connection flags
|
||||
if p.cfg.Host != "" {
|
||||
psqlArgs = append(psqlArgs, "-h", p.cfg.Host)
|
||||
}
|
||||
psqlArgs = append(psqlArgs, "-p", fmt.Sprint(p.cfg.Port))
|
||||
if p.cfg.User != "" {
|
||||
psqlArgs = append(psqlArgs, "-U", p.cfg.User)
|
||||
}
|
||||
// Use database if provided (helps some setups)
|
||||
if p.cfg.Database != "" {
|
||||
psqlArgs = append(psqlArgs, "-d", p.cfg.Database)
|
||||
}
|
||||
|
||||
// Env: prefer PGPASSWORD if configured
|
||||
env := []string{}
|
||||
if p.cfg.Password != "" {
|
||||
env = append(env, "PGPASSWORD="+p.cfg.Password)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// helper to run a single SHOW query and parse numeric result
|
||||
runShow := func(q string) (int64, error) {
|
||||
args := append(psqlArgs, q)
|
||||
out, err := execPsql(ctx, args, env, false)
|
||||
if err != nil {
|
||||
// If local host and no explicit auth, try sudo -u postgres
|
||||
if (p.cfg.Host == "" || p.cfg.Host == "localhost" || p.cfg.Host == "127.0.0.1") && p.cfg.Password == "" {
|
||||
out, err = execPsql(ctx, append(psqlArgs, q), env, true)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
} else {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
v, err := parseNumeric(out)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("non-numeric response from psql: %q", out)
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
locks, err := runShow(queryLocks)
|
||||
if err != nil {
|
||||
check.Status = StatusFailed
|
||||
check.Message = "Could not read max_locks_per_transaction"
|
||||
check.Details = err.Error()
|
||||
return check
|
||||
}
|
||||
|
||||
conns, err := runShow(queryConns)
|
||||
if err != nil {
|
||||
check.Status = StatusFailed
|
||||
check.Message = "Could not read max_connections"
|
||||
check.Details = err.Error()
|
||||
return check
|
||||
}
|
||||
|
||||
prepared, _ := runShow(queryPrepared) // optional; treat errors as zero
|
||||
|
||||
// Compute capacity
|
||||
capacity := locks * (conns + prepared)
|
||||
|
||||
status, rec := determineLockRecommendation(locks, conns, prepared)
|
||||
check.Status = status
|
||||
check.Message = fmt.Sprintf("locks=%d connections=%d prepared=%d capacity=%d", locks, conns, prepared, capacity)
|
||||
|
||||
// Human-friendly details + actionable remediation
|
||||
detailLines := []string{fmt.Sprintf("max_locks_per_transaction: %d", locks), fmt.Sprintf("max_connections: %d", conns), fmt.Sprintf("max_prepared_transactions: %d", prepared), fmt.Sprintf("Total lock capacity: %d", capacity)}
|
||||
|
||||
switch rec {
|
||||
case recIncrease:
|
||||
detailLines = append(detailLines, "RECOMMENDATION: Increase to at least 65536 and run restore single-threaded")
|
||||
detailLines = append(detailLines, " sudo -u postgres psql -c \"ALTER SYSTEM SET max_locks_per_transaction = 65536;\" && sudo systemctl restart postgresql")
|
||||
check.Details = strings.Join(detailLines, "\n")
|
||||
case recSingleThreadedOrIncrease:
|
||||
detailLines = append(detailLines, "RECOMMENDATION: Use single-threaded restore (--jobs 1 --parallel-dbs 1) or increase locks to 65536 and still prefer single-threaded")
|
||||
check.Details = strings.Join(detailLines, "\n")
|
||||
case recSingleThreaded:
|
||||
detailLines = append(detailLines, "RECOMMENDATION: Single-threaded restore is safest for very large DBs")
|
||||
check.Details = strings.Join(detailLines, "\n")
|
||||
}
|
||||
|
||||
return check
|
||||
}
|
||||
55
internal/checks/locks_test.go
Normal file
55
internal/checks/locks_test.go
Normal file
@ -0,0 +1,55 @@
|
||||
package checks
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDetermineLockRecommendation(t *testing.T) {
|
||||
tests := []struct {
|
||||
locks int64
|
||||
conns int64
|
||||
prepared int64
|
||||
exStatus CheckStatus
|
||||
exRec lockRecommendation
|
||||
}{
|
||||
{locks: 1024, conns: 100, prepared: 0, exStatus: StatusFailed, exRec: recIncrease},
|
||||
{locks: 4096, conns: 200, prepared: 0, exStatus: StatusWarning, exRec: recIncrease},
|
||||
{locks: 16384, conns: 200, prepared: 0, exStatus: StatusWarning, exRec: recSingleThreadedOrIncrease},
|
||||
{locks: 65536, conns: 200, prepared: 0, exStatus: StatusPassed, exRec: recSingleThreaded},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
st, rec := determineLockRecommendation(tc.locks, tc.conns, tc.prepared)
|
||||
if st != tc.exStatus {
|
||||
t.Fatalf("locks=%d: status = %v, want %v", tc.locks, st, tc.exStatus)
|
||||
}
|
||||
if rec != tc.exRec {
|
||||
t.Fatalf("locks=%d: rec = %v, want %v", tc.locks, rec, tc.exRec)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNumeric(t *testing.T) {
|
||||
cases := map[string]int64{
|
||||
"4096": 4096,
|
||||
" 4096\n": 4096,
|
||||
"4096 (default)": 4096,
|
||||
"unknown": 0, // should error
|
||||
}
|
||||
|
||||
for in, want := range cases {
|
||||
v, err := parseNumeric(in)
|
||||
if want == 0 {
|
||||
if err == nil {
|
||||
t.Fatalf("expected error parsing %q", in)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("parseNumeric(%q) error: %v", in, err)
|
||||
}
|
||||
if v != want {
|
||||
t.Fatalf("parseNumeric(%q) = %d, want %d", in, v, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -49,15 +49,15 @@ func (s CheckStatus) String() string {
|
||||
func (s CheckStatus) Icon() string {
|
||||
switch s {
|
||||
case StatusPassed:
|
||||
return "✓"
|
||||
return "[+]"
|
||||
case StatusWarning:
|
||||
return "⚠"
|
||||
return "[!]"
|
||||
case StatusFailed:
|
||||
return "✗"
|
||||
return "[-]"
|
||||
case StatusSkipped:
|
||||
return "○"
|
||||
return "[ ]"
|
||||
default:
|
||||
return "?"
|
||||
return "[?]"
|
||||
}
|
||||
}
|
||||
|
||||
@ -120,6 +120,17 @@ func (p *PreflightChecker) RunAllChecks(ctx context.Context, dbName string) (*Pr
|
||||
result.FailureCount++
|
||||
}
|
||||
|
||||
// Postgres lock configuration check (provides explicit restore guidance)
|
||||
locksCheck := p.checkPostgresLocks(ctx)
|
||||
result.Checks = append(result.Checks, locksCheck)
|
||||
if locksCheck.Status == StatusFailed {
|
||||
result.AllPassed = false
|
||||
result.FailureCount++
|
||||
} else if locksCheck.Status == StatusWarning {
|
||||
result.HasWarnings = true
|
||||
result.WarningCount++
|
||||
}
|
||||
|
||||
// Extract database info if connection succeeded
|
||||
if dbCheck.Status == StatusPassed && p.db != nil {
|
||||
version, _ := p.db.GetVersion(ctx)
|
||||
|
||||
@ -11,9 +11,9 @@ func FormatPreflightReport(result *PreflightResult, dbName string, verbose bool)
|
||||
var sb strings.Builder
|
||||
|
||||
sb.WriteString("\n")
|
||||
sb.WriteString("╔══════════════════════════════════════════════════════════════╗\n")
|
||||
sb.WriteString("║ [DRY RUN] Preflight Check Results ║\n")
|
||||
sb.WriteString("╚══════════════════════════════════════════════════════════════╝\n")
|
||||
sb.WriteString("+==============================================================+\n")
|
||||
sb.WriteString("| [DRY RUN] Preflight Check Results |\n")
|
||||
sb.WriteString("+==============================================================+\n")
|
||||
sb.WriteString("\n")
|
||||
|
||||
// Database info
|
||||
@ -29,7 +29,7 @@ func FormatPreflightReport(result *PreflightResult, dbName string, verbose bool)
|
||||
|
||||
// Check results
|
||||
sb.WriteString(" Checks:\n")
|
||||
sb.WriteString(" ─────────────────────────────────────────────────────────────\n")
|
||||
sb.WriteString(" --------------------------------------------------------------\n")
|
||||
|
||||
for _, check := range result.Checks {
|
||||
icon := check.Status.Icon()
|
||||
@ -40,26 +40,26 @@ func FormatPreflightReport(result *PreflightResult, dbName string, verbose bool)
|
||||
color, icon, reset, check.Name+":", check.Message))
|
||||
|
||||
if verbose && check.Details != "" {
|
||||
sb.WriteString(fmt.Sprintf(" └─ %s\n", check.Details))
|
||||
sb.WriteString(fmt.Sprintf(" +- %s\n", check.Details))
|
||||
}
|
||||
}
|
||||
|
||||
sb.WriteString(" ─────────────────────────────────────────────────────────────\n")
|
||||
sb.WriteString(" --------------------------------------------------------------\n")
|
||||
sb.WriteString("\n")
|
||||
|
||||
// Summary
|
||||
if result.AllPassed {
|
||||
if result.HasWarnings {
|
||||
sb.WriteString(" ⚠️ All checks passed with warnings\n")
|
||||
sb.WriteString(" [!] All checks passed with warnings\n")
|
||||
sb.WriteString("\n")
|
||||
sb.WriteString(" Ready to backup. Remove --dry-run to execute.\n")
|
||||
} else {
|
||||
sb.WriteString(" ✅ All checks passed\n")
|
||||
sb.WriteString(" [OK] All checks passed\n")
|
||||
sb.WriteString("\n")
|
||||
sb.WriteString(" Ready to backup. Remove --dry-run to execute.\n")
|
||||
}
|
||||
} else {
|
||||
sb.WriteString(fmt.Sprintf(" ❌ %d check(s) failed\n", result.FailureCount))
|
||||
sb.WriteString(fmt.Sprintf(" [FAIL] %d check(s) failed\n", result.FailureCount))
|
||||
sb.WriteString("\n")
|
||||
sb.WriteString(" Fix the issues above before running backup.\n")
|
||||
}
|
||||
@ -96,7 +96,7 @@ func FormatPreflightReportPlain(result *PreflightResult, dbName string) string {
|
||||
status := fmt.Sprintf("[%s]", check.Status.String())
|
||||
sb.WriteString(fmt.Sprintf(" %-10s %-25s %s\n", status, check.Name+":", check.Message))
|
||||
if check.Details != "" {
|
||||
sb.WriteString(fmt.Sprintf(" └─ %s\n", check.Details))
|
||||
sb.WriteString(fmt.Sprintf(" +- %s\n", check.Details))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -12,6 +12,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
@ -116,8 +117,11 @@ func KillOrphanedProcesses(log logger.Logger) error {
|
||||
|
||||
// findProcessesByName returns PIDs of processes matching the given name
|
||||
func findProcessesByName(name string, excludePID int) ([]int, error) {
|
||||
// Use pgrep for efficient process searching
|
||||
cmd := exec.Command("pgrep", "-x", name)
|
||||
// Use pgrep for efficient process searching with timeout
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, "pgrep", "-x", name)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
// Exit code 1 means no processes found (not an error)
|
||||
|
||||
@ -90,7 +90,7 @@ func NewAzureBackend(cfg *Config) (*AzureBackend, error) {
|
||||
}
|
||||
} else {
|
||||
// Use default Azure credential (managed identity, environment variables, etc.)
|
||||
return nil, fmt.Errorf("Azure authentication requires account name and key, or use AZURE_STORAGE_CONNECTION_STRING environment variable")
|
||||
return nil, fmt.Errorf("azure authentication requires account name and key, or use AZURE_STORAGE_CONNECTION_STRING environment variable")
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,37 +151,51 @@ func (a *AzureBackend) Upload(ctx context.Context, localPath, remotePath string,
|
||||
return a.uploadSimple(ctx, file, blobName, fileSize, progress)
|
||||
}
|
||||
|
||||
// uploadSimple uploads a file using simple upload (single request)
|
||||
// uploadSimple uploads a file using simple upload (single request) with retry
|
||||
func (a *AzureBackend) uploadSimple(ctx context.Context, file *os.File, blobName string, fileSize int64, progress ProgressCallback) error {
|
||||
blockBlobClient := a.client.ServiceClient().NewContainerClient(a.containerName).NewBlockBlobClient(blobName)
|
||||
return RetryOperationWithNotify(ctx, DefaultRetryConfig(), func() error {
|
||||
// Reset file position for retry
|
||||
if _, err := file.Seek(0, 0); err != nil {
|
||||
return fmt.Errorf("failed to reset file position: %w", err)
|
||||
}
|
||||
|
||||
// Wrap reader with progress tracking
|
||||
reader := NewProgressReader(file, fileSize, progress)
|
||||
blockBlobClient := a.client.ServiceClient().NewContainerClient(a.containerName).NewBlockBlobClient(blobName)
|
||||
|
||||
// Calculate MD5 hash for integrity
|
||||
hash := sha256.New()
|
||||
teeReader := io.TeeReader(reader, hash)
|
||||
// Wrap reader with progress tracking
|
||||
var reader io.Reader = NewProgressReader(file, fileSize, progress)
|
||||
|
||||
_, err := blockBlobClient.UploadStream(ctx, teeReader, &blockblob.UploadStreamOptions{
|
||||
BlockSize: 4 * 1024 * 1024, // 4MB blocks
|
||||
// Apply bandwidth throttling if configured
|
||||
if a.config.BandwidthLimit > 0 {
|
||||
reader = NewThrottledReader(ctx, reader, a.config.BandwidthLimit)
|
||||
}
|
||||
|
||||
// Calculate MD5 hash for integrity
|
||||
hash := sha256.New()
|
||||
teeReader := io.TeeReader(reader, hash)
|
||||
|
||||
_, err := blockBlobClient.UploadStream(ctx, teeReader, &blockblob.UploadStreamOptions{
|
||||
BlockSize: 4 * 1024 * 1024, // 4MB blocks
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to upload blob: %w", err)
|
||||
}
|
||||
|
||||
// Store checksum as metadata
|
||||
checksum := hex.EncodeToString(hash.Sum(nil))
|
||||
metadata := map[string]*string{
|
||||
"sha256": &checksum,
|
||||
}
|
||||
|
||||
_, err = blockBlobClient.SetMetadata(ctx, metadata, nil)
|
||||
if err != nil {
|
||||
// Non-fatal: upload succeeded but metadata failed
|
||||
fmt.Fprintf(os.Stderr, "Warning: failed to set blob metadata: %v\n", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}, func(err error, duration time.Duration) {
|
||||
fmt.Printf("[Azure] Upload retry in %v: %v\n", duration, err)
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to upload blob: %w", err)
|
||||
}
|
||||
|
||||
// Store checksum as metadata
|
||||
checksum := hex.EncodeToString(hash.Sum(nil))
|
||||
metadata := map[string]*string{
|
||||
"sha256": &checksum,
|
||||
}
|
||||
|
||||
_, err = blockBlobClient.SetMetadata(ctx, metadata, nil)
|
||||
if err != nil {
|
||||
// Non-fatal: upload succeeded but metadata failed
|
||||
fmt.Fprintf(os.Stderr, "Warning: failed to set blob metadata: %v\n", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// uploadBlocks uploads a file using block blob staging (for large files)
|
||||
@ -195,6 +209,13 @@ func (a *AzureBackend) uploadBlocks(ctx context.Context, file *os.File, blobName
|
||||
hash := sha256.New()
|
||||
var totalUploaded int64
|
||||
|
||||
// Calculate throttle delay per byte if bandwidth limited
|
||||
var throttleDelay time.Duration
|
||||
if a.config.BandwidthLimit > 0 {
|
||||
// Calculate nanoseconds per byte
|
||||
throttleDelay = time.Duration(float64(time.Second) / float64(a.config.BandwidthLimit) * float64(blockSize))
|
||||
}
|
||||
|
||||
for i := int64(0); i < numBlocks; i++ {
|
||||
blockID := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("block-%08d", i)))
|
||||
blockIDs = append(blockIDs, blockID)
|
||||
@ -216,6 +237,15 @@ func (a *AzureBackend) uploadBlocks(ctx context.Context, file *os.File, blobName
|
||||
// Update hash
|
||||
hash.Write(blockData)
|
||||
|
||||
// Apply throttling between blocks if configured
|
||||
if a.config.BandwidthLimit > 0 && i > 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(throttleDelay):
|
||||
}
|
||||
}
|
||||
|
||||
// Upload block
|
||||
reader := bytes.NewReader(blockData)
|
||||
_, err = blockBlobClient.StageBlock(ctx, blockID, streaming.NopCloser(reader), nil)
|
||||
@ -251,7 +281,7 @@ func (a *AzureBackend) uploadBlocks(ctx context.Context, file *os.File, blobName
|
||||
return nil
|
||||
}
|
||||
|
||||
// Download downloads a file from Azure Blob Storage
|
||||
// Download downloads a file from Azure Blob Storage with retry
|
||||
func (a *AzureBackend) Download(ctx context.Context, remotePath, localPath string, progress ProgressCallback) error {
|
||||
blobName := strings.TrimPrefix(remotePath, "/")
|
||||
blockBlobClient := a.client.ServiceClient().NewContainerClient(a.containerName).NewBlockBlobClient(blobName)
|
||||
@ -264,30 +294,34 @@ func (a *AzureBackend) Download(ctx context.Context, remotePath, localPath strin
|
||||
|
||||
fileSize := *props.ContentLength
|
||||
|
||||
// Download blob
|
||||
resp, err := blockBlobClient.DownloadStream(ctx, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download blob: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
return RetryOperationWithNotify(ctx, DefaultRetryConfig(), func() error {
|
||||
// Download blob
|
||||
resp, err := blockBlobClient.DownloadStream(ctx, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download blob: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Create local file
|
||||
file, err := os.Create(localPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
// Create/truncate local file
|
||||
file, err := os.Create(localPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Wrap reader with progress tracking
|
||||
reader := NewProgressReader(resp.Body, fileSize, progress)
|
||||
// Wrap reader with progress tracking
|
||||
reader := NewProgressReader(resp.Body, fileSize, progress)
|
||||
|
||||
// Copy with progress
|
||||
_, err = io.Copy(file, reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write file: %w", err)
|
||||
}
|
||||
// Copy with progress
|
||||
_, err = io.Copy(file, reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
return nil
|
||||
}, func(err error, duration time.Duration) {
|
||||
fmt.Printf("[Azure] Download retry in %v: %v\n", duration, err)
|
||||
})
|
||||
}
|
||||
|
||||
// Delete deletes a file from Azure Blob Storage
|
||||
|
||||
@ -89,7 +89,7 @@ func (g *GCSBackend) Name() string {
|
||||
return "gcs"
|
||||
}
|
||||
|
||||
// Upload uploads a file to Google Cloud Storage
|
||||
// Upload uploads a file to Google Cloud Storage with retry
|
||||
func (g *GCSBackend) Upload(ctx context.Context, localPath, remotePath string, progress ProgressCallback) error {
|
||||
file, err := os.Open(localPath)
|
||||
if err != nil {
|
||||
@ -106,45 +106,59 @@ func (g *GCSBackend) Upload(ctx context.Context, localPath, remotePath string, p
|
||||
// Remove leading slash from remote path
|
||||
objectName := strings.TrimPrefix(remotePath, "/")
|
||||
|
||||
bucket := g.client.Bucket(g.bucketName)
|
||||
object := bucket.Object(objectName)
|
||||
return RetryOperationWithNotify(ctx, DefaultRetryConfig(), func() error {
|
||||
// Reset file position for retry
|
||||
if _, err := file.Seek(0, 0); err != nil {
|
||||
return fmt.Errorf("failed to reset file position: %w", err)
|
||||
}
|
||||
|
||||
// Create writer with automatic chunking for large files
|
||||
writer := object.NewWriter(ctx)
|
||||
writer.ChunkSize = 16 * 1024 * 1024 // 16MB chunks for streaming
|
||||
bucket := g.client.Bucket(g.bucketName)
|
||||
object := bucket.Object(objectName)
|
||||
|
||||
// Wrap reader with progress tracking and hash calculation
|
||||
hash := sha256.New()
|
||||
reader := NewProgressReader(io.TeeReader(file, hash), fileSize, progress)
|
||||
// Create writer with automatic chunking for large files
|
||||
writer := object.NewWriter(ctx)
|
||||
writer.ChunkSize = 16 * 1024 * 1024 // 16MB chunks for streaming
|
||||
|
||||
// Upload with progress tracking
|
||||
_, err = io.Copy(writer, reader)
|
||||
if err != nil {
|
||||
writer.Close()
|
||||
return fmt.Errorf("failed to upload object: %w", err)
|
||||
}
|
||||
// Wrap reader with progress tracking and hash calculation
|
||||
hash := sha256.New()
|
||||
var reader io.Reader = NewProgressReader(io.TeeReader(file, hash), fileSize, progress)
|
||||
|
||||
// Close writer (finalizes upload)
|
||||
if err := writer.Close(); err != nil {
|
||||
return fmt.Errorf("failed to finalize upload: %w", err)
|
||||
}
|
||||
// Apply bandwidth throttling if configured
|
||||
if g.config.BandwidthLimit > 0 {
|
||||
reader = NewThrottledReader(ctx, reader, g.config.BandwidthLimit)
|
||||
}
|
||||
|
||||
// Store checksum as metadata
|
||||
checksum := hex.EncodeToString(hash.Sum(nil))
|
||||
_, err = object.Update(ctx, storage.ObjectAttrsToUpdate{
|
||||
Metadata: map[string]string{
|
||||
"sha256": checksum,
|
||||
},
|
||||
// Upload with progress tracking
|
||||
_, err = io.Copy(writer, reader)
|
||||
if err != nil {
|
||||
writer.Close()
|
||||
return fmt.Errorf("failed to upload object: %w", err)
|
||||
}
|
||||
|
||||
// Close writer (finalizes upload)
|
||||
if err := writer.Close(); err != nil {
|
||||
return fmt.Errorf("failed to finalize upload: %w", err)
|
||||
}
|
||||
|
||||
// Store checksum as metadata
|
||||
checksum := hex.EncodeToString(hash.Sum(nil))
|
||||
_, err = object.Update(ctx, storage.ObjectAttrsToUpdate{
|
||||
Metadata: map[string]string{
|
||||
"sha256": checksum,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
// Non-fatal: upload succeeded but metadata failed
|
||||
fmt.Fprintf(os.Stderr, "Warning: failed to set object metadata: %v\n", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}, func(err error, duration time.Duration) {
|
||||
fmt.Printf("[GCS] Upload retry in %v: %v\n", duration, err)
|
||||
})
|
||||
if err != nil {
|
||||
// Non-fatal: upload succeeded but metadata failed
|
||||
fmt.Fprintf(os.Stderr, "Warning: failed to set object metadata: %v\n", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Download downloads a file from Google Cloud Storage
|
||||
// Download downloads a file from Google Cloud Storage with retry
|
||||
func (g *GCSBackend) Download(ctx context.Context, remotePath, localPath string, progress ProgressCallback) error {
|
||||
objectName := strings.TrimPrefix(remotePath, "/")
|
||||
|
||||
@ -159,30 +173,34 @@ func (g *GCSBackend) Download(ctx context.Context, remotePath, localPath string,
|
||||
|
||||
fileSize := attrs.Size
|
||||
|
||||
// Create reader
|
||||
reader, err := object.NewReader(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download object: %w", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
return RetryOperationWithNotify(ctx, DefaultRetryConfig(), func() error {
|
||||
// Create reader
|
||||
reader, err := object.NewReader(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download object: %w", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
// Create local file
|
||||
file, err := os.Create(localPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
// Create/truncate local file
|
||||
file, err := os.Create(localPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Wrap reader with progress tracking
|
||||
progressReader := NewProgressReader(reader, fileSize, progress)
|
||||
// Wrap reader with progress tracking
|
||||
progressReader := NewProgressReader(reader, fileSize, progress)
|
||||
|
||||
// Copy with progress
|
||||
_, err = io.Copy(file, progressReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write file: %w", err)
|
||||
}
|
||||
// Copy with progress
|
||||
_, err = io.Copy(file, progressReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
return nil
|
||||
}, func(err error, duration time.Duration) {
|
||||
fmt.Printf("[GCS] Download retry in %v: %v\n", duration, err)
|
||||
})
|
||||
}
|
||||
|
||||
// Delete deletes a file from Google Cloud Storage
|
||||
|
||||
@ -46,18 +46,19 @@ type ProgressCallback func(bytesTransferred, totalBytes int64)
|
||||
|
||||
// Config contains common configuration for cloud backends
|
||||
type Config struct {
|
||||
Provider string // "s3", "minio", "azure", "gcs", "b2"
|
||||
Bucket string // Bucket or container name
|
||||
Region string // Region (for S3)
|
||||
Endpoint string // Custom endpoint (for MinIO, S3-compatible)
|
||||
AccessKey string // Access key or account ID
|
||||
SecretKey string // Secret key or access token
|
||||
UseSSL bool // Use SSL/TLS (default: true)
|
||||
PathStyle bool // Use path-style addressing (for MinIO)
|
||||
Prefix string // Prefix for all operations (e.g., "backups/")
|
||||
Timeout int // Timeout in seconds (default: 300)
|
||||
MaxRetries int // Maximum retry attempts (default: 3)
|
||||
Concurrency int // Upload/download concurrency (default: 5)
|
||||
Provider string // "s3", "minio", "azure", "gcs", "b2"
|
||||
Bucket string // Bucket or container name
|
||||
Region string // Region (for S3)
|
||||
Endpoint string // Custom endpoint (for MinIO, S3-compatible)
|
||||
AccessKey string // Access key or account ID
|
||||
SecretKey string // Secret key or access token
|
||||
UseSSL bool // Use SSL/TLS (default: true)
|
||||
PathStyle bool // Use path-style addressing (for MinIO)
|
||||
Prefix string // Prefix for all operations (e.g., "backups/")
|
||||
Timeout int // Timeout in seconds (default: 300)
|
||||
MaxRetries int // Maximum retry attempts (default: 3)
|
||||
Concurrency int // Upload/download concurrency (default: 5)
|
||||
BandwidthLimit int64 // Maximum upload/download bandwidth in bytes/sec (0 = unlimited)
|
||||
}
|
||||
|
||||
// NewBackend creates a new cloud storage backend based on the provider
|
||||
|
||||
257
internal/cloud/retry.go
Normal file
257
internal/cloud/retry.go
Normal file
@ -0,0 +1,257 @@
|
||||
package cloud
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/cenkalti/backoff/v4"
|
||||
)
|
||||
|
||||
// RetryConfig configures retry behavior
|
||||
type RetryConfig struct {
|
||||
MaxRetries int // Maximum number of retries (0 = unlimited)
|
||||
InitialInterval time.Duration // Initial backoff interval
|
||||
MaxInterval time.Duration // Maximum backoff interval
|
||||
MaxElapsedTime time.Duration // Maximum total time for retries
|
||||
Multiplier float64 // Backoff multiplier
|
||||
}
|
||||
|
||||
// DefaultRetryConfig returns sensible defaults for cloud operations
|
||||
func DefaultRetryConfig() *RetryConfig {
|
||||
return &RetryConfig{
|
||||
MaxRetries: 5,
|
||||
InitialInterval: 500 * time.Millisecond,
|
||||
MaxInterval: 30 * time.Second,
|
||||
MaxElapsedTime: 5 * time.Minute,
|
||||
Multiplier: 2.0,
|
||||
}
|
||||
}
|
||||
|
||||
// AggressiveRetryConfig returns config for critical operations that need more retries
|
||||
func AggressiveRetryConfig() *RetryConfig {
|
||||
return &RetryConfig{
|
||||
MaxRetries: 10,
|
||||
InitialInterval: 1 * time.Second,
|
||||
MaxInterval: 60 * time.Second,
|
||||
MaxElapsedTime: 15 * time.Minute,
|
||||
Multiplier: 1.5,
|
||||
}
|
||||
}
|
||||
|
||||
// QuickRetryConfig returns config for operations that should fail fast
|
||||
func QuickRetryConfig() *RetryConfig {
|
||||
return &RetryConfig{
|
||||
MaxRetries: 3,
|
||||
InitialInterval: 100 * time.Millisecond,
|
||||
MaxInterval: 5 * time.Second,
|
||||
MaxElapsedTime: 30 * time.Second,
|
||||
Multiplier: 2.0,
|
||||
}
|
||||
}
|
||||
|
||||
// RetryOperation executes an operation with exponential backoff retry
|
||||
func RetryOperation(ctx context.Context, cfg *RetryConfig, operation func() error) error {
|
||||
if cfg == nil {
|
||||
cfg = DefaultRetryConfig()
|
||||
}
|
||||
|
||||
// Create exponential backoff
|
||||
expBackoff := backoff.NewExponentialBackOff()
|
||||
expBackoff.InitialInterval = cfg.InitialInterval
|
||||
expBackoff.MaxInterval = cfg.MaxInterval
|
||||
expBackoff.MaxElapsedTime = cfg.MaxElapsedTime
|
||||
expBackoff.Multiplier = cfg.Multiplier
|
||||
expBackoff.Reset()
|
||||
|
||||
// Wrap with max retries if specified
|
||||
var b backoff.BackOff = expBackoff
|
||||
if cfg.MaxRetries > 0 {
|
||||
b = backoff.WithMaxRetries(expBackoff, uint64(cfg.MaxRetries))
|
||||
}
|
||||
|
||||
// Add context support
|
||||
b = backoff.WithContext(b, ctx)
|
||||
|
||||
// Track attempts for logging
|
||||
attempt := 0
|
||||
|
||||
// Wrap operation to handle permanent vs retryable errors
|
||||
wrappedOp := func() error {
|
||||
attempt++
|
||||
err := operation()
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if error is permanent (should not retry)
|
||||
if IsPermanentError(err) {
|
||||
return backoff.Permanent(err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
return backoff.Retry(wrappedOp, b)
|
||||
}
|
||||
|
||||
// RetryOperationWithNotify executes an operation with retry and calls notify on each retry
|
||||
func RetryOperationWithNotify(ctx context.Context, cfg *RetryConfig, operation func() error, notify func(err error, duration time.Duration)) error {
|
||||
if cfg == nil {
|
||||
cfg = DefaultRetryConfig()
|
||||
}
|
||||
|
||||
// Create exponential backoff
|
||||
expBackoff := backoff.NewExponentialBackOff()
|
||||
expBackoff.InitialInterval = cfg.InitialInterval
|
||||
expBackoff.MaxInterval = cfg.MaxInterval
|
||||
expBackoff.MaxElapsedTime = cfg.MaxElapsedTime
|
||||
expBackoff.Multiplier = cfg.Multiplier
|
||||
expBackoff.Reset()
|
||||
|
||||
// Wrap with max retries if specified
|
||||
var b backoff.BackOff = expBackoff
|
||||
if cfg.MaxRetries > 0 {
|
||||
b = backoff.WithMaxRetries(expBackoff, uint64(cfg.MaxRetries))
|
||||
}
|
||||
|
||||
// Add context support
|
||||
b = backoff.WithContext(b, ctx)
|
||||
|
||||
// Wrap operation to handle permanent vs retryable errors
|
||||
wrappedOp := func() error {
|
||||
err := operation()
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if error is permanent (should not retry)
|
||||
if IsPermanentError(err) {
|
||||
return backoff.Permanent(err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
return backoff.RetryNotify(wrappedOp, b, notify)
|
||||
}
|
||||
|
||||
// IsPermanentError returns true if the error should not be retried
|
||||
func IsPermanentError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
errStr := strings.ToLower(err.Error())
|
||||
|
||||
// Authentication/authorization errors - don't retry
|
||||
permanentPatterns := []string{
|
||||
"access denied",
|
||||
"forbidden",
|
||||
"unauthorized",
|
||||
"invalid credentials",
|
||||
"invalid access key",
|
||||
"invalid secret",
|
||||
"no such bucket",
|
||||
"bucket not found",
|
||||
"container not found",
|
||||
"nosuchbucket",
|
||||
"nosuchkey",
|
||||
"invalid argument",
|
||||
"malformed",
|
||||
"invalid request",
|
||||
"permission denied",
|
||||
"access control",
|
||||
"policy",
|
||||
}
|
||||
|
||||
for _, pattern := range permanentPatterns {
|
||||
if strings.Contains(errStr, pattern) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// IsRetryableError returns true if the error is transient and should be retried
|
||||
func IsRetryableError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Network errors are typically retryable
|
||||
var netErr net.Error
|
||||
if ok := isNetError(err, &netErr); ok {
|
||||
return netErr.Timeout() || netErr.Temporary()
|
||||
}
|
||||
|
||||
errStr := strings.ToLower(err.Error())
|
||||
|
||||
// Transient errors - should retry
|
||||
retryablePatterns := []string{
|
||||
"timeout",
|
||||
"connection reset",
|
||||
"connection refused",
|
||||
"connection closed",
|
||||
"eof",
|
||||
"broken pipe",
|
||||
"temporary failure",
|
||||
"service unavailable",
|
||||
"internal server error",
|
||||
"bad gateway",
|
||||
"gateway timeout",
|
||||
"too many requests",
|
||||
"rate limit",
|
||||
"throttl",
|
||||
"slowdown",
|
||||
"try again",
|
||||
"retry",
|
||||
}
|
||||
|
||||
for _, pattern := range retryablePatterns {
|
||||
if strings.Contains(errStr, pattern) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// isNetError checks if err wraps a net.Error
|
||||
func isNetError(err error, target *net.Error) bool {
|
||||
for err != nil {
|
||||
if ne, ok := err.(net.Error); ok {
|
||||
*target = ne
|
||||
return true
|
||||
}
|
||||
// Try to unwrap
|
||||
if unwrapper, ok := err.(interface{ Unwrap() error }); ok {
|
||||
err = unwrapper.Unwrap()
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// WithRetry is a helper that wraps a function with default retry logic
|
||||
func WithRetry(ctx context.Context, operationName string, fn func() error) error {
|
||||
notify := func(err error, duration time.Duration) {
|
||||
// Log retry attempts (caller can provide their own logger if needed)
|
||||
fmt.Printf("[RETRY] %s failed, retrying in %v: %v\n", operationName, duration, err)
|
||||
}
|
||||
|
||||
return RetryOperationWithNotify(ctx, DefaultRetryConfig(), fn, notify)
|
||||
}
|
||||
|
||||
// WithRetryConfig is a helper that wraps a function with custom retry config
|
||||
func WithRetryConfig(ctx context.Context, cfg *RetryConfig, operationName string, fn func() error) error {
|
||||
notify := func(err error, duration time.Duration) {
|
||||
fmt.Printf("[RETRY] %s failed, retrying in %v: %v\n", operationName, duration, err)
|
||||
}
|
||||
|
||||
return RetryOperationWithNotify(ctx, cfg, fn, notify)
|
||||
}
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/aws/aws-sdk-go-v2/aws"
|
||||
"github.com/aws/aws-sdk-go-v2/config"
|
||||
@ -123,63 +124,99 @@ func (s *S3Backend) Upload(ctx context.Context, localPath, remotePath string, pr
|
||||
return s.uploadSimple(ctx, file, key, fileSize, progress)
|
||||
}
|
||||
|
||||
// uploadSimple performs a simple single-part upload
|
||||
// uploadSimple performs a simple single-part upload with retry
|
||||
func (s *S3Backend) uploadSimple(ctx context.Context, file *os.File, key string, fileSize int64, progress ProgressCallback) error {
|
||||
// Create progress reader
|
||||
var reader io.Reader = file
|
||||
if progress != nil {
|
||||
reader = NewProgressReader(file, fileSize, progress)
|
||||
}
|
||||
return RetryOperationWithNotify(ctx, DefaultRetryConfig(), func() error {
|
||||
// Reset file position for retry
|
||||
if _, err := file.Seek(0, 0); err != nil {
|
||||
return fmt.Errorf("failed to reset file position: %w", err)
|
||||
}
|
||||
|
||||
// Upload to S3
|
||||
_, err := s.client.PutObject(ctx, &s3.PutObjectInput{
|
||||
Bucket: aws.String(s.bucket),
|
||||
Key: aws.String(key),
|
||||
Body: reader,
|
||||
// Create progress reader
|
||||
var reader io.Reader = file
|
||||
if progress != nil {
|
||||
reader = NewProgressReader(file, fileSize, progress)
|
||||
}
|
||||
|
||||
// Apply bandwidth throttling if configured
|
||||
if s.config.BandwidthLimit > 0 {
|
||||
reader = NewThrottledReader(ctx, reader, s.config.BandwidthLimit)
|
||||
}
|
||||
|
||||
// Upload to S3
|
||||
_, err := s.client.PutObject(ctx, &s3.PutObjectInput{
|
||||
Bucket: aws.String(s.bucket),
|
||||
Key: aws.String(key),
|
||||
Body: reader,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to upload to S3: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}, func(err error, duration time.Duration) {
|
||||
fmt.Printf("[S3] Upload retry in %v: %v\n", duration, err)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to upload to S3: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// uploadMultipart performs a multipart upload for large files
|
||||
// uploadMultipart performs a multipart upload for large files with retry
|
||||
func (s *S3Backend) uploadMultipart(ctx context.Context, file *os.File, key string, fileSize int64, progress ProgressCallback) error {
|
||||
// Create uploader with custom options
|
||||
uploader := manager.NewUploader(s.client, func(u *manager.Uploader) {
|
||||
// Part size: 10MB
|
||||
u.PartSize = 10 * 1024 * 1024
|
||||
return RetryOperationWithNotify(ctx, AggressiveRetryConfig(), func() error {
|
||||
// Reset file position for retry
|
||||
if _, err := file.Seek(0, 0); err != nil {
|
||||
return fmt.Errorf("failed to reset file position: %w", err)
|
||||
}
|
||||
|
||||
// Upload up to 10 parts concurrently
|
||||
u.Concurrency = 10
|
||||
// Calculate concurrency based on bandwidth limit
|
||||
// If limited, reduce concurrency to make throttling more effective
|
||||
concurrency := 10
|
||||
if s.config.BandwidthLimit > 0 {
|
||||
// With bandwidth limiting, use fewer concurrent parts
|
||||
concurrency = 3
|
||||
}
|
||||
|
||||
// Leave parts on failure for debugging
|
||||
u.LeavePartsOnError = false
|
||||
// Create uploader with custom options
|
||||
uploader := manager.NewUploader(s.client, func(u *manager.Uploader) {
|
||||
// Part size: 10MB
|
||||
u.PartSize = 10 * 1024 * 1024
|
||||
|
||||
// Adjust concurrency
|
||||
u.Concurrency = concurrency
|
||||
|
||||
// Leave parts on failure for debugging
|
||||
u.LeavePartsOnError = false
|
||||
})
|
||||
|
||||
// Wrap file with progress reader
|
||||
var reader io.Reader = file
|
||||
if progress != nil {
|
||||
reader = NewProgressReader(file, fileSize, progress)
|
||||
}
|
||||
|
||||
// Apply bandwidth throttling if configured
|
||||
if s.config.BandwidthLimit > 0 {
|
||||
reader = NewThrottledReader(ctx, reader, s.config.BandwidthLimit)
|
||||
}
|
||||
|
||||
// Upload with multipart
|
||||
_, err := uploader.Upload(ctx, &s3.PutObjectInput{
|
||||
Bucket: aws.String(s.bucket),
|
||||
Key: aws.String(key),
|
||||
Body: reader,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("multipart upload failed: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}, func(err error, duration time.Duration) {
|
||||
fmt.Printf("[S3] Multipart upload retry in %v: %v\n", duration, err)
|
||||
})
|
||||
|
||||
// Wrap file with progress reader
|
||||
var reader io.Reader = file
|
||||
if progress != nil {
|
||||
reader = NewProgressReader(file, fileSize, progress)
|
||||
}
|
||||
|
||||
// Upload with multipart
|
||||
_, err := uploader.Upload(ctx, &s3.PutObjectInput{
|
||||
Bucket: aws.String(s.bucket),
|
||||
Key: aws.String(key),
|
||||
Body: reader,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("multipart upload failed: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Download downloads a file from S3
|
||||
// Download downloads a file from S3 with retry
|
||||
func (s *S3Backend) Download(ctx context.Context, remotePath, localPath string, progress ProgressCallback) error {
|
||||
// Build S3 key
|
||||
key := s.buildKey(remotePath)
|
||||
@ -190,39 +227,44 @@ func (s *S3Backend) Download(ctx context.Context, remotePath, localPath string,
|
||||
return fmt.Errorf("failed to get object size: %w", err)
|
||||
}
|
||||
|
||||
// Download from S3
|
||||
result, err := s.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s.bucket),
|
||||
Key: aws.String(key),
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download from S3: %w", err)
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
// Create local file
|
||||
// Create directory for local file
|
||||
if err := os.MkdirAll(filepath.Dir(localPath), 0755); err != nil {
|
||||
return fmt.Errorf("failed to create directory: %w", err)
|
||||
}
|
||||
|
||||
outFile, err := os.Create(localPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create local file: %w", err)
|
||||
}
|
||||
defer outFile.Close()
|
||||
return RetryOperationWithNotify(ctx, DefaultRetryConfig(), func() error {
|
||||
// Download from S3
|
||||
result, err := s.client.GetObject(ctx, &s3.GetObjectInput{
|
||||
Bucket: aws.String(s.bucket),
|
||||
Key: aws.String(key),
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download from S3: %w", err)
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
// Copy with progress tracking
|
||||
var reader io.Reader = result.Body
|
||||
if progress != nil {
|
||||
reader = NewProgressReader(result.Body, size, progress)
|
||||
}
|
||||
// Create/truncate local file
|
||||
outFile, err := os.Create(localPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create local file: %w", err)
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
_, err = io.Copy(outFile, reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write file: %w", err)
|
||||
}
|
||||
// Copy with progress tracking
|
||||
var reader io.Reader = result.Body
|
||||
if progress != nil {
|
||||
reader = NewProgressReader(result.Body, size, progress)
|
||||
}
|
||||
|
||||
return nil
|
||||
_, err = io.Copy(outFile, reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}, func(err error, duration time.Duration) {
|
||||
fmt.Printf("[S3] Download retry in %v: %v\n", duration, err)
|
||||
})
|
||||
}
|
||||
|
||||
// List lists all backup files in S3
|
||||
|
||||
251
internal/cloud/throttle.go
Normal file
251
internal/cloud/throttle.go
Normal file
@ -0,0 +1,251 @@
|
||||
// Package cloud provides throttled readers for bandwidth limiting during cloud uploads/downloads
|
||||
package cloud
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ThrottledReader wraps an io.Reader and limits the read rate to a maximum bytes per second.
|
||||
// This is useful for cloud uploads where you don't want to saturate the network.
|
||||
type ThrottledReader struct {
|
||||
reader io.Reader
|
||||
bytesPerSec int64 // Maximum bytes per second (0 = unlimited)
|
||||
bytesRead int64 // Bytes read in current window
|
||||
windowStart time.Time // Start of current measurement window
|
||||
windowSize time.Duration // Size of the measurement window
|
||||
mu sync.Mutex // Protects bytesRead and windowStart
|
||||
ctx context.Context
|
||||
}
|
||||
|
||||
// NewThrottledReader creates a new bandwidth-limited reader.
|
||||
// bytesPerSec is the maximum transfer rate in bytes per second.
|
||||
// Set to 0 for unlimited bandwidth.
|
||||
func NewThrottledReader(ctx context.Context, reader io.Reader, bytesPerSec int64) *ThrottledReader {
|
||||
return &ThrottledReader{
|
||||
reader: reader,
|
||||
bytesPerSec: bytesPerSec,
|
||||
windowStart: time.Now(),
|
||||
windowSize: 100 * time.Millisecond, // Measure in 100ms windows for smooth throttling
|
||||
ctx: ctx,
|
||||
}
|
||||
}
|
||||
|
||||
// Read implements io.Reader with bandwidth throttling
|
||||
func (t *ThrottledReader) Read(p []byte) (int, error) {
|
||||
// No throttling if unlimited
|
||||
if t.bytesPerSec <= 0 {
|
||||
return t.reader.Read(p)
|
||||
}
|
||||
|
||||
t.mu.Lock()
|
||||
|
||||
// Calculate how many bytes we're allowed in this window
|
||||
now := time.Now()
|
||||
elapsed := now.Sub(t.windowStart)
|
||||
|
||||
// If we've passed the window, reset
|
||||
if elapsed >= t.windowSize {
|
||||
t.bytesRead = 0
|
||||
t.windowStart = now
|
||||
elapsed = 0
|
||||
}
|
||||
|
||||
// Calculate bytes allowed per window
|
||||
bytesPerWindow := int64(float64(t.bytesPerSec) * t.windowSize.Seconds())
|
||||
|
||||
// How many bytes can we still read in this window?
|
||||
remaining := bytesPerWindow - t.bytesRead
|
||||
if remaining <= 0 {
|
||||
// We've exhausted our quota for this window - wait for next window
|
||||
sleepDuration := t.windowSize - elapsed
|
||||
t.mu.Unlock()
|
||||
|
||||
select {
|
||||
case <-t.ctx.Done():
|
||||
return 0, t.ctx.Err()
|
||||
case <-time.After(sleepDuration):
|
||||
}
|
||||
|
||||
// Retry after sleeping
|
||||
return t.Read(p)
|
||||
}
|
||||
|
||||
// Limit read size to remaining quota
|
||||
maxRead := len(p)
|
||||
if int64(maxRead) > remaining {
|
||||
maxRead = int(remaining)
|
||||
}
|
||||
t.mu.Unlock()
|
||||
|
||||
// Perform the actual read
|
||||
n, err := t.reader.Read(p[:maxRead])
|
||||
|
||||
// Track bytes read
|
||||
t.mu.Lock()
|
||||
t.bytesRead += int64(n)
|
||||
t.mu.Unlock()
|
||||
|
||||
return n, err
|
||||
}
|
||||
|
||||
// ThrottledWriter wraps an io.Writer and limits the write rate.
|
||||
type ThrottledWriter struct {
|
||||
writer io.Writer
|
||||
bytesPerSec int64
|
||||
bytesWritten int64
|
||||
windowStart time.Time
|
||||
windowSize time.Duration
|
||||
mu sync.Mutex
|
||||
ctx context.Context
|
||||
}
|
||||
|
||||
// NewThrottledWriter creates a new bandwidth-limited writer.
|
||||
func NewThrottledWriter(ctx context.Context, writer io.Writer, bytesPerSec int64) *ThrottledWriter {
|
||||
return &ThrottledWriter{
|
||||
writer: writer,
|
||||
bytesPerSec: bytesPerSec,
|
||||
windowStart: time.Now(),
|
||||
windowSize: 100 * time.Millisecond,
|
||||
ctx: ctx,
|
||||
}
|
||||
}
|
||||
|
||||
// Write implements io.Writer with bandwidth throttling
|
||||
func (t *ThrottledWriter) Write(p []byte) (int, error) {
|
||||
if t.bytesPerSec <= 0 {
|
||||
return t.writer.Write(p)
|
||||
}
|
||||
|
||||
totalWritten := 0
|
||||
for totalWritten < len(p) {
|
||||
t.mu.Lock()
|
||||
|
||||
now := time.Now()
|
||||
elapsed := now.Sub(t.windowStart)
|
||||
|
||||
if elapsed >= t.windowSize {
|
||||
t.bytesWritten = 0
|
||||
t.windowStart = now
|
||||
elapsed = 0
|
||||
}
|
||||
|
||||
bytesPerWindow := int64(float64(t.bytesPerSec) * t.windowSize.Seconds())
|
||||
remaining := bytesPerWindow - t.bytesWritten
|
||||
|
||||
if remaining <= 0 {
|
||||
sleepDuration := t.windowSize - elapsed
|
||||
t.mu.Unlock()
|
||||
|
||||
select {
|
||||
case <-t.ctx.Done():
|
||||
return totalWritten, t.ctx.Err()
|
||||
case <-time.After(sleepDuration):
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Calculate how much to write
|
||||
toWrite := len(p) - totalWritten
|
||||
if int64(toWrite) > remaining {
|
||||
toWrite = int(remaining)
|
||||
}
|
||||
t.mu.Unlock()
|
||||
|
||||
// Write chunk
|
||||
n, err := t.writer.Write(p[totalWritten : totalWritten+toWrite])
|
||||
totalWritten += n
|
||||
|
||||
t.mu.Lock()
|
||||
t.bytesWritten += int64(n)
|
||||
t.mu.Unlock()
|
||||
|
||||
if err != nil {
|
||||
return totalWritten, err
|
||||
}
|
||||
}
|
||||
|
||||
return totalWritten, nil
|
||||
}
|
||||
|
||||
// ParseBandwidth parses a human-readable bandwidth string into bytes per second.
|
||||
// Supports: "10MB/s", "10MiB/s", "100KB/s", "1GB/s", "10Mbps", "100Kbps"
|
||||
// Returns 0 for empty or "unlimited"
|
||||
func ParseBandwidth(s string) (int64, error) {
|
||||
if s == "" || s == "0" || s == "unlimited" {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Normalize input
|
||||
s = strings.TrimSpace(s)
|
||||
s = strings.ToLower(s)
|
||||
s = strings.TrimSuffix(s, "/s")
|
||||
s = strings.TrimSuffix(s, "ps") // For mbps/kbps
|
||||
|
||||
// Parse unit
|
||||
var multiplier int64 = 1
|
||||
var value float64
|
||||
|
||||
switch {
|
||||
case strings.HasSuffix(s, "gib"):
|
||||
multiplier = 1024 * 1024 * 1024
|
||||
s = strings.TrimSuffix(s, "gib")
|
||||
case strings.HasSuffix(s, "gb"):
|
||||
multiplier = 1000 * 1000 * 1000
|
||||
s = strings.TrimSuffix(s, "gb")
|
||||
case strings.HasSuffix(s, "mib"):
|
||||
multiplier = 1024 * 1024
|
||||
s = strings.TrimSuffix(s, "mib")
|
||||
case strings.HasSuffix(s, "mb"):
|
||||
multiplier = 1000 * 1000
|
||||
s = strings.TrimSuffix(s, "mb")
|
||||
case strings.HasSuffix(s, "kib"):
|
||||
multiplier = 1024
|
||||
s = strings.TrimSuffix(s, "kib")
|
||||
case strings.HasSuffix(s, "kb"):
|
||||
multiplier = 1000
|
||||
s = strings.TrimSuffix(s, "kb")
|
||||
case strings.HasSuffix(s, "b"):
|
||||
multiplier = 1
|
||||
s = strings.TrimSuffix(s, "b")
|
||||
default:
|
||||
// Assume MB if no unit
|
||||
multiplier = 1000 * 1000
|
||||
}
|
||||
|
||||
// Parse numeric value
|
||||
_, err := fmt.Sscanf(s, "%f", &value)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid bandwidth value: %s", s)
|
||||
}
|
||||
|
||||
return int64(value * float64(multiplier)), nil
|
||||
}
|
||||
|
||||
// FormatBandwidth returns a human-readable bandwidth string
|
||||
func FormatBandwidth(bytesPerSec int64) string {
|
||||
if bytesPerSec <= 0 {
|
||||
return "unlimited"
|
||||
}
|
||||
|
||||
const (
|
||||
KB = 1000
|
||||
MB = 1000 * KB
|
||||
GB = 1000 * MB
|
||||
)
|
||||
|
||||
switch {
|
||||
case bytesPerSec >= GB:
|
||||
return fmt.Sprintf("%.1f GB/s", float64(bytesPerSec)/float64(GB))
|
||||
case bytesPerSec >= MB:
|
||||
return fmt.Sprintf("%.1f MB/s", float64(bytesPerSec)/float64(MB))
|
||||
case bytesPerSec >= KB:
|
||||
return fmt.Sprintf("%.1f KB/s", float64(bytesPerSec)/float64(KB))
|
||||
default:
|
||||
return fmt.Sprintf("%d B/s", bytesPerSec)
|
||||
}
|
||||
}
|
||||
175
internal/cloud/throttle_test.go
Normal file
175
internal/cloud/throttle_test.go
Normal file
@ -0,0 +1,175 @@
|
||||
package cloud
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestParseBandwidth(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected int64
|
||||
wantErr bool
|
||||
}{
|
||||
// Empty/unlimited
|
||||
{"", 0, false},
|
||||
{"0", 0, false},
|
||||
{"unlimited", 0, false},
|
||||
|
||||
// Megabytes per second (SI)
|
||||
{"10MB/s", 10 * 1000 * 1000, false},
|
||||
{"10mb/s", 10 * 1000 * 1000, false},
|
||||
{"10MB", 10 * 1000 * 1000, false},
|
||||
{"100MB/s", 100 * 1000 * 1000, false},
|
||||
|
||||
// Mebibytes per second (binary)
|
||||
{"10MiB/s", 10 * 1024 * 1024, false},
|
||||
{"10mib/s", 10 * 1024 * 1024, false},
|
||||
|
||||
// Kilobytes
|
||||
{"500KB/s", 500 * 1000, false},
|
||||
{"500KiB/s", 500 * 1024, false},
|
||||
|
||||
// Gigabytes
|
||||
{"1GB/s", 1000 * 1000 * 1000, false},
|
||||
{"1GiB/s", 1024 * 1024 * 1024, false},
|
||||
|
||||
// Megabits per second
|
||||
{"100Mbps", 100 * 1000 * 1000, false},
|
||||
|
||||
// Plain bytes
|
||||
{"1000B/s", 1000, false},
|
||||
|
||||
// No unit (assumes MB)
|
||||
{"50", 50 * 1000 * 1000, false},
|
||||
|
||||
// Decimal values
|
||||
{"1.5MB/s", 1500000, false},
|
||||
{"0.5GB/s", 500 * 1000 * 1000, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.input, func(t *testing.T) {
|
||||
got, err := ParseBandwidth(tt.input)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("ParseBandwidth(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if got != tt.expected {
|
||||
t.Errorf("ParseBandwidth(%q) = %d, want %d", tt.input, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatBandwidth(t *testing.T) {
|
||||
tests := []struct {
|
||||
input int64
|
||||
expected string
|
||||
}{
|
||||
{0, "unlimited"},
|
||||
{500, "500 B/s"},
|
||||
{1500, "1.5 KB/s"},
|
||||
{10 * 1000 * 1000, "10.0 MB/s"},
|
||||
{1000 * 1000 * 1000, "1.0 GB/s"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.expected, func(t *testing.T) {
|
||||
got := FormatBandwidth(tt.input)
|
||||
if got != tt.expected {
|
||||
t.Errorf("FormatBandwidth(%d) = %q, want %q", tt.input, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestThrottledReader_Unlimited(t *testing.T) {
|
||||
data := []byte("hello world")
|
||||
reader := bytes.NewReader(data)
|
||||
ctx := context.Background()
|
||||
|
||||
throttled := NewThrottledReader(ctx, reader, 0) // 0 = unlimited
|
||||
|
||||
result, err := io.ReadAll(throttled)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !bytes.Equal(result, data) {
|
||||
t.Errorf("got %q, want %q", result, data)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThrottledReader_Limited(t *testing.T) {
|
||||
// Create 1KB of data
|
||||
data := make([]byte, 1024)
|
||||
for i := range data {
|
||||
data[i] = byte(i % 256)
|
||||
}
|
||||
|
||||
reader := bytes.NewReader(data)
|
||||
ctx := context.Background()
|
||||
|
||||
// Limit to 512 bytes/second - should take ~2 seconds
|
||||
throttled := NewThrottledReader(ctx, reader, 512)
|
||||
|
||||
start := time.Now()
|
||||
result, err := io.ReadAll(throttled)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !bytes.Equal(result, data) {
|
||||
t.Errorf("data mismatch: got %d bytes, want %d bytes", len(result), len(data))
|
||||
}
|
||||
|
||||
// Should take at least 1.5 seconds (allowing some margin)
|
||||
if elapsed < 1500*time.Millisecond {
|
||||
t.Errorf("read completed too fast: %v (expected ~2s for 1KB at 512B/s)", elapsed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThrottledReader_CancelContext(t *testing.T) {
|
||||
data := make([]byte, 10*1024) // 10KB
|
||||
reader := bytes.NewReader(data)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
// Very slow rate
|
||||
throttled := NewThrottledReader(ctx, reader, 100)
|
||||
|
||||
// Cancel after 100ms
|
||||
go func() {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
cancel()
|
||||
}()
|
||||
|
||||
_, err := io.ReadAll(throttled)
|
||||
if err != context.Canceled {
|
||||
t.Errorf("expected context.Canceled, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThrottledWriter_Unlimited(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
var buf bytes.Buffer
|
||||
|
||||
throttled := NewThrottledWriter(ctx, &buf, 0) // 0 = unlimited
|
||||
|
||||
data := []byte("hello world")
|
||||
n, err := throttled.Write(data)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if n != len(data) {
|
||||
t.Errorf("wrote %d bytes, want %d", n, len(data))
|
||||
}
|
||||
if !bytes.Equal(buf.Bytes(), data) {
|
||||
t.Errorf("got %q, want %q", buf.Bytes(), data)
|
||||
}
|
||||
}
|
||||
@ -36,19 +36,25 @@ type Config struct {
|
||||
AutoDetectCores bool
|
||||
CPUWorkloadType string // "cpu-intensive", "io-intensive", "balanced"
|
||||
|
||||
// Resource profile for backup/restore operations
|
||||
ResourceProfile string // "conservative", "balanced", "performance", "max-performance"
|
||||
LargeDBMode bool // Enable large database mode (reduces parallelism, increases max_locks)
|
||||
|
||||
// CPU detection
|
||||
CPUDetector *cpu.Detector
|
||||
CPUInfo *cpu.CPUInfo
|
||||
MemoryInfo *cpu.MemoryInfo // System memory information
|
||||
|
||||
// Sample backup options
|
||||
SampleStrategy string // "ratio", "percent", "count"
|
||||
SampleValue int
|
||||
|
||||
// Output options
|
||||
NoColor bool
|
||||
Debug bool
|
||||
LogLevel string
|
||||
LogFormat string
|
||||
NoColor bool
|
||||
Debug bool
|
||||
DebugLocks bool // Extended lock debugging (captures lock detection, Guard decisions, boost attempts)
|
||||
LogLevel string
|
||||
LogFormat string
|
||||
|
||||
// Config persistence
|
||||
NoSaveConfig bool
|
||||
@ -178,6 +184,13 @@ func New() *Config {
|
||||
sslMode = ""
|
||||
}
|
||||
|
||||
// Detect memory information
|
||||
memInfo, _ := cpu.DetectMemory()
|
||||
|
||||
// Determine recommended resource profile
|
||||
recommendedProfile := cpu.RecommendProfile(cpuInfo, memInfo, false)
|
||||
defaultProfile := getEnvString("RESOURCE_PROFILE", recommendedProfile.Name)
|
||||
|
||||
cfg := &Config{
|
||||
// Database defaults
|
||||
Host: host,
|
||||
@ -189,18 +202,21 @@ func New() *Config {
|
||||
SSLMode: sslMode,
|
||||
Insecure: getEnvBool("INSECURE", false),
|
||||
|
||||
// Backup defaults
|
||||
// Backup defaults - use recommended profile's settings for small VMs
|
||||
BackupDir: backupDir,
|
||||
CompressionLevel: getEnvInt("COMPRESS_LEVEL", 6),
|
||||
Jobs: getEnvInt("JOBS", getDefaultJobs(cpuInfo)),
|
||||
DumpJobs: getEnvInt("DUMP_JOBS", getDefaultDumpJobs(cpuInfo)),
|
||||
Jobs: getEnvInt("JOBS", recommendedProfile.Jobs),
|
||||
DumpJobs: getEnvInt("DUMP_JOBS", recommendedProfile.DumpJobs),
|
||||
MaxCores: getEnvInt("MAX_CORES", getDefaultMaxCores(cpuInfo)),
|
||||
AutoDetectCores: getEnvBool("AUTO_DETECT_CORES", true),
|
||||
CPUWorkloadType: getEnvString("CPU_WORKLOAD_TYPE", "balanced"),
|
||||
ResourceProfile: defaultProfile,
|
||||
LargeDBMode: getEnvBool("LARGE_DB_MODE", false),
|
||||
|
||||
// CPU detection
|
||||
// CPU and memory detection
|
||||
CPUDetector: cpuDetector,
|
||||
CPUInfo: cpuInfo,
|
||||
MemoryInfo: memInfo,
|
||||
|
||||
// Sample backup defaults
|
||||
SampleStrategy: getEnvString("SAMPLE_STRATEGY", "ratio"),
|
||||
@ -217,14 +233,17 @@ func New() *Config {
|
||||
SingleDBName: getEnvString("SINGLE_DB_NAME", ""),
|
||||
RestoreDBName: getEnvString("RESTORE_DB_NAME", ""),
|
||||
|
||||
// Timeouts
|
||||
ClusterTimeoutMinutes: getEnvInt("CLUSTER_TIMEOUT_MIN", 240),
|
||||
// Timeouts - default 24 hours (1440 min) to handle very large databases with large objects
|
||||
ClusterTimeoutMinutes: getEnvInt("CLUSTER_TIMEOUT_MIN", 1440),
|
||||
|
||||
// Cluster parallelism (default: 2 concurrent operations for faster cluster backup/restore)
|
||||
ClusterParallelism: getEnvInt("CLUSTER_PARALLELISM", 2),
|
||||
// Cluster parallelism - use recommended profile's setting for small VMs
|
||||
ClusterParallelism: getEnvInt("CLUSTER_PARALLELISM", recommendedProfile.ClusterParallelism),
|
||||
|
||||
// Working directory for large operations (default: system temp)
|
||||
WorkDir: getEnvString("WORK_DIR", ""),
|
||||
|
||||
// Swap file management
|
||||
SwapFilePath: getEnvString("SWAP_FILE_PATH", "/tmp/dbbackup_swap"),
|
||||
SwapFilePath: "", // Will be set after WorkDir is initialized
|
||||
SwapFileSizeGB: getEnvInt("SWAP_FILE_SIZE_GB", 0), // 0 = disabled by default
|
||||
AutoSwap: getEnvBool("AUTO_SWAP", false),
|
||||
|
||||
@ -264,6 +283,13 @@ func New() *Config {
|
||||
cfg.SSLMode = "prefer"
|
||||
}
|
||||
|
||||
// Set SwapFilePath using WorkDir if not explicitly set via env var
|
||||
if envSwap := os.Getenv("SWAP_FILE_PATH"); envSwap != "" {
|
||||
cfg.SwapFilePath = envSwap
|
||||
} else {
|
||||
cfg.SwapFilePath = filepath.Join(cfg.GetEffectiveWorkDir(), "dbbackup_swap")
|
||||
}
|
||||
|
||||
return cfg
|
||||
}
|
||||
|
||||
@ -399,6 +425,62 @@ func (c *Config) OptimizeForCPU() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ApplyResourceProfile applies a resource profile to the configuration
|
||||
// This adjusts parallelism settings based on the chosen profile
|
||||
func (c *Config) ApplyResourceProfile(profileName string) error {
|
||||
profile := cpu.GetProfileByName(profileName)
|
||||
if profile == nil {
|
||||
return &ConfigError{
|
||||
Field: "resource_profile",
|
||||
Value: profileName,
|
||||
Message: "unknown profile. Valid profiles: conservative, balanced, performance, max-performance",
|
||||
}
|
||||
}
|
||||
|
||||
// Validate profile against current system
|
||||
isValid, warnings := cpu.ValidateProfileForSystem(profile, c.CPUInfo, c.MemoryInfo)
|
||||
if !isValid {
|
||||
// Log warnings but don't block - user may know what they're doing
|
||||
_ = warnings // In production, log these warnings
|
||||
}
|
||||
|
||||
// Apply profile settings
|
||||
c.ResourceProfile = profile.Name
|
||||
|
||||
// If LargeDBMode is enabled, apply its modifiers
|
||||
if c.LargeDBMode {
|
||||
profile = cpu.ApplyLargeDBMode(profile)
|
||||
}
|
||||
|
||||
c.ClusterParallelism = profile.ClusterParallelism
|
||||
c.Jobs = profile.Jobs
|
||||
c.DumpJobs = profile.DumpJobs
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetResourceProfileRecommendation returns the recommended profile and reason
|
||||
func (c *Config) GetResourceProfileRecommendation(isLargeDB bool) (string, string) {
|
||||
profile, reason := cpu.RecommendProfileWithReason(c.CPUInfo, c.MemoryInfo, isLargeDB)
|
||||
return profile.Name, reason
|
||||
}
|
||||
|
||||
// GetCurrentProfile returns the current resource profile details
|
||||
// If LargeDBMode is enabled, returns a modified profile with reduced parallelism
|
||||
func (c *Config) GetCurrentProfile() *cpu.ResourceProfile {
|
||||
profile := cpu.GetProfileByName(c.ResourceProfile)
|
||||
if profile == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Apply LargeDBMode modifier if enabled
|
||||
if c.LargeDBMode {
|
||||
return cpu.ApplyLargeDBMode(profile)
|
||||
}
|
||||
|
||||
return profile
|
||||
}
|
||||
|
||||
// GetCPUInfo returns CPU information, detecting if necessary
|
||||
func (c *Config) GetCPUInfo() (*cpu.CPUInfo, error) {
|
||||
if c.CPUInfo != nil {
|
||||
@ -499,6 +581,14 @@ func GetCurrentOSUser() string {
|
||||
return getCurrentUser()
|
||||
}
|
||||
|
||||
// GetEffectiveWorkDir returns the configured WorkDir or system temp as fallback
|
||||
func (c *Config) GetEffectiveWorkDir() string {
|
||||
if c.WorkDir != "" {
|
||||
return c.WorkDir
|
||||
}
|
||||
return os.TempDir()
|
||||
}
|
||||
|
||||
func getDefaultBackupDir() string {
|
||||
// Try to create a sensible default backup directory
|
||||
homeDir, _ := os.UserHomeDir()
|
||||
@ -516,7 +606,7 @@ func getDefaultBackupDir() string {
|
||||
return "/var/lib/pgsql/pg_backups"
|
||||
}
|
||||
|
||||
return "/tmp/db_backups"
|
||||
return filepath.Join(os.TempDir(), "db_backups")
|
||||
}
|
||||
|
||||
// CPU-related helper functions
|
||||
|
||||
@ -28,8 +28,11 @@ type LocalConfig struct {
|
||||
DumpJobs int
|
||||
|
||||
// Performance settings
|
||||
CPUWorkload string
|
||||
MaxCores int
|
||||
CPUWorkload string
|
||||
MaxCores int
|
||||
ClusterTimeout int // Cluster operation timeout in minutes (default: 1440 = 24 hours)
|
||||
ResourceProfile string
|
||||
LargeDBMode bool // Enable large database mode (reduces parallelism, increases locks)
|
||||
|
||||
// Security settings
|
||||
RetentionDays int
|
||||
@ -121,6 +124,14 @@ func LoadLocalConfig() (*LocalConfig, error) {
|
||||
if mc, err := strconv.Atoi(value); err == nil {
|
||||
cfg.MaxCores = mc
|
||||
}
|
||||
case "cluster_timeout":
|
||||
if ct, err := strconv.Atoi(value); err == nil {
|
||||
cfg.ClusterTimeout = ct
|
||||
}
|
||||
case "resource_profile":
|
||||
cfg.ResourceProfile = value
|
||||
case "large_db_mode":
|
||||
cfg.LargeDBMode = value == "true" || value == "1"
|
||||
}
|
||||
case "security":
|
||||
switch key {
|
||||
@ -199,6 +210,15 @@ func SaveLocalConfig(cfg *LocalConfig) error {
|
||||
if cfg.MaxCores != 0 {
|
||||
sb.WriteString(fmt.Sprintf("max_cores = %d\n", cfg.MaxCores))
|
||||
}
|
||||
if cfg.ClusterTimeout != 0 {
|
||||
sb.WriteString(fmt.Sprintf("cluster_timeout = %d\n", cfg.ClusterTimeout))
|
||||
}
|
||||
if cfg.ResourceProfile != "" {
|
||||
sb.WriteString(fmt.Sprintf("resource_profile = %s\n", cfg.ResourceProfile))
|
||||
}
|
||||
if cfg.LargeDBMode {
|
||||
sb.WriteString("large_db_mode = true\n")
|
||||
}
|
||||
sb.WriteString("\n")
|
||||
|
||||
// Security section
|
||||
@ -268,6 +288,18 @@ func ApplyLocalConfig(cfg *Config, local *LocalConfig) {
|
||||
if local.MaxCores != 0 {
|
||||
cfg.MaxCores = local.MaxCores
|
||||
}
|
||||
// Apply cluster timeout from config file (overrides default)
|
||||
if local.ClusterTimeout != 0 {
|
||||
cfg.ClusterTimeoutMinutes = local.ClusterTimeout
|
||||
}
|
||||
// Apply resource profile settings
|
||||
if local.ResourceProfile != "" {
|
||||
cfg.ResourceProfile = local.ResourceProfile
|
||||
}
|
||||
// LargeDBMode is a boolean - apply if true in config
|
||||
if local.LargeDBMode {
|
||||
cfg.LargeDBMode = true
|
||||
}
|
||||
if cfg.RetentionDays == 30 && local.RetentionDays != 0 {
|
||||
cfg.RetentionDays = local.RetentionDays
|
||||
}
|
||||
@ -282,21 +314,24 @@ func ApplyLocalConfig(cfg *Config, local *LocalConfig) {
|
||||
// ConfigFromConfig creates a LocalConfig from a Config
|
||||
func ConfigFromConfig(cfg *Config) *LocalConfig {
|
||||
return &LocalConfig{
|
||||
DBType: cfg.DatabaseType,
|
||||
Host: cfg.Host,
|
||||
Port: cfg.Port,
|
||||
User: cfg.User,
|
||||
Database: cfg.Database,
|
||||
SSLMode: cfg.SSLMode,
|
||||
BackupDir: cfg.BackupDir,
|
||||
WorkDir: cfg.WorkDir,
|
||||
Compression: cfg.CompressionLevel,
|
||||
Jobs: cfg.Jobs,
|
||||
DumpJobs: cfg.DumpJobs,
|
||||
CPUWorkload: cfg.CPUWorkloadType,
|
||||
MaxCores: cfg.MaxCores,
|
||||
RetentionDays: cfg.RetentionDays,
|
||||
MinBackups: cfg.MinBackups,
|
||||
MaxRetries: cfg.MaxRetries,
|
||||
DBType: cfg.DatabaseType,
|
||||
Host: cfg.Host,
|
||||
Port: cfg.Port,
|
||||
User: cfg.User,
|
||||
Database: cfg.Database,
|
||||
SSLMode: cfg.SSLMode,
|
||||
BackupDir: cfg.BackupDir,
|
||||
WorkDir: cfg.WorkDir,
|
||||
Compression: cfg.CompressionLevel,
|
||||
Jobs: cfg.Jobs,
|
||||
DumpJobs: cfg.DumpJobs,
|
||||
CPUWorkload: cfg.CPUWorkloadType,
|
||||
MaxCores: cfg.MaxCores,
|
||||
ClusterTimeout: cfg.ClusterTimeoutMinutes,
|
||||
ResourceProfile: cfg.ResourceProfile,
|
||||
LargeDBMode: cfg.LargeDBMode,
|
||||
RetentionDays: cfg.RetentionDays,
|
||||
MinBackups: cfg.MinBackups,
|
||||
MaxRetries: cfg.MaxRetries,
|
||||
}
|
||||
}
|
||||
|
||||
128
internal/config/profile.go
Normal file
128
internal/config/profile.go
Normal file
@ -0,0 +1,128 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RestoreProfile defines resource settings for restore operations
|
||||
type RestoreProfile struct {
|
||||
Name string
|
||||
ParallelDBs int // Number of databases to restore in parallel
|
||||
Jobs int // Parallel decompression jobs
|
||||
DisableProgress bool // Disable progress indicators to reduce overhead
|
||||
MemoryConservative bool // Use memory-conservative settings
|
||||
}
|
||||
|
||||
// GetRestoreProfile returns the profile settings for a given profile name
|
||||
func GetRestoreProfile(profileName string) (*RestoreProfile, error) {
|
||||
profileName = strings.ToLower(strings.TrimSpace(profileName))
|
||||
|
||||
switch profileName {
|
||||
case "conservative":
|
||||
return &RestoreProfile{
|
||||
Name: "conservative",
|
||||
ParallelDBs: 1, // Single-threaded restore
|
||||
Jobs: 1, // Single-threaded decompression
|
||||
DisableProgress: false,
|
||||
MemoryConservative: true,
|
||||
}, nil
|
||||
|
||||
case "balanced", "":
|
||||
return &RestoreProfile{
|
||||
Name: "balanced",
|
||||
ParallelDBs: 0, // Use config default or auto-detect
|
||||
Jobs: 0, // Use config default or auto-detect
|
||||
DisableProgress: false,
|
||||
MemoryConservative: false,
|
||||
}, nil
|
||||
|
||||
case "aggressive", "performance", "max":
|
||||
return &RestoreProfile{
|
||||
Name: "aggressive",
|
||||
ParallelDBs: -1, // Auto-detect based on resources
|
||||
Jobs: -1, // Auto-detect based on CPU
|
||||
DisableProgress: false,
|
||||
MemoryConservative: false,
|
||||
}, nil
|
||||
|
||||
case "potato":
|
||||
// Easter egg: same as conservative but with a fun name
|
||||
return &RestoreProfile{
|
||||
Name: "potato",
|
||||
ParallelDBs: 1,
|
||||
Jobs: 1,
|
||||
DisableProgress: false,
|
||||
MemoryConservative: true,
|
||||
}, nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown profile: %s (valid: conservative, balanced, aggressive)", profileName)
|
||||
}
|
||||
}
|
||||
|
||||
// ApplyProfile applies profile settings to config, respecting explicit user overrides
|
||||
func ApplyProfile(cfg *Config, profileName string, explicitJobs, explicitParallelDBs int) error {
|
||||
profile, err := GetRestoreProfile(profileName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Show profile being used
|
||||
if cfg.Debug {
|
||||
fmt.Printf("Using restore profile: %s\n", profile.Name)
|
||||
if profile.MemoryConservative {
|
||||
fmt.Println("Memory-conservative mode enabled")
|
||||
}
|
||||
}
|
||||
|
||||
// Apply profile settings only if not explicitly overridden
|
||||
if explicitJobs == 0 && profile.Jobs > 0 {
|
||||
cfg.Jobs = profile.Jobs
|
||||
}
|
||||
|
||||
if explicitParallelDBs == 0 && profile.ParallelDBs != 0 {
|
||||
cfg.ClusterParallelism = profile.ParallelDBs
|
||||
}
|
||||
|
||||
// Store profile name
|
||||
cfg.ResourceProfile = profile.Name
|
||||
|
||||
// Conservative profile implies large DB mode settings
|
||||
if profile.MemoryConservative {
|
||||
cfg.LargeDBMode = true
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetProfileDescription returns a human-readable description of the profile
|
||||
func GetProfileDescription(profileName string) string {
|
||||
profile, err := GetRestoreProfile(profileName)
|
||||
if err != nil {
|
||||
return "Unknown profile"
|
||||
}
|
||||
|
||||
switch profile.Name {
|
||||
case "conservative":
|
||||
return "Conservative: --parallel=1, single-threaded, minimal memory usage. Best for resource-constrained servers or when other services are running."
|
||||
case "potato":
|
||||
return "Potato Mode: Same as conservative, for servers running on a potato 🥔"
|
||||
case "balanced":
|
||||
return "Balanced: Auto-detect resources, moderate parallelism. Good default for most scenarios."
|
||||
case "aggressive":
|
||||
return "Aggressive: Maximum parallelism, all available resources. Best for dedicated database servers with ample resources."
|
||||
default:
|
||||
return profile.Name
|
||||
}
|
||||
}
|
||||
|
||||
// ListProfiles returns a list of all available profiles with descriptions
|
||||
func ListProfiles() map[string]string {
|
||||
return map[string]string{
|
||||
"conservative": GetProfileDescription("conservative"),
|
||||
"balanced": GetProfileDescription("balanced"),
|
||||
"aggressive": GetProfileDescription("aggressive"),
|
||||
"potato": GetProfileDescription("potato"),
|
||||
}
|
||||
}
|
||||
475
internal/cpu/profiles.go
Normal file
475
internal/cpu/profiles.go
Normal file
@ -0,0 +1,475 @@
|
||||
package cpu
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// MemoryInfo holds system memory information
|
||||
type MemoryInfo struct {
|
||||
TotalBytes int64 `json:"total_bytes"`
|
||||
AvailableBytes int64 `json:"available_bytes"`
|
||||
FreeBytes int64 `json:"free_bytes"`
|
||||
UsedBytes int64 `json:"used_bytes"`
|
||||
SwapTotalBytes int64 `json:"swap_total_bytes"`
|
||||
SwapFreeBytes int64 `json:"swap_free_bytes"`
|
||||
TotalGB int `json:"total_gb"`
|
||||
AvailableGB int `json:"available_gb"`
|
||||
Platform string `json:"platform"`
|
||||
}
|
||||
|
||||
// ResourceProfile defines a resource allocation profile for backup/restore operations
|
||||
type ResourceProfile struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
ClusterParallelism int `json:"cluster_parallelism"` // Concurrent databases
|
||||
Jobs int `json:"jobs"` // Parallel jobs within pg_restore
|
||||
DumpJobs int `json:"dump_jobs"` // Parallel jobs for pg_dump
|
||||
MaintenanceWorkMem string `json:"maintenance_work_mem"` // PostgreSQL recommendation
|
||||
MaxLocksPerTxn int `json:"max_locks_per_txn"` // PostgreSQL recommendation
|
||||
RecommendedForLarge bool `json:"recommended_for_large"` // Suitable for large DBs?
|
||||
MinMemoryGB int `json:"min_memory_gb"` // Minimum memory for this profile
|
||||
MinCores int `json:"min_cores"` // Minimum cores for this profile
|
||||
}
|
||||
|
||||
// Predefined resource profiles
|
||||
var (
|
||||
// ProfileConservative - Safe for constrained VMs, avoids shared memory issues
|
||||
ProfileConservative = ResourceProfile{
|
||||
Name: "conservative",
|
||||
Description: "Safe for small VMs (2-4 cores, <16GB). Sequential operations, minimal memory pressure. Best for large DBs on limited hardware.",
|
||||
ClusterParallelism: 1,
|
||||
Jobs: 1,
|
||||
DumpJobs: 2,
|
||||
MaintenanceWorkMem: "256MB",
|
||||
MaxLocksPerTxn: 4096,
|
||||
RecommendedForLarge: true,
|
||||
MinMemoryGB: 4,
|
||||
MinCores: 2,
|
||||
}
|
||||
|
||||
// ProfileBalanced - Default profile, works for most scenarios
|
||||
ProfileBalanced = ResourceProfile{
|
||||
Name: "balanced",
|
||||
Description: "Balanced for medium VMs (4-8 cores, 16-32GB). Moderate parallelism with good safety margin.",
|
||||
ClusterParallelism: 2,
|
||||
Jobs: 2,
|
||||
DumpJobs: 4,
|
||||
MaintenanceWorkMem: "512MB",
|
||||
MaxLocksPerTxn: 2048,
|
||||
RecommendedForLarge: true,
|
||||
MinMemoryGB: 16,
|
||||
MinCores: 4,
|
||||
}
|
||||
|
||||
// ProfilePerformance - Aggressive parallelism for powerful servers
|
||||
ProfilePerformance = ResourceProfile{
|
||||
Name: "performance",
|
||||
Description: "Aggressive for powerful servers (8+ cores, 32GB+). Maximum parallelism for fast operations.",
|
||||
ClusterParallelism: 4,
|
||||
Jobs: 4,
|
||||
DumpJobs: 8,
|
||||
MaintenanceWorkMem: "1GB",
|
||||
MaxLocksPerTxn: 1024,
|
||||
RecommendedForLarge: false, // Large DBs may still need conservative
|
||||
MinMemoryGB: 32,
|
||||
MinCores: 8,
|
||||
}
|
||||
|
||||
// ProfileMaxPerformance - Maximum parallelism for high-end servers
|
||||
ProfileMaxPerformance = ResourceProfile{
|
||||
Name: "max-performance",
|
||||
Description: "Maximum for high-end servers (16+ cores, 64GB+). Full CPU utilization.",
|
||||
ClusterParallelism: 8,
|
||||
Jobs: 8,
|
||||
DumpJobs: 16,
|
||||
MaintenanceWorkMem: "2GB",
|
||||
MaxLocksPerTxn: 512,
|
||||
RecommendedForLarge: false, // Large DBs should use LargeDBMode
|
||||
MinMemoryGB: 64,
|
||||
MinCores: 16,
|
||||
}
|
||||
|
||||
// AllProfiles contains all available profiles (VM resource-based)
|
||||
AllProfiles = []ResourceProfile{
|
||||
ProfileConservative,
|
||||
ProfileBalanced,
|
||||
ProfilePerformance,
|
||||
ProfileMaxPerformance,
|
||||
}
|
||||
)
|
||||
|
||||
// GetProfileByName returns a profile by its name
|
||||
func GetProfileByName(name string) *ResourceProfile {
|
||||
for _, p := range AllProfiles {
|
||||
if strings.EqualFold(p.Name, name) {
|
||||
return &p
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ApplyLargeDBMode modifies a profile for large database operations.
|
||||
// This is a modifier that reduces parallelism and increases max_locks_per_transaction
|
||||
// to prevent "out of shared memory" errors with large databases (many tables, LOBs, etc.).
|
||||
// It returns a new profile with adjusted settings, leaving the original unchanged.
|
||||
func ApplyLargeDBMode(profile *ResourceProfile) *ResourceProfile {
|
||||
if profile == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create a copy with adjusted settings
|
||||
modified := *profile
|
||||
|
||||
// Add "(large-db)" suffix to indicate this is modified
|
||||
modified.Name = profile.Name + " +large-db"
|
||||
modified.Description = fmt.Sprintf("%s [LargeDBMode: reduced parallelism, high locks]", profile.Description)
|
||||
|
||||
// Reduce parallelism to avoid lock exhaustion
|
||||
// Rule: halve parallelism, minimum 1
|
||||
modified.ClusterParallelism = max(1, profile.ClusterParallelism/2)
|
||||
modified.Jobs = max(1, profile.Jobs/2)
|
||||
modified.DumpJobs = max(2, profile.DumpJobs/2)
|
||||
|
||||
// Force high max_locks_per_transaction for large schemas
|
||||
modified.MaxLocksPerTxn = 8192
|
||||
|
||||
// Increase maintenance_work_mem for complex operations
|
||||
// Keep or boost maintenance work mem
|
||||
modified.MaintenanceWorkMem = "1GB"
|
||||
if profile.MinMemoryGB >= 32 {
|
||||
modified.MaintenanceWorkMem = "2GB"
|
||||
}
|
||||
|
||||
modified.RecommendedForLarge = true
|
||||
|
||||
return &modified
|
||||
}
|
||||
|
||||
// max returns the larger of two integers
|
||||
func max(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// DetectMemory detects system memory information
|
||||
func DetectMemory() (*MemoryInfo, error) {
|
||||
info := &MemoryInfo{
|
||||
Platform: runtime.GOOS,
|
||||
}
|
||||
|
||||
switch runtime.GOOS {
|
||||
case "linux":
|
||||
if err := detectLinuxMemory(info); err != nil {
|
||||
return info, fmt.Errorf("linux memory detection failed: %w", err)
|
||||
}
|
||||
case "darwin":
|
||||
if err := detectDarwinMemory(info); err != nil {
|
||||
return info, fmt.Errorf("darwin memory detection failed: %w", err)
|
||||
}
|
||||
case "windows":
|
||||
if err := detectWindowsMemory(info); err != nil {
|
||||
return info, fmt.Errorf("windows memory detection failed: %w", err)
|
||||
}
|
||||
default:
|
||||
// Fallback: use Go runtime memory stats
|
||||
var memStats runtime.MemStats
|
||||
runtime.ReadMemStats(&memStats)
|
||||
info.TotalBytes = int64(memStats.Sys)
|
||||
info.AvailableBytes = int64(memStats.Sys - memStats.Alloc)
|
||||
}
|
||||
|
||||
// Calculate GB values
|
||||
info.TotalGB = int(info.TotalBytes / (1024 * 1024 * 1024))
|
||||
info.AvailableGB = int(info.AvailableBytes / (1024 * 1024 * 1024))
|
||||
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// detectLinuxMemory reads memory info from /proc/meminfo
|
||||
func detectLinuxMemory(info *MemoryInfo) error {
|
||||
file, err := os.Open("/proc/meminfo")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
key := strings.TrimSuffix(parts[0], ":")
|
||||
value, err := strconv.ParseInt(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Values are in kB
|
||||
valueBytes := value * 1024
|
||||
|
||||
switch key {
|
||||
case "MemTotal":
|
||||
info.TotalBytes = valueBytes
|
||||
case "MemAvailable":
|
||||
info.AvailableBytes = valueBytes
|
||||
case "MemFree":
|
||||
info.FreeBytes = valueBytes
|
||||
case "SwapTotal":
|
||||
info.SwapTotalBytes = valueBytes
|
||||
case "SwapFree":
|
||||
info.SwapFreeBytes = valueBytes
|
||||
}
|
||||
}
|
||||
|
||||
info.UsedBytes = info.TotalBytes - info.AvailableBytes
|
||||
|
||||
return scanner.Err()
|
||||
}
|
||||
|
||||
// detectDarwinMemory detects memory on macOS
|
||||
func detectDarwinMemory(info *MemoryInfo) error {
|
||||
// Use sysctl for total memory
|
||||
if output, err := runCommand("sysctl", "-n", "hw.memsize"); err == nil {
|
||||
if val, err := strconv.ParseInt(strings.TrimSpace(output), 10, 64); err == nil {
|
||||
info.TotalBytes = val
|
||||
}
|
||||
}
|
||||
|
||||
// Use vm_stat for available memory (more complex parsing required)
|
||||
if output, err := runCommand("vm_stat"); err == nil {
|
||||
pageSize := int64(4096) // Default page size
|
||||
var freePages, inactivePages int64
|
||||
|
||||
lines := strings.Split(output, "\n")
|
||||
for _, line := range lines {
|
||||
if strings.Contains(line, "page size of") {
|
||||
parts := strings.Fields(line)
|
||||
for i, p := range parts {
|
||||
if p == "of" && i+1 < len(parts) {
|
||||
if ps, err := strconv.ParseInt(parts[i+1], 10, 64); err == nil {
|
||||
pageSize = ps
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if strings.Contains(line, "Pages free:") {
|
||||
val := extractNumberFromLine(line)
|
||||
freePages = val
|
||||
} else if strings.Contains(line, "Pages inactive:") {
|
||||
val := extractNumberFromLine(line)
|
||||
inactivePages = val
|
||||
}
|
||||
}
|
||||
|
||||
info.FreeBytes = freePages * pageSize
|
||||
info.AvailableBytes = (freePages + inactivePages) * pageSize
|
||||
}
|
||||
|
||||
info.UsedBytes = info.TotalBytes - info.AvailableBytes
|
||||
return nil
|
||||
}
|
||||
|
||||
// detectWindowsMemory detects memory on Windows
|
||||
func detectWindowsMemory(info *MemoryInfo) error {
|
||||
// Use wmic for memory info
|
||||
if output, err := runCommand("wmic", "OS", "get", "TotalVisibleMemorySize,FreePhysicalMemory", "/format:list"); err == nil {
|
||||
lines := strings.Split(output, "\n")
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "TotalVisibleMemorySize=") {
|
||||
val := strings.TrimPrefix(line, "TotalVisibleMemorySize=")
|
||||
if v, err := strconv.ParseInt(strings.TrimSpace(val), 10, 64); err == nil {
|
||||
info.TotalBytes = v * 1024 // KB to bytes
|
||||
}
|
||||
} else if strings.HasPrefix(line, "FreePhysicalMemory=") {
|
||||
val := strings.TrimPrefix(line, "FreePhysicalMemory=")
|
||||
if v, err := strconv.ParseInt(strings.TrimSpace(val), 10, 64); err == nil {
|
||||
info.FreeBytes = v * 1024
|
||||
info.AvailableBytes = v * 1024
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info.UsedBytes = info.TotalBytes - info.AvailableBytes
|
||||
return nil
|
||||
}
|
||||
|
||||
// RecommendProfile recommends a resource profile based on system resources and workload
|
||||
func RecommendProfile(cpuInfo *CPUInfo, memInfo *MemoryInfo, isLargeDB bool) *ResourceProfile {
|
||||
cores := 0
|
||||
if cpuInfo != nil {
|
||||
cores = cpuInfo.PhysicalCores
|
||||
if cores == 0 {
|
||||
cores = cpuInfo.LogicalCores
|
||||
}
|
||||
}
|
||||
if cores == 0 {
|
||||
cores = runtime.NumCPU()
|
||||
}
|
||||
|
||||
memGB := 0
|
||||
if memInfo != nil {
|
||||
memGB = memInfo.TotalGB
|
||||
}
|
||||
|
||||
// Special case: large databases should use conservative profile
|
||||
// The caller should also enable LargeDBMode for increased MaxLocksPerTxn
|
||||
if isLargeDB {
|
||||
// For large DBs, recommend conservative regardless of resources
|
||||
// LargeDBMode flag will handle the lock settings separately
|
||||
return &ProfileConservative
|
||||
}
|
||||
|
||||
// Resource-based selection
|
||||
if cores >= 16 && memGB >= 64 {
|
||||
return &ProfileMaxPerformance
|
||||
} else if cores >= 8 && memGB >= 32 {
|
||||
return &ProfilePerformance
|
||||
} else if cores >= 4 && memGB >= 16 {
|
||||
return &ProfileBalanced
|
||||
}
|
||||
|
||||
// Default to conservative for constrained systems
|
||||
return &ProfileConservative
|
||||
}
|
||||
|
||||
// RecommendProfileWithReason returns a profile recommendation with explanation
|
||||
func RecommendProfileWithReason(cpuInfo *CPUInfo, memInfo *MemoryInfo, isLargeDB bool) (*ResourceProfile, string) {
|
||||
cores := 0
|
||||
if cpuInfo != nil {
|
||||
cores = cpuInfo.PhysicalCores
|
||||
if cores == 0 {
|
||||
cores = cpuInfo.LogicalCores
|
||||
}
|
||||
}
|
||||
if cores == 0 {
|
||||
cores = runtime.NumCPU()
|
||||
}
|
||||
|
||||
memGB := 0
|
||||
if memInfo != nil {
|
||||
memGB = memInfo.TotalGB
|
||||
}
|
||||
|
||||
// Build reason string
|
||||
var reason strings.Builder
|
||||
reason.WriteString(fmt.Sprintf("System: %d cores, %dGB RAM. ", cores, memGB))
|
||||
|
||||
profile := RecommendProfile(cpuInfo, memInfo, isLargeDB)
|
||||
|
||||
if isLargeDB {
|
||||
reason.WriteString("Large database mode - using conservative settings. Enable LargeDBMode for higher max_locks.")
|
||||
} else if profile.Name == "conservative" {
|
||||
reason.WriteString("Limited resources detected - using conservative profile for stability.")
|
||||
} else if profile.Name == "max-performance" {
|
||||
reason.WriteString("High-end server detected - using maximum parallelism.")
|
||||
} else if profile.Name == "performance" {
|
||||
reason.WriteString("Good resources detected - using performance profile.")
|
||||
} else {
|
||||
reason.WriteString("Using balanced profile for optimal performance/stability trade-off.")
|
||||
}
|
||||
|
||||
return profile, reason.String()
|
||||
}
|
||||
|
||||
// ValidateProfileForSystem checks if a profile is suitable for the current system
|
||||
func ValidateProfileForSystem(profile *ResourceProfile, cpuInfo *CPUInfo, memInfo *MemoryInfo) (bool, []string) {
|
||||
var warnings []string
|
||||
|
||||
cores := 0
|
||||
if cpuInfo != nil {
|
||||
cores = cpuInfo.PhysicalCores
|
||||
if cores == 0 {
|
||||
cores = cpuInfo.LogicalCores
|
||||
}
|
||||
}
|
||||
if cores == 0 {
|
||||
cores = runtime.NumCPU()
|
||||
}
|
||||
|
||||
memGB := 0
|
||||
if memInfo != nil {
|
||||
memGB = memInfo.TotalGB
|
||||
}
|
||||
|
||||
// Check minimum requirements
|
||||
if cores < profile.MinCores {
|
||||
warnings = append(warnings,
|
||||
fmt.Sprintf("Profile '%s' recommends %d+ cores (system has %d)", profile.Name, profile.MinCores, cores))
|
||||
}
|
||||
|
||||
if memGB < profile.MinMemoryGB {
|
||||
warnings = append(warnings,
|
||||
fmt.Sprintf("Profile '%s' recommends %dGB+ RAM (system has %dGB)", profile.Name, profile.MinMemoryGB, memGB))
|
||||
}
|
||||
|
||||
// Check for potential issues
|
||||
if profile.ClusterParallelism > cores {
|
||||
warnings = append(warnings,
|
||||
fmt.Sprintf("Cluster parallelism (%d) exceeds CPU cores (%d) - may cause contention",
|
||||
profile.ClusterParallelism, cores))
|
||||
}
|
||||
|
||||
// Memory pressure warning
|
||||
memPerWorker := 2 // Rough estimate: 2GB per parallel worker for large DB operations
|
||||
requiredMem := profile.ClusterParallelism * profile.Jobs * memPerWorker
|
||||
if memGB > 0 && requiredMem > memGB {
|
||||
warnings = append(warnings,
|
||||
fmt.Sprintf("High parallelism may require ~%dGB RAM (system has %dGB) - risk of OOM",
|
||||
requiredMem, memGB))
|
||||
}
|
||||
|
||||
return len(warnings) == 0, warnings
|
||||
}
|
||||
|
||||
// FormatProfileSummary returns a formatted summary of a profile
|
||||
func (p *ResourceProfile) FormatProfileSummary() string {
|
||||
return fmt.Sprintf("[%s] Parallel: %d DBs, %d jobs | Recommended for large DBs: %v",
|
||||
strings.ToUpper(p.Name),
|
||||
p.ClusterParallelism,
|
||||
p.Jobs,
|
||||
p.RecommendedForLarge)
|
||||
}
|
||||
|
||||
// PostgreSQLRecommendations returns PostgreSQL configuration recommendations for this profile
|
||||
func (p *ResourceProfile) PostgreSQLRecommendations() []string {
|
||||
return []string{
|
||||
fmt.Sprintf("ALTER SYSTEM SET max_locks_per_transaction = %d;", p.MaxLocksPerTxn),
|
||||
fmt.Sprintf("ALTER SYSTEM SET maintenance_work_mem = '%s';", p.MaintenanceWorkMem),
|
||||
"-- Restart PostgreSQL after changes to max_locks_per_transaction",
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
func runCommand(name string, args ...string) (string, error) {
|
||||
cmd := exec.Command(name, args...)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(output), nil
|
||||
}
|
||||
|
||||
func extractNumberFromLine(line string) int64 {
|
||||
// Extract number before the period at end (e.g., "Pages free: 123456.")
|
||||
parts := strings.Fields(line)
|
||||
for _, p := range parts {
|
||||
p = strings.TrimSuffix(p, ".")
|
||||
if val, err := strconv.ParseInt(p, 10, 64); err == nil && val > 0 {
|
||||
return val
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
@ -126,13 +126,46 @@ func (m *MySQL) ListTables(ctx context.Context, database string) ([]string, erro
|
||||
return tables, rows.Err()
|
||||
}
|
||||
|
||||
// validateMySQLIdentifier checks if a database/table name is safe for use in SQL
|
||||
// Prevents SQL injection by only allowing alphanumeric names with underscores
|
||||
func validateMySQLIdentifier(name string) error {
|
||||
if len(name) == 0 {
|
||||
return fmt.Errorf("identifier cannot be empty")
|
||||
}
|
||||
if len(name) > 64 {
|
||||
return fmt.Errorf("identifier too long (max 64 chars): %s", name)
|
||||
}
|
||||
// Only allow alphanumeric, underscores, and must start with letter or underscore
|
||||
for i, c := range name {
|
||||
if i == 0 && !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
|
||||
return fmt.Errorf("identifier must start with letter or underscore: %s", name)
|
||||
}
|
||||
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') {
|
||||
return fmt.Errorf("identifier contains invalid character %q: %s", c, name)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// quoteMySQLIdentifier safely quotes a MySQL identifier
|
||||
func quoteMySQLIdentifier(name string) string {
|
||||
// Escape any backticks by doubling them and wrap in backticks
|
||||
return "`" + strings.ReplaceAll(name, "`", "``") + "`"
|
||||
}
|
||||
|
||||
// CreateDatabase creates a new database
|
||||
func (m *MySQL) CreateDatabase(ctx context.Context, name string) error {
|
||||
if m.db == nil {
|
||||
return fmt.Errorf("not connected to database")
|
||||
}
|
||||
|
||||
query := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS `%s`", name)
|
||||
// Validate identifier to prevent SQL injection
|
||||
if err := validateMySQLIdentifier(name); err != nil {
|
||||
return fmt.Errorf("invalid database name: %w", err)
|
||||
}
|
||||
|
||||
// Use safe quoting for identifier
|
||||
query := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", quoteMySQLIdentifier(name))
|
||||
_, err := m.db.ExecContext(ctx, query)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create database %s: %w", name, err)
|
||||
@ -148,7 +181,13 @@ func (m *MySQL) DropDatabase(ctx context.Context, name string) error {
|
||||
return fmt.Errorf("not connected to database")
|
||||
}
|
||||
|
||||
query := fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", name)
|
||||
// Validate identifier to prevent SQL injection
|
||||
if err := validateMySQLIdentifier(name); err != nil {
|
||||
return fmt.Errorf("invalid database name: %w", err)
|
||||
}
|
||||
|
||||
// Use safe quoting for identifier
|
||||
query := fmt.Sprintf("DROP DATABASE IF EXISTS %s", quoteMySQLIdentifier(name))
|
||||
_, err := m.db.ExecContext(ctx, query)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to drop database %s: %w", name, err)
|
||||
|
||||
@ -15,7 +15,6 @@ import (
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/jackc/pgx/v5/stdlib"
|
||||
_ "github.com/jackc/pgx/v5/stdlib" // PostgreSQL driver (pgx)
|
||||
)
|
||||
|
||||
// PostgreSQL implements Database interface for PostgreSQL
|
||||
@ -163,14 +162,47 @@ func (p *PostgreSQL) ListTables(ctx context.Context, database string) ([]string,
|
||||
return tables, rows.Err()
|
||||
}
|
||||
|
||||
// validateIdentifier checks if a database/table name is safe for use in SQL
|
||||
// Prevents SQL injection by only allowing alphanumeric names with underscores
|
||||
func validateIdentifier(name string) error {
|
||||
if len(name) == 0 {
|
||||
return fmt.Errorf("identifier cannot be empty")
|
||||
}
|
||||
if len(name) > 63 {
|
||||
return fmt.Errorf("identifier too long (max 63 chars): %s", name)
|
||||
}
|
||||
// Only allow alphanumeric, underscores, and must start with letter or underscore
|
||||
for i, c := range name {
|
||||
if i == 0 && !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
|
||||
return fmt.Errorf("identifier must start with letter or underscore: %s", name)
|
||||
}
|
||||
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') {
|
||||
return fmt.Errorf("identifier contains invalid character %q: %s", c, name)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// quoteIdentifier safely quotes a PostgreSQL identifier
|
||||
func quoteIdentifier(name string) string {
|
||||
// Double any existing double quotes and wrap in double quotes
|
||||
return `"` + strings.ReplaceAll(name, `"`, `""`) + `"`
|
||||
}
|
||||
|
||||
// CreateDatabase creates a new database
|
||||
func (p *PostgreSQL) CreateDatabase(ctx context.Context, name string) error {
|
||||
if p.db == nil {
|
||||
return fmt.Errorf("not connected to database")
|
||||
}
|
||||
|
||||
// Validate identifier to prevent SQL injection
|
||||
if err := validateIdentifier(name); err != nil {
|
||||
return fmt.Errorf("invalid database name: %w", err)
|
||||
}
|
||||
|
||||
// PostgreSQL doesn't support CREATE DATABASE in transactions or prepared statements
|
||||
query := fmt.Sprintf("CREATE DATABASE %s", name)
|
||||
// Use quoted identifier for safety
|
||||
query := fmt.Sprintf("CREATE DATABASE %s", quoteIdentifier(name))
|
||||
_, err := p.db.ExecContext(ctx, query)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create database %s: %w", name, err)
|
||||
@ -186,8 +218,14 @@ func (p *PostgreSQL) DropDatabase(ctx context.Context, name string) error {
|
||||
return fmt.Errorf("not connected to database")
|
||||
}
|
||||
|
||||
// Validate identifier to prevent SQL injection
|
||||
if err := validateIdentifier(name); err != nil {
|
||||
return fmt.Errorf("invalid database name: %w", err)
|
||||
}
|
||||
|
||||
// Force drop connections and drop database
|
||||
query := fmt.Sprintf("DROP DATABASE IF EXISTS %s", name)
|
||||
// Use quoted identifier for safety
|
||||
query := fmt.Sprintf("DROP DATABASE IF EXISTS %s", quoteIdentifier(name))
|
||||
_, err := p.db.ExecContext(ctx, query)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to drop database %s: %w", name, err)
|
||||
@ -278,11 +316,12 @@ func (p *PostgreSQL) BuildBackupCommand(database, outputFile string, options Bac
|
||||
cmd := []string{"pg_dump"}
|
||||
|
||||
// Connection parameters
|
||||
if p.cfg.Host != "localhost" {
|
||||
// CRITICAL: Always pass port even for localhost - user may have non-standard port
|
||||
if p.cfg.Host != "localhost" && p.cfg.Host != "127.0.0.1" && p.cfg.Host != "" {
|
||||
cmd = append(cmd, "-h", p.cfg.Host)
|
||||
cmd = append(cmd, "-p", strconv.Itoa(p.cfg.Port))
|
||||
cmd = append(cmd, "--no-password")
|
||||
}
|
||||
cmd = append(cmd, "-p", strconv.Itoa(p.cfg.Port))
|
||||
cmd = append(cmd, "-U", p.cfg.User)
|
||||
|
||||
// Format and compression
|
||||
@ -342,11 +381,12 @@ func (p *PostgreSQL) BuildRestoreCommand(database, inputFile string, options Res
|
||||
cmd := []string{"pg_restore"}
|
||||
|
||||
// Connection parameters
|
||||
if p.cfg.Host != "localhost" {
|
||||
// CRITICAL: Always pass port even for localhost - user may have non-standard port
|
||||
if p.cfg.Host != "localhost" && p.cfg.Host != "127.0.0.1" && p.cfg.Host != "" {
|
||||
cmd = append(cmd, "-h", p.cfg.Host)
|
||||
cmd = append(cmd, "-p", strconv.Itoa(p.cfg.Port))
|
||||
cmd = append(cmd, "--no-password")
|
||||
}
|
||||
cmd = append(cmd, "-p", strconv.Itoa(p.cfg.Port))
|
||||
cmd = append(cmd, "-U", p.cfg.User)
|
||||
|
||||
// Parallel jobs (incompatible with --single-transaction per PostgreSQL docs)
|
||||
|
||||
228
internal/dedup/chunker.go
Normal file
228
internal/dedup/chunker.go
Normal file
@ -0,0 +1,228 @@
|
||||
// Package dedup provides content-defined chunking and deduplication
|
||||
// for database backups, similar to restic/borgbackup but with native
|
||||
// database dump support.
|
||||
package dedup
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Chunker constants for content-defined chunking
|
||||
const (
|
||||
// DefaultMinChunkSize is the minimum chunk size (4KB)
|
||||
DefaultMinChunkSize = 4 * 1024
|
||||
|
||||
// DefaultAvgChunkSize is the target average chunk size (8KB)
|
||||
DefaultAvgChunkSize = 8 * 1024
|
||||
|
||||
// DefaultMaxChunkSize is the maximum chunk size (32KB)
|
||||
DefaultMaxChunkSize = 32 * 1024
|
||||
|
||||
// WindowSize for the rolling hash
|
||||
WindowSize = 48
|
||||
|
||||
// ChunkMask determines average chunk size
|
||||
// For 8KB average: we look for hash % 8192 == 0
|
||||
ChunkMask = DefaultAvgChunkSize - 1
|
||||
)
|
||||
|
||||
// Gear hash table - random values for each byte
|
||||
// This is used for the Gear rolling hash which is simpler and faster than Buzhash
|
||||
var gearTable = [256]uint64{
|
||||
0x5c95c078, 0x22408989, 0x2d48a214, 0x12842087, 0x530f8afb, 0x474536b9, 0x2963b4f1, 0x44cb738b,
|
||||
0x4ea7403d, 0x4d606b6e, 0x074ec5d3, 0x3f7e82f4, 0x4e3d26e7, 0x5cb4e82f, 0x7b0a1ef5, 0x3d4e7c92,
|
||||
0x2a81ed69, 0x7f853df8, 0x452c8cf7, 0x0f4f3c9d, 0x3a5e81b7, 0x6cb2d819, 0x2e4c5f93, 0x7e8a1c57,
|
||||
0x1f9d3e8c, 0x4b7c2a5d, 0x3c8f1d6e, 0x5d2a7b4f, 0x6e9c3f8a, 0x7a4d1e5c, 0x2b8c4f7d, 0x4f7d2c9e,
|
||||
0x5a1e3d7c, 0x6b4f8a2d, 0x3e7c9d5a, 0x7d2a4f8b, 0x4c9e7d3a, 0x5b8a1c6e, 0x2d5f4a9c, 0x7a3c8d6b,
|
||||
0x6e2a7b4d, 0x3f8c5d9a, 0x4a7d3e5b, 0x5c9a2d7e, 0x7b4e8f3c, 0x2a6d9c5b, 0x3e4a7d8c, 0x5d7b2e9a,
|
||||
0x4c8a3d7b, 0x6e9d5c8a, 0x7a3e4d9c, 0x2b5c8a7d, 0x4d7e3a9c, 0x5a9c7d3e, 0x3c8b5a7d, 0x7d4e9c2a,
|
||||
0x6a3d8c5b, 0x4e7a9d3c, 0x5c2a7b9e, 0x3a9d4e7c, 0x7b8c5a2d, 0x2d7e4a9c, 0x4a3c9d7b, 0x5e9a7c3d,
|
||||
0x6c4d8a5b, 0x3b7e9c4a, 0x7a5c2d8b, 0x4d9a3e7c, 0x5b7c4a9e, 0x2e8a5d3c, 0x3c9e7a4d, 0x7d4a8c5b,
|
||||
0x6b2d9a7c, 0x4a8c3e5d, 0x5d7a9c2e, 0x3e4c7b9a, 0x7c9d5a4b, 0x2a7e8c3d, 0x4c5a9d7e, 0x5a3e7c4b,
|
||||
0x6d8a2c9e, 0x3c7b4a8d, 0x7e2d9c5a, 0x4b9a7e3c, 0x5c4d8a7b, 0x2d9e3c5a, 0x3a7c9d4e, 0x7b5a4c8d,
|
||||
0x6a9c2e7b, 0x4d3e8a9c, 0x5e7b4d2a, 0x3b9a7c5d, 0x7c4e8a3b, 0x2e7d9c4a, 0x4a8b3e7d, 0x5d2c9a7e,
|
||||
0x6c7a5d3e, 0x3e9c4a7b, 0x7a8d2c5e, 0x4c3e9a7d, 0x5b9c7e2a, 0x2a4d7c9e, 0x3d8a5c4b, 0x7e7b9a3c,
|
||||
0x6b4a8d9e, 0x4e9c3b7a, 0x5a7d4e9c, 0x3c2a8b7d, 0x7d9e5c4a, 0x2b8a7d3e, 0x4d5c9a2b, 0x5e3a7c8d,
|
||||
0x6a9d4b7c, 0x3b7a9c5e, 0x7c4b8a2d, 0x4a9e7c3b, 0x5d2b9a4e, 0x2e7c4d9a, 0x3a9b7e4c, 0x7e5a3c8b,
|
||||
0x6c8a9d4e, 0x4b7c2a5e, 0x5a3e9c7d, 0x3d9a4b7c, 0x7a2d5e9c, 0x2c8b7a3d, 0x4e9c5a2b, 0x5b4d7e9a,
|
||||
0x6d7a3c8b, 0x3e2b9a5d, 0x7c9d4a7e, 0x4a5e3c9b, 0x5e7a9d2c, 0x2b3c7e9a, 0x3a9e4b7d, 0x7d8a5c3e,
|
||||
0x6b9c2d4a, 0x4c7e9a3b, 0x5a2c8b7e, 0x3b4d9a5c, 0x7e9b3a4d, 0x2d5a7c9e, 0x4b8d3e7a, 0x5c9a4b2d,
|
||||
0x6a7c8d9e, 0x3c9e5a7b, 0x7b4a2c9d, 0x4d3b7e9a, 0x5e9c4a3b, 0x2a7b9d4e, 0x3e5c8a7b, 0x7a9d3e5c,
|
||||
0x6c2a7b8d, 0x4e9a5c3b, 0x5b7d2a9e, 0x3a4e9c7b, 0x7d8b3a5c, 0x2c9e7a4b, 0x4a3d5e9c, 0x5d7b8a2e,
|
||||
0x6b9a4c7d, 0x3d5a9e4b, 0x7e2c7b9a, 0x4b9d3a5e, 0x5c4e7a9d, 0x2e8a3c7b, 0x3b7c9e5a, 0x7a4d8b3e,
|
||||
0x6d9c5a2b, 0x4a7e3d9c, 0x5e2a9b7d, 0x3c9a7e4b, 0x7b3e5c9a, 0x2a4b8d7e, 0x4d9c2a5b, 0x5a7d9e3c,
|
||||
0x6c3b8a7d, 0x3e9d4a5c, 0x7d5c2b9e, 0x4c8a7d3b, 0x5b9e3c7a, 0x2d7a9c4e, 0x3a5e7b9d, 0x7e8b4a3c,
|
||||
0x6a2d9e7b, 0x4b3e5a9d, 0x5d9c7b2a, 0x3b7d4e9c, 0x7c9a3b5e, 0x2e5c8a7d, 0x4a7b9d3e, 0x5c3a7e9b,
|
||||
0x6d9e5c4a, 0x3c4a7b9e, 0x7a9d2e5c, 0x4e7c9a3d, 0x5a8b4e7c, 0x2b9a3d7e, 0x3d5b8a9c, 0x7b4e9a2d,
|
||||
0x6c7d3a9e, 0x4a9c5e3b, 0x5e2b7d9a, 0x3a8d4c7b, 0x7d3e9a5c, 0x2c7a8b9e, 0x4b5d3a7c, 0x5c9a7e2b,
|
||||
0x6a4b9d3e, 0x3e7c2a9d, 0x7c8a5b4e, 0x4d9e3c7a, 0x5b3a9e7c, 0x2e9c7b4a, 0x3b4e8a9d, 0x7a9c4e3b,
|
||||
0x6d2a7c9e, 0x4c8b9a5d, 0x5a9e2b7c, 0x3c3d7a9e, 0x7e5a9c4b, 0x2a8d3e7c, 0x4e7a5c9b, 0x5d9b8a2e,
|
||||
0x6b4c9e7a, 0x3a9d5b4e, 0x7b2e8a9c, 0x4a5c3e9b, 0x5c9a4d7e, 0x2d7e9a3c, 0x3e8b7c5a, 0x7c9e2a4d,
|
||||
0x6a3b7d9c, 0x4d9a8b3e, 0x5e5c2a7b, 0x3b4a9d7c, 0x7a7c5e9b, 0x2c9b4a8d, 0x4b3e7c9a, 0x5a9d3b7e,
|
||||
0x6c8a4e9d, 0x3d7b9c5a, 0x7e2a4b9c, 0x4c9e5d3a, 0x5b7a9c4e, 0x2e4d8a7b, 0x3a9c7e5d, 0x7b8d3a9e,
|
||||
0x6d5c9a4b, 0x4a2e7b9d, 0x5d9b4c8a, 0x3c7a9e2b, 0x7d4b8c9e, 0x2b9a5c4d, 0x4e7d3a9c, 0x5c8a9e7b,
|
||||
}
|
||||
|
||||
// Chunk represents a single deduplicated chunk
|
||||
type Chunk struct {
|
||||
// Hash is the SHA-256 hash of the chunk data (content-addressed)
|
||||
Hash string
|
||||
|
||||
// Data is the raw chunk bytes
|
||||
Data []byte
|
||||
|
||||
// Offset is the byte offset in the original file
|
||||
Offset int64
|
||||
|
||||
// Length is the size of this chunk
|
||||
Length int
|
||||
}
|
||||
|
||||
// ChunkerConfig holds configuration for the chunker
|
||||
type ChunkerConfig struct {
|
||||
MinSize int // Minimum chunk size
|
||||
AvgSize int // Target average chunk size
|
||||
MaxSize int // Maximum chunk size
|
||||
}
|
||||
|
||||
// DefaultChunkerConfig returns sensible defaults
|
||||
func DefaultChunkerConfig() ChunkerConfig {
|
||||
return ChunkerConfig{
|
||||
MinSize: DefaultMinChunkSize,
|
||||
AvgSize: DefaultAvgChunkSize,
|
||||
MaxSize: DefaultMaxChunkSize,
|
||||
}
|
||||
}
|
||||
|
||||
// Chunker performs content-defined chunking using Gear hash
|
||||
type Chunker struct {
|
||||
reader io.Reader
|
||||
config ChunkerConfig
|
||||
|
||||
// Rolling hash state
|
||||
hash uint64
|
||||
|
||||
// Current chunk state
|
||||
buf []byte
|
||||
offset int64
|
||||
mask uint64
|
||||
}
|
||||
|
||||
// NewChunker creates a new chunker for the given reader
|
||||
func NewChunker(r io.Reader, config ChunkerConfig) *Chunker {
|
||||
// Calculate mask for target average size
|
||||
// We want: avg_size = 1 / P(boundary)
|
||||
// With mask, P(boundary) = 1 / (mask + 1)
|
||||
// So mask = avg_size - 1
|
||||
mask := uint64(config.AvgSize - 1)
|
||||
|
||||
return &Chunker{
|
||||
reader: r,
|
||||
config: config,
|
||||
buf: make([]byte, 0, config.MaxSize),
|
||||
mask: mask,
|
||||
}
|
||||
}
|
||||
|
||||
// Next returns the next chunk from the input stream
|
||||
// Returns io.EOF when no more data is available
|
||||
func (c *Chunker) Next() (*Chunk, error) {
|
||||
c.buf = c.buf[:0]
|
||||
c.hash = 0
|
||||
|
||||
// Read bytes until we find a chunk boundary or hit max size
|
||||
singleByte := make([]byte, 1)
|
||||
|
||||
for {
|
||||
n, err := c.reader.Read(singleByte)
|
||||
if n == 0 {
|
||||
if err == io.EOF {
|
||||
// Return remaining data as final chunk
|
||||
if len(c.buf) > 0 {
|
||||
return c.makeChunk(), nil
|
||||
}
|
||||
return nil, io.EOF
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
b := singleByte[0]
|
||||
c.buf = append(c.buf, b)
|
||||
|
||||
// Update Gear rolling hash
|
||||
// Gear hash: hash = (hash << 1) + gear_table[byte]
|
||||
c.hash = (c.hash << 1) + gearTable[b]
|
||||
|
||||
// Check for chunk boundary after minimum size
|
||||
if len(c.buf) >= c.config.MinSize {
|
||||
// Check if we hit a boundary (hash matches mask pattern)
|
||||
if (c.hash & c.mask) == 0 {
|
||||
return c.makeChunk(), nil
|
||||
}
|
||||
}
|
||||
|
||||
// Force boundary at max size
|
||||
if len(c.buf) >= c.config.MaxSize {
|
||||
return c.makeChunk(), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// makeChunk creates a Chunk from the current buffer
|
||||
func (c *Chunker) makeChunk() *Chunk {
|
||||
// Compute SHA-256 hash
|
||||
h := sha256.Sum256(c.buf)
|
||||
hash := hex.EncodeToString(h[:])
|
||||
|
||||
// Copy data
|
||||
data := make([]byte, len(c.buf))
|
||||
copy(data, c.buf)
|
||||
|
||||
chunk := &Chunk{
|
||||
Hash: hash,
|
||||
Data: data,
|
||||
Offset: c.offset,
|
||||
Length: len(data),
|
||||
}
|
||||
|
||||
c.offset += int64(len(data))
|
||||
return chunk
|
||||
}
|
||||
|
||||
// ChunkReader splits a reader into content-defined chunks
|
||||
// and returns them via a channel for concurrent processing
|
||||
func ChunkReader(r io.Reader, config ChunkerConfig) (<-chan *Chunk, <-chan error) {
|
||||
chunks := make(chan *Chunk, 100)
|
||||
errs := make(chan error, 1)
|
||||
|
||||
go func() {
|
||||
defer close(chunks)
|
||||
defer close(errs)
|
||||
|
||||
chunker := NewChunker(r, config)
|
||||
for {
|
||||
chunk, err := chunker.Next()
|
||||
if err == io.EOF {
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
errs <- err
|
||||
return
|
||||
}
|
||||
chunks <- chunk
|
||||
}
|
||||
}()
|
||||
|
||||
return chunks, errs
|
||||
}
|
||||
|
||||
// HashData computes SHA-256 hash of data
|
||||
func HashData(data []byte) string {
|
||||
h := sha256.Sum256(data)
|
||||
return hex.EncodeToString(h[:])
|
||||
}
|
||||
221
internal/dedup/chunker_test.go
Normal file
221
internal/dedup/chunker_test.go
Normal file
@ -0,0 +1,221 @@
|
||||
package dedup
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"io"
|
||||
mathrand "math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestChunker_Basic(t *testing.T) {
|
||||
// Create test data
|
||||
data := make([]byte, 100*1024) // 100KB
|
||||
rand.Read(data)
|
||||
|
||||
chunker := NewChunker(bytes.NewReader(data), DefaultChunkerConfig())
|
||||
|
||||
var chunks []*Chunk
|
||||
var totalBytes int
|
||||
|
||||
for {
|
||||
chunk, err := chunker.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("Chunker.Next() error: %v", err)
|
||||
}
|
||||
|
||||
chunks = append(chunks, chunk)
|
||||
totalBytes += chunk.Length
|
||||
|
||||
// Verify chunk properties
|
||||
if chunk.Length < DefaultMinChunkSize && len(chunks) < 10 {
|
||||
// Only the last chunk can be smaller than min
|
||||
// (unless file is smaller than min)
|
||||
}
|
||||
if chunk.Length > DefaultMaxChunkSize {
|
||||
t.Errorf("Chunk %d exceeds max size: %d > %d", len(chunks), chunk.Length, DefaultMaxChunkSize)
|
||||
}
|
||||
if chunk.Hash == "" {
|
||||
t.Errorf("Chunk %d has empty hash", len(chunks))
|
||||
}
|
||||
if len(chunk.Hash) != 64 { // SHA-256 hex length
|
||||
t.Errorf("Chunk %d has invalid hash length: %d", len(chunks), len(chunk.Hash))
|
||||
}
|
||||
}
|
||||
|
||||
if totalBytes != len(data) {
|
||||
t.Errorf("Total bytes mismatch: got %d, want %d", totalBytes, len(data))
|
||||
}
|
||||
|
||||
t.Logf("Chunked %d bytes into %d chunks", totalBytes, len(chunks))
|
||||
t.Logf("Average chunk size: %d bytes", totalBytes/len(chunks))
|
||||
}
|
||||
|
||||
func TestChunker_Deterministic(t *testing.T) {
|
||||
// Same data should produce same chunks
|
||||
data := make([]byte, 50*1024)
|
||||
rand.Read(data)
|
||||
|
||||
// First pass
|
||||
chunker1 := NewChunker(bytes.NewReader(data), DefaultChunkerConfig())
|
||||
var hashes1 []string
|
||||
for {
|
||||
chunk, err := chunker1.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
hashes1 = append(hashes1, chunk.Hash)
|
||||
}
|
||||
|
||||
// Second pass
|
||||
chunker2 := NewChunker(bytes.NewReader(data), DefaultChunkerConfig())
|
||||
var hashes2 []string
|
||||
for {
|
||||
chunk, err := chunker2.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
hashes2 = append(hashes2, chunk.Hash)
|
||||
}
|
||||
|
||||
// Compare
|
||||
if len(hashes1) != len(hashes2) {
|
||||
t.Fatalf("Different chunk counts: %d vs %d", len(hashes1), len(hashes2))
|
||||
}
|
||||
|
||||
for i := range hashes1 {
|
||||
if hashes1[i] != hashes2[i] {
|
||||
t.Errorf("Hash mismatch at chunk %d: %s vs %s", i, hashes1[i], hashes2[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestChunker_ShiftedData(t *testing.T) {
|
||||
// Test that shifted data still shares chunks (the key CDC benefit)
|
||||
// Use deterministic random data for reproducible test results
|
||||
rng := mathrand.New(mathrand.NewSource(42))
|
||||
|
||||
original := make([]byte, 100*1024)
|
||||
rng.Read(original)
|
||||
|
||||
// Create shifted version (prepend some bytes)
|
||||
prefix := make([]byte, 1000)
|
||||
rng.Read(prefix)
|
||||
shifted := append(prefix, original...)
|
||||
|
||||
// Chunk both
|
||||
config := DefaultChunkerConfig()
|
||||
|
||||
chunker1 := NewChunker(bytes.NewReader(original), config)
|
||||
hashes1 := make(map[string]bool)
|
||||
for {
|
||||
chunk, err := chunker1.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
hashes1[chunk.Hash] = true
|
||||
}
|
||||
|
||||
chunker2 := NewChunker(bytes.NewReader(shifted), config)
|
||||
var matched, total int
|
||||
for {
|
||||
chunk, err := chunker2.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
total++
|
||||
if hashes1[chunk.Hash] {
|
||||
matched++
|
||||
}
|
||||
}
|
||||
|
||||
// Should have significant overlap despite the shift
|
||||
overlapRatio := float64(matched) / float64(total)
|
||||
t.Logf("Chunk overlap after %d-byte shift: %.1f%% (%d/%d chunks)",
|
||||
len(prefix), overlapRatio*100, matched, total)
|
||||
|
||||
// We expect at least 50% overlap for content-defined chunking
|
||||
if overlapRatio < 0.5 {
|
||||
t.Errorf("Low chunk overlap: %.1f%% (expected >50%%)", overlapRatio*100)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChunker_SmallFile(t *testing.T) {
|
||||
// File smaller than min chunk size
|
||||
data := []byte("hello world")
|
||||
chunker := NewChunker(bytes.NewReader(data), DefaultChunkerConfig())
|
||||
|
||||
chunk, err := chunker.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if chunk.Length != len(data) {
|
||||
t.Errorf("Expected chunk length %d, got %d", len(data), chunk.Length)
|
||||
}
|
||||
|
||||
// Should be EOF after
|
||||
_, err = chunker.Next()
|
||||
if err != io.EOF {
|
||||
t.Errorf("Expected EOF, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChunker_EmptyFile(t *testing.T) {
|
||||
chunker := NewChunker(bytes.NewReader(nil), DefaultChunkerConfig())
|
||||
|
||||
_, err := chunker.Next()
|
||||
if err != io.EOF {
|
||||
t.Errorf("Expected EOF for empty file, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHashData(t *testing.T) {
|
||||
hash := HashData([]byte("test"))
|
||||
if len(hash) != 64 {
|
||||
t.Errorf("Expected 64-char hash, got %d", len(hash))
|
||||
}
|
||||
|
||||
// Known SHA-256 of "test"
|
||||
expected := "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
|
||||
if hash != expected {
|
||||
t.Errorf("Hash mismatch: got %s, want %s", hash, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkChunker(b *testing.B) {
|
||||
// 1MB of random data
|
||||
data := make([]byte, 1024*1024)
|
||||
rand.Read(data)
|
||||
|
||||
b.ResetTimer()
|
||||
b.SetBytes(int64(len(data)))
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
chunker := NewChunker(bytes.NewReader(data), DefaultChunkerConfig())
|
||||
for {
|
||||
_, err := chunker.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
306
internal/dedup/index.go
Normal file
306
internal/dedup/index.go
Normal file
@ -0,0 +1,306 @@
|
||||
package dedup
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
_ "modernc.org/sqlite" // Pure Go SQLite driver (no CGO required)
|
||||
)
|
||||
|
||||
// ChunkIndex provides fast chunk lookups using SQLite
|
||||
type ChunkIndex struct {
|
||||
db *sql.DB
|
||||
dbPath string
|
||||
}
|
||||
|
||||
// NewChunkIndex opens or creates a chunk index database at the default location
|
||||
func NewChunkIndex(basePath string) (*ChunkIndex, error) {
|
||||
dbPath := filepath.Join(basePath, "chunks.db")
|
||||
return NewChunkIndexAt(dbPath)
|
||||
}
|
||||
|
||||
// NewChunkIndexAt opens or creates a chunk index database at a specific path
|
||||
// Use this to put the SQLite index on local storage when chunks are on NFS/CIFS
|
||||
func NewChunkIndexAt(dbPath string) (*ChunkIndex, error) {
|
||||
// Ensure parent directory exists
|
||||
if err := os.MkdirAll(filepath.Dir(dbPath), 0700); err != nil {
|
||||
return nil, fmt.Errorf("failed to create index directory: %w", err)
|
||||
}
|
||||
|
||||
// Add busy_timeout to handle lock contention gracefully
|
||||
db, err := sql.Open("sqlite", dbPath+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=5000")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open chunk index: %w", err)
|
||||
}
|
||||
|
||||
// Test the connection and check for locking issues
|
||||
if err := db.Ping(); err != nil {
|
||||
db.Close()
|
||||
if isNFSLockingError(err) {
|
||||
return nil, fmt.Errorf("database locked (common on NFS/CIFS): %w\n\n"+
|
||||
"HINT: Use --index-db to put the SQLite index on local storage:\n"+
|
||||
" dbbackup dedup ... --index-db /var/lib/dbbackup/dedup-index.db", err)
|
||||
}
|
||||
return nil, fmt.Errorf("failed to connect to chunk index: %w", err)
|
||||
}
|
||||
|
||||
idx := &ChunkIndex{db: db, dbPath: dbPath}
|
||||
if err := idx.migrate(); err != nil {
|
||||
db.Close()
|
||||
if isNFSLockingError(err) {
|
||||
return nil, fmt.Errorf("database locked during migration (common on NFS/CIFS): %w\n\n"+
|
||||
"HINT: Use --index-db to put the SQLite index on local storage:\n"+
|
||||
" dbbackup dedup ... --index-db /var/lib/dbbackup/dedup-index.db", err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return idx, nil
|
||||
}
|
||||
|
||||
// isNFSLockingError checks if an error is likely due to NFS/CIFS locking issues
|
||||
func isNFSLockingError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
errStr := err.Error()
|
||||
return strings.Contains(errStr, "database is locked") ||
|
||||
strings.Contains(errStr, "SQLITE_BUSY") ||
|
||||
strings.Contains(errStr, "cannot lock") ||
|
||||
strings.Contains(errStr, "lock protocol")
|
||||
}
|
||||
|
||||
// migrate creates the schema if needed
|
||||
func (idx *ChunkIndex) migrate() error {
|
||||
schema := `
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
hash TEXT PRIMARY KEY,
|
||||
size_raw INTEGER NOT NULL,
|
||||
size_stored INTEGER NOT NULL,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
last_accessed DATETIME,
|
||||
ref_count INTEGER DEFAULT 1
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS manifests (
|
||||
id TEXT PRIMARY KEY,
|
||||
database_type TEXT,
|
||||
database_name TEXT,
|
||||
database_host TEXT,
|
||||
created_at DATETIME,
|
||||
original_size INTEGER,
|
||||
stored_size INTEGER,
|
||||
chunk_count INTEGER,
|
||||
new_chunks INTEGER,
|
||||
dedup_ratio REAL,
|
||||
sha256 TEXT,
|
||||
verified_at DATETIME
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_chunks_created ON chunks(created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_chunks_accessed ON chunks(last_accessed);
|
||||
CREATE INDEX IF NOT EXISTS idx_manifests_created ON manifests(created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_manifests_database ON manifests(database_name);
|
||||
`
|
||||
|
||||
_, err := idx.db.Exec(schema)
|
||||
return err
|
||||
}
|
||||
|
||||
// Close closes the database
|
||||
func (idx *ChunkIndex) Close() error {
|
||||
return idx.db.Close()
|
||||
}
|
||||
|
||||
// AddChunk records a chunk in the index
|
||||
func (idx *ChunkIndex) AddChunk(hash string, sizeRaw, sizeStored int) error {
|
||||
_, err := idx.db.Exec(`
|
||||
INSERT INTO chunks (hash, size_raw, size_stored, created_at, last_accessed, ref_count)
|
||||
VALUES (?, ?, ?, ?, ?, 1)
|
||||
ON CONFLICT(hash) DO UPDATE SET
|
||||
ref_count = ref_count + 1,
|
||||
last_accessed = ?
|
||||
`, hash, sizeRaw, sizeStored, time.Now(), time.Now(), time.Now())
|
||||
return err
|
||||
}
|
||||
|
||||
// HasChunk checks if a chunk exists in the index
|
||||
func (idx *ChunkIndex) HasChunk(hash string) (bool, error) {
|
||||
var count int
|
||||
err := idx.db.QueryRow("SELECT COUNT(*) FROM chunks WHERE hash = ?", hash).Scan(&count)
|
||||
return count > 0, err
|
||||
}
|
||||
|
||||
// GetChunk retrieves chunk metadata
|
||||
func (idx *ChunkIndex) GetChunk(hash string) (*ChunkMeta, error) {
|
||||
var m ChunkMeta
|
||||
err := idx.db.QueryRow(`
|
||||
SELECT hash, size_raw, size_stored, created_at, ref_count
|
||||
FROM chunks WHERE hash = ?
|
||||
`, hash).Scan(&m.Hash, &m.SizeRaw, &m.SizeStored, &m.CreatedAt, &m.RefCount)
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
// ChunkMeta holds metadata about a chunk
|
||||
type ChunkMeta struct {
|
||||
Hash string
|
||||
SizeRaw int64
|
||||
SizeStored int64
|
||||
CreatedAt time.Time
|
||||
RefCount int
|
||||
}
|
||||
|
||||
// DecrementRef decreases the reference count for a chunk
|
||||
// Returns true if the chunk should be deleted (ref_count <= 0)
|
||||
func (idx *ChunkIndex) DecrementRef(hash string) (shouldDelete bool, err error) {
|
||||
result, err := idx.db.Exec(`
|
||||
UPDATE chunks SET ref_count = ref_count - 1 WHERE hash = ?
|
||||
`, hash)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
affected, _ := result.RowsAffected()
|
||||
if affected == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
var refCount int
|
||||
err = idx.db.QueryRow("SELECT ref_count FROM chunks WHERE hash = ?", hash).Scan(&refCount)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return refCount <= 0, nil
|
||||
}
|
||||
|
||||
// RemoveChunk removes a chunk from the index
|
||||
func (idx *ChunkIndex) RemoveChunk(hash string) error {
|
||||
_, err := idx.db.Exec("DELETE FROM chunks WHERE hash = ?", hash)
|
||||
return err
|
||||
}
|
||||
|
||||
// AddManifest records a manifest in the index
|
||||
func (idx *ChunkIndex) AddManifest(m *Manifest) error {
|
||||
_, err := idx.db.Exec(`
|
||||
INSERT OR REPLACE INTO manifests
|
||||
(id, database_type, database_name, database_host, created_at,
|
||||
original_size, stored_size, chunk_count, new_chunks, dedup_ratio, sha256)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`, m.ID, m.DatabaseType, m.DatabaseName, m.DatabaseHost, m.CreatedAt,
|
||||
m.OriginalSize, m.StoredSize, m.ChunkCount, m.NewChunks, m.DedupRatio, m.SHA256)
|
||||
return err
|
||||
}
|
||||
|
||||
// RemoveManifest removes a manifest from the index
|
||||
func (idx *ChunkIndex) RemoveManifest(id string) error {
|
||||
_, err := idx.db.Exec("DELETE FROM manifests WHERE id = ?", id)
|
||||
return err
|
||||
}
|
||||
|
||||
// UpdateManifestVerified updates the verified timestamp for a manifest
|
||||
func (idx *ChunkIndex) UpdateManifestVerified(id string, verifiedAt time.Time) error {
|
||||
_, err := idx.db.Exec("UPDATE manifests SET verified_at = ? WHERE id = ?", verifiedAt, id)
|
||||
return err
|
||||
}
|
||||
|
||||
// IndexStats holds statistics about the dedup index
|
||||
type IndexStats struct {
|
||||
TotalChunks int64
|
||||
TotalManifests int64
|
||||
TotalSizeRaw int64 // Uncompressed, undeduplicated (per-chunk)
|
||||
TotalSizeStored int64 // On-disk after dedup+compression (per-chunk)
|
||||
DedupRatio float64 // Based on manifests (real dedup ratio)
|
||||
OldestChunk time.Time
|
||||
NewestChunk time.Time
|
||||
|
||||
// Manifest-based stats (accurate dedup calculation)
|
||||
TotalBackupSize int64 // Sum of all backup original sizes
|
||||
TotalNewData int64 // Sum of all new chunks stored
|
||||
SpaceSaved int64 // Difference = what dedup saved
|
||||
}
|
||||
|
||||
// Stats returns statistics about the index
|
||||
func (idx *ChunkIndex) Stats() (*IndexStats, error) {
|
||||
stats := &IndexStats{}
|
||||
|
||||
var oldestStr, newestStr string
|
||||
err := idx.db.QueryRow(`
|
||||
SELECT
|
||||
COUNT(*),
|
||||
COALESCE(SUM(size_raw), 0),
|
||||
COALESCE(SUM(size_stored), 0),
|
||||
COALESCE(MIN(created_at), ''),
|
||||
COALESCE(MAX(created_at), '')
|
||||
FROM chunks
|
||||
`).Scan(&stats.TotalChunks, &stats.TotalSizeRaw, &stats.TotalSizeStored,
|
||||
&oldestStr, &newestStr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Parse time strings
|
||||
if oldestStr != "" {
|
||||
stats.OldestChunk, _ = time.Parse("2006-01-02 15:04:05", oldestStr)
|
||||
}
|
||||
if newestStr != "" {
|
||||
stats.NewestChunk, _ = time.Parse("2006-01-02 15:04:05", newestStr)
|
||||
}
|
||||
|
||||
idx.db.QueryRow("SELECT COUNT(*) FROM manifests").Scan(&stats.TotalManifests)
|
||||
|
||||
// Calculate accurate dedup ratio from manifests
|
||||
// Sum all backup original sizes and all new data stored
|
||||
err = idx.db.QueryRow(`
|
||||
SELECT
|
||||
COALESCE(SUM(original_size), 0),
|
||||
COALESCE(SUM(stored_size), 0)
|
||||
FROM manifests
|
||||
`).Scan(&stats.TotalBackupSize, &stats.TotalNewData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Calculate real dedup ratio: how much data was deduplicated across all backups
|
||||
if stats.TotalBackupSize > 0 {
|
||||
stats.DedupRatio = 1.0 - float64(stats.TotalNewData)/float64(stats.TotalBackupSize)
|
||||
stats.SpaceSaved = stats.TotalBackupSize - stats.TotalNewData
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// ListOrphanedChunks returns chunks that have ref_count <= 0
|
||||
func (idx *ChunkIndex) ListOrphanedChunks() ([]string, error) {
|
||||
rows, err := idx.db.Query("SELECT hash FROM chunks WHERE ref_count <= 0")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var hashes []string
|
||||
for rows.Next() {
|
||||
var hash string
|
||||
if err := rows.Scan(&hash); err != nil {
|
||||
continue
|
||||
}
|
||||
hashes = append(hashes, hash)
|
||||
}
|
||||
return hashes, rows.Err()
|
||||
}
|
||||
|
||||
// Vacuum cleans up the database
|
||||
func (idx *ChunkIndex) Vacuum() error {
|
||||
_, err := idx.db.Exec("VACUUM")
|
||||
return err
|
||||
}
|
||||
189
internal/dedup/manifest.go
Normal file
189
internal/dedup/manifest.go
Normal file
@ -0,0 +1,189 @@
|
||||
package dedup
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Manifest describes a single backup as a list of chunks
|
||||
type Manifest struct {
|
||||
// ID is the unique identifier (typically timestamp-based)
|
||||
ID string `json:"id"`
|
||||
|
||||
// Name is an optional human-readable name
|
||||
Name string `json:"name,omitempty"`
|
||||
|
||||
// CreatedAt is when this backup was created
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
|
||||
// Database information
|
||||
DatabaseType string `json:"database_type"` // postgres, mysql
|
||||
DatabaseName string `json:"database_name"`
|
||||
DatabaseHost string `json:"database_host"`
|
||||
|
||||
// Chunks is the ordered list of chunk hashes
|
||||
// The file is reconstructed by concatenating chunks in order
|
||||
Chunks []ChunkRef `json:"chunks"`
|
||||
|
||||
// Stats about the backup
|
||||
OriginalSize int64 `json:"original_size"` // Size before deduplication
|
||||
StoredSize int64 `json:"stored_size"` // Size after dedup (new chunks only)
|
||||
ChunkCount int `json:"chunk_count"` // Total chunks
|
||||
NewChunks int `json:"new_chunks"` // Chunks that weren't deduplicated
|
||||
DedupRatio float64 `json:"dedup_ratio"` // 1.0 = no dedup, 0.0 = 100% dedup
|
||||
|
||||
// Encryption and compression settings used
|
||||
Encrypted bool `json:"encrypted"`
|
||||
Compressed bool `json:"compressed"`
|
||||
Decompressed bool `json:"decompressed,omitempty"` // Input was auto-decompressed before chunking
|
||||
|
||||
// Verification
|
||||
SHA256 string `json:"sha256"` // Hash of reconstructed file
|
||||
VerifiedAt time.Time `json:"verified_at,omitempty"`
|
||||
}
|
||||
|
||||
// ChunkRef references a chunk in the manifest
|
||||
type ChunkRef struct {
|
||||
Hash string `json:"h"` // SHA-256 hash (64 chars)
|
||||
Offset int64 `json:"o"` // Offset in original file
|
||||
Length int `json:"l"` // Chunk length
|
||||
}
|
||||
|
||||
// ManifestStore manages backup manifests
|
||||
type ManifestStore struct {
|
||||
basePath string
|
||||
}
|
||||
|
||||
// NewManifestStore creates a new manifest store
|
||||
func NewManifestStore(basePath string) (*ManifestStore, error) {
|
||||
manifestDir := filepath.Join(basePath, "manifests")
|
||||
if err := os.MkdirAll(manifestDir, 0700); err != nil {
|
||||
return nil, fmt.Errorf("failed to create manifest directory: %w", err)
|
||||
}
|
||||
return &ManifestStore{basePath: basePath}, nil
|
||||
}
|
||||
|
||||
// manifestPath returns the path for a manifest ID
|
||||
func (s *ManifestStore) manifestPath(id string) string {
|
||||
return filepath.Join(s.basePath, "manifests", id+".manifest.json")
|
||||
}
|
||||
|
||||
// Save writes a manifest to disk
|
||||
func (s *ManifestStore) Save(m *Manifest) error {
|
||||
path := s.manifestPath(m.ID)
|
||||
|
||||
data, err := json.MarshalIndent(m, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal manifest: %w", err)
|
||||
}
|
||||
|
||||
// Atomic write
|
||||
tmpPath := path + ".tmp"
|
||||
if err := os.WriteFile(tmpPath, data, 0600); err != nil {
|
||||
return fmt.Errorf("failed to write manifest: %w", err)
|
||||
}
|
||||
|
||||
if err := os.Rename(tmpPath, path); err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return fmt.Errorf("failed to commit manifest: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Load reads a manifest from disk
|
||||
func (s *ManifestStore) Load(id string) (*Manifest, error) {
|
||||
path := s.manifestPath(id)
|
||||
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read manifest %s: %w", id, err)
|
||||
}
|
||||
|
||||
var m Manifest
|
||||
if err := json.Unmarshal(data, &m); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse manifest %s: %w", id, err)
|
||||
}
|
||||
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
// Delete removes a manifest
|
||||
func (s *ManifestStore) Delete(id string) error {
|
||||
path := s.manifestPath(id)
|
||||
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
|
||||
return fmt.Errorf("failed to delete manifest %s: %w", id, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// List returns all manifest IDs
|
||||
func (s *ManifestStore) List() ([]string, error) {
|
||||
manifestDir := filepath.Join(s.basePath, "manifests")
|
||||
entries, err := os.ReadDir(manifestDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list manifests: %w", err)
|
||||
}
|
||||
|
||||
var ids []string
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := e.Name()
|
||||
if len(name) > 14 && name[len(name)-14:] == ".manifest.json" {
|
||||
ids = append(ids, name[:len(name)-14])
|
||||
}
|
||||
}
|
||||
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
// ListAll returns all manifests sorted by creation time (newest first)
|
||||
func (s *ManifestStore) ListAll() ([]*Manifest, error) {
|
||||
ids, err := s.List()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var manifests []*Manifest
|
||||
for _, id := range ids {
|
||||
m, err := s.Load(id)
|
||||
if err != nil {
|
||||
continue // Skip corrupted manifests
|
||||
}
|
||||
manifests = append(manifests, m)
|
||||
}
|
||||
|
||||
// Sort by creation time (newest first)
|
||||
for i := 0; i < len(manifests)-1; i++ {
|
||||
for j := i + 1; j < len(manifests); j++ {
|
||||
if manifests[j].CreatedAt.After(manifests[i].CreatedAt) {
|
||||
manifests[i], manifests[j] = manifests[j], manifests[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return manifests, nil
|
||||
}
|
||||
|
||||
// GetChunkHashes returns all unique chunk hashes referenced by manifests
|
||||
func (s *ManifestStore) GetChunkHashes() (map[string]int, error) {
|
||||
manifests, err := s.ListAll()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Map hash -> reference count
|
||||
refs := make(map[string]int)
|
||||
for _, m := range manifests {
|
||||
for _, c := range m.Chunks {
|
||||
refs[c.Hash]++
|
||||
}
|
||||
}
|
||||
|
||||
return refs, nil
|
||||
}
|
||||
286
internal/dedup/metrics.go
Normal file
286
internal/dedup/metrics.go
Normal file
@ -0,0 +1,286 @@
|
||||
package dedup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// DedupMetrics holds deduplication statistics for Prometheus
|
||||
type DedupMetrics struct {
|
||||
// Global stats
|
||||
TotalChunks int64
|
||||
TotalManifests int64
|
||||
TotalBackupSize int64 // Sum of all backup original sizes
|
||||
TotalNewData int64 // Sum of all new chunks stored
|
||||
SpaceSaved int64 // Bytes saved by deduplication
|
||||
DedupRatio float64 // Overall dedup ratio (0-1)
|
||||
DiskUsage int64 // Actual bytes on disk
|
||||
OldestChunkEpoch int64 // Unix timestamp of oldest chunk
|
||||
NewestChunkEpoch int64 // Unix timestamp of newest chunk
|
||||
CompressionRatio float64 // Compression ratio (raw vs stored)
|
||||
|
||||
// Per-database stats
|
||||
ByDatabase map[string]*DatabaseDedupMetrics
|
||||
}
|
||||
|
||||
// DatabaseDedupMetrics holds per-database dedup stats
|
||||
type DatabaseDedupMetrics struct {
|
||||
Database string
|
||||
BackupCount int
|
||||
TotalSize int64
|
||||
StoredSize int64
|
||||
DedupRatio float64
|
||||
LastBackupTime time.Time
|
||||
LastVerified time.Time
|
||||
}
|
||||
|
||||
// CollectMetrics gathers dedup statistics from the index and store
|
||||
func CollectMetrics(basePath string, indexPath string) (*DedupMetrics, error) {
|
||||
var idx *ChunkIndex
|
||||
var err error
|
||||
|
||||
if indexPath != "" {
|
||||
idx, err = NewChunkIndexAt(indexPath)
|
||||
} else {
|
||||
idx, err = NewChunkIndex(basePath)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open chunk index: %w", err)
|
||||
}
|
||||
defer idx.Close()
|
||||
|
||||
store, err := NewChunkStore(StoreConfig{BasePath: basePath})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open chunk store: %w", err)
|
||||
}
|
||||
|
||||
// Get index stats
|
||||
stats, err := idx.Stats()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get index stats: %w", err)
|
||||
}
|
||||
|
||||
// Get store stats
|
||||
storeStats, err := store.Stats()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get store stats: %w", err)
|
||||
}
|
||||
|
||||
metrics := &DedupMetrics{
|
||||
TotalChunks: stats.TotalChunks,
|
||||
TotalManifests: stats.TotalManifests,
|
||||
TotalBackupSize: stats.TotalBackupSize,
|
||||
TotalNewData: stats.TotalNewData,
|
||||
SpaceSaved: stats.SpaceSaved,
|
||||
DedupRatio: stats.DedupRatio,
|
||||
DiskUsage: storeStats.TotalSize,
|
||||
ByDatabase: make(map[string]*DatabaseDedupMetrics),
|
||||
}
|
||||
|
||||
// Add chunk age timestamps
|
||||
if !stats.OldestChunk.IsZero() {
|
||||
metrics.OldestChunkEpoch = stats.OldestChunk.Unix()
|
||||
}
|
||||
if !stats.NewestChunk.IsZero() {
|
||||
metrics.NewestChunkEpoch = stats.NewestChunk.Unix()
|
||||
}
|
||||
|
||||
// Calculate compression ratio (raw size vs stored size)
|
||||
if stats.TotalSizeRaw > 0 {
|
||||
metrics.CompressionRatio = 1.0 - float64(stats.TotalSizeStored)/float64(stats.TotalSizeRaw)
|
||||
}
|
||||
|
||||
// Collect per-database metrics from manifest store
|
||||
manifestStore, err := NewManifestStore(basePath)
|
||||
if err != nil {
|
||||
return metrics, nil // Return partial metrics
|
||||
}
|
||||
|
||||
manifests, err := manifestStore.ListAll()
|
||||
if err != nil {
|
||||
return metrics, nil // Return partial metrics
|
||||
}
|
||||
|
||||
for _, m := range manifests {
|
||||
dbKey := m.DatabaseName
|
||||
if dbKey == "" {
|
||||
dbKey = "_default"
|
||||
}
|
||||
|
||||
dbMetrics, ok := metrics.ByDatabase[dbKey]
|
||||
if !ok {
|
||||
dbMetrics = &DatabaseDedupMetrics{
|
||||
Database: dbKey,
|
||||
}
|
||||
metrics.ByDatabase[dbKey] = dbMetrics
|
||||
}
|
||||
|
||||
dbMetrics.BackupCount++
|
||||
dbMetrics.TotalSize += m.OriginalSize
|
||||
dbMetrics.StoredSize += m.StoredSize
|
||||
|
||||
if m.CreatedAt.After(dbMetrics.LastBackupTime) {
|
||||
dbMetrics.LastBackupTime = m.CreatedAt
|
||||
}
|
||||
if !m.VerifiedAt.IsZero() && m.VerifiedAt.After(dbMetrics.LastVerified) {
|
||||
dbMetrics.LastVerified = m.VerifiedAt
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate per-database dedup ratios
|
||||
for _, dbMetrics := range metrics.ByDatabase {
|
||||
if dbMetrics.TotalSize > 0 {
|
||||
dbMetrics.DedupRatio = 1.0 - float64(dbMetrics.StoredSize)/float64(dbMetrics.TotalSize)
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
// WritePrometheusTextfile writes dedup metrics in Prometheus format
|
||||
func WritePrometheusTextfile(path string, instance string, basePath string, indexPath string) error {
|
||||
metrics, err := CollectMetrics(basePath, indexPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
output := FormatPrometheusMetrics(metrics, instance)
|
||||
|
||||
// Atomic write
|
||||
dir := filepath.Dir(path)
|
||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create directory: %w", err)
|
||||
}
|
||||
|
||||
tmpPath := path + ".tmp"
|
||||
if err := os.WriteFile(tmpPath, []byte(output), 0644); err != nil {
|
||||
return fmt.Errorf("failed to write temp file: %w", err)
|
||||
}
|
||||
|
||||
if err := os.Rename(tmpPath, path); err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return fmt.Errorf("failed to rename temp file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FormatPrometheusMetrics formats dedup metrics in Prometheus exposition format
|
||||
func FormatPrometheusMetrics(m *DedupMetrics, instance string) string {
|
||||
var b strings.Builder
|
||||
now := time.Now().Unix()
|
||||
|
||||
b.WriteString("# DBBackup Deduplication Prometheus Metrics\n")
|
||||
b.WriteString(fmt.Sprintf("# Generated at: %s\n", time.Now().Format(time.RFC3339)))
|
||||
b.WriteString(fmt.Sprintf("# Instance: %s\n", instance))
|
||||
b.WriteString("\n")
|
||||
|
||||
// Global dedup metrics
|
||||
b.WriteString("# HELP dbbackup_dedup_chunks_total Total number of unique chunks stored\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_chunks_total gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_chunks_total{instance=%q} %d\n", instance, m.TotalChunks))
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_manifests_total Total number of deduplicated backups\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_manifests_total gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_manifests_total{instance=%q} %d\n", instance, m.TotalManifests))
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_backup_bytes_total Total logical size of all backups in bytes\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_backup_bytes_total gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_backup_bytes_total{instance=%q} %d\n", instance, m.TotalBackupSize))
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_stored_bytes_total Total unique data stored in bytes (after dedup)\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_stored_bytes_total gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_stored_bytes_total{instance=%q} %d\n", instance, m.TotalNewData))
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_space_saved_bytes Bytes saved by deduplication\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_space_saved_bytes gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_space_saved_bytes{instance=%q} %d\n", instance, m.SpaceSaved))
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_ratio Deduplication ratio (0-1, higher is better)\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_ratio gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_ratio{instance=%q} %.4f\n", instance, m.DedupRatio))
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_disk_usage_bytes Actual disk usage of chunk store\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_disk_usage_bytes gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_disk_usage_bytes{instance=%q} %d\n", instance, m.DiskUsage))
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_compression_ratio Compression ratio (0-1, higher = better compression)\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_compression_ratio gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_compression_ratio{instance=%q} %.4f\n", instance, m.CompressionRatio))
|
||||
b.WriteString("\n")
|
||||
|
||||
if m.OldestChunkEpoch > 0 {
|
||||
b.WriteString("# HELP dbbackup_dedup_oldest_chunk_timestamp Unix timestamp of oldest chunk (for retention monitoring)\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_oldest_chunk_timestamp gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_oldest_chunk_timestamp{instance=%q} %d\n", instance, m.OldestChunkEpoch))
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
if m.NewestChunkEpoch > 0 {
|
||||
b.WriteString("# HELP dbbackup_dedup_newest_chunk_timestamp Unix timestamp of newest chunk\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_newest_chunk_timestamp gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_newest_chunk_timestamp{instance=%q} %d\n", instance, m.NewestChunkEpoch))
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
// Per-database metrics
|
||||
if len(m.ByDatabase) > 0 {
|
||||
b.WriteString("# HELP dbbackup_dedup_database_backup_count Number of deduplicated backups per database\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_database_backup_count gauge\n")
|
||||
for _, db := range m.ByDatabase {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_database_backup_count{instance=%q,database=%q} %d\n",
|
||||
instance, db.Database, db.BackupCount))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_database_ratio Deduplication ratio per database (0-1)\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_database_ratio gauge\n")
|
||||
for _, db := range m.ByDatabase {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_database_ratio{instance=%q,database=%q} %.4f\n",
|
||||
instance, db.Database, db.DedupRatio))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_database_last_backup_timestamp Last backup timestamp per database\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_database_last_backup_timestamp gauge\n")
|
||||
for _, db := range m.ByDatabase {
|
||||
if !db.LastBackupTime.IsZero() {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_database_last_backup_timestamp{instance=%q,database=%q} %d\n",
|
||||
instance, db.Database, db.LastBackupTime.Unix()))
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_database_total_bytes Total logical size per database\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_database_total_bytes gauge\n")
|
||||
for _, db := range m.ByDatabase {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_database_total_bytes{instance=%q,database=%q} %d\n",
|
||||
instance, db.Database, db.TotalSize))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_database_stored_bytes Stored bytes per database (after dedup)\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_database_stored_bytes gauge\n")
|
||||
for _, db := range m.ByDatabase {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_database_stored_bytes{instance=%q,database=%q} %d\n",
|
||||
instance, db.Database, db.StoredSize))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
b.WriteString("# HELP dbbackup_dedup_scrape_timestamp Unix timestamp when dedup metrics were collected\n")
|
||||
b.WriteString("# TYPE dbbackup_dedup_scrape_timestamp gauge\n")
|
||||
b.WriteString(fmt.Sprintf("dbbackup_dedup_scrape_timestamp{instance=%q} %d\n", instance, now))
|
||||
|
||||
return b.String()
|
||||
}
|
||||
403
internal/dedup/store.go
Normal file
403
internal/dedup/store.go
Normal file
@ -0,0 +1,403 @@
|
||||
package dedup
|
||||
|
||||
import (
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
)
|
||||
|
||||
// ChunkStore manages content-addressed chunk storage
|
||||
// Chunks are stored as: <base>/<prefix>/<hash>.chunk[.gz][.enc]
|
||||
type ChunkStore struct {
|
||||
basePath string
|
||||
compress bool
|
||||
encryptionKey []byte // 32 bytes for AES-256
|
||||
mu sync.RWMutex
|
||||
existingChunks map[string]bool // Cache of known chunks
|
||||
}
|
||||
|
||||
// StoreConfig holds configuration for the chunk store
|
||||
type StoreConfig struct {
|
||||
BasePath string
|
||||
Compress bool // Enable gzip compression
|
||||
EncryptionKey string // Optional: hex-encoded 32-byte key for AES-256-GCM
|
||||
}
|
||||
|
||||
// NewChunkStore creates a new chunk store
|
||||
func NewChunkStore(config StoreConfig) (*ChunkStore, error) {
|
||||
store := &ChunkStore{
|
||||
basePath: config.BasePath,
|
||||
compress: config.Compress,
|
||||
existingChunks: make(map[string]bool),
|
||||
}
|
||||
|
||||
// Parse encryption key if provided
|
||||
if config.EncryptionKey != "" {
|
||||
key, err := hex.DecodeString(config.EncryptionKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid encryption key: %w", err)
|
||||
}
|
||||
if len(key) != 32 {
|
||||
return nil, fmt.Errorf("encryption key must be 32 bytes (got %d)", len(key))
|
||||
}
|
||||
store.encryptionKey = key
|
||||
}
|
||||
|
||||
// Create base directory structure
|
||||
if err := os.MkdirAll(config.BasePath, 0700); err != nil {
|
||||
return nil, fmt.Errorf("failed to create chunk store: %w", err)
|
||||
}
|
||||
|
||||
// Create chunks and manifests directories
|
||||
for _, dir := range []string{"chunks", "manifests"} {
|
||||
if err := os.MkdirAll(filepath.Join(config.BasePath, dir), 0700); err != nil {
|
||||
return nil, fmt.Errorf("failed to create %s directory: %w", dir, err)
|
||||
}
|
||||
}
|
||||
|
||||
return store, nil
|
||||
}
|
||||
|
||||
// chunkPath returns the filesystem path for a chunk hash
|
||||
// Uses 2-character prefix for directory sharding (256 subdirs)
|
||||
func (s *ChunkStore) chunkPath(hash string) string {
|
||||
if len(hash) < 2 {
|
||||
return filepath.Join(s.basePath, "chunks", "xx", hash+s.chunkExt())
|
||||
}
|
||||
prefix := hash[:2]
|
||||
return filepath.Join(s.basePath, "chunks", prefix, hash+s.chunkExt())
|
||||
}
|
||||
|
||||
// chunkExt returns the file extension based on compression/encryption settings
|
||||
func (s *ChunkStore) chunkExt() string {
|
||||
ext := ".chunk"
|
||||
if s.compress {
|
||||
ext += ".gz"
|
||||
}
|
||||
if s.encryptionKey != nil {
|
||||
ext += ".enc"
|
||||
}
|
||||
return ext
|
||||
}
|
||||
|
||||
// Has checks if a chunk exists in the store
|
||||
func (s *ChunkStore) Has(hash string) bool {
|
||||
s.mu.RLock()
|
||||
if exists, ok := s.existingChunks[hash]; ok {
|
||||
s.mu.RUnlock()
|
||||
return exists
|
||||
}
|
||||
s.mu.RUnlock()
|
||||
|
||||
// Check filesystem
|
||||
path := s.chunkPath(hash)
|
||||
_, err := os.Stat(path)
|
||||
exists := err == nil
|
||||
|
||||
s.mu.Lock()
|
||||
s.existingChunks[hash] = exists
|
||||
s.mu.Unlock()
|
||||
|
||||
return exists
|
||||
}
|
||||
|
||||
// Put stores a chunk, returning true if it was new (not deduplicated)
|
||||
func (s *ChunkStore) Put(chunk *Chunk) (isNew bool, err error) {
|
||||
// Check if already exists (deduplication!)
|
||||
if s.Has(chunk.Hash) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
path := s.chunkPath(chunk.Hash)
|
||||
chunkDir := filepath.Dir(path)
|
||||
|
||||
// Create prefix directory with verification for CIFS/NFS
|
||||
// Network filesystems can return success from MkdirAll before
|
||||
// the directory is actually visible for writes
|
||||
if err := os.MkdirAll(chunkDir, 0700); err != nil {
|
||||
return false, fmt.Errorf("failed to create chunk directory: %w", err)
|
||||
}
|
||||
|
||||
// Verify directory exists (CIFS workaround)
|
||||
for i := 0; i < 5; i++ {
|
||||
if _, err := os.Stat(chunkDir); err == nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
os.MkdirAll(chunkDir, 0700) // retry mkdir
|
||||
}
|
||||
|
||||
// Prepare data
|
||||
data := chunk.Data
|
||||
|
||||
// Compress if enabled
|
||||
if s.compress {
|
||||
data, err = s.compressData(data)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("compression failed: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Encrypt if enabled
|
||||
if s.encryptionKey != nil {
|
||||
data, err = s.encryptData(data)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("encryption failed: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Write atomically (write to temp, then rename)
|
||||
tmpPath := path + ".tmp"
|
||||
|
||||
// Write with retry for CIFS/NFS directory visibility lag
|
||||
var writeErr error
|
||||
for attempt := 0; attempt < 3; attempt++ {
|
||||
if writeErr = os.WriteFile(tmpPath, data, 0600); writeErr == nil {
|
||||
break
|
||||
}
|
||||
// Directory might not be visible yet on network FS
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
os.MkdirAll(chunkDir, 0700)
|
||||
}
|
||||
if writeErr != nil {
|
||||
return false, fmt.Errorf("failed to write chunk: %w", writeErr)
|
||||
}
|
||||
|
||||
// Rename with retry for CIFS/SMB flakiness
|
||||
var renameErr error
|
||||
for attempt := 0; attempt < 3; attempt++ {
|
||||
if renameErr = os.Rename(tmpPath, path); renameErr == nil {
|
||||
break
|
||||
}
|
||||
// Brief pause before retry on network filesystems
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
// Re-ensure directory exists (refresh CIFS cache)
|
||||
os.MkdirAll(filepath.Dir(path), 0700)
|
||||
}
|
||||
if renameErr != nil {
|
||||
os.Remove(tmpPath)
|
||||
return false, fmt.Errorf("failed to commit chunk: %w", renameErr)
|
||||
}
|
||||
|
||||
// Update cache
|
||||
s.mu.Lock()
|
||||
s.existingChunks[chunk.Hash] = true
|
||||
s.mu.Unlock()
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Get retrieves a chunk by hash
|
||||
func (s *ChunkStore) Get(hash string) (*Chunk, error) {
|
||||
path := s.chunkPath(hash)
|
||||
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read chunk %s: %w", hash, err)
|
||||
}
|
||||
|
||||
// Decrypt if encrypted
|
||||
if s.encryptionKey != nil {
|
||||
data, err = s.decryptData(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decryption failed: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Decompress if compressed
|
||||
if s.compress {
|
||||
data, err = s.decompressData(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decompression failed: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify hash
|
||||
h := sha256.Sum256(data)
|
||||
actualHash := hex.EncodeToString(h[:])
|
||||
if actualHash != hash {
|
||||
return nil, fmt.Errorf("chunk hash mismatch: expected %s, got %s", hash, actualHash)
|
||||
}
|
||||
|
||||
return &Chunk{
|
||||
Hash: hash,
|
||||
Data: data,
|
||||
Length: len(data),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Delete removes a chunk from the store
|
||||
func (s *ChunkStore) Delete(hash string) error {
|
||||
path := s.chunkPath(hash)
|
||||
|
||||
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
|
||||
return fmt.Errorf("failed to delete chunk %s: %w", hash, err)
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
delete(s.existingChunks, hash)
|
||||
s.mu.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stats returns storage statistics
|
||||
type StoreStats struct {
|
||||
TotalChunks int64
|
||||
TotalSize int64 // Bytes on disk (after compression/encryption)
|
||||
UniqueSize int64 // Bytes of unique data
|
||||
Directories int
|
||||
}
|
||||
|
||||
// Stats returns statistics about the chunk store
|
||||
func (s *ChunkStore) Stats() (*StoreStats, error) {
|
||||
stats := &StoreStats{}
|
||||
|
||||
chunksDir := filepath.Join(s.basePath, "chunks")
|
||||
err := filepath.Walk(chunksDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
stats.Directories++
|
||||
return nil
|
||||
}
|
||||
stats.TotalChunks++
|
||||
stats.TotalSize += info.Size()
|
||||
return nil
|
||||
})
|
||||
|
||||
return stats, err
|
||||
}
|
||||
|
||||
// LoadIndex loads the existing chunk hashes into memory
|
||||
func (s *ChunkStore) LoadIndex() error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
s.existingChunks = make(map[string]bool)
|
||||
|
||||
chunksDir := filepath.Join(s.basePath, "chunks")
|
||||
return filepath.Walk(chunksDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil || info.IsDir() {
|
||||
return err
|
||||
}
|
||||
|
||||
// Extract hash from filename
|
||||
base := filepath.Base(path)
|
||||
hash := base
|
||||
// Remove extensions
|
||||
for _, ext := range []string{".enc", ".gz", ".chunk"} {
|
||||
if len(hash) > len(ext) && hash[len(hash)-len(ext):] == ext {
|
||||
hash = hash[:len(hash)-len(ext)]
|
||||
}
|
||||
}
|
||||
if len(hash) == 64 { // SHA-256 hex length
|
||||
s.existingChunks[hash] = true
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// compressData compresses data using parallel gzip
|
||||
func (s *ChunkStore) compressData(data []byte) ([]byte, error) {
|
||||
var buf []byte
|
||||
w, err := pgzip.NewWriterLevel((*bytesBuffer)(&buf), pgzip.BestCompression)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := w.Write(data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := w.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// bytesBuffer is a simple io.Writer that appends to a byte slice
|
||||
type bytesBuffer []byte
|
||||
|
||||
func (b *bytesBuffer) Write(p []byte) (int, error) {
|
||||
*b = append(*b, p...)
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
// decompressData decompresses gzip data
|
||||
func (s *ChunkStore) decompressData(data []byte) ([]byte, error) {
|
||||
r, err := pgzip.NewReader(&bytesReader{data: data})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer r.Close()
|
||||
return io.ReadAll(r)
|
||||
}
|
||||
|
||||
// bytesReader is a simple io.Reader from a byte slice
|
||||
type bytesReader struct {
|
||||
data []byte
|
||||
pos int
|
||||
}
|
||||
|
||||
func (r *bytesReader) Read(p []byte) (int, error) {
|
||||
if r.pos >= len(r.data) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
n := copy(p, r.data[r.pos:])
|
||||
r.pos += n
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// encryptData encrypts data using AES-256-GCM
|
||||
func (s *ChunkStore) encryptData(plaintext []byte) ([]byte, error) {
|
||||
block, err := aes.NewCipher(s.encryptionKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
gcm, err := cipher.NewGCM(block)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nonce := make([]byte, gcm.NonceSize())
|
||||
if _, err := rand.Read(nonce); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Prepend nonce to ciphertext
|
||||
return gcm.Seal(nonce, nonce, plaintext, nil), nil
|
||||
}
|
||||
|
||||
// decryptData decrypts AES-256-GCM encrypted data
|
||||
func (s *ChunkStore) decryptData(ciphertext []byte) ([]byte, error) {
|
||||
block, err := aes.NewCipher(s.encryptionKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
gcm, err := cipher.NewGCM(block)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(ciphertext) < gcm.NonceSize() {
|
||||
return nil, fmt.Errorf("ciphertext too short")
|
||||
}
|
||||
|
||||
nonce := ciphertext[:gcm.NonceSize()]
|
||||
ciphertext = ciphertext[gcm.NonceSize():]
|
||||
|
||||
return gcm.Open(nil, nonce, ciphertext, nil)
|
||||
}
|
||||
@ -223,11 +223,11 @@ func (r *DrillResult) IsSuccess() bool {
|
||||
|
||||
// Summary returns a human-readable summary of the drill
|
||||
func (r *DrillResult) Summary() string {
|
||||
status := "✅ PASSED"
|
||||
status := "[OK] PASSED"
|
||||
if !r.Success {
|
||||
status = "❌ FAILED"
|
||||
status = "[FAIL] FAILED"
|
||||
} else if r.Status == StatusPartial {
|
||||
status = "⚠️ PARTIAL"
|
||||
status = "[WARN] PARTIAL"
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s - %s (%.2fs) - %d tables, %d rows",
|
||||
|
||||
@ -41,20 +41,20 @@ func (e *Engine) Run(ctx context.Context, config *DrillConfig) (*DrillResult, er
|
||||
TargetRTO: float64(config.MaxRestoreSeconds),
|
||||
}
|
||||
|
||||
e.log.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
e.log.Info(" 🧪 DR Drill: " + result.DrillID)
|
||||
e.log.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
e.log.Info("=====================================================")
|
||||
e.log.Info(" [TEST] DR Drill: " + result.DrillID)
|
||||
e.log.Info("=====================================================")
|
||||
e.log.Info("")
|
||||
|
||||
// Cleanup function for error cases
|
||||
var containerID string
|
||||
cleanup := func() {
|
||||
if containerID != "" && config.CleanupOnExit && (result.Success || !config.KeepOnFailure) {
|
||||
e.log.Info("🗑️ Cleaning up container...")
|
||||
e.log.Info("[DEL] Cleaning up container...")
|
||||
e.docker.RemoveContainer(context.Background(), containerID)
|
||||
} else if containerID != "" {
|
||||
result.ContainerKept = true
|
||||
e.log.Info("📦 Container kept for debugging: " + containerID)
|
||||
e.log.Info("[PKG] Container kept for debugging: " + containerID)
|
||||
}
|
||||
}
|
||||
defer cleanup()
|
||||
@ -88,7 +88,7 @@ func (e *Engine) Run(ctx context.Context, config *DrillConfig) (*DrillResult, er
|
||||
}
|
||||
containerID = container.ID
|
||||
result.ContainerID = containerID
|
||||
e.log.Info("📦 Container started: " + containerID[:12])
|
||||
e.log.Info("[PKG] Container started: " + containerID[:12])
|
||||
|
||||
// Wait for container to be healthy
|
||||
if err := e.docker.WaitForHealth(ctx, containerID, config.DatabaseType, config.ContainerTimeout); err != nil {
|
||||
@ -118,7 +118,7 @@ func (e *Engine) Run(ctx context.Context, config *DrillConfig) (*DrillResult, er
|
||||
result.RestoreTime = time.Since(restoreStart).Seconds()
|
||||
e.completePhase(&phase, fmt.Sprintf("Restored in %.2fs", result.RestoreTime))
|
||||
result.Phases = append(result.Phases, phase)
|
||||
e.log.Info(fmt.Sprintf("✅ Backup restored in %.2fs", result.RestoreTime))
|
||||
e.log.Info(fmt.Sprintf("[OK] Backup restored in %.2fs", result.RestoreTime))
|
||||
|
||||
// Phase 4: Validate
|
||||
phase = e.startPhase("Validate Database")
|
||||
@ -182,24 +182,24 @@ func (e *Engine) preflightChecks(ctx context.Context, config *DrillConfig) error
|
||||
if err := e.docker.CheckDockerAvailable(ctx); err != nil {
|
||||
return fmt.Errorf("docker not available: %w", err)
|
||||
}
|
||||
e.log.Info("✓ Docker is available")
|
||||
e.log.Info("[OK] Docker is available")
|
||||
|
||||
// Check backup file exists
|
||||
if _, err := os.Stat(config.BackupPath); err != nil {
|
||||
return fmt.Errorf("backup file not found: %s", config.BackupPath)
|
||||
}
|
||||
e.log.Info("✓ Backup file exists: " + filepath.Base(config.BackupPath))
|
||||
e.log.Info("[OK] Backup file exists: " + filepath.Base(config.BackupPath))
|
||||
|
||||
// Pull Docker image
|
||||
image := config.ContainerImage
|
||||
if image == "" {
|
||||
image = GetDefaultImage(config.DatabaseType, "")
|
||||
}
|
||||
e.log.Info("⬇️ Pulling image: " + image)
|
||||
e.log.Info("[DOWN] Pulling image: " + image)
|
||||
if err := e.docker.PullImage(ctx, image); err != nil {
|
||||
return fmt.Errorf("failed to pull image: %w", err)
|
||||
}
|
||||
e.log.Info("✓ Image ready: " + image)
|
||||
e.log.Info("[OK] Image ready: " + image)
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -243,7 +243,7 @@ func (e *Engine) restoreBackup(ctx context.Context, config *DrillConfig, contain
|
||||
backupName := filepath.Base(config.BackupPath)
|
||||
containerBackupPath := "/tmp/" + backupName
|
||||
|
||||
e.log.Info("📁 Copying backup to container...")
|
||||
e.log.Info("[DIR] Copying backup to container...")
|
||||
if err := e.docker.CopyToContainer(ctx, containerID, config.BackupPath, containerBackupPath); err != nil {
|
||||
return fmt.Errorf("failed to copy backup: %w", err)
|
||||
}
|
||||
@ -256,7 +256,7 @@ func (e *Engine) restoreBackup(ctx context.Context, config *DrillConfig, contain
|
||||
}
|
||||
|
||||
// Restore based on database type and format
|
||||
e.log.Info("🔄 Restoring backup...")
|
||||
e.log.Info("[EXEC] Restoring backup...")
|
||||
return e.executeRestore(ctx, config, containerID, containerBackupPath, containerConfig)
|
||||
}
|
||||
|
||||
@ -366,13 +366,13 @@ func (e *Engine) validateDatabase(ctx context.Context, config *DrillConfig, resu
|
||||
tables, err := validator.GetTableList(ctx)
|
||||
if err == nil {
|
||||
result.TableCount = len(tables)
|
||||
e.log.Info(fmt.Sprintf("📊 Tables found: %d", result.TableCount))
|
||||
e.log.Info(fmt.Sprintf("[STATS] Tables found: %d", result.TableCount))
|
||||
}
|
||||
|
||||
totalRows, err := validator.GetTotalRowCount(ctx)
|
||||
if err == nil {
|
||||
result.TotalRows = totalRows
|
||||
e.log.Info(fmt.Sprintf("📊 Total rows: %d", result.TotalRows))
|
||||
e.log.Info(fmt.Sprintf("[STATS] Total rows: %d", result.TotalRows))
|
||||
}
|
||||
|
||||
dbSize, err := validator.GetDatabaseSize(ctx, config.DatabaseName)
|
||||
@ -387,9 +387,9 @@ func (e *Engine) validateDatabase(ctx context.Context, config *DrillConfig, resu
|
||||
result.CheckResults = append(result.CheckResults, tr)
|
||||
if !tr.Success {
|
||||
errorCount++
|
||||
e.log.Warn("❌ " + tr.Message)
|
||||
e.log.Warn("[FAIL] " + tr.Message)
|
||||
} else {
|
||||
e.log.Info("✓ " + tr.Message)
|
||||
e.log.Info("[OK] " + tr.Message)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -404,9 +404,9 @@ func (e *Engine) validateDatabase(ctx context.Context, config *DrillConfig, resu
|
||||
totalQueryTime += qr.Duration
|
||||
if !qr.Success {
|
||||
errorCount++
|
||||
e.log.Warn(fmt.Sprintf("❌ %s: %s", qr.Name, qr.Error))
|
||||
e.log.Warn(fmt.Sprintf("[FAIL] %s: %s", qr.Name, qr.Error))
|
||||
} else {
|
||||
e.log.Info(fmt.Sprintf("✓ %s: %s (%.0fms)", qr.Name, qr.Result, qr.Duration))
|
||||
e.log.Info(fmt.Sprintf("[OK] %s: %s (%.0fms)", qr.Name, qr.Result, qr.Duration))
|
||||
}
|
||||
}
|
||||
if len(queryResults) > 0 {
|
||||
@ -421,9 +421,9 @@ func (e *Engine) validateDatabase(ctx context.Context, config *DrillConfig, resu
|
||||
result.CheckResults = append(result.CheckResults, cr)
|
||||
if !cr.Success {
|
||||
errorCount++
|
||||
e.log.Warn("❌ " + cr.Message)
|
||||
e.log.Warn("[FAIL] " + cr.Message)
|
||||
} else {
|
||||
e.log.Info("✓ " + cr.Message)
|
||||
e.log.Info("[OK] " + cr.Message)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -433,7 +433,7 @@ func (e *Engine) validateDatabase(ctx context.Context, config *DrillConfig, resu
|
||||
errorCount++
|
||||
msg := fmt.Sprintf("Total rows (%d) below minimum (%d)", result.TotalRows, config.MinRowCount)
|
||||
result.Warnings = append(result.Warnings, msg)
|
||||
e.log.Warn("⚠️ " + msg)
|
||||
e.log.Warn("[WARN] " + msg)
|
||||
}
|
||||
|
||||
return errorCount
|
||||
@ -441,7 +441,7 @@ func (e *Engine) validateDatabase(ctx context.Context, config *DrillConfig, resu
|
||||
|
||||
// startPhase starts a new drill phase
|
||||
func (e *Engine) startPhase(name string) DrillPhase {
|
||||
e.log.Info("▶️ " + name)
|
||||
e.log.Info("[RUN] " + name)
|
||||
return DrillPhase{
|
||||
Name: name,
|
||||
Status: "running",
|
||||
@ -463,7 +463,7 @@ func (e *Engine) failPhase(phase *DrillPhase, message string) {
|
||||
phase.Duration = phase.EndTime.Sub(phase.StartTime).Seconds()
|
||||
phase.Status = "failed"
|
||||
phase.Message = message
|
||||
e.log.Error("❌ Phase failed: " + message)
|
||||
e.log.Error("[FAIL] Phase failed: " + message)
|
||||
}
|
||||
|
||||
// finalize completes the drill result
|
||||
@ -472,9 +472,9 @@ func (e *Engine) finalize(result *DrillResult) {
|
||||
result.Duration = result.EndTime.Sub(result.StartTime).Seconds()
|
||||
|
||||
e.log.Info("")
|
||||
e.log.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
e.log.Info("=====================================================")
|
||||
e.log.Info(" " + result.Summary())
|
||||
e.log.Info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
e.log.Info("=====================================================")
|
||||
|
||||
if result.Success {
|
||||
e.log.Info(fmt.Sprintf(" RTO: %.2fs (target: %.0fs) %s",
|
||||
@ -484,9 +484,9 @@ func (e *Engine) finalize(result *DrillResult) {
|
||||
|
||||
func boolIcon(b bool) string {
|
||||
if b {
|
||||
return "✅"
|
||||
return "[OK]"
|
||||
}
|
||||
return "❌"
|
||||
return "[FAIL]"
|
||||
}
|
||||
|
||||
// Cleanup removes drill resources
|
||||
@ -498,7 +498,7 @@ func (e *Engine) Cleanup(ctx context.Context, drillID string) error {
|
||||
|
||||
for _, c := range containers {
|
||||
if strings.Contains(c.Name, drillID) || (drillID == "" && strings.HasPrefix(c.Name, "drill_")) {
|
||||
e.log.Info("🗑️ Removing container: " + c.Name)
|
||||
e.log.Info("[DEL] Removing container: " + c.Name)
|
||||
if err := e.docker.RemoveContainer(ctx, c.ID); err != nil {
|
||||
e.log.Warn("Failed to remove container", "id", c.ID, "error", err)
|
||||
}
|
||||
|
||||
@ -8,7 +8,7 @@ import (
|
||||
|
||||
func TestEncryptDecrypt(t *testing.T) {
|
||||
// Test data
|
||||
original := []byte("This is a secret database backup that needs encryption! 🔒")
|
||||
original := []byte("This is a secret database backup that needs encryption! [LOCK]")
|
||||
|
||||
// Test with passphrase
|
||||
t.Run("Passphrase", func(t *testing.T) {
|
||||
@ -57,7 +57,7 @@ func TestEncryptDecrypt(t *testing.T) {
|
||||
string(original), string(decrypted))
|
||||
}
|
||||
|
||||
t.Log("✅ Encryption/decryption successful")
|
||||
t.Log("[OK] Encryption/decryption successful")
|
||||
})
|
||||
|
||||
// Test with direct key
|
||||
@ -102,7 +102,7 @@ func TestEncryptDecrypt(t *testing.T) {
|
||||
t.Errorf("Decrypted data doesn't match original")
|
||||
}
|
||||
|
||||
t.Log("✅ Direct key encryption/decryption successful")
|
||||
t.Log("[OK] Direct key encryption/decryption successful")
|
||||
})
|
||||
|
||||
// Test wrong password
|
||||
@ -133,7 +133,7 @@ func TestEncryptDecrypt(t *testing.T) {
|
||||
t.Error("Expected decryption to fail with wrong password, but it succeeded")
|
||||
}
|
||||
|
||||
t.Logf("✅ Wrong password correctly rejected: %v", err)
|
||||
t.Logf("[OK] Wrong password correctly rejected: %v", err)
|
||||
})
|
||||
}
|
||||
|
||||
@ -183,7 +183,7 @@ func TestLargeData(t *testing.T) {
|
||||
t.Errorf("Large data decryption failed")
|
||||
}
|
||||
|
||||
t.Log("✅ Large data encryption/decryption successful")
|
||||
t.Log("[OK] Large data encryption/decryption successful")
|
||||
}
|
||||
|
||||
func TestKeyGeneration(t *testing.T) {
|
||||
@ -207,7 +207,7 @@ func TestKeyGeneration(t *testing.T) {
|
||||
t.Error("Generated keys are identical - randomness broken!")
|
||||
}
|
||||
|
||||
t.Log("✅ Key generation successful")
|
||||
t.Log("[OK] Key generation successful")
|
||||
}
|
||||
|
||||
func TestKeyDerivation(t *testing.T) {
|
||||
@ -230,5 +230,5 @@ func TestKeyDerivation(t *testing.T) {
|
||||
t.Error("Different salts produced same key")
|
||||
}
|
||||
|
||||
t.Log("✅ Key derivation successful")
|
||||
t.Log("[OK] Key derivation successful")
|
||||
}
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
package binlog
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
@ -9,6 +8,8 @@ import (
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
)
|
||||
|
||||
// FileTarget writes binlog events to local files
|
||||
@ -167,7 +168,7 @@ type CompressedFileTarget struct {
|
||||
|
||||
mu sync.Mutex
|
||||
file *os.File
|
||||
gzWriter *gzip.Writer
|
||||
gzWriter *pgzip.Writer
|
||||
written int64
|
||||
fileNum int
|
||||
healthy bool
|
||||
@ -261,7 +262,7 @@ func (c *CompressedFileTarget) openNewFile() error {
|
||||
}
|
||||
|
||||
c.file = file
|
||||
c.gzWriter = gzip.NewWriter(file)
|
||||
c.gzWriter = pgzip.NewWriter(file)
|
||||
c.written = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -2,7 +2,6 @@ package engine
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
@ -17,6 +16,8 @@ import (
|
||||
"dbbackup/internal/logger"
|
||||
"dbbackup/internal/metadata"
|
||||
"dbbackup/internal/security"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
)
|
||||
|
||||
// CloneEngine implements BackupEngine using MySQL Clone Plugin (8.0.17+)
|
||||
@ -339,7 +340,7 @@ func (e *CloneEngine) Backup(ctx context.Context, opts *BackupOptions) (*BackupR
|
||||
|
||||
// Save metadata
|
||||
meta := &metadata.BackupMetadata{
|
||||
Version: "3.40.0",
|
||||
Version: "3.42.1",
|
||||
Timestamp: startTime,
|
||||
Database: opts.Database,
|
||||
DatabaseType: "mysql",
|
||||
@ -597,12 +598,12 @@ func (e *CloneEngine) compressClone(ctx context.Context, sourceDir, targetFile s
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
// Create gzip writer
|
||||
// Create parallel gzip writer for faster compression
|
||||
level := e.config.CompressLevel
|
||||
if level == 0 {
|
||||
level = gzip.DefaultCompression
|
||||
level = pgzip.DefaultCompression
|
||||
}
|
||||
gzWriter, err := gzip.NewWriterLevel(outFile, level)
|
||||
gzWriter, err := pgzip.NewWriterLevel(outFile, level)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -684,8 +685,8 @@ func (e *CloneEngine) extractClone(ctx context.Context, sourceFile, targetDir st
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Create gzip reader
|
||||
gzReader, err := gzip.NewReader(file)
|
||||
// Create parallel gzip reader for faster decompression
|
||||
gzReader, err := pgzip.NewReader(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -2,7 +2,6 @@ package engine
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
@ -17,6 +16,8 @@ import (
|
||||
"dbbackup/internal/logger"
|
||||
"dbbackup/internal/metadata"
|
||||
"dbbackup/internal/security"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
)
|
||||
|
||||
// MySQLDumpEngine implements BackupEngine using mysqldump
|
||||
@ -184,13 +185,13 @@ func (e *MySQLDumpEngine) Backup(ctx context.Context, opts *BackupOptions) (*Bac
|
||||
|
||||
// Setup writer (with optional compression)
|
||||
var writer io.Writer = outFile
|
||||
var gzWriter *gzip.Writer
|
||||
var gzWriter *pgzip.Writer
|
||||
if opts.Compress {
|
||||
level := opts.CompressLevel
|
||||
if level == 0 {
|
||||
level = gzip.DefaultCompression
|
||||
level = pgzip.DefaultCompression
|
||||
}
|
||||
gzWriter, err = gzip.NewWriterLevel(outFile, level)
|
||||
gzWriter, err = pgzip.NewWriterLevel(outFile, level)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create gzip writer: %w", err)
|
||||
}
|
||||
@ -234,10 +235,26 @@ func (e *MySQLDumpEngine) Backup(ctx context.Context, opts *BackupOptions) (*Bac
|
||||
gzWriter.Close()
|
||||
}
|
||||
|
||||
// Wait for command
|
||||
if err := cmd.Wait(); err != nil {
|
||||
// Wait for command with proper context handling
|
||||
cmdDone := make(chan error, 1)
|
||||
go func() {
|
||||
cmdDone <- cmd.Wait()
|
||||
}()
|
||||
|
||||
var cmdErr error
|
||||
select {
|
||||
case cmdErr = <-cmdDone:
|
||||
// Command completed
|
||||
case <-ctx.Done():
|
||||
e.log.Warn("MySQL backup cancelled - killing process")
|
||||
cmd.Process.Kill()
|
||||
<-cmdDone
|
||||
cmdErr = ctx.Err()
|
||||
}
|
||||
|
||||
if cmdErr != nil {
|
||||
stderr := stderrBuf.String()
|
||||
return nil, fmt.Errorf("mysqldump failed: %w\n%s", err, stderr)
|
||||
return nil, fmt.Errorf("mysqldump failed: %w\n%s", cmdErr, stderr)
|
||||
}
|
||||
|
||||
// Get file info
|
||||
@ -254,7 +271,7 @@ func (e *MySQLDumpEngine) Backup(ctx context.Context, opts *BackupOptions) (*Bac
|
||||
|
||||
// Save metadata
|
||||
meta := &metadata.BackupMetadata{
|
||||
Version: "3.40.0",
|
||||
Version: "3.42.1",
|
||||
Timestamp: startTime,
|
||||
Database: opts.Database,
|
||||
DatabaseType: "mysql",
|
||||
@ -358,7 +375,7 @@ func (e *MySQLDumpEngine) Restore(ctx context.Context, opts *RestoreOptions) err
|
||||
// Setup reader (with optional decompression)
|
||||
var reader io.Reader = inFile
|
||||
if strings.HasSuffix(opts.SourcePath, ".gz") {
|
||||
gzReader, err := gzip.NewReader(inFile)
|
||||
gzReader, err := pgzip.NewReader(inFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create gzip reader: %w", err)
|
||||
}
|
||||
@ -425,9 +442,9 @@ func (e *MySQLDumpEngine) BackupToWriter(ctx context.Context, w io.Writer, opts
|
||||
|
||||
// Copy with optional compression
|
||||
var writer io.Writer = w
|
||||
var gzWriter *gzip.Writer
|
||||
var gzWriter *pgzip.Writer
|
||||
if opts.Compress {
|
||||
gzWriter = gzip.NewWriter(w)
|
||||
gzWriter = pgzip.NewWriter(w)
|
||||
defer gzWriter.Close()
|
||||
writer = gzWriter
|
||||
}
|
||||
@ -442,8 +459,25 @@ func (e *MySQLDumpEngine) BackupToWriter(ctx context.Context, w io.Writer, opts
|
||||
gzWriter.Close()
|
||||
}
|
||||
|
||||
if err := cmd.Wait(); err != nil {
|
||||
return nil, fmt.Errorf("mysqldump failed: %w\n%s", err, stderrBuf.String())
|
||||
// Wait for command with proper context handling
|
||||
cmdDone := make(chan error, 1)
|
||||
go func() {
|
||||
cmdDone <- cmd.Wait()
|
||||
}()
|
||||
|
||||
var cmdErr error
|
||||
select {
|
||||
case cmdErr = <-cmdDone:
|
||||
// Command completed
|
||||
case <-ctx.Done():
|
||||
e.log.Warn("MySQL streaming backup cancelled - killing process")
|
||||
cmd.Process.Kill()
|
||||
<-cmdDone
|
||||
cmdErr = ctx.Err()
|
||||
}
|
||||
|
||||
if cmdErr != nil {
|
||||
return nil, fmt.Errorf("mysqldump failed: %w\n%s", cmdErr, stderrBuf.String())
|
||||
}
|
||||
|
||||
return &BackupResult{
|
||||
|
||||
@ -63,7 +63,7 @@ func (b *BtrfsBackend) Detect(dataDir string) (bool, error) {
|
||||
// CreateSnapshot creates a Btrfs snapshot
|
||||
func (b *BtrfsBackend) CreateSnapshot(ctx context.Context, opts SnapshotOptions) (*Snapshot, error) {
|
||||
if b.config == nil || b.config.Subvolume == "" {
|
||||
return nil, fmt.Errorf("Btrfs subvolume not configured")
|
||||
return nil, fmt.Errorf("btrfs subvolume not configured")
|
||||
}
|
||||
|
||||
// Generate snapshot name
|
||||
|
||||
@ -2,7 +2,6 @@ package engine
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
@ -15,6 +14,8 @@ import (
|
||||
"dbbackup/internal/logger"
|
||||
"dbbackup/internal/metadata"
|
||||
"dbbackup/internal/security"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
)
|
||||
|
||||
// SnapshotEngine implements BackupEngine using filesystem snapshots
|
||||
@ -188,6 +189,8 @@ func (e *SnapshotEngine) Backup(ctx context.Context, opts *BackupOptions) (*Back
|
||||
// Step 4: Mount snapshot
|
||||
mountPoint := e.config.MountPoint
|
||||
if mountPoint == "" {
|
||||
// Note: snapshot engine uses snapshot.Config which doesnt have GetEffectiveWorkDir()
|
||||
// TODO: Refactor to use main config.Config for WorkDir support
|
||||
mountPoint = filepath.Join(os.TempDir(), fmt.Sprintf("dbbackup_snap_%s", timestamp))
|
||||
}
|
||||
|
||||
@ -223,7 +226,7 @@ func (e *SnapshotEngine) Backup(ctx context.Context, opts *BackupOptions) (*Back
|
||||
|
||||
// Save metadata
|
||||
meta := &metadata.BackupMetadata{
|
||||
Version: "3.40.0",
|
||||
Version: "3.42.1",
|
||||
Timestamp: startTime,
|
||||
Database: opts.Database,
|
||||
DatabaseType: "mysql",
|
||||
@ -294,14 +297,13 @@ func (e *SnapshotEngine) streamSnapshot(ctx context.Context, sourcePath, destFil
|
||||
// Wrap in counting writer for progress
|
||||
countWriter := &countingWriter{w: outFile}
|
||||
|
||||
// Create gzip writer
|
||||
level := gzip.DefaultCompression
|
||||
// Create parallel gzip writer for faster compression
|
||||
level := pgzip.DefaultCompression
|
||||
if e.config.Threads > 1 {
|
||||
// Use parallel gzip if available (pigz)
|
||||
// For now, use standard gzip
|
||||
level = gzip.BestSpeed // Faster for parallel streaming
|
||||
// pgzip already uses parallel compression
|
||||
level = pgzip.BestSpeed // Faster for parallel streaming
|
||||
}
|
||||
gzWriter, err := gzip.NewWriterLevel(countWriter, level)
|
||||
gzWriter, err := pgzip.NewWriterLevel(countWriter, level)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
@ -446,8 +448,8 @@ func (e *SnapshotEngine) Restore(ctx context.Context, opts *RestoreOptions) erro
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Create gzip reader
|
||||
gzReader, err := gzip.NewReader(file)
|
||||
// Create parallel gzip reader for faster decompression
|
||||
gzReader, err := pgzip.NewReader(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create gzip reader: %w", err)
|
||||
}
|
||||
|
||||
396
internal/fs/extract.go
Normal file
396
internal/fs/extract.go
Normal file
@ -0,0 +1,396 @@
|
||||
// Package fs provides parallel tar.gz extraction using pgzip
|
||||
package fs
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
)
|
||||
|
||||
// ParallelGzipWriter wraps pgzip.Writer for streaming compression
|
||||
type ParallelGzipWriter struct {
|
||||
*pgzip.Writer
|
||||
}
|
||||
|
||||
// NewParallelGzipWriter creates a parallel gzip writer using all CPU cores
|
||||
// This is 2-4x faster than standard gzip on multi-core systems
|
||||
func NewParallelGzipWriter(w io.Writer, level int) (*ParallelGzipWriter, error) {
|
||||
gzWriter, err := pgzip.NewWriterLevel(w, level)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create gzip writer: %w", err)
|
||||
}
|
||||
// Set block size and concurrency for parallel compression
|
||||
if err := gzWriter.SetConcurrency(1<<20, runtime.NumCPU()); err != nil {
|
||||
// Non-fatal, continue with defaults
|
||||
}
|
||||
return &ParallelGzipWriter{Writer: gzWriter}, nil
|
||||
}
|
||||
|
||||
// ExtractProgress reports extraction progress
|
||||
type ExtractProgress struct {
|
||||
CurrentFile string
|
||||
BytesRead int64
|
||||
TotalBytes int64
|
||||
FilesCount int
|
||||
CurrentIndex int
|
||||
}
|
||||
|
||||
// ProgressCallback is called during extraction
|
||||
type ProgressCallback func(progress ExtractProgress)
|
||||
|
||||
// ExtractTarGzParallel extracts a tar.gz archive using parallel gzip decompression
|
||||
// This is 2-4x faster than standard gzip on multi-core systems
|
||||
// Uses pgzip which decompresses in parallel using multiple goroutines
|
||||
func ExtractTarGzParallel(ctx context.Context, archivePath, destDir string, progressCb ProgressCallback) error {
|
||||
// Open the archive
|
||||
file, err := os.Open(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot open archive: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Get file size for progress
|
||||
stat, err := file.Stat()
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot stat archive: %w", err)
|
||||
}
|
||||
totalSize := stat.Size()
|
||||
|
||||
// Create parallel gzip reader
|
||||
// Uses all available CPU cores for decompression
|
||||
gzReader, err := pgzip.NewReaderN(file, 1<<20, runtime.NumCPU()) // 1MB blocks
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot create gzip reader: %w", err)
|
||||
}
|
||||
defer gzReader.Close()
|
||||
|
||||
// Create tar reader
|
||||
tarReader := tar.NewReader(gzReader)
|
||||
|
||||
// Track progress
|
||||
var bytesRead int64
|
||||
var filesCount int
|
||||
|
||||
// Extract each file
|
||||
for {
|
||||
// Check context
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
header, err := tarReader.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading tar: %w", err)
|
||||
}
|
||||
|
||||
// Security: prevent path traversal
|
||||
targetPath := filepath.Join(destDir, header.Name)
|
||||
if !strings.HasPrefix(filepath.Clean(targetPath), filepath.Clean(destDir)) {
|
||||
return fmt.Errorf("path traversal detected: %s", header.Name)
|
||||
}
|
||||
|
||||
filesCount++
|
||||
|
||||
// Report progress
|
||||
if progressCb != nil {
|
||||
// Estimate bytes read from file position
|
||||
pos, _ := file.Seek(0, io.SeekCurrent)
|
||||
progressCb(ExtractProgress{
|
||||
CurrentFile: header.Name,
|
||||
BytesRead: pos,
|
||||
TotalBytes: totalSize,
|
||||
FilesCount: filesCount,
|
||||
CurrentIndex: filesCount,
|
||||
})
|
||||
}
|
||||
|
||||
switch header.Typeflag {
|
||||
case tar.TypeDir:
|
||||
if err := os.MkdirAll(targetPath, 0700); err != nil {
|
||||
return fmt.Errorf("cannot create directory %s: %w", targetPath, err)
|
||||
}
|
||||
|
||||
case tar.TypeReg:
|
||||
// Ensure parent directory exists
|
||||
if err := os.MkdirAll(filepath.Dir(targetPath), 0700); err != nil {
|
||||
return fmt.Errorf("cannot create parent directory: %w", err)
|
||||
}
|
||||
|
||||
// Create file with secure permissions
|
||||
outFile, err := os.OpenFile(targetPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot create file %s: %w", targetPath, err)
|
||||
}
|
||||
|
||||
// Copy with size limit to prevent zip bombs
|
||||
written, err := io.Copy(outFile, tarReader)
|
||||
outFile.Close()
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("error writing %s: %w", targetPath, err)
|
||||
}
|
||||
|
||||
bytesRead += written
|
||||
|
||||
case tar.TypeSymlink:
|
||||
// Handle symlinks (validate target is within destDir)
|
||||
linkTarget := header.Linkname
|
||||
absTarget := filepath.Join(filepath.Dir(targetPath), linkTarget)
|
||||
if !strings.HasPrefix(filepath.Clean(absTarget), filepath.Clean(destDir)) {
|
||||
// Skip symlinks that point outside
|
||||
continue
|
||||
}
|
||||
if err := os.Symlink(linkTarget, targetPath); err != nil {
|
||||
// Ignore symlink errors (may not be supported)
|
||||
continue
|
||||
}
|
||||
|
||||
default:
|
||||
// Skip other types (devices, etc.)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListTarGzContents lists the contents of a tar.gz archive without extracting
|
||||
// Returns a slice of file paths in the archive
|
||||
// Uses parallel gzip decompression for 2-4x faster listing on multi-core systems
|
||||
func ListTarGzContents(ctx context.Context, archivePath string) ([]string, error) {
|
||||
// Open the archive
|
||||
file, err := os.Open(archivePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot open archive: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Create parallel gzip reader
|
||||
gzReader, err := pgzip.NewReaderN(file, 1<<20, runtime.NumCPU())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create gzip reader: %w", err)
|
||||
}
|
||||
defer gzReader.Close()
|
||||
|
||||
// Create tar reader
|
||||
tarReader := tar.NewReader(gzReader)
|
||||
|
||||
var files []string
|
||||
for {
|
||||
// Check for cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
header, err := tarReader.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("tar read error: %w", err)
|
||||
}
|
||||
|
||||
files = append(files, header.Name)
|
||||
}
|
||||
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// ExtractTarGzFast is a convenience wrapper that chooses the best extraction method
|
||||
// Uses parallel gzip if available, falls back to system tar if needed
|
||||
func ExtractTarGzFast(ctx context.Context, archivePath, destDir string, progressCb ProgressCallback) error {
|
||||
// Always use parallel Go implementation - it's faster and more portable
|
||||
return ExtractTarGzParallel(ctx, archivePath, destDir, progressCb)
|
||||
}
|
||||
|
||||
// CreateProgress reports archive creation progress
|
||||
type CreateProgress struct {
|
||||
CurrentFile string
|
||||
BytesWritten int64
|
||||
FilesCount int
|
||||
}
|
||||
|
||||
// CreateProgressCallback is called during archive creation
|
||||
type CreateProgressCallback func(progress CreateProgress)
|
||||
|
||||
// CreateTarGzParallel creates a tar.gz archive using parallel gzip compression
|
||||
// This is 2-4x faster than standard gzip on multi-core systems
|
||||
// Uses pgzip which compresses in parallel using multiple goroutines
|
||||
func CreateTarGzParallel(ctx context.Context, sourceDir, outputPath string, compressionLevel int, progressCb CreateProgressCallback) error {
|
||||
// Create output file
|
||||
outFile, err := os.Create(outputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot create archive: %w", err)
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
// Create parallel gzip writer
|
||||
// Uses all available CPU cores for compression
|
||||
gzWriter, err := pgzip.NewWriterLevel(outFile, compressionLevel)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot create gzip writer: %w", err)
|
||||
}
|
||||
// Set block size and concurrency for parallel compression
|
||||
if err := gzWriter.SetConcurrency(1<<20, runtime.NumCPU()); err != nil {
|
||||
// Non-fatal, continue with defaults
|
||||
}
|
||||
defer gzWriter.Close()
|
||||
|
||||
// Create tar writer
|
||||
tarWriter := tar.NewWriter(gzWriter)
|
||||
defer tarWriter.Close()
|
||||
|
||||
var bytesWritten int64
|
||||
var filesCount int
|
||||
|
||||
// Walk the source directory
|
||||
err = filepath.Walk(sourceDir, func(path string, info os.FileInfo, err error) error {
|
||||
// Check for cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get relative path
|
||||
relPath, err := filepath.Rel(sourceDir, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip the root directory itself
|
||||
if relPath == "." {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create tar header
|
||||
header, err := tar.FileInfoHeader(info, "")
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot create header for %s: %w", relPath, err)
|
||||
}
|
||||
|
||||
// Use relative path in archive
|
||||
header.Name = relPath
|
||||
|
||||
// Handle symlinks
|
||||
if info.Mode()&os.ModeSymlink != 0 {
|
||||
link, err := os.Readlink(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read symlink %s: %w", path, err)
|
||||
}
|
||||
header.Linkname = link
|
||||
}
|
||||
|
||||
// Write header
|
||||
if err := tarWriter.WriteHeader(header); err != nil {
|
||||
return fmt.Errorf("cannot write header for %s: %w", relPath, err)
|
||||
}
|
||||
|
||||
// If it's a regular file, write its contents
|
||||
if info.Mode().IsRegular() {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot open %s: %w", path, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
written, err := io.Copy(tarWriter, file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot write %s: %w", path, err)
|
||||
}
|
||||
bytesWritten += written
|
||||
}
|
||||
|
||||
filesCount++
|
||||
|
||||
// Report progress
|
||||
if progressCb != nil {
|
||||
progressCb(CreateProgress{
|
||||
CurrentFile: relPath,
|
||||
BytesWritten: bytesWritten,
|
||||
FilesCount: filesCount,
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
// Clean up partial file on error
|
||||
outFile.Close()
|
||||
os.Remove(outputPath)
|
||||
return err
|
||||
}
|
||||
|
||||
// Explicitly close tar and gzip to flush all data
|
||||
if err := tarWriter.Close(); err != nil {
|
||||
return fmt.Errorf("cannot close tar writer: %w", err)
|
||||
}
|
||||
if err := gzWriter.Close(); err != nil {
|
||||
return fmt.Errorf("cannot close gzip writer: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// EstimateCompressionRatio samples the archive to estimate uncompressed size
|
||||
// Returns a multiplier (e.g., 3.0 means uncompressed is ~3x the compressed size)
|
||||
func EstimateCompressionRatio(archivePath string) (float64, error) {
|
||||
file, err := os.Open(archivePath)
|
||||
if err != nil {
|
||||
return 3.0, err // Default to 3x
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Get compressed size
|
||||
stat, err := file.Stat()
|
||||
if err != nil {
|
||||
return 3.0, err
|
||||
}
|
||||
compressedSize := stat.Size()
|
||||
|
||||
// Read first 1MB and measure decompression ratio
|
||||
gzReader, err := pgzip.NewReader(file)
|
||||
if err != nil {
|
||||
return 3.0, err
|
||||
}
|
||||
defer gzReader.Close()
|
||||
|
||||
// Read up to 1MB of decompressed data
|
||||
buf := make([]byte, 1<<20)
|
||||
n, _ := io.ReadFull(gzReader, buf)
|
||||
|
||||
if n < 1024 {
|
||||
return 3.0, nil // Not enough data, use default
|
||||
}
|
||||
|
||||
// Estimate: decompressed / compressed
|
||||
// Based on sample of first 1MB
|
||||
compressedPortion := float64(compressedSize) * (float64(n) / float64(compressedSize))
|
||||
if compressedPortion > 0 {
|
||||
ratio := float64(n) / compressedPortion
|
||||
if ratio > 1.0 && ratio < 20.0 {
|
||||
return ratio, nil
|
||||
}
|
||||
}
|
||||
|
||||
return 3.0, nil // Default
|
||||
}
|
||||
223
internal/fs/fs.go
Normal file
223
internal/fs/fs.go
Normal file
@ -0,0 +1,223 @@
|
||||
// Package fs provides filesystem abstraction using spf13/afero for testability.
|
||||
// It allows swapping the real filesystem with an in-memory mock for unit tests.
|
||||
package fs
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
)
|
||||
|
||||
// FS is the global filesystem interface used throughout the application.
|
||||
// By default, it uses the real OS filesystem.
|
||||
// For testing, use SetFS(afero.NewMemMapFs()) to use an in-memory filesystem.
|
||||
var FS afero.Fs = afero.NewOsFs()
|
||||
|
||||
// SetFS sets the global filesystem (useful for testing)
|
||||
func SetFS(fs afero.Fs) {
|
||||
FS = fs
|
||||
}
|
||||
|
||||
// ResetFS resets to the real OS filesystem
|
||||
func ResetFS() {
|
||||
FS = afero.NewOsFs()
|
||||
}
|
||||
|
||||
// NewMemMapFs creates a new in-memory filesystem for testing
|
||||
func NewMemMapFs() afero.Fs {
|
||||
return afero.NewMemMapFs()
|
||||
}
|
||||
|
||||
// NewReadOnlyFs wraps a filesystem to make it read-only
|
||||
func NewReadOnlyFs(base afero.Fs) afero.Fs {
|
||||
return afero.NewReadOnlyFs(base)
|
||||
}
|
||||
|
||||
// NewBasePathFs creates a filesystem rooted at a specific path
|
||||
func NewBasePathFs(base afero.Fs, path string) afero.Fs {
|
||||
return afero.NewBasePathFs(base, path)
|
||||
}
|
||||
|
||||
// --- File Operations (use global FS) ---
|
||||
|
||||
// Create creates a file
|
||||
func Create(name string) (afero.File, error) {
|
||||
return FS.Create(name)
|
||||
}
|
||||
|
||||
// Open opens a file for reading
|
||||
func Open(name string) (afero.File, error) {
|
||||
return FS.Open(name)
|
||||
}
|
||||
|
||||
// OpenFile opens a file with specified flags and permissions
|
||||
func OpenFile(name string, flag int, perm os.FileMode) (afero.File, error) {
|
||||
return FS.OpenFile(name, flag, perm)
|
||||
}
|
||||
|
||||
// Remove removes a file or empty directory
|
||||
func Remove(name string) error {
|
||||
return FS.Remove(name)
|
||||
}
|
||||
|
||||
// RemoveAll removes a path and any children it contains
|
||||
func RemoveAll(path string) error {
|
||||
return FS.RemoveAll(path)
|
||||
}
|
||||
|
||||
// Rename renames (moves) a file
|
||||
func Rename(oldname, newname string) error {
|
||||
return FS.Rename(oldname, newname)
|
||||
}
|
||||
|
||||
// Stat returns file info
|
||||
func Stat(name string) (os.FileInfo, error) {
|
||||
return FS.Stat(name)
|
||||
}
|
||||
|
||||
// Chmod changes file mode
|
||||
func Chmod(name string, mode os.FileMode) error {
|
||||
return FS.Chmod(name, mode)
|
||||
}
|
||||
|
||||
// Chown changes file ownership (may not work on all filesystems)
|
||||
func Chown(name string, uid, gid int) error {
|
||||
return FS.Chown(name, uid, gid)
|
||||
}
|
||||
|
||||
// Chtimes changes file access and modification times
|
||||
func Chtimes(name string, atime, mtime time.Time) error {
|
||||
return FS.Chtimes(name, atime, mtime)
|
||||
}
|
||||
|
||||
// --- Directory Operations ---
|
||||
|
||||
// Mkdir creates a directory
|
||||
func Mkdir(name string, perm os.FileMode) error {
|
||||
return FS.Mkdir(name, perm)
|
||||
}
|
||||
|
||||
// MkdirAll creates a directory and all parents
|
||||
func MkdirAll(path string, perm os.FileMode) error {
|
||||
return FS.MkdirAll(path, perm)
|
||||
}
|
||||
|
||||
// ReadDir reads a directory
|
||||
func ReadDir(dirname string) ([]os.FileInfo, error) {
|
||||
return afero.ReadDir(FS, dirname)
|
||||
}
|
||||
|
||||
// --- File Content Operations ---
|
||||
|
||||
// ReadFile reads an entire file
|
||||
func ReadFile(filename string) ([]byte, error) {
|
||||
return afero.ReadFile(FS, filename)
|
||||
}
|
||||
|
||||
// WriteFile writes data to a file
|
||||
func WriteFile(filename string, data []byte, perm os.FileMode) error {
|
||||
return afero.WriteFile(FS, filename, data, perm)
|
||||
}
|
||||
|
||||
// --- Existence Checks ---
|
||||
|
||||
// Exists checks if a file or directory exists
|
||||
func Exists(path string) (bool, error) {
|
||||
return afero.Exists(FS, path)
|
||||
}
|
||||
|
||||
// DirExists checks if a directory exists
|
||||
func DirExists(path string) (bool, error) {
|
||||
return afero.DirExists(FS, path)
|
||||
}
|
||||
|
||||
// IsDir checks if path is a directory
|
||||
func IsDir(path string) (bool, error) {
|
||||
return afero.IsDir(FS, path)
|
||||
}
|
||||
|
||||
// IsEmpty checks if a directory is empty
|
||||
func IsEmpty(path string) (bool, error) {
|
||||
return afero.IsEmpty(FS, path)
|
||||
}
|
||||
|
||||
// --- Utility Functions ---
|
||||
|
||||
// Walk walks a directory tree
|
||||
func Walk(root string, walkFn filepath.WalkFunc) error {
|
||||
return afero.Walk(FS, root, walkFn)
|
||||
}
|
||||
|
||||
// Glob returns the names of all files matching pattern
|
||||
func Glob(pattern string) ([]string, error) {
|
||||
return afero.Glob(FS, pattern)
|
||||
}
|
||||
|
||||
// TempDir creates a temporary directory
|
||||
func TempDir(dir, prefix string) (string, error) {
|
||||
return afero.TempDir(FS, dir, prefix)
|
||||
}
|
||||
|
||||
// TempFile creates a temporary file
|
||||
func TempFile(dir, pattern string) (afero.File, error) {
|
||||
return afero.TempFile(FS, dir, pattern)
|
||||
}
|
||||
|
||||
// CopyFile copies a file from src to dst
|
||||
func CopyFile(src, dst string) error {
|
||||
srcFile, err := FS.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer srcFile.Close()
|
||||
|
||||
srcInfo, err := srcFile.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dstFile, err := FS.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, srcInfo.Mode())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer dstFile.Close()
|
||||
|
||||
_, err = io.Copy(dstFile, srcFile)
|
||||
return err
|
||||
}
|
||||
|
||||
// FileSize returns the size of a file
|
||||
func FileSize(path string) (int64, error) {
|
||||
info, err := FS.Stat(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return info.Size(), nil
|
||||
}
|
||||
|
||||
// --- Testing Helpers ---
|
||||
|
||||
// WithMemFs executes a function with an in-memory filesystem, then restores the original
|
||||
func WithMemFs(fn func(fs afero.Fs)) {
|
||||
original := FS
|
||||
memFs := afero.NewMemMapFs()
|
||||
FS = memFs
|
||||
defer func() { FS = original }()
|
||||
fn(memFs)
|
||||
}
|
||||
|
||||
// SetupTestDir creates a test directory structure in-memory
|
||||
func SetupTestDir(files map[string]string) afero.Fs {
|
||||
memFs := afero.NewMemMapFs()
|
||||
for path, content := range files {
|
||||
dir := filepath.Dir(path)
|
||||
if dir != "." && dir != "/" {
|
||||
_ = memFs.MkdirAll(dir, 0755)
|
||||
}
|
||||
_ = afero.WriteFile(memFs, path, []byte(content), 0644)
|
||||
}
|
||||
return memFs
|
||||
}
|
||||
191
internal/fs/fs_test.go
Normal file
191
internal/fs/fs_test.go
Normal file
@ -0,0 +1,191 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
)
|
||||
|
||||
func TestMemMapFs(t *testing.T) {
|
||||
// Use in-memory filesystem for testing
|
||||
WithMemFs(func(memFs afero.Fs) {
|
||||
// Create a file
|
||||
err := WriteFile("/test/file.txt", []byte("hello world"), 0644)
|
||||
if err != nil {
|
||||
t.Fatalf("WriteFile failed: %v", err)
|
||||
}
|
||||
|
||||
// Read it back
|
||||
content, err := ReadFile("/test/file.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFile failed: %v", err)
|
||||
}
|
||||
|
||||
if string(content) != "hello world" {
|
||||
t.Errorf("expected 'hello world', got '%s'", string(content))
|
||||
}
|
||||
|
||||
// Check existence
|
||||
exists, err := Exists("/test/file.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("Exists failed: %v", err)
|
||||
}
|
||||
if !exists {
|
||||
t.Error("file should exist")
|
||||
}
|
||||
|
||||
// Check non-existent file
|
||||
exists, err = Exists("/nonexistent.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("Exists failed: %v", err)
|
||||
}
|
||||
if exists {
|
||||
t.Error("file should not exist")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestSetupTestDir(t *testing.T) {
|
||||
// Create test directory structure
|
||||
testFs := SetupTestDir(map[string]string{
|
||||
"/backups/db1.dump": "database 1 content",
|
||||
"/backups/db2.dump": "database 2 content",
|
||||
"/config/settings.json": `{"key": "value"}`,
|
||||
})
|
||||
|
||||
// Verify files exist
|
||||
content, err := afero.ReadFile(testFs, "/backups/db1.dump")
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFile failed: %v", err)
|
||||
}
|
||||
if string(content) != "database 1 content" {
|
||||
t.Errorf("unexpected content: %s", string(content))
|
||||
}
|
||||
|
||||
// Verify directory structure
|
||||
files, err := afero.ReadDir(testFs, "/backups")
|
||||
if err != nil {
|
||||
t.Fatalf("ReadDir failed: %v", err)
|
||||
}
|
||||
if len(files) != 2 {
|
||||
t.Errorf("expected 2 files, got %d", len(files))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCopyFile(t *testing.T) {
|
||||
WithMemFs(func(memFs afero.Fs) {
|
||||
// Create source file
|
||||
err := WriteFile("/source.txt", []byte("copy me"), 0644)
|
||||
if err != nil {
|
||||
t.Fatalf("WriteFile failed: %v", err)
|
||||
}
|
||||
|
||||
// Copy file
|
||||
err = CopyFile("/source.txt", "/dest.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("CopyFile failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify copy
|
||||
content, err := ReadFile("/dest.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFile failed: %v", err)
|
||||
}
|
||||
if string(content) != "copy me" {
|
||||
t.Errorf("unexpected content: %s", string(content))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestFileSize(t *testing.T) {
|
||||
WithMemFs(func(memFs afero.Fs) {
|
||||
data := []byte("12345678901234567890") // 20 bytes
|
||||
err := WriteFile("/sized.txt", data, 0644)
|
||||
if err != nil {
|
||||
t.Fatalf("WriteFile failed: %v", err)
|
||||
}
|
||||
|
||||
size, err := FileSize("/sized.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("FileSize failed: %v", err)
|
||||
}
|
||||
if size != 20 {
|
||||
t.Errorf("expected size 20, got %d", size)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestTempDir(t *testing.T) {
|
||||
WithMemFs(func(memFs afero.Fs) {
|
||||
// Create temp dir
|
||||
dir, err := TempDir("", "test-")
|
||||
if err != nil {
|
||||
t.Fatalf("TempDir failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify it exists
|
||||
isDir, err := IsDir(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("IsDir failed: %v", err)
|
||||
}
|
||||
if !isDir {
|
||||
t.Error("temp dir should be a directory")
|
||||
}
|
||||
|
||||
// Verify it's empty
|
||||
isEmpty, err := IsEmpty(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("IsEmpty failed: %v", err)
|
||||
}
|
||||
if !isEmpty {
|
||||
t.Error("temp dir should be empty")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestWalk(t *testing.T) {
|
||||
WithMemFs(func(memFs afero.Fs) {
|
||||
// Create directory structure
|
||||
_ = MkdirAll("/root/a/b", 0755)
|
||||
_ = WriteFile("/root/file1.txt", []byte("1"), 0644)
|
||||
_ = WriteFile("/root/a/file2.txt", []byte("2"), 0644)
|
||||
_ = WriteFile("/root/a/b/file3.txt", []byte("3"), 0644)
|
||||
|
||||
var files []string
|
||||
err := Walk("/root", func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
files = append(files, path)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Walk failed: %v", err)
|
||||
}
|
||||
|
||||
if len(files) != 3 {
|
||||
t.Errorf("expected 3 files, got %d: %v", len(files), files)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestGlob(t *testing.T) {
|
||||
WithMemFs(func(memFs afero.Fs) {
|
||||
_ = WriteFile("/data/backup1.dump", []byte("1"), 0644)
|
||||
_ = WriteFile("/data/backup2.dump", []byte("2"), 0644)
|
||||
_ = WriteFile("/data/config.json", []byte("{}"), 0644)
|
||||
|
||||
matches, err := Glob("/data/*.dump")
|
||||
if err != nil {
|
||||
t.Fatalf("Glob failed: %v", err)
|
||||
}
|
||||
|
||||
if len(matches) != 2 {
|
||||
t.Errorf("expected 2 matches, got %d: %v", len(matches), matches)
|
||||
}
|
||||
})
|
||||
}
|
||||
327
internal/fs/tmpfs.go
Normal file
327
internal/fs/tmpfs.go
Normal file
@ -0,0 +1,327 @@
|
||||
// Package fs provides filesystem utilities including tmpfs detection
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// TmpfsInfo contains information about a tmpfs mount
|
||||
type TmpfsInfo struct {
|
||||
MountPoint string // Mount path
|
||||
TotalBytes uint64 // Total size
|
||||
FreeBytes uint64 // Available space
|
||||
UsedBytes uint64 // Used space
|
||||
Writable bool // Can we write to it
|
||||
Recommended bool // Is it recommended for restore temp files
|
||||
}
|
||||
|
||||
// TmpfsManager handles tmpfs detection and usage for non-root users
|
||||
type TmpfsManager struct {
|
||||
log logger.Logger
|
||||
available []TmpfsInfo
|
||||
}
|
||||
|
||||
// NewTmpfsManager creates a new tmpfs manager
|
||||
func NewTmpfsManager(log logger.Logger) *TmpfsManager {
|
||||
return &TmpfsManager{
|
||||
log: log,
|
||||
}
|
||||
}
|
||||
|
||||
// Detect finds all available tmpfs mounts that we can use
|
||||
// This works without root - dynamically reads /proc/mounts
|
||||
// No hardcoded paths - discovers all tmpfs/devtmpfs mounts on the system
|
||||
func (m *TmpfsManager) Detect() ([]TmpfsInfo, error) {
|
||||
m.available = nil
|
||||
|
||||
file, err := os.Open("/proc/mounts")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read /proc/mounts: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
fields := strings.Fields(scanner.Text())
|
||||
if len(fields) < 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
fsType := fields[2]
|
||||
mountPoint := fields[1]
|
||||
|
||||
// Dynamically discover all tmpfs and devtmpfs mounts (RAM-backed)
|
||||
if fsType == "tmpfs" || fsType == "devtmpfs" {
|
||||
info := m.checkMount(mountPoint)
|
||||
if info != nil {
|
||||
m.available = append(m.available, *info)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return m.available, nil
|
||||
}
|
||||
|
||||
// checkMount checks a single mount point for usability
|
||||
// No hardcoded paths - recommends based on space and writability only
|
||||
func (m *TmpfsManager) checkMount(mountPoint string) *TmpfsInfo {
|
||||
var stat syscall.Statfs_t
|
||||
if err := syscall.Statfs(mountPoint, &stat); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Use int64 for all calculations to handle platform differences
|
||||
// (FreeBSD has int64 for Bavail/Bfree, Linux has uint64)
|
||||
bsize := int64(stat.Bsize)
|
||||
blocks := int64(stat.Blocks)
|
||||
bavail := int64(stat.Bavail)
|
||||
bfree := int64(stat.Bfree)
|
||||
|
||||
info := &TmpfsInfo{
|
||||
MountPoint: mountPoint,
|
||||
TotalBytes: uint64(blocks * bsize),
|
||||
FreeBytes: uint64(bavail * bsize),
|
||||
UsedBytes: uint64((blocks - bfree) * bsize),
|
||||
}
|
||||
|
||||
// Check if we can write
|
||||
testFile := filepath.Join(mountPoint, ".dbbackup_test")
|
||||
if f, err := os.Create(testFile); err == nil {
|
||||
f.Close()
|
||||
os.Remove(testFile)
|
||||
info.Writable = true
|
||||
}
|
||||
|
||||
// Recommend if:
|
||||
// 1. At least 1GB free
|
||||
// 2. We can write
|
||||
// No hardcoded path preferences - any writable tmpfs with enough space is good
|
||||
minFree := uint64(1 * 1024 * 1024 * 1024) // 1GB
|
||||
|
||||
if info.FreeBytes >= minFree && info.Writable {
|
||||
info.Recommended = true
|
||||
}
|
||||
|
||||
return info
|
||||
}
|
||||
|
||||
// GetBestTmpfs returns the best available tmpfs for temp files
|
||||
// Returns the writable tmpfs with the most free space (no hardcoded path preferences)
|
||||
func (m *TmpfsManager) GetBestTmpfs(minFreeGB int) *TmpfsInfo {
|
||||
if m.available == nil {
|
||||
m.Detect()
|
||||
}
|
||||
|
||||
minFreeBytes := uint64(minFreeGB) * 1024 * 1024 * 1024
|
||||
|
||||
// Find the writable tmpfs with the most free space
|
||||
var best *TmpfsInfo
|
||||
for i := range m.available {
|
||||
info := &m.available[i]
|
||||
if info.Writable && info.FreeBytes >= minFreeBytes {
|
||||
if best == nil || info.FreeBytes > best.FreeBytes {
|
||||
best = info
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return best
|
||||
}
|
||||
|
||||
// GetTempDir returns a temp directory on tmpfs if available
|
||||
// Falls back to os.TempDir() if no suitable tmpfs found
|
||||
// Uses secure permissions (0700) to prevent other users from reading sensitive data
|
||||
func (m *TmpfsManager) GetTempDir(subdir string, minFreeGB int) (string, bool) {
|
||||
best := m.GetBestTmpfs(minFreeGB)
|
||||
if best == nil {
|
||||
// Fallback to regular temp
|
||||
return filepath.Join(os.TempDir(), subdir), false
|
||||
}
|
||||
|
||||
// Create subdir on tmpfs with secure permissions (0700 = owner-only)
|
||||
dir := filepath.Join(best.MountPoint, subdir)
|
||||
if err := os.MkdirAll(dir, 0700); err != nil {
|
||||
// Fallback if we can't create
|
||||
return filepath.Join(os.TempDir(), subdir), false
|
||||
}
|
||||
|
||||
// Ensure permissions are correct even if dir already existed
|
||||
os.Chmod(dir, 0700)
|
||||
|
||||
return dir, true
|
||||
}
|
||||
|
||||
// Summary returns a string summarizing available tmpfs
|
||||
func (m *TmpfsManager) Summary() string {
|
||||
if m.available == nil {
|
||||
m.Detect()
|
||||
}
|
||||
|
||||
if len(m.available) == 0 {
|
||||
return "No tmpfs mounts available"
|
||||
}
|
||||
|
||||
var lines []string
|
||||
for _, info := range m.available {
|
||||
status := "read-only"
|
||||
if info.Writable {
|
||||
status = "writable"
|
||||
}
|
||||
if info.Recommended {
|
||||
status = "✓ recommended"
|
||||
}
|
||||
|
||||
lines = append(lines, fmt.Sprintf(" %s: %s free / %s total (%s)",
|
||||
info.MountPoint,
|
||||
FormatBytes(int64(info.FreeBytes)),
|
||||
FormatBytes(int64(info.TotalBytes)),
|
||||
status))
|
||||
}
|
||||
|
||||
return strings.Join(lines, "\n")
|
||||
}
|
||||
|
||||
// PrintAvailable logs available tmpfs mounts
|
||||
func (m *TmpfsManager) PrintAvailable() {
|
||||
if m.available == nil {
|
||||
m.Detect()
|
||||
}
|
||||
|
||||
if len(m.available) == 0 {
|
||||
m.log.Warn("No tmpfs mounts available for fast temp storage")
|
||||
return
|
||||
}
|
||||
|
||||
m.log.Info("Available tmpfs mounts (RAM-backed, no root needed):")
|
||||
for _, info := range m.available {
|
||||
status := "read-only"
|
||||
if info.Writable {
|
||||
status = "writable"
|
||||
}
|
||||
if info.Recommended {
|
||||
status = "✓ recommended"
|
||||
}
|
||||
|
||||
m.log.Info(fmt.Sprintf(" %s: %s free / %s total (%s)",
|
||||
info.MountPoint,
|
||||
FormatBytes(int64(info.FreeBytes)),
|
||||
FormatBytes(int64(info.TotalBytes)),
|
||||
status))
|
||||
}
|
||||
}
|
||||
|
||||
// FormatBytes formats bytes as human-readable
|
||||
func FormatBytes(bytes int64) string {
|
||||
const unit = 1024
|
||||
if bytes < unit {
|
||||
return fmt.Sprintf("%d B", bytes)
|
||||
}
|
||||
div, exp := int64(unit), 0
|
||||
for n := bytes / unit; n >= unit; n /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
// MemoryStatus returns current memory and swap status
|
||||
type MemoryStatus struct {
|
||||
TotalRAM uint64
|
||||
FreeRAM uint64
|
||||
AvailableRAM uint64
|
||||
TotalSwap uint64
|
||||
FreeSwap uint64
|
||||
Recommended string // Recommendation for restore
|
||||
}
|
||||
|
||||
// GetMemoryStatus reads current memory status from /proc/meminfo
|
||||
func GetMemoryStatus() (*MemoryStatus, error) {
|
||||
data, err := os.ReadFile("/proc/meminfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
status := &MemoryStatus{}
|
||||
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse value (in KB)
|
||||
val := uint64(0)
|
||||
if v, err := fmt.Sscanf(fields[1], "%d", &val); err == nil && v > 0 {
|
||||
val *= 1024 // Convert KB to bytes
|
||||
}
|
||||
|
||||
switch fields[0] {
|
||||
case "MemTotal:":
|
||||
status.TotalRAM = val
|
||||
case "MemFree:":
|
||||
status.FreeRAM = val
|
||||
case "MemAvailable:":
|
||||
status.AvailableRAM = val
|
||||
case "SwapTotal:":
|
||||
status.TotalSwap = val
|
||||
case "SwapFree:":
|
||||
status.FreeSwap = val
|
||||
}
|
||||
}
|
||||
|
||||
// Generate recommendation
|
||||
totalGB := status.TotalRAM / (1024 * 1024 * 1024)
|
||||
swapGB := status.TotalSwap / (1024 * 1024 * 1024)
|
||||
|
||||
if totalGB < 8 && swapGB < 4 {
|
||||
status.Recommended = "CRITICAL: Low RAM and swap. Run: sudo ./prepare_system.sh --fix"
|
||||
} else if totalGB < 16 && swapGB < 2 {
|
||||
status.Recommended = "WARNING: Consider adding swap. Run: sudo ./prepare_system.sh --swap"
|
||||
} else {
|
||||
status.Recommended = "OK: Sufficient memory for large restores"
|
||||
}
|
||||
|
||||
return status, nil
|
||||
}
|
||||
|
||||
// SecureMkdirTemp creates a temporary directory with secure permissions (0700)
|
||||
// This prevents other users from reading sensitive database dump contents
|
||||
// Uses the specified baseDir, or os.TempDir() if empty
|
||||
func SecureMkdirTemp(baseDir, pattern string) (string, error) {
|
||||
if baseDir == "" {
|
||||
baseDir = os.TempDir()
|
||||
}
|
||||
|
||||
// Use os.MkdirTemp for unique naming
|
||||
dir, err := os.MkdirTemp(baseDir, pattern)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Ensure secure permissions (0700 = owner read/write/execute only)
|
||||
if err := os.Chmod(dir, 0700); err != nil {
|
||||
// Try to clean up if we can't secure it
|
||||
os.Remove(dir)
|
||||
return "", fmt.Errorf("cannot set secure permissions: %w", err)
|
||||
}
|
||||
|
||||
return dir, nil
|
||||
}
|
||||
|
||||
// SecureWriteFile writes content to a file with secure permissions (0600)
|
||||
// This prevents other users from reading sensitive data
|
||||
func SecureWriteFile(filename string, data []byte) error {
|
||||
// Write with restrictive permissions
|
||||
if err := os.WriteFile(filename, data, 0600); err != nil {
|
||||
return err
|
||||
}
|
||||
// Ensure permissions are correct
|
||||
return os.Chmod(filename, 0600)
|
||||
}
|
||||
11
internal/installer/embed.go
Normal file
11
internal/installer/embed.go
Normal file
@ -0,0 +1,11 @@
|
||||
// Package installer provides systemd service installation for dbbackup
|
||||
package installer
|
||||
|
||||
import (
|
||||
"embed"
|
||||
)
|
||||
|
||||
// Templates contains embedded systemd unit files
|
||||
//
|
||||
//go:embed templates/*.service templates/*.timer
|
||||
var Templates embed.FS
|
||||
680
internal/installer/installer.go
Normal file
680
internal/installer/installer.go
Normal file
@ -0,0 +1,680 @@
|
||||
// Package installer provides systemd service installation for dbbackup
|
||||
package installer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// Installer handles systemd service installation
|
||||
type Installer struct {
|
||||
log logger.Logger
|
||||
unitDir string // /etc/systemd/system or custom
|
||||
dryRun bool
|
||||
}
|
||||
|
||||
// InstallOptions configures the installation
|
||||
type InstallOptions struct {
|
||||
// Instance name (e.g., "production", "staging")
|
||||
Instance string
|
||||
|
||||
// Binary path (auto-detected if empty)
|
||||
BinaryPath string
|
||||
|
||||
// Backup configuration
|
||||
BackupType string // "single" or "cluster"
|
||||
Schedule string // OnCalendar format, e.g., "daily", "*-*-* 02:00:00"
|
||||
|
||||
// Service user/group
|
||||
User string
|
||||
Group string
|
||||
|
||||
// Paths
|
||||
BackupDir string
|
||||
ConfigPath string
|
||||
|
||||
// Timeout in seconds (default: 3600)
|
||||
TimeoutSeconds int
|
||||
|
||||
// Metrics
|
||||
WithMetrics bool
|
||||
MetricsPort int
|
||||
}
|
||||
|
||||
// ServiceStatus contains information about installed services
|
||||
type ServiceStatus struct {
|
||||
Installed bool
|
||||
Enabled bool
|
||||
Active bool
|
||||
TimerEnabled bool
|
||||
TimerActive bool
|
||||
LastRun string
|
||||
NextRun string
|
||||
ServicePath string
|
||||
TimerPath string
|
||||
ExporterPath string
|
||||
}
|
||||
|
||||
// NewInstaller creates a new Installer
|
||||
func NewInstaller(log logger.Logger, dryRun bool) *Installer {
|
||||
return &Installer{
|
||||
log: log,
|
||||
unitDir: "/etc/systemd/system",
|
||||
dryRun: dryRun,
|
||||
}
|
||||
}
|
||||
|
||||
// SetUnitDir allows overriding the systemd unit directory (for testing)
|
||||
func (i *Installer) SetUnitDir(dir string) {
|
||||
i.unitDir = dir
|
||||
}
|
||||
|
||||
// Install installs the systemd service and timer
|
||||
func (i *Installer) Install(ctx context.Context, opts InstallOptions) error {
|
||||
// Validate platform
|
||||
if runtime.GOOS != "linux" {
|
||||
return fmt.Errorf("systemd installation only supported on Linux (current: %s)", runtime.GOOS)
|
||||
}
|
||||
|
||||
// Validate prerequisites
|
||||
if err := i.validatePrerequisites(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set defaults
|
||||
if err := i.setDefaults(&opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create user if needed
|
||||
if err := i.ensureUser(opts.User, opts.Group); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create directories
|
||||
if err := i.createDirectories(opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy binary to /usr/local/bin (required for ProtectHome=yes)
|
||||
if err := i.copyBinary(&opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write service and timer files
|
||||
if err := i.writeUnitFiles(opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Reload systemd
|
||||
if err := i.systemctl(ctx, "daemon-reload"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Enable timer
|
||||
timerName := i.getTimerName(opts)
|
||||
if err := i.systemctl(ctx, "enable", timerName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Install metrics exporter if requested
|
||||
if opts.WithMetrics {
|
||||
if err := i.installExporter(ctx, opts); err != nil {
|
||||
i.log.Warn("Failed to install metrics exporter", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
i.log.Info("Installation complete",
|
||||
"instance", opts.Instance,
|
||||
"timer", timerName,
|
||||
"schedule", opts.Schedule)
|
||||
|
||||
i.printNextSteps(opts)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Uninstall removes the systemd service and timer
|
||||
func (i *Installer) Uninstall(ctx context.Context, instance string, purge bool) error {
|
||||
if runtime.GOOS != "linux" {
|
||||
return fmt.Errorf("systemd uninstallation only supported on Linux")
|
||||
}
|
||||
|
||||
if err := i.validatePrerequisites(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Determine service names
|
||||
var serviceName, timerName string
|
||||
if instance == "cluster" || instance == "" {
|
||||
serviceName = "dbbackup-cluster.service"
|
||||
timerName = "dbbackup-cluster.timer"
|
||||
} else {
|
||||
serviceName = fmt.Sprintf("dbbackup@%s.service", instance)
|
||||
timerName = fmt.Sprintf("dbbackup@%s.timer", instance)
|
||||
}
|
||||
|
||||
// Stop and disable timer
|
||||
_ = i.systemctl(ctx, "stop", timerName)
|
||||
_ = i.systemctl(ctx, "disable", timerName)
|
||||
|
||||
// Stop and disable service
|
||||
_ = i.systemctl(ctx, "stop", serviceName)
|
||||
_ = i.systemctl(ctx, "disable", serviceName)
|
||||
|
||||
// Remove unit files
|
||||
servicePath := filepath.Join(i.unitDir, serviceName)
|
||||
timerPath := filepath.Join(i.unitDir, timerName)
|
||||
|
||||
if !i.dryRun {
|
||||
os.Remove(servicePath)
|
||||
os.Remove(timerPath)
|
||||
} else {
|
||||
i.log.Info("Would remove", "service", servicePath)
|
||||
i.log.Info("Would remove", "timer", timerPath)
|
||||
}
|
||||
|
||||
// Also try to remove template units if they exist
|
||||
if instance != "cluster" && instance != "" {
|
||||
templateService := filepath.Join(i.unitDir, "dbbackup@.service")
|
||||
templateTimer := filepath.Join(i.unitDir, "dbbackup@.timer")
|
||||
|
||||
// Only remove templates if no other instances are using them
|
||||
if i.canRemoveTemplates() {
|
||||
if !i.dryRun {
|
||||
os.Remove(templateService)
|
||||
os.Remove(templateTimer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove exporter
|
||||
exporterPath := filepath.Join(i.unitDir, "dbbackup-exporter.service")
|
||||
_ = i.systemctl(ctx, "stop", "dbbackup-exporter.service")
|
||||
_ = i.systemctl(ctx, "disable", "dbbackup-exporter.service")
|
||||
if !i.dryRun {
|
||||
os.Remove(exporterPath)
|
||||
}
|
||||
|
||||
// Reload systemd
|
||||
_ = i.systemctl(ctx, "daemon-reload")
|
||||
|
||||
// Purge config files if requested
|
||||
if purge {
|
||||
configDirs := []string{
|
||||
"/etc/dbbackup",
|
||||
"/var/lib/dbbackup",
|
||||
}
|
||||
for _, dir := range configDirs {
|
||||
if !i.dryRun {
|
||||
if err := os.RemoveAll(dir); err != nil {
|
||||
i.log.Warn("Failed to remove directory", "path", dir, "error", err)
|
||||
} else {
|
||||
i.log.Info("Removed directory", "path", dir)
|
||||
}
|
||||
} else {
|
||||
i.log.Info("Would remove directory", "path", dir)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i.log.Info("Uninstallation complete", "instance", instance, "purge", purge)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Status returns the current installation status
|
||||
func (i *Installer) Status(ctx context.Context, instance string) (*ServiceStatus, error) {
|
||||
if runtime.GOOS != "linux" {
|
||||
return nil, fmt.Errorf("systemd status only supported on Linux")
|
||||
}
|
||||
|
||||
status := &ServiceStatus{}
|
||||
|
||||
// Determine service names
|
||||
var serviceName, timerName string
|
||||
if instance == "cluster" || instance == "" {
|
||||
serviceName = "dbbackup-cluster.service"
|
||||
timerName = "dbbackup-cluster.timer"
|
||||
} else {
|
||||
serviceName = fmt.Sprintf("dbbackup@%s.service", instance)
|
||||
timerName = fmt.Sprintf("dbbackup@%s.timer", instance)
|
||||
}
|
||||
|
||||
// Check service file exists
|
||||
status.ServicePath = filepath.Join(i.unitDir, serviceName)
|
||||
if _, err := os.Stat(status.ServicePath); err == nil {
|
||||
status.Installed = true
|
||||
}
|
||||
|
||||
// Check timer file exists
|
||||
status.TimerPath = filepath.Join(i.unitDir, timerName)
|
||||
|
||||
// Check exporter
|
||||
status.ExporterPath = filepath.Join(i.unitDir, "dbbackup-exporter.service")
|
||||
|
||||
// Check enabled/active status
|
||||
if status.Installed {
|
||||
status.Enabled = i.isEnabled(ctx, serviceName)
|
||||
status.Active = i.isActive(ctx, serviceName)
|
||||
status.TimerEnabled = i.isEnabled(ctx, timerName)
|
||||
status.TimerActive = i.isActive(ctx, timerName)
|
||||
|
||||
// Get timer info
|
||||
status.NextRun = i.getTimerNext(ctx, timerName)
|
||||
status.LastRun = i.getTimerLast(ctx, timerName)
|
||||
}
|
||||
|
||||
return status, nil
|
||||
}
|
||||
|
||||
// validatePrerequisites checks system requirements
|
||||
func (i *Installer) validatePrerequisites() error {
|
||||
// Check root (skip in dry-run mode)
|
||||
if os.Getuid() != 0 && !i.dryRun {
|
||||
return fmt.Errorf("installation requires root privileges (use sudo)")
|
||||
}
|
||||
|
||||
// Check systemd
|
||||
if _, err := exec.LookPath("systemctl"); err != nil {
|
||||
return fmt.Errorf("systemctl not found - is this a systemd-based system?")
|
||||
}
|
||||
|
||||
// Check for container environment
|
||||
if _, err := os.Stat("/.dockerenv"); err == nil {
|
||||
i.log.Warn("Running inside Docker container - systemd may not work correctly")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setDefaults fills in default values
|
||||
func (i *Installer) setDefaults(opts *InstallOptions) error {
|
||||
// Auto-detect binary path
|
||||
if opts.BinaryPath == "" {
|
||||
binPath, err := os.Executable()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to detect binary path: %w", err)
|
||||
}
|
||||
binPath, err = filepath.EvalSymlinks(binPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to resolve binary path: %w", err)
|
||||
}
|
||||
opts.BinaryPath = binPath
|
||||
}
|
||||
|
||||
// Default instance
|
||||
if opts.Instance == "" {
|
||||
opts.Instance = "default"
|
||||
}
|
||||
|
||||
// Default backup type
|
||||
if opts.BackupType == "" {
|
||||
opts.BackupType = "single"
|
||||
}
|
||||
|
||||
// Default schedule (daily at 2am)
|
||||
if opts.Schedule == "" {
|
||||
opts.Schedule = "*-*-* 02:00:00"
|
||||
}
|
||||
|
||||
// Default user/group
|
||||
if opts.User == "" {
|
||||
opts.User = "dbbackup"
|
||||
}
|
||||
if opts.Group == "" {
|
||||
opts.Group = "dbbackup"
|
||||
}
|
||||
|
||||
// Default paths
|
||||
if opts.BackupDir == "" {
|
||||
opts.BackupDir = "/var/lib/dbbackup/backups"
|
||||
}
|
||||
if opts.ConfigPath == "" {
|
||||
opts.ConfigPath = "/etc/dbbackup/dbbackup.conf"
|
||||
}
|
||||
|
||||
// Default timeout (1 hour)
|
||||
if opts.TimeoutSeconds == 0 {
|
||||
opts.TimeoutSeconds = 3600
|
||||
}
|
||||
|
||||
// Default metrics port
|
||||
if opts.MetricsPort == 0 {
|
||||
opts.MetricsPort = 9399
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ensureUser creates the service user if it doesn't exist
|
||||
func (i *Installer) ensureUser(username, groupname string) error {
|
||||
// Check if user exists
|
||||
if _, err := user.Lookup(username); err == nil {
|
||||
i.log.Debug("User already exists", "user", username)
|
||||
return nil
|
||||
}
|
||||
|
||||
if i.dryRun {
|
||||
i.log.Info("Would create user", "user", username, "group", groupname)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create group first
|
||||
groupCmd := exec.Command("groupadd", "--system", groupname)
|
||||
if output, err := groupCmd.CombinedOutput(); err != nil {
|
||||
// Ignore if group already exists
|
||||
if !strings.Contains(string(output), "already exists") {
|
||||
i.log.Debug("Group creation output", "output", string(output))
|
||||
}
|
||||
}
|
||||
|
||||
// Create user
|
||||
userCmd := exec.Command("useradd",
|
||||
"--system",
|
||||
"--shell", "/usr/sbin/nologin",
|
||||
"--home-dir", "/var/lib/dbbackup",
|
||||
"--gid", groupname,
|
||||
username)
|
||||
|
||||
if output, err := userCmd.CombinedOutput(); err != nil {
|
||||
if !strings.Contains(string(output), "already exists") {
|
||||
return fmt.Errorf("failed to create user %s: %w (%s)", username, err, output)
|
||||
}
|
||||
}
|
||||
|
||||
i.log.Info("Created system user", "user", username, "group", groupname)
|
||||
return nil
|
||||
}
|
||||
|
||||
// createDirectories creates required directories
|
||||
func (i *Installer) createDirectories(opts InstallOptions) error {
|
||||
dirs := []struct {
|
||||
path string
|
||||
mode os.FileMode
|
||||
}{
|
||||
{"/etc/dbbackup", 0755},
|
||||
{"/etc/dbbackup/env.d", 0700},
|
||||
{"/var/lib/dbbackup", 0750},
|
||||
{"/var/lib/dbbackup/backups", 0750},
|
||||
{"/var/lib/dbbackup/metrics", 0755},
|
||||
{"/var/log/dbbackup", 0750},
|
||||
{opts.BackupDir, 0750},
|
||||
}
|
||||
|
||||
for _, d := range dirs {
|
||||
if i.dryRun {
|
||||
i.log.Info("Would create directory", "path", d.path, "mode", d.mode)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(d.path, d.mode); err != nil {
|
||||
return fmt.Errorf("failed to create directory %s: %w", d.path, err)
|
||||
}
|
||||
|
||||
// Set ownership
|
||||
u, err := user.Lookup(opts.User)
|
||||
if err == nil {
|
||||
var uid, gid int
|
||||
fmt.Sscanf(u.Uid, "%d", &uid)
|
||||
fmt.Sscanf(u.Gid, "%d", &gid)
|
||||
os.Chown(d.path, uid, gid)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// copyBinary copies the binary to /usr/local/bin for systemd access
|
||||
// This is required because ProtectHome=yes blocks access to home directories
|
||||
func (i *Installer) copyBinary(opts *InstallOptions) error {
|
||||
const installPath = "/usr/local/bin/dbbackup"
|
||||
|
||||
// Check if binary is already in a system path
|
||||
if opts.BinaryPath == installPath {
|
||||
return nil
|
||||
}
|
||||
|
||||
if i.dryRun {
|
||||
i.log.Info("Would copy binary", "from", opts.BinaryPath, "to", installPath)
|
||||
opts.BinaryPath = installPath
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read source binary
|
||||
src, err := os.Open(opts.BinaryPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open source binary: %w", err)
|
||||
}
|
||||
defer src.Close()
|
||||
|
||||
// Create destination
|
||||
dst, err := os.OpenFile(installPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create %s: %w", installPath, err)
|
||||
}
|
||||
defer dst.Close()
|
||||
|
||||
// Copy
|
||||
if _, err := io.Copy(dst, src); err != nil {
|
||||
return fmt.Errorf("failed to copy binary: %w", err)
|
||||
}
|
||||
|
||||
i.log.Info("Copied binary", "from", opts.BinaryPath, "to", installPath)
|
||||
opts.BinaryPath = installPath
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeUnitFiles renders and writes the systemd unit files
|
||||
func (i *Installer) writeUnitFiles(opts InstallOptions) error {
|
||||
// Prepare template data
|
||||
data := map[string]interface{}{
|
||||
"User": opts.User,
|
||||
"Group": opts.Group,
|
||||
"BinaryPath": opts.BinaryPath,
|
||||
"BackupType": opts.BackupType,
|
||||
"BackupDir": opts.BackupDir,
|
||||
"ConfigPath": opts.ConfigPath,
|
||||
"TimeoutSeconds": opts.TimeoutSeconds,
|
||||
"Schedule": opts.Schedule,
|
||||
"MetricsPort": opts.MetricsPort,
|
||||
}
|
||||
|
||||
// Determine which templates to use
|
||||
var serviceTemplate, timerTemplate string
|
||||
var serviceName, timerName string
|
||||
|
||||
if opts.BackupType == "cluster" {
|
||||
serviceTemplate = "templates/dbbackup-cluster.service"
|
||||
timerTemplate = "templates/dbbackup-cluster.timer"
|
||||
serviceName = "dbbackup-cluster.service"
|
||||
timerName = "dbbackup-cluster.timer"
|
||||
} else {
|
||||
serviceTemplate = "templates/dbbackup@.service"
|
||||
timerTemplate = "templates/dbbackup@.timer"
|
||||
serviceName = "dbbackup@.service"
|
||||
timerName = "dbbackup@.timer"
|
||||
}
|
||||
|
||||
// Write service file
|
||||
if err := i.writeTemplateFile(serviceTemplate, serviceName, data); err != nil {
|
||||
return fmt.Errorf("failed to write service file: %w", err)
|
||||
}
|
||||
|
||||
// Write timer file
|
||||
if err := i.writeTemplateFile(timerTemplate, timerName, data); err != nil {
|
||||
return fmt.Errorf("failed to write timer file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeTemplateFile reads an embedded template and writes it to the unit directory
|
||||
func (i *Installer) writeTemplateFile(templatePath, outputName string, data map[string]interface{}) error {
|
||||
// Read template
|
||||
content, err := Templates.ReadFile(templatePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read template %s: %w", templatePath, err)
|
||||
}
|
||||
|
||||
// Parse template
|
||||
tmpl, err := template.New(outputName).Parse(string(content))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse template %s: %w", templatePath, err)
|
||||
}
|
||||
|
||||
// Render template
|
||||
var buf strings.Builder
|
||||
if err := tmpl.Execute(&buf, data); err != nil {
|
||||
return fmt.Errorf("failed to render template %s: %w", templatePath, err)
|
||||
}
|
||||
|
||||
// Write file
|
||||
outputPath := filepath.Join(i.unitDir, outputName)
|
||||
if i.dryRun {
|
||||
i.log.Info("Would write unit file", "path", outputPath)
|
||||
i.log.Debug("Unit file content", "content", buf.String())
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := os.WriteFile(outputPath, []byte(buf.String()), 0644); err != nil {
|
||||
return fmt.Errorf("failed to write %s: %w", outputPath, err)
|
||||
}
|
||||
|
||||
i.log.Info("Created unit file", "path", outputPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// installExporter installs the metrics exporter service
|
||||
func (i *Installer) installExporter(ctx context.Context, opts InstallOptions) error {
|
||||
data := map[string]interface{}{
|
||||
"User": opts.User,
|
||||
"Group": opts.Group,
|
||||
"BinaryPath": opts.BinaryPath,
|
||||
"ConfigPath": opts.ConfigPath,
|
||||
"MetricsPort": opts.MetricsPort,
|
||||
}
|
||||
|
||||
if err := i.writeTemplateFile("templates/dbbackup-exporter.service", "dbbackup-exporter.service", data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := i.systemctl(ctx, "daemon-reload"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := i.systemctl(ctx, "enable", "dbbackup-exporter.service"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := i.systemctl(ctx, "start", "dbbackup-exporter.service"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
i.log.Info("Installed metrics exporter", "port", opts.MetricsPort)
|
||||
return nil
|
||||
}
|
||||
|
||||
// getTimerName returns the timer unit name for the given options
|
||||
func (i *Installer) getTimerName(opts InstallOptions) string {
|
||||
if opts.BackupType == "cluster" {
|
||||
return "dbbackup-cluster.timer"
|
||||
}
|
||||
return fmt.Sprintf("dbbackup@%s.timer", opts.Instance)
|
||||
}
|
||||
|
||||
// systemctl runs a systemctl command
|
||||
func (i *Installer) systemctl(ctx context.Context, args ...string) error {
|
||||
if i.dryRun {
|
||||
i.log.Info("Would run: systemctl", "args", args)
|
||||
return nil
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, "systemctl", args...)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("systemctl %v failed: %w\n%s", args, err, string(output))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// isEnabled checks if a unit is enabled
|
||||
func (i *Installer) isEnabled(ctx context.Context, unit string) bool {
|
||||
cmd := exec.CommandContext(ctx, "systemctl", "is-enabled", unit)
|
||||
return cmd.Run() == nil
|
||||
}
|
||||
|
||||
// isActive checks if a unit is active
|
||||
func (i *Installer) isActive(ctx context.Context, unit string) bool {
|
||||
cmd := exec.CommandContext(ctx, "systemctl", "is-active", unit)
|
||||
return cmd.Run() == nil
|
||||
}
|
||||
|
||||
// getTimerNext gets the next run time for a timer
|
||||
func (i *Installer) getTimerNext(ctx context.Context, timer string) string {
|
||||
cmd := exec.CommandContext(ctx, "systemctl", "show", timer, "--property=NextElapseUSecRealtime", "--value")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(string(output))
|
||||
}
|
||||
|
||||
// getTimerLast gets the last run time for a timer
|
||||
func (i *Installer) getTimerLast(ctx context.Context, timer string) string {
|
||||
cmd := exec.CommandContext(ctx, "systemctl", "show", timer, "--property=LastTriggerUSec", "--value")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(string(output))
|
||||
}
|
||||
|
||||
// canRemoveTemplates checks if template units can be safely removed
|
||||
func (i *Installer) canRemoveTemplates() bool {
|
||||
// Check if any dbbackup@*.service instances exist
|
||||
pattern := filepath.Join(i.unitDir, "dbbackup@*.service")
|
||||
matches, _ := filepath.Glob(pattern)
|
||||
|
||||
// Also check for running instances
|
||||
cmd := exec.Command("systemctl", "list-units", "--type=service", "--all", "dbbackup@*")
|
||||
output, _ := cmd.Output()
|
||||
|
||||
return len(matches) == 0 && !strings.Contains(string(output), "dbbackup@")
|
||||
}
|
||||
|
||||
// printNextSteps prints helpful next steps after installation
|
||||
func (i *Installer) printNextSteps(opts InstallOptions) {
|
||||
timerName := i.getTimerName(opts)
|
||||
serviceName := strings.Replace(timerName, ".timer", ".service", 1)
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("[OK] Installation successful!")
|
||||
fmt.Println()
|
||||
fmt.Println("[NEXT] Next steps:")
|
||||
fmt.Println()
|
||||
fmt.Printf(" 1. Edit configuration: sudo nano %s\n", opts.ConfigPath)
|
||||
fmt.Printf(" 2. Set credentials: sudo nano /etc/dbbackup/env.d/%s.conf\n", opts.Instance)
|
||||
fmt.Printf(" 3. Start the timer: sudo systemctl start %s\n", timerName)
|
||||
fmt.Printf(" 4. Verify timer status: sudo systemctl status %s\n", timerName)
|
||||
fmt.Printf(" 5. Run backup manually: sudo systemctl start %s\n", serviceName)
|
||||
fmt.Println()
|
||||
fmt.Println("[LOGS] View backup logs:")
|
||||
fmt.Printf(" journalctl -u %s -f\n", serviceName)
|
||||
fmt.Println()
|
||||
|
||||
if opts.WithMetrics {
|
||||
fmt.Println("[METRICS] Prometheus metrics:")
|
||||
fmt.Printf(" curl http://localhost:%d/metrics\n", opts.MetricsPort)
|
||||
fmt.Println()
|
||||
}
|
||||
}
|
||||
50
internal/installer/templates/dbbackup-cluster.service
Normal file
50
internal/installer/templates/dbbackup-cluster.service
Normal file
@ -0,0 +1,50 @@
|
||||
[Unit]
|
||||
Description=Database Cluster Backup
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
After=network-online.target postgresql.service mysql.service mariadb.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User={{.User}}
|
||||
Group={{.Group}}
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
RestrictRealtime=yes
|
||||
LockPersonality=yes
|
||||
RemoveIPC=yes
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
# Directories
|
||||
ReadWritePaths={{.BackupDir}} /var/lib/dbbackup /var/log/dbbackup
|
||||
|
||||
# Network access for cloud uploads
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
|
||||
# Environment
|
||||
EnvironmentFile=-/etc/dbbackup/env.d/cluster.conf
|
||||
|
||||
# Working directory (config is loaded from .dbbackup.conf here)
|
||||
WorkingDirectory=/var/lib/dbbackup
|
||||
|
||||
# Execution - cluster backup (all databases)
|
||||
ExecStart={{.BinaryPath}} backup cluster --backup-dir {{.BackupDir}}
|
||||
TimeoutStartSec={{.TimeoutSeconds}}
|
||||
|
||||
# Post-backup metrics export
|
||||
ExecStopPost=-{{.BinaryPath}} metrics export --instance cluster --output /var/lib/dbbackup/metrics/cluster.prom
|
||||
|
||||
# OOM protection for large backups
|
||||
OOMScoreAdjust=-500
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
11
internal/installer/templates/dbbackup-cluster.timer
Normal file
11
internal/installer/templates/dbbackup-cluster.timer
Normal file
@ -0,0 +1,11 @@
|
||||
[Unit]
|
||||
Description=Database Cluster Backup Timer
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
|
||||
[Timer]
|
||||
OnCalendar={{.Schedule}}
|
||||
Persistent=true
|
||||
RandomizedDelaySec=1800
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
37
internal/installer/templates/dbbackup-exporter.service
Normal file
37
internal/installer/templates/dbbackup-exporter.service
Normal file
@ -0,0 +1,37 @@
|
||||
[Unit]
|
||||
Description=DBBackup Prometheus Metrics Exporter
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User={{.User}}
|
||||
Group={{.Group}}
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=yes
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
RestrictRealtime=yes
|
||||
LockPersonality=yes
|
||||
RemoveIPC=yes
|
||||
|
||||
# Read-write access to catalog for metrics collection
|
||||
ReadWritePaths=/var/lib/dbbackup
|
||||
|
||||
# Network for HTTP server
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
|
||||
# Execution
|
||||
ExecStart={{.BinaryPath}} metrics serve --port {{.MetricsPort}}
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
50
internal/installer/templates/dbbackup@.service
Normal file
50
internal/installer/templates/dbbackup@.service
Normal file
@ -0,0 +1,50 @@
|
||||
[Unit]
|
||||
Description=Database Backup for %i
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
After=network-online.target postgresql.service mysql.service mariadb.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User={{.User}}
|
||||
Group={{.Group}}
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=yes
|
||||
ProtectKernelTunables=yes
|
||||
ProtectKernelModules=yes
|
||||
ProtectControlGroups=yes
|
||||
RestrictSUIDSGID=yes
|
||||
RestrictRealtime=yes
|
||||
LockPersonality=yes
|
||||
RemoveIPC=yes
|
||||
CapabilityBoundingSet=
|
||||
AmbientCapabilities=
|
||||
|
||||
# Directories
|
||||
ReadWritePaths={{.BackupDir}} /var/lib/dbbackup /var/log/dbbackup
|
||||
|
||||
# Network access for cloud uploads
|
||||
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
|
||||
|
||||
# Environment
|
||||
EnvironmentFile=-/etc/dbbackup/env.d/%i.conf
|
||||
|
||||
# Working directory (config is loaded from .dbbackup.conf here)
|
||||
WorkingDirectory=/var/lib/dbbackup
|
||||
|
||||
# Execution
|
||||
ExecStart={{.BinaryPath}} backup {{.BackupType}} %i --backup-dir {{.BackupDir}}
|
||||
TimeoutStartSec={{.TimeoutSeconds}}
|
||||
|
||||
# Post-backup metrics export
|
||||
ExecStopPost=-{{.BinaryPath}} metrics export --instance %i --output /var/lib/dbbackup/metrics/%i.prom
|
||||
|
||||
# OOM protection for large backups
|
||||
OOMScoreAdjust=-500
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
11
internal/installer/templates/dbbackup@.timer
Normal file
11
internal/installer/templates/dbbackup@.timer
Normal file
@ -0,0 +1,11 @@
|
||||
[Unit]
|
||||
Description=Database Backup Timer for %i
|
||||
Documentation=https://github.com/PlusOne/dbbackup
|
||||
|
||||
[Timer]
|
||||
OnCalendar={{.Schedule}}
|
||||
Persistent=true
|
||||
RandomizedDelaySec=1800
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user