Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 354c083e38 | |||
| a211befea8 | |||
| d6fbc77c21 | |||
| e449e2f448 | |||
| dceab64b67 | |||
| a101fb81ab | |||
| 555177f5a7 | |||
| 0d416ecb55 | |||
| 1fe16ef89b | |||
| 4507ec682f | |||
| 084b8bd279 | |||
| 0d85caea53 | |||
| 3624ff54ff | |||
| 696273816e | |||
| 2b7cfa4b67 | |||
| 714ff3a41d | |||
| b095e2fab5 | |||
| e6c0ca0667 | |||
| 79dc604eb6 | |||
| de88e38f93 | |||
| 97c52ab9e5 | |||
| 3c9e5f04ca | |||
| 86a28b6ec5 | |||
| 63b35414d2 | |||
| db46770e7f | |||
| 51764a677a |
@ -49,13 +49,14 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
POSTGRES_PASSWORD: postgres
|
POSTGRES_PASSWORD: postgres
|
||||||
POSTGRES_DB: testdb
|
POSTGRES_DB: testdb
|
||||||
ports: ['5432:5432']
|
# Use container networking instead of host port binding
|
||||||
|
# This avoids "port already in use" errors on shared runners
|
||||||
mysql:
|
mysql:
|
||||||
image: mysql:8
|
image: mysql:8
|
||||||
env:
|
env:
|
||||||
MYSQL_ROOT_PASSWORD: mysql
|
MYSQL_ROOT_PASSWORD: mysql
|
||||||
MYSQL_DATABASE: testdb
|
MYSQL_DATABASE: testdb
|
||||||
ports: ['3306:3306']
|
# Use container networking instead of host port binding
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
env:
|
env:
|
||||||
@ -80,7 +81,7 @@ jobs:
|
|||||||
done
|
done
|
||||||
|
|
||||||
- name: Build dbbackup
|
- name: Build dbbackup
|
||||||
run: go build -o dbbackup .
|
run: go build -trimpath -o dbbackup .
|
||||||
|
|
||||||
- name: Test PostgreSQL backup/restore
|
- name: Test PostgreSQL backup/restore
|
||||||
env:
|
env:
|
||||||
@ -239,7 +240,7 @@ jobs:
|
|||||||
echo "Focus: PostgreSQL native engine validation only"
|
echo "Focus: PostgreSQL native engine validation only"
|
||||||
|
|
||||||
- name: Build dbbackup for native testing
|
- name: Build dbbackup for native testing
|
||||||
run: go build -o dbbackup-native .
|
run: go build -trimpath -o dbbackup-native .
|
||||||
|
|
||||||
- name: Test PostgreSQL Native Engine
|
- name: Test PostgreSQL Native Engine
|
||||||
env:
|
env:
|
||||||
@ -383,7 +384,7 @@ jobs:
|
|||||||
- name: Build for current platform
|
- name: Build for current platform
|
||||||
run: |
|
run: |
|
||||||
echo "Building dbbackup for testing..."
|
echo "Building dbbackup for testing..."
|
||||||
go build -ldflags="-s -w" -o dbbackup .
|
go build -trimpath -ldflags="-s -w" -o dbbackup .
|
||||||
echo "Build successful!"
|
echo "Build successful!"
|
||||||
ls -lh dbbackup
|
ls -lh dbbackup
|
||||||
./dbbackup version || echo "Binary created successfully"
|
./dbbackup version || echo "Binary created successfully"
|
||||||
@ -419,7 +420,7 @@ jobs:
|
|||||||
|
|
||||||
# Test Linux amd64 build (with CGO for SQLite)
|
# Test Linux amd64 build (with CGO for SQLite)
|
||||||
echo "Testing linux/amd64 build (CGO enabled)..."
|
echo "Testing linux/amd64 build (CGO enabled)..."
|
||||||
if CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-linux-amd64 .; then
|
if CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-linux-amd64 .; then
|
||||||
echo "✅ linux/amd64 build successful"
|
echo "✅ linux/amd64 build successful"
|
||||||
ls -lh release/dbbackup-linux-amd64
|
ls -lh release/dbbackup-linux-amd64
|
||||||
else
|
else
|
||||||
@ -428,7 +429,7 @@ jobs:
|
|||||||
|
|
||||||
# Test Darwin amd64 (no CGO - cross-compile limitation)
|
# Test Darwin amd64 (no CGO - cross-compile limitation)
|
||||||
echo "Testing darwin/amd64 build (CGO disabled)..."
|
echo "Testing darwin/amd64 build (CGO disabled)..."
|
||||||
if CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-darwin-amd64 .; then
|
if CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-darwin-amd64 .; then
|
||||||
echo "✅ darwin/amd64 build successful"
|
echo "✅ darwin/amd64 build successful"
|
||||||
ls -lh release/dbbackup-darwin-amd64
|
ls -lh release/dbbackup-darwin-amd64
|
||||||
else
|
else
|
||||||
@ -508,23 +509,23 @@ jobs:
|
|||||||
|
|
||||||
# Linux amd64 (with CGO for SQLite)
|
# Linux amd64 (with CGO for SQLite)
|
||||||
echo "Building linux/amd64 (CGO enabled)..."
|
echo "Building linux/amd64 (CGO enabled)..."
|
||||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-linux-amd64 .
|
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-linux-amd64 .
|
||||||
|
|
||||||
# Linux arm64 (with CGO for SQLite)
|
# Linux arm64 (with CGO for SQLite)
|
||||||
echo "Building linux/arm64 (CGO enabled)..."
|
echo "Building linux/arm64 (CGO enabled)..."
|
||||||
CC=aarch64-linux-gnu-gcc CGO_ENABLED=1 GOOS=linux GOARCH=arm64 go build -ldflags="-s -w" -o release/dbbackup-linux-arm64 .
|
CC=aarch64-linux-gnu-gcc CGO_ENABLED=1 GOOS=linux GOARCH=arm64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-linux-arm64 .
|
||||||
|
|
||||||
# Darwin amd64 (no CGO - cross-compile limitation)
|
# Darwin amd64 (no CGO - cross-compile limitation)
|
||||||
echo "Building darwin/amd64 (CGO disabled)..."
|
echo "Building darwin/amd64 (CGO disabled)..."
|
||||||
CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-darwin-amd64 .
|
CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-darwin-amd64 .
|
||||||
|
|
||||||
# Darwin arm64 (no CGO - cross-compile limitation)
|
# Darwin arm64 (no CGO - cross-compile limitation)
|
||||||
echo "Building darwin/arm64 (CGO disabled)..."
|
echo "Building darwin/arm64 (CGO disabled)..."
|
||||||
CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build -ldflags="-s -w" -o release/dbbackup-darwin-arm64 .
|
CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-darwin-arm64 .
|
||||||
|
|
||||||
# FreeBSD amd64 (no CGO - cross-compile limitation)
|
# FreeBSD amd64 (no CGO - cross-compile limitation)
|
||||||
echo "Building freebsd/amd64 (CGO disabled)..."
|
echo "Building freebsd/amd64 (CGO disabled)..."
|
||||||
CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-freebsd-amd64 .
|
CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-freebsd-amd64 .
|
||||||
|
|
||||||
echo "All builds complete:"
|
echo "All builds complete:"
|
||||||
ls -lh release/
|
ls -lh release/
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -18,6 +18,7 @@ bin/
|
|||||||
|
|
||||||
# Ignore local configuration (may contain IPs/credentials)
|
# Ignore local configuration (may contain IPs/credentials)
|
||||||
.dbbackup.conf
|
.dbbackup.conf
|
||||||
|
.gh_token
|
||||||
|
|
||||||
# Ignore session/development notes
|
# Ignore session/development notes
|
||||||
TODO_SESSION.md
|
TODO_SESSION.md
|
||||||
|
|||||||
85
CHANGELOG.md
85
CHANGELOG.md
@ -5,6 +5,91 @@ All notable changes to dbbackup will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [5.8.26] - 2026-02-05
|
||||||
|
|
||||||
|
### Improved
|
||||||
|
- **Size-Weighted ETA for Cluster Backups**: ETAs now based on database sizes, not count
|
||||||
|
- Query database sizes upfront before starting cluster backup
|
||||||
|
- Progress bar shows bytes completed vs total bytes (e.g., `0B/500.0GB`)
|
||||||
|
- ETA calculated using size-weighted formula: `elapsed * (remaining_bytes / done_bytes)`
|
||||||
|
- Much more accurate for clusters with mixed database sizes (e.g., 8MB postgres + 500GB fakedb)
|
||||||
|
- Falls back to count-based ETA with `~` prefix if sizes unavailable
|
||||||
|
|
||||||
|
## [5.8.25] - 2026-02-05
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **Backup Database Elapsed Time Display**: Fixed bug where per-database elapsed time and ETA showed `0.0s` during cluster backups
|
||||||
|
- Root cause: elapsed time was only updated when `hasUpdate` flag was true, not on every tick
|
||||||
|
- Fix: Store `phase2StartTime` in model and recalculate elapsed time on every UI tick
|
||||||
|
- Now shows accurate real-time elapsed and ETA for database backup phase
|
||||||
|
|
||||||
|
## [5.8.24] - 2026-02-05
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- **Skip Preflight Checks Option**: New TUI setting to disable pre-restore safety checks
|
||||||
|
- Accessible via Settings menu → "Skip Preflight Checks"
|
||||||
|
- Shows warning when enabled: "⚠️ SKIPPED (dangerous)"
|
||||||
|
- Displays prominent warning banner on restore preview screen
|
||||||
|
- Useful for enterprise scenarios where checks are too slow on large databases
|
||||||
|
- Config field: `SkipPreflightChecks` (default: false)
|
||||||
|
- Setting is persisted to config file with warning comment
|
||||||
|
- Added nil-pointer safety checks throughout
|
||||||
|
|
||||||
|
## [5.8.23] - 2026-02-05
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- **Cancellation Tests**: Added Go unit tests for context cancellation verification
|
||||||
|
- `TestParseStatementsContextCancellation` - verifies statement parsing can be cancelled
|
||||||
|
- `TestParseStatementsWithCopyDataCancellation` - verifies COPY data parsing can be cancelled
|
||||||
|
- Tests confirm cancellation responds within 10ms on large (1M+ line) files
|
||||||
|
|
||||||
|
## [5.8.15] - 2026-02-05
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **TUI Cluster Restore Hang**: Fixed hang during large SQL file restore (pg_dumpall format)
|
||||||
|
- Added context cancellation support to `parseStatementsWithContext()` with checks every 10000 lines
|
||||||
|
- Added context cancellation checks in schema statement execution loop
|
||||||
|
- Now uses context-aware parsing in `RestoreFile()` for proper Ctrl+C handling
|
||||||
|
- This complements the v5.8.14 panic recovery fix by preventing hangs (not just panics)
|
||||||
|
|
||||||
|
## [5.8.14] - 2026-02-05
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **TUI Cluster Restore Panic**: Fixed BubbleTea WaitGroup deadlock during cluster restore
|
||||||
|
- Panic recovery in `tea.Cmd` functions now uses named return values to properly return messages
|
||||||
|
- Previously, panic recovery returned nil which caused `execBatchMsg` WaitGroup to hang forever
|
||||||
|
- Affected files: `restore_exec.go` and `backup_exec.go`
|
||||||
|
|
||||||
|
## [5.8.12] - 2026-02-04
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **Config Loading**: Fixed config not loading for users without standard home directories
|
||||||
|
- Now searches: current dir → home dir → /etc/dbbackup.conf → /etc/dbbackup/dbbackup.conf
|
||||||
|
- Works for postgres user with home at /var/lib/postgresql
|
||||||
|
- Added `ConfigSearchPaths()` and `LoadLocalConfigWithPath()` functions
|
||||||
|
- Log now shows which config path was actually loaded
|
||||||
|
|
||||||
|
## [5.8.11] - 2026-02-04
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **TUI Deadlock**: Fixed goroutine leaks in pgxpool connection handling
|
||||||
|
- Removed redundant goroutines waiting on ctx.Done() in postgresql.go and parallel_restore.go
|
||||||
|
- These were causing WaitGroup deadlocks when BubbleTea tried to shutdown
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- **systemd-run Resource Isolation**: New `internal/cleanup/cgroups.go` for long-running jobs
|
||||||
|
- `RunWithResourceLimits()` wraps commands in systemd-run scopes
|
||||||
|
- Configurable: MemoryHigh, MemoryMax, CPUQuota, IOWeight, Nice, Slice
|
||||||
|
- Automatic cleanup on context cancellation
|
||||||
|
- **Restore Dry-Run Checks**: New `internal/restore/dryrun.go` with 10 pre-restore validations
|
||||||
|
- Archive access, format, connectivity, permissions, target conflicts
|
||||||
|
- Disk space, work directory, required tools, lock settings, memory estimation
|
||||||
|
- Returns pass/warning/fail status with detailed messages
|
||||||
|
- **Audit Log Signing**: Enhanced `internal/security/audit.go` with Ed25519 cryptographic signing
|
||||||
|
- `SignedAuditEntry` with sequence numbers, hash chains, and signatures
|
||||||
|
- `GenerateSigningKeys()`, `SavePrivateKey()`, `LoadPublicKey()`
|
||||||
|
- `EnableSigning()`, `ExportSignedLog()`, `VerifyAuditLog()` for tamper detection
|
||||||
|
|
||||||
## [5.7.10] - 2026-02-03
|
## [5.7.10] - 2026-02-03
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|||||||
@ -19,7 +19,7 @@ COPY . .
|
|||||||
|
|
||||||
# Build binary with cross-compilation support
|
# Build binary with cross-compilation support
|
||||||
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
|
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
|
||||||
go build -a -installsuffix cgo -ldflags="-w -s" -o dbbackup .
|
go build -trimpath -a -installsuffix cgo -ldflags="-w -s" -o dbbackup .
|
||||||
|
|
||||||
# Final stage - minimal runtime image
|
# Final stage - minimal runtime image
|
||||||
# Using pinned version 3.19 which has better QEMU compatibility
|
# Using pinned version 3.19 which has better QEMU compatibility
|
||||||
|
|||||||
2
Makefile
2
Makefile
@ -15,7 +15,7 @@ all: lint test build
|
|||||||
## build: Build the binary with optimizations
|
## build: Build the binary with optimizations
|
||||||
build:
|
build:
|
||||||
@echo "🔨 Building dbbackup $(VERSION)..."
|
@echo "🔨 Building dbbackup $(VERSION)..."
|
||||||
CGO_ENABLED=0 go build -ldflags="$(LDFLAGS)" -o bin/dbbackup .
|
CGO_ENABLED=0 go build -trimpath -ldflags="$(LDFLAGS)" -o bin/dbbackup .
|
||||||
@echo "✅ Built bin/dbbackup"
|
@echo "✅ Built bin/dbbackup"
|
||||||
|
|
||||||
## build-debug: Build with debug symbols (for debugging)
|
## build-debug: Build with debug symbols (for debugging)
|
||||||
|
|||||||
@ -80,7 +80,7 @@ for platform_config in "${PLATFORMS[@]}"; do
|
|||||||
# Set environment and build (using export for better compatibility)
|
# Set environment and build (using export for better compatibility)
|
||||||
# CGO_ENABLED=0 creates static binaries without glibc dependency
|
# CGO_ENABLED=0 creates static binaries without glibc dependency
|
||||||
export CGO_ENABLED=0 GOOS GOARCH
|
export CGO_ENABLED=0 GOOS GOARCH
|
||||||
if go build -ldflags "$LDFLAGS" -o "${BIN_DIR}/${binary_name}" . 2>/dev/null; then
|
if go build -trimpath -ldflags "$LDFLAGS" -o "${BIN_DIR}/${binary_name}" . 2>/dev/null; then
|
||||||
# Get file size
|
# Get file size
|
||||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||||
size=$(stat -f%z "${BIN_DIR}/${binary_name}" 2>/dev/null || echo "0")
|
size=$(stat -f%z "${BIN_DIR}/${binary_name}" 2>/dev/null || echo "0")
|
||||||
|
|||||||
@ -11,6 +11,7 @@ import (
|
|||||||
|
|
||||||
"dbbackup/internal/database"
|
"dbbackup/internal/database"
|
||||||
"dbbackup/internal/engine/native"
|
"dbbackup/internal/engine/native"
|
||||||
|
"dbbackup/internal/metadata"
|
||||||
"dbbackup/internal/notify"
|
"dbbackup/internal/notify"
|
||||||
|
|
||||||
"github.com/klauspost/pgzip"
|
"github.com/klauspost/pgzip"
|
||||||
@ -163,6 +164,54 @@ func runNativeBackup(ctx context.Context, db database.Database, databaseName, ba
|
|||||||
"duration", backupDuration,
|
"duration", backupDuration,
|
||||||
"engine", result.EngineUsed)
|
"engine", result.EngineUsed)
|
||||||
|
|
||||||
|
// Get actual file size from disk
|
||||||
|
fileInfo, err := os.Stat(outputFile)
|
||||||
|
var actualSize int64
|
||||||
|
if err == nil {
|
||||||
|
actualSize = fileInfo.Size()
|
||||||
|
} else {
|
||||||
|
actualSize = result.BytesProcessed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate SHA256 checksum
|
||||||
|
sha256sum, err := metadata.CalculateSHA256(outputFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Failed to calculate SHA256", "error", err)
|
||||||
|
sha256sum = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create and save metadata file
|
||||||
|
meta := &metadata.BackupMetadata{
|
||||||
|
Version: "1.0",
|
||||||
|
Timestamp: backupStartTime,
|
||||||
|
Database: databaseName,
|
||||||
|
DatabaseType: dbType,
|
||||||
|
Host: cfg.Host,
|
||||||
|
Port: cfg.Port,
|
||||||
|
User: cfg.User,
|
||||||
|
BackupFile: filepath.Base(outputFile),
|
||||||
|
SizeBytes: actualSize,
|
||||||
|
SHA256: sha256sum,
|
||||||
|
Compression: "gzip",
|
||||||
|
BackupType: backupType,
|
||||||
|
Duration: backupDuration.Seconds(),
|
||||||
|
ExtraInfo: map[string]string{
|
||||||
|
"engine": result.EngineUsed,
|
||||||
|
"objects_processed": fmt.Sprintf("%d", result.ObjectsProcessed),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.CompressionLevel == 0 {
|
||||||
|
meta.Compression = "none"
|
||||||
|
}
|
||||||
|
|
||||||
|
metaPath := outputFile + ".meta.json"
|
||||||
|
if err := metadata.Save(metaPath, meta); err != nil {
|
||||||
|
log.Warn("Failed to save metadata", "error", err)
|
||||||
|
} else {
|
||||||
|
log.Debug("Metadata saved", "path", metaPath)
|
||||||
|
}
|
||||||
|
|
||||||
// Audit log: backup completed
|
// Audit log: backup completed
|
||||||
auditLogger.LogBackupComplete(user, databaseName, cfg.BackupDir, result.BytesProcessed)
|
auditLogger.LogBackupComplete(user, databaseName, cfg.BackupDir, result.BytesProcessed)
|
||||||
|
|
||||||
|
|||||||
20
cmd/root.go
20
cmd/root.go
@ -15,11 +15,11 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
cfg *config.Config
|
cfg *config.Config
|
||||||
log logger.Logger
|
log logger.Logger
|
||||||
auditLogger *security.AuditLogger
|
auditLogger *security.AuditLogger
|
||||||
rateLimiter *security.RateLimiter
|
rateLimiter *security.RateLimiter
|
||||||
notifyManager *notify.Manager
|
notifyManager *notify.Manager
|
||||||
deprecatedPassword string
|
deprecatedPassword string
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -61,22 +61,24 @@ For help with specific commands, use: dbbackup [command] --help`,
|
|||||||
|
|
||||||
// Load local config if not disabled
|
// Load local config if not disabled
|
||||||
if !cfg.NoLoadConfig {
|
if !cfg.NoLoadConfig {
|
||||||
// Use custom config path if specified, otherwise default to current directory
|
// Use custom config path if specified, otherwise search standard locations
|
||||||
var localCfg *config.LocalConfig
|
var localCfg *config.LocalConfig
|
||||||
|
var configPath string
|
||||||
var err error
|
var err error
|
||||||
if cfg.ConfigPath != "" {
|
if cfg.ConfigPath != "" {
|
||||||
localCfg, err = config.LoadLocalConfigFromPath(cfg.ConfigPath)
|
localCfg, err = config.LoadLocalConfigFromPath(cfg.ConfigPath)
|
||||||
|
configPath = cfg.ConfigPath
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Failed to load config from specified path", "path", cfg.ConfigPath, "error", err)
|
log.Warn("Failed to load config from specified path", "path", cfg.ConfigPath, "error", err)
|
||||||
} else if localCfg != nil {
|
} else if localCfg != nil {
|
||||||
log.Info("Loaded configuration", "path", cfg.ConfigPath)
|
log.Info("Loaded configuration", "path", cfg.ConfigPath)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
localCfg, err = config.LoadLocalConfig()
|
localCfg, configPath, err = config.LoadLocalConfigWithPath()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Failed to load local config", "error", err)
|
log.Warn("Failed to load config", "error", err)
|
||||||
} else if localCfg != nil {
|
} else if localCfg != nil {
|
||||||
log.Info("Loaded configuration from .dbbackup.conf")
|
log.Info("Loaded configuration", "path", configPath)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -39,7 +39,8 @@ import (
|
|||||||
type ProgressCallback func(current, total int64, description string)
|
type ProgressCallback func(current, total int64, description string)
|
||||||
|
|
||||||
// DatabaseProgressCallback is called with database count progress during cluster backup
|
// DatabaseProgressCallback is called with database count progress during cluster backup
|
||||||
type DatabaseProgressCallback func(done, total int, dbName string)
|
// bytesDone and bytesTotal enable size-weighted ETA calculations
|
||||||
|
type DatabaseProgressCallback func(done, total int, dbName string, bytesDone, bytesTotal int64)
|
||||||
|
|
||||||
// Engine handles backup operations
|
// Engine handles backup operations
|
||||||
type Engine struct {
|
type Engine struct {
|
||||||
@ -112,7 +113,8 @@ func (e *Engine) SetDatabaseProgressCallback(cb DatabaseProgressCallback) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// reportDatabaseProgress reports database count progress to the callback if set
|
// reportDatabaseProgress reports database count progress to the callback if set
|
||||||
func (e *Engine) reportDatabaseProgress(done, total int, dbName string) {
|
// bytesDone/bytesTotal enable size-weighted ETA calculations
|
||||||
|
func (e *Engine) reportDatabaseProgress(done, total int, dbName string, bytesDone, bytesTotal int64) {
|
||||||
// CRITICAL: Add panic recovery to prevent crashes during TUI shutdown
|
// CRITICAL: Add panic recovery to prevent crashes during TUI shutdown
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := recover(); r != nil {
|
if r := recover(); r != nil {
|
||||||
@ -121,7 +123,7 @@ func (e *Engine) reportDatabaseProgress(done, total int, dbName string) {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
if e.dbProgressCallback != nil {
|
if e.dbProgressCallback != nil {
|
||||||
e.dbProgressCallback(done, total, dbName)
|
e.dbProgressCallback(done, total, dbName, bytesDone, bytesTotal)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -461,6 +463,18 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
|||||||
return fmt.Errorf("failed to list databases: %w", err)
|
return fmt.Errorf("failed to list databases: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Query database sizes upfront for accurate ETA calculation
|
||||||
|
e.printf(" Querying database sizes for ETA estimation...\n")
|
||||||
|
dbSizes := make(map[string]int64)
|
||||||
|
var totalBytes int64
|
||||||
|
for _, dbName := range databases {
|
||||||
|
if size, err := e.db.GetDatabaseSize(ctx, dbName); err == nil {
|
||||||
|
dbSizes[dbName] = size
|
||||||
|
totalBytes += size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var completedBytes int64 // Track bytes completed (atomic access)
|
||||||
|
|
||||||
// Create ETA estimator for database backups
|
// Create ETA estimator for database backups
|
||||||
estimator := progress.NewETAEstimator("Backing up cluster", len(databases))
|
estimator := progress.NewETAEstimator("Backing up cluster", len(databases))
|
||||||
quietProgress.SetEstimator(estimator)
|
quietProgress.SetEstimator(estimator)
|
||||||
@ -520,25 +534,26 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
|||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get this database's size for progress tracking
|
||||||
|
thisDbSize := dbSizes[name]
|
||||||
|
|
||||||
// Update estimator progress (thread-safe)
|
// Update estimator progress (thread-safe)
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
estimator.UpdateProgress(idx)
|
estimator.UpdateProgress(idx)
|
||||||
e.printf(" [%d/%d] Backing up database: %s\n", idx+1, len(databases), name)
|
e.printf(" [%d/%d] Backing up database: %s\n", idx+1, len(databases), name)
|
||||||
quietProgress.Update(fmt.Sprintf("Backing up database %d/%d: %s", idx+1, len(databases), name))
|
quietProgress.Update(fmt.Sprintf("Backing up database %d/%d: %s", idx+1, len(databases), name))
|
||||||
// Report database progress to TUI callback
|
// Report database progress to TUI callback with size-weighted info
|
||||||
e.reportDatabaseProgress(idx+1, len(databases), name)
|
e.reportDatabaseProgress(idx+1, len(databases), name, completedBytes, totalBytes)
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
|
|
||||||
// Check database size and warn if very large
|
// Use cached size, warn if very large
|
||||||
if size, err := e.db.GetDatabaseSize(ctx, name); err == nil {
|
sizeStr := formatBytes(thisDbSize)
|
||||||
sizeStr := formatBytes(size)
|
mu.Lock()
|
||||||
mu.Lock()
|
e.printf(" Database size: %s\n", sizeStr)
|
||||||
e.printf(" Database size: %s\n", sizeStr)
|
if thisDbSize > 10*1024*1024*1024 { // > 10GB
|
||||||
if size > 10*1024*1024*1024 { // > 10GB
|
e.printf(" [WARN] Large database detected - this may take a while\n")
|
||||||
e.printf(" [WARN] Large database detected - this may take a while\n")
|
|
||||||
}
|
|
||||||
mu.Unlock()
|
|
||||||
}
|
}
|
||||||
|
mu.Unlock()
|
||||||
|
|
||||||
dumpFile := filepath.Join(tempDir, "dumps", name+".dump")
|
dumpFile := filepath.Join(tempDir, "dumps", name+".dump")
|
||||||
|
|
||||||
@ -635,6 +650,8 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Native backup succeeded!
|
// Native backup succeeded!
|
||||||
|
// Update completed bytes for size-weighted ETA
|
||||||
|
atomic.AddInt64(&completedBytes, thisDbSize)
|
||||||
if info, statErr := os.Stat(sqlFile); statErr == nil {
|
if info, statErr := os.Stat(sqlFile); statErr == nil {
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
e.printf(" [OK] Completed %s (%s) [native]\n", name, formatBytes(info.Size()))
|
e.printf(" [OK] Completed %s (%s) [native]\n", name, formatBytes(info.Size()))
|
||||||
@ -687,6 +704,8 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
|||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
atomic.AddInt32(&failCount, 1)
|
atomic.AddInt32(&failCount, 1)
|
||||||
} else {
|
} else {
|
||||||
|
// Update completed bytes for size-weighted ETA
|
||||||
|
atomic.AddInt64(&completedBytes, thisDbSize)
|
||||||
compressedCandidate := strings.TrimSuffix(dumpFile, ".dump") + ".sql.gz"
|
compressedCandidate := strings.TrimSuffix(dumpFile, ".dump") + ".sql.gz"
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
if info, err := os.Stat(compressedCandidate); err == nil {
|
if info, err := os.Stat(compressedCandidate); err == nil {
|
||||||
|
|||||||
236
internal/cleanup/cgroups.go
Normal file
236
internal/cleanup/cgroups.go
Normal file
@ -0,0 +1,236 @@
|
|||||||
|
package cleanup
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"dbbackup/internal/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ResourceLimits defines resource constraints for long-running operations
|
||||||
|
type ResourceLimits struct {
|
||||||
|
// MemoryHigh is the high memory limit (e.g., "4G", "2048M")
|
||||||
|
// When exceeded, kernel will throttle and reclaim memory aggressively
|
||||||
|
MemoryHigh string
|
||||||
|
|
||||||
|
// MemoryMax is the hard memory limit (e.g., "6G")
|
||||||
|
// Process is killed if exceeded
|
||||||
|
MemoryMax string
|
||||||
|
|
||||||
|
// CPUQuota limits CPU usage (e.g., "70%" for 70% of one CPU)
|
||||||
|
CPUQuota string
|
||||||
|
|
||||||
|
// IOWeight sets I/O priority (1-10000, default 100)
|
||||||
|
IOWeight int
|
||||||
|
|
||||||
|
// Nice sets process priority (-20 to 19)
|
||||||
|
Nice int
|
||||||
|
|
||||||
|
// Slice is the systemd slice to run under (e.g., "dbbackup.slice")
|
||||||
|
Slice string
|
||||||
|
}
|
||||||
|
|
||||||
|
// DefaultResourceLimits returns sensible defaults for backup/restore operations
|
||||||
|
func DefaultResourceLimits() *ResourceLimits {
|
||||||
|
return &ResourceLimits{
|
||||||
|
MemoryHigh: "4G",
|
||||||
|
MemoryMax: "6G",
|
||||||
|
CPUQuota: "80%",
|
||||||
|
IOWeight: 100, // Default priority
|
||||||
|
Nice: 10, // Slightly lower priority than interactive processes
|
||||||
|
Slice: "dbbackup.slice",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SystemdRunAvailable checks if systemd-run is available on this system
|
||||||
|
func SystemdRunAvailable() bool {
|
||||||
|
if runtime.GOOS != "linux" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
_, err := exec.LookPath("systemd-run")
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunWithResourceLimits executes a command with resource limits via systemd-run
|
||||||
|
// Falls back to direct execution if systemd-run is not available
|
||||||
|
func RunWithResourceLimits(ctx context.Context, log logger.Logger, limits *ResourceLimits, name string, args ...string) error {
|
||||||
|
if limits == nil {
|
||||||
|
limits = DefaultResourceLimits()
|
||||||
|
}
|
||||||
|
|
||||||
|
// If systemd-run not available, fall back to direct execution
|
||||||
|
if !SystemdRunAvailable() {
|
||||||
|
log.Debug("systemd-run not available, running without resource limits")
|
||||||
|
cmd := exec.CommandContext(ctx, name, args...)
|
||||||
|
cmd.Stdout = os.Stdout
|
||||||
|
cmd.Stderr = os.Stderr
|
||||||
|
return cmd.Run()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build systemd-run command
|
||||||
|
systemdArgs := buildSystemdArgs(limits, name, args)
|
||||||
|
|
||||||
|
log.Info("Running with systemd resource limits",
|
||||||
|
"command", name,
|
||||||
|
"memory_high", limits.MemoryHigh,
|
||||||
|
"cpu_quota", limits.CPUQuota)
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, "systemd-run", systemdArgs...)
|
||||||
|
cmd.Stdout = os.Stdout
|
||||||
|
cmd.Stderr = os.Stderr
|
||||||
|
|
||||||
|
return cmd.Run()
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunWithResourceLimitsOutput executes with limits and returns combined output
|
||||||
|
func RunWithResourceLimitsOutput(ctx context.Context, log logger.Logger, limits *ResourceLimits, name string, args ...string) ([]byte, error) {
|
||||||
|
if limits == nil {
|
||||||
|
limits = DefaultResourceLimits()
|
||||||
|
}
|
||||||
|
|
||||||
|
// If systemd-run not available, fall back to direct execution
|
||||||
|
if !SystemdRunAvailable() {
|
||||||
|
log.Debug("systemd-run not available, running without resource limits")
|
||||||
|
cmd := exec.CommandContext(ctx, name, args...)
|
||||||
|
return cmd.CombinedOutput()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build systemd-run command
|
||||||
|
systemdArgs := buildSystemdArgs(limits, name, args)
|
||||||
|
|
||||||
|
log.Debug("Running with systemd resource limits",
|
||||||
|
"command", name,
|
||||||
|
"memory_high", limits.MemoryHigh)
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, "systemd-run", systemdArgs...)
|
||||||
|
return cmd.CombinedOutput()
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildSystemdArgs constructs the systemd-run argument list
|
||||||
|
func buildSystemdArgs(limits *ResourceLimits, name string, args []string) []string {
|
||||||
|
systemdArgs := []string{
|
||||||
|
"--scope", // Run as transient scope (not service)
|
||||||
|
"--user", // Run in user session (no root required)
|
||||||
|
"--quiet", // Reduce systemd noise
|
||||||
|
"--collect", // Automatically clean up after exit
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add description for easier identification
|
||||||
|
systemdArgs = append(systemdArgs, fmt.Sprintf("--description=dbbackup: %s", name))
|
||||||
|
|
||||||
|
// Add resource properties
|
||||||
|
if limits.MemoryHigh != "" {
|
||||||
|
systemdArgs = append(systemdArgs, fmt.Sprintf("--property=MemoryHigh=%s", limits.MemoryHigh))
|
||||||
|
}
|
||||||
|
|
||||||
|
if limits.MemoryMax != "" {
|
||||||
|
systemdArgs = append(systemdArgs, fmt.Sprintf("--property=MemoryMax=%s", limits.MemoryMax))
|
||||||
|
}
|
||||||
|
|
||||||
|
if limits.CPUQuota != "" {
|
||||||
|
systemdArgs = append(systemdArgs, fmt.Sprintf("--property=CPUQuota=%s", limits.CPUQuota))
|
||||||
|
}
|
||||||
|
|
||||||
|
if limits.IOWeight > 0 {
|
||||||
|
systemdArgs = append(systemdArgs, fmt.Sprintf("--property=IOWeight=%d", limits.IOWeight))
|
||||||
|
}
|
||||||
|
|
||||||
|
if limits.Nice != 0 {
|
||||||
|
systemdArgs = append(systemdArgs, fmt.Sprintf("--property=Nice=%d", limits.Nice))
|
||||||
|
}
|
||||||
|
|
||||||
|
if limits.Slice != "" {
|
||||||
|
systemdArgs = append(systemdArgs, fmt.Sprintf("--slice=%s", limits.Slice))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add separator and command
|
||||||
|
systemdArgs = append(systemdArgs, "--")
|
||||||
|
systemdArgs = append(systemdArgs, name)
|
||||||
|
systemdArgs = append(systemdArgs, args...)
|
||||||
|
|
||||||
|
return systemdArgs
|
||||||
|
}
|
||||||
|
|
||||||
|
// WrapCommand creates an exec.Cmd that runs with resource limits
|
||||||
|
// This allows the caller to customize stdin/stdout/stderr before running
|
||||||
|
func WrapCommand(ctx context.Context, log logger.Logger, limits *ResourceLimits, name string, args ...string) *exec.Cmd {
|
||||||
|
if limits == nil {
|
||||||
|
limits = DefaultResourceLimits()
|
||||||
|
}
|
||||||
|
|
||||||
|
// If systemd-run not available, return direct command
|
||||||
|
if !SystemdRunAvailable() {
|
||||||
|
log.Debug("systemd-run not available, returning unwrapped command")
|
||||||
|
return exec.CommandContext(ctx, name, args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build systemd-run command
|
||||||
|
systemdArgs := buildSystemdArgs(limits, name, args)
|
||||||
|
|
||||||
|
log.Debug("Wrapping command with systemd resource limits",
|
||||||
|
"command", name,
|
||||||
|
"memory_high", limits.MemoryHigh)
|
||||||
|
|
||||||
|
return exec.CommandContext(ctx, "systemd-run", systemdArgs...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResourceLimitsFromConfig creates resource limits from size estimates
|
||||||
|
// Useful for dynamically setting limits based on backup/restore size
|
||||||
|
func ResourceLimitsFromConfig(estimatedSizeBytes int64, isRestore bool) *ResourceLimits {
|
||||||
|
limits := DefaultResourceLimits()
|
||||||
|
|
||||||
|
// Estimate memory needs based on data size
|
||||||
|
// Restore needs more memory than backup
|
||||||
|
var memoryMultiplier float64 = 0.1 // 10% of data size for backup
|
||||||
|
if isRestore {
|
||||||
|
memoryMultiplier = 0.2 // 20% of data size for restore
|
||||||
|
}
|
||||||
|
|
||||||
|
estimatedMemMB := int64(float64(estimatedSizeBytes/1024/1024) * memoryMultiplier)
|
||||||
|
|
||||||
|
// Clamp to reasonable values
|
||||||
|
if estimatedMemMB < 512 {
|
||||||
|
estimatedMemMB = 512 // Minimum 512MB
|
||||||
|
}
|
||||||
|
if estimatedMemMB > 16384 {
|
||||||
|
estimatedMemMB = 16384 // Maximum 16GB
|
||||||
|
}
|
||||||
|
|
||||||
|
limits.MemoryHigh = fmt.Sprintf("%dM", estimatedMemMB)
|
||||||
|
limits.MemoryMax = fmt.Sprintf("%dM", estimatedMemMB*2) // 2x high limit
|
||||||
|
|
||||||
|
return limits
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetActiveResourceUsage returns current resource usage if running in systemd scope
|
||||||
|
func GetActiveResourceUsage() (string, error) {
|
||||||
|
if !SystemdRunAvailable() {
|
||||||
|
return "", fmt.Errorf("systemd not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we're running in a scope
|
||||||
|
cmd := exec.Command("systemctl", "--user", "status", "--no-pager")
|
||||||
|
output, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to get systemd status: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract dbbackup-related scopes
|
||||||
|
lines := strings.Split(string(output), "\n")
|
||||||
|
var dbbackupLines []string
|
||||||
|
for _, line := range lines {
|
||||||
|
if strings.Contains(line, "dbbackup") {
|
||||||
|
dbbackupLines = append(dbbackupLines, strings.TrimSpace(line))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(dbbackupLines) == 0 {
|
||||||
|
return "No active dbbackup scopes", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(dbbackupLines, "\n"), nil
|
||||||
|
}
|
||||||
@ -131,6 +131,9 @@ type Config struct {
|
|||||||
TUIVerbose bool // Verbose TUI logging
|
TUIVerbose bool // Verbose TUI logging
|
||||||
TUILogFile string // TUI event log file path
|
TUILogFile string // TUI event log file path
|
||||||
|
|
||||||
|
// Safety options
|
||||||
|
SkipPreflightChecks bool // Skip pre-restore safety checks (archive integrity, disk space, etc.)
|
||||||
|
|
||||||
// Cloud storage options (v2.0)
|
// Cloud storage options (v2.0)
|
||||||
CloudEnabled bool // Enable cloud storage integration
|
CloudEnabled bool // Enable cloud storage integration
|
||||||
CloudProvider string // "s3", "minio", "b2", "azure", "gcs"
|
CloudProvider string // "s3", "minio", "b2", "azure", "gcs"
|
||||||
|
|||||||
@ -35,15 +35,62 @@ type LocalConfig struct {
|
|||||||
ResourceProfile string
|
ResourceProfile string
|
||||||
LargeDBMode bool // Enable large database mode (reduces parallelism, increases locks)
|
LargeDBMode bool // Enable large database mode (reduces parallelism, increases locks)
|
||||||
|
|
||||||
|
// Safety settings
|
||||||
|
SkipPreflightChecks bool // Skip pre-restore safety checks (dangerous)
|
||||||
|
|
||||||
// Security settings
|
// Security settings
|
||||||
RetentionDays int
|
RetentionDays int
|
||||||
MinBackups int
|
MinBackups int
|
||||||
MaxRetries int
|
MaxRetries int
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoadLocalConfig loads configuration from .dbbackup.conf in current directory
|
// ConfigSearchPaths returns all paths where config files are searched, in order of priority
|
||||||
|
func ConfigSearchPaths() []string {
|
||||||
|
paths := []string{
|
||||||
|
filepath.Join(".", ConfigFileName), // Current directory (highest priority)
|
||||||
|
}
|
||||||
|
|
||||||
|
// User's home directory
|
||||||
|
if home, err := os.UserHomeDir(); err == nil && home != "" {
|
||||||
|
paths = append(paths, filepath.Join(home, ConfigFileName))
|
||||||
|
}
|
||||||
|
|
||||||
|
// System-wide config locations
|
||||||
|
paths = append(paths,
|
||||||
|
"/etc/dbbackup.conf",
|
||||||
|
"/etc/dbbackup/dbbackup.conf",
|
||||||
|
)
|
||||||
|
|
||||||
|
return paths
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadLocalConfig loads configuration from .dbbackup.conf
|
||||||
|
// Search order: 1) current directory, 2) user's home directory, 3) /etc/dbbackup.conf, 4) /etc/dbbackup/dbbackup.conf
|
||||||
func LoadLocalConfig() (*LocalConfig, error) {
|
func LoadLocalConfig() (*LocalConfig, error) {
|
||||||
return LoadLocalConfigFromPath(filepath.Join(".", ConfigFileName))
|
for _, path := range ConfigSearchPaths() {
|
||||||
|
cfg, err := LoadLocalConfigFromPath(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if cfg != nil {
|
||||||
|
return cfg, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadLocalConfigWithPath loads configuration and returns the path it was loaded from
|
||||||
|
func LoadLocalConfigWithPath() (*LocalConfig, string, error) {
|
||||||
|
for _, path := range ConfigSearchPaths() {
|
||||||
|
cfg, err := LoadLocalConfigFromPath(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, "", err
|
||||||
|
}
|
||||||
|
if cfg != nil {
|
||||||
|
return cfg, path, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoadLocalConfigFromPath loads configuration from a specific path
|
// LoadLocalConfigFromPath loads configuration from a specific path
|
||||||
@ -152,6 +199,11 @@ func LoadLocalConfigFromPath(configPath string) (*LocalConfig, error) {
|
|||||||
cfg.MaxRetries = mr
|
cfg.MaxRetries = mr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case "safety":
|
||||||
|
switch key {
|
||||||
|
case "skip_preflight_checks":
|
||||||
|
cfg.SkipPreflightChecks = value == "true" || value == "1"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -208,6 +260,14 @@ func SaveLocalConfigToPath(cfg *LocalConfig, configPath string) error {
|
|||||||
sb.WriteString(fmt.Sprintf("retention_days = %d\n", cfg.RetentionDays))
|
sb.WriteString(fmt.Sprintf("retention_days = %d\n", cfg.RetentionDays))
|
||||||
sb.WriteString(fmt.Sprintf("min_backups = %d\n", cfg.MinBackups))
|
sb.WriteString(fmt.Sprintf("min_backups = %d\n", cfg.MinBackups))
|
||||||
sb.WriteString(fmt.Sprintf("max_retries = %d\n", cfg.MaxRetries))
|
sb.WriteString(fmt.Sprintf("max_retries = %d\n", cfg.MaxRetries))
|
||||||
|
sb.WriteString("\n")
|
||||||
|
|
||||||
|
// Safety section - only write if non-default (dangerous setting)
|
||||||
|
if cfg.SkipPreflightChecks {
|
||||||
|
sb.WriteString("[safety]\n")
|
||||||
|
sb.WriteString("# WARNING: Skipping preflight checks can lead to failed restores!\n")
|
||||||
|
sb.WriteString(fmt.Sprintf("skip_preflight_checks = %t\n", cfg.SkipPreflightChecks))
|
||||||
|
}
|
||||||
|
|
||||||
// Use 0644 permissions for readability
|
// Use 0644 permissions for readability
|
||||||
if err := os.WriteFile(configPath, []byte(sb.String()), 0644); err != nil {
|
if err := os.WriteFile(configPath, []byte(sb.String()), 0644); err != nil {
|
||||||
@ -284,29 +344,36 @@ func ApplyLocalConfig(cfg *Config, local *LocalConfig) {
|
|||||||
if local.MaxRetries != 0 {
|
if local.MaxRetries != 0 {
|
||||||
cfg.MaxRetries = local.MaxRetries
|
cfg.MaxRetries = local.MaxRetries
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Safety settings - apply even if false (explicit setting)
|
||||||
|
// This is a dangerous setting, so we always respect what's in the config
|
||||||
|
if local.SkipPreflightChecks {
|
||||||
|
cfg.SkipPreflightChecks = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConfigFromConfig creates a LocalConfig from a Config
|
// ConfigFromConfig creates a LocalConfig from a Config
|
||||||
func ConfigFromConfig(cfg *Config) *LocalConfig {
|
func ConfigFromConfig(cfg *Config) *LocalConfig {
|
||||||
return &LocalConfig{
|
return &LocalConfig{
|
||||||
DBType: cfg.DatabaseType,
|
DBType: cfg.DatabaseType,
|
||||||
Host: cfg.Host,
|
Host: cfg.Host,
|
||||||
Port: cfg.Port,
|
Port: cfg.Port,
|
||||||
User: cfg.User,
|
User: cfg.User,
|
||||||
Database: cfg.Database,
|
Database: cfg.Database,
|
||||||
SSLMode: cfg.SSLMode,
|
SSLMode: cfg.SSLMode,
|
||||||
BackupDir: cfg.BackupDir,
|
BackupDir: cfg.BackupDir,
|
||||||
WorkDir: cfg.WorkDir,
|
WorkDir: cfg.WorkDir,
|
||||||
Compression: cfg.CompressionLevel,
|
Compression: cfg.CompressionLevel,
|
||||||
Jobs: cfg.Jobs,
|
Jobs: cfg.Jobs,
|
||||||
DumpJobs: cfg.DumpJobs,
|
DumpJobs: cfg.DumpJobs,
|
||||||
CPUWorkload: cfg.CPUWorkloadType,
|
CPUWorkload: cfg.CPUWorkloadType,
|
||||||
MaxCores: cfg.MaxCores,
|
MaxCores: cfg.MaxCores,
|
||||||
ClusterTimeout: cfg.ClusterTimeoutMinutes,
|
ClusterTimeout: cfg.ClusterTimeoutMinutes,
|
||||||
ResourceProfile: cfg.ResourceProfile,
|
ResourceProfile: cfg.ResourceProfile,
|
||||||
LargeDBMode: cfg.LargeDBMode,
|
LargeDBMode: cfg.LargeDBMode,
|
||||||
RetentionDays: cfg.RetentionDays,
|
SkipPreflightChecks: cfg.SkipPreflightChecks,
|
||||||
MinBackups: cfg.MinBackups,
|
RetentionDays: cfg.RetentionDays,
|
||||||
MaxRetries: cfg.MaxRetries,
|
MinBackups: cfg.MinBackups,
|
||||||
|
MaxRetries: cfg.MaxRetries,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -74,7 +74,7 @@ func (p *PostgreSQL) Connect(ctx context.Context) error {
|
|||||||
config.MinConns = 2 // Keep minimum connections ready
|
config.MinConns = 2 // Keep minimum connections ready
|
||||||
config.MaxConnLifetime = 0 // No limit on connection lifetime
|
config.MaxConnLifetime = 0 // No limit on connection lifetime
|
||||||
config.MaxConnIdleTime = 0 // No idle timeout
|
config.MaxConnIdleTime = 0 // No idle timeout
|
||||||
config.HealthCheckPeriod = 1 * time.Minute // Health check every minute
|
config.HealthCheckPeriod = 5 * time.Second // Faster health check for quicker shutdown on Ctrl+C
|
||||||
|
|
||||||
// Optimize for large query results (BLOB data)
|
// Optimize for large query results (BLOB data)
|
||||||
config.ConnConfig.RuntimeParams["work_mem"] = "64MB"
|
config.ConnConfig.RuntimeParams["work_mem"] = "64MB"
|
||||||
@ -97,6 +97,14 @@ func (p *PostgreSQL) Connect(ctx context.Context) error {
|
|||||||
|
|
||||||
p.pool = pool
|
p.pool = pool
|
||||||
p.db = db
|
p.db = db
|
||||||
|
|
||||||
|
// NOTE: We intentionally do NOT start a goroutine to close the pool on context cancellation.
|
||||||
|
// The pool is closed via defer dbClient.Close() in the caller, which is the correct pattern.
|
||||||
|
// Starting a goroutine here causes goroutine leaks and potential double-close issues when:
|
||||||
|
// 1. The caller's defer runs first (normal case)
|
||||||
|
// 2. Then context is cancelled and the goroutine tries to close an already-closed pool
|
||||||
|
// This was causing deadlocks in the TUI when tea.Batch was waiting for commands to complete.
|
||||||
|
|
||||||
p.log.Info("Connected to PostgreSQL successfully", "driver", "pgx", "max_conns", config.MaxConns)
|
p.log.Info("Connected to PostgreSQL successfully", "driver", "pgx", "max_conns", config.MaxConns)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -28,6 +28,9 @@ type ParallelRestoreEngine struct {
|
|||||||
|
|
||||||
// Configuration
|
// Configuration
|
||||||
parallelWorkers int
|
parallelWorkers int
|
||||||
|
|
||||||
|
// Internal cancel channel to stop the pool cleanup goroutine
|
||||||
|
closeCh chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParallelRestoreOptions configures parallel restore behavior
|
// ParallelRestoreOptions configures parallel restore behavior
|
||||||
@ -71,7 +74,14 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// NewParallelRestoreEngine creates a new parallel restore engine
|
// NewParallelRestoreEngine creates a new parallel restore engine
|
||||||
|
// NOTE: Pass a cancellable context to ensure the pool is properly closed on Ctrl+C
|
||||||
func NewParallelRestoreEngine(config *PostgreSQLNativeConfig, log logger.Logger, workers int) (*ParallelRestoreEngine, error) {
|
func NewParallelRestoreEngine(config *PostgreSQLNativeConfig, log logger.Logger, workers int) (*ParallelRestoreEngine, error) {
|
||||||
|
return NewParallelRestoreEngineWithContext(context.Background(), config, log, workers)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewParallelRestoreEngineWithContext creates a new parallel restore engine with context support
|
||||||
|
// This ensures the connection pool is properly closed when the context is cancelled
|
||||||
|
func NewParallelRestoreEngineWithContext(ctx context.Context, config *PostgreSQLNativeConfig, log logger.Logger, workers int) (*ParallelRestoreEngine, error) {
|
||||||
if workers < 1 {
|
if workers < 1 {
|
||||||
workers = 4 // Default to 4 parallel workers
|
workers = 4 // Default to 4 parallel workers
|
||||||
}
|
}
|
||||||
@ -94,17 +104,43 @@ func NewParallelRestoreEngine(config *PostgreSQLNativeConfig, log logger.Logger,
|
|||||||
poolConfig.MaxConns = int32(workers + 2)
|
poolConfig.MaxConns = int32(workers + 2)
|
||||||
poolConfig.MinConns = int32(workers)
|
poolConfig.MinConns = int32(workers)
|
||||||
|
|
||||||
pool, err := pgxpool.NewWithConfig(context.Background(), poolConfig)
|
// CRITICAL: Reduce health check period to allow faster shutdown
|
||||||
|
// Default is 1 minute which causes hangs on Ctrl+C
|
||||||
|
poolConfig.HealthCheckPeriod = 5 * time.Second
|
||||||
|
|
||||||
|
// CRITICAL: Set connection-level timeouts to ensure queries can be cancelled
|
||||||
|
// This prevents infinite hangs on slow/stuck operations
|
||||||
|
poolConfig.ConnConfig.RuntimeParams = map[string]string{
|
||||||
|
"statement_timeout": "3600000", // 1 hour max per statement (in ms)
|
||||||
|
"lock_timeout": "300000", // 5 min max wait for locks (in ms)
|
||||||
|
"idle_in_transaction_session_timeout": "600000", // 10 min idle timeout (in ms)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the provided context so pool health checks stop when context is cancelled
|
||||||
|
pool, err := pgxpool.NewWithConfig(ctx, poolConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create connection pool: %w", err)
|
return nil, fmt.Errorf("failed to create connection pool: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &ParallelRestoreEngine{
|
closeCh := make(chan struct{})
|
||||||
|
|
||||||
|
engine := &ParallelRestoreEngine{
|
||||||
config: config,
|
config: config,
|
||||||
pool: pool,
|
pool: pool,
|
||||||
log: log,
|
log: log,
|
||||||
parallelWorkers: workers,
|
parallelWorkers: workers,
|
||||||
}, nil
|
closeCh: closeCh,
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: We intentionally do NOT start a goroutine to close the pool on context cancellation.
|
||||||
|
// The pool is closed via defer parallelEngine.Close() in the caller (restore/engine.go).
|
||||||
|
// The Close() method properly signals closeCh and closes the pool.
|
||||||
|
// Starting a goroutine here can cause:
|
||||||
|
// 1. Race conditions with explicit Close() calls
|
||||||
|
// 2. Goroutine leaks if neither ctx nor Close() fires
|
||||||
|
// 3. Deadlocks with BubbleTea's event loop
|
||||||
|
|
||||||
|
return engine, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// RestoreFile restores from a SQL file with parallel execution
|
// RestoreFile restores from a SQL file with parallel execution
|
||||||
@ -146,7 +182,7 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
|||||||
options.ProgressCallback("parsing", 0, 0, "")
|
options.ProgressCallback("parsing", 0, 0, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
statements, err := e.parseStatements(reader)
|
statements, err := e.parseStatementsWithContext(ctx, reader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return result, fmt.Errorf("failed to parse SQL: %w", err)
|
return result, fmt.Errorf("failed to parse SQL: %w", err)
|
||||||
}
|
}
|
||||||
@ -177,6 +213,13 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
|||||||
|
|
||||||
schemaStmts := 0
|
schemaStmts := 0
|
||||||
for _, stmt := range statements {
|
for _, stmt := range statements {
|
||||||
|
// Check for context cancellation periodically
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return result, ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
if stmt.Type == StmtSchema || stmt.Type == StmtOther {
|
if stmt.Type == StmtSchema || stmt.Type == StmtOther {
|
||||||
if err := e.executeStatement(ctx, stmt.SQL); err != nil {
|
if err := e.executeStatement(ctx, stmt.SQL); err != nil {
|
||||||
if options.ContinueOnError {
|
if options.ContinueOnError {
|
||||||
@ -215,17 +258,39 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
|||||||
semaphore := make(chan struct{}, options.Workers)
|
semaphore := make(chan struct{}, options.Workers)
|
||||||
var completedCopies int64
|
var completedCopies int64
|
||||||
var totalRows int64
|
var totalRows int64
|
||||||
|
var cancelled int32 // Atomic flag to signal cancellation
|
||||||
|
|
||||||
|
copyLoop:
|
||||||
for _, stmt := range copyStmts {
|
for _, stmt := range copyStmts {
|
||||||
|
// Check for context cancellation before starting new work
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
semaphore <- struct{}{} // Acquire worker slot
|
select {
|
||||||
|
case semaphore <- struct{}{}: // Acquire worker slot
|
||||||
|
case <-ctx.Done():
|
||||||
|
wg.Done()
|
||||||
|
atomic.StoreInt32(&cancelled, 1)
|
||||||
|
break copyLoop // CRITICAL: Use labeled break to exit the for loop, not just the select
|
||||||
|
}
|
||||||
|
|
||||||
go func(s *SQLStatement) {
|
go func(s *SQLStatement) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
defer func() { <-semaphore }() // Release worker slot
|
defer func() { <-semaphore }() // Release worker slot
|
||||||
|
|
||||||
|
// Check cancellation before executing
|
||||||
|
if ctx.Err() != nil || atomic.LoadInt32(&cancelled) == 1 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
rows, err := e.executeCopy(ctx, s)
|
rows, err := e.executeCopy(ctx, s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
// Context cancelled, don't log as error
|
||||||
|
return
|
||||||
|
}
|
||||||
if options.ContinueOnError {
|
if options.ContinueOnError {
|
||||||
e.log.Warn("COPY failed", "table", s.TableName, "error", err)
|
e.log.Warn("COPY failed", "table", s.TableName, "error", err)
|
||||||
} else {
|
} else {
|
||||||
@ -243,6 +308,12 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
|||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
|
// Check if cancelled
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return result, ctx.Err()
|
||||||
|
}
|
||||||
|
|
||||||
result.TablesRestored = completedCopies
|
result.TablesRestored = completedCopies
|
||||||
result.RowsRestored = totalRows
|
result.RowsRestored = totalRows
|
||||||
|
|
||||||
@ -264,15 +335,36 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
|||||||
|
|
||||||
// Execute post-data in parallel
|
// Execute post-data in parallel
|
||||||
var completedPostData int64
|
var completedPostData int64
|
||||||
|
cancelled = 0 // Reset for phase 4
|
||||||
|
postDataLoop:
|
||||||
for _, sql := range postDataStmts {
|
for _, sql := range postDataStmts {
|
||||||
|
// Check for context cancellation before starting new work
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
semaphore <- struct{}{}
|
select {
|
||||||
|
case semaphore <- struct{}{}:
|
||||||
|
case <-ctx.Done():
|
||||||
|
wg.Done()
|
||||||
|
atomic.StoreInt32(&cancelled, 1)
|
||||||
|
break postDataLoop // CRITICAL: Use labeled break to exit the for loop, not just the select
|
||||||
|
}
|
||||||
|
|
||||||
go func(stmt string) {
|
go func(stmt string) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
defer func() { <-semaphore }()
|
defer func() { <-semaphore }()
|
||||||
|
|
||||||
|
// Check cancellation before executing
|
||||||
|
if ctx.Err() != nil || atomic.LoadInt32(&cancelled) == 1 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if err := e.executeStatement(ctx, stmt); err != nil {
|
if err := e.executeStatement(ctx, stmt); err != nil {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return // Context cancelled
|
||||||
|
}
|
||||||
if options.ContinueOnError {
|
if options.ContinueOnError {
|
||||||
e.log.Warn("Post-data statement failed", "error", err)
|
e.log.Warn("Post-data statement failed", "error", err)
|
||||||
}
|
}
|
||||||
@ -289,6 +381,11 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
|||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
|
// Check if cancelled
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return result, ctx.Err()
|
||||||
|
}
|
||||||
|
|
||||||
result.Duration = time.Since(startTime)
|
result.Duration = time.Since(startTime)
|
||||||
e.log.Info("Parallel restore completed",
|
e.log.Info("Parallel restore completed",
|
||||||
"duration", result.Duration,
|
"duration", result.Duration,
|
||||||
@ -301,6 +398,11 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
|||||||
|
|
||||||
// parseStatements reads and classifies all SQL statements
|
// parseStatements reads and classifies all SQL statements
|
||||||
func (e *ParallelRestoreEngine) parseStatements(reader io.Reader) ([]SQLStatement, error) {
|
func (e *ParallelRestoreEngine) parseStatements(reader io.Reader) ([]SQLStatement, error) {
|
||||||
|
return e.parseStatementsWithContext(context.Background(), reader)
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseStatementsWithContext reads and classifies all SQL statements with context support
|
||||||
|
func (e *ParallelRestoreEngine) parseStatementsWithContext(ctx context.Context, reader io.Reader) ([]SQLStatement, error) {
|
||||||
scanner := bufio.NewScanner(reader)
|
scanner := bufio.NewScanner(reader)
|
||||||
scanner.Buffer(make([]byte, 1024*1024), 64*1024*1024) // 64MB max for large statements
|
scanner.Buffer(make([]byte, 1024*1024), 64*1024*1024) // 64MB max for large statements
|
||||||
|
|
||||||
@ -308,8 +410,19 @@ func (e *ParallelRestoreEngine) parseStatements(reader io.Reader) ([]SQLStatemen
|
|||||||
var stmtBuffer bytes.Buffer
|
var stmtBuffer bytes.Buffer
|
||||||
var inCopyMode bool
|
var inCopyMode bool
|
||||||
var currentCopyStmt *SQLStatement
|
var currentCopyStmt *SQLStatement
|
||||||
|
lineCount := 0
|
||||||
|
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
|
// Check for context cancellation every 10000 lines
|
||||||
|
lineCount++
|
||||||
|
if lineCount%10000 == 0 {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return statements, ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
line := scanner.Text()
|
line := scanner.Text()
|
||||||
|
|
||||||
// Handle COPY data mode
|
// Handle COPY data mode
|
||||||
@ -327,6 +440,15 @@ func (e *ParallelRestoreEngine) parseStatements(reader io.Reader) ([]SQLStatemen
|
|||||||
currentCopyStmt.CopyData.WriteString(line)
|
currentCopyStmt.CopyData.WriteString(line)
|
||||||
currentCopyStmt.CopyData.WriteByte('\n')
|
currentCopyStmt.CopyData.WriteByte('\n')
|
||||||
}
|
}
|
||||||
|
// Check for context cancellation during COPY data parsing (large tables)
|
||||||
|
// Check every 10000 lines to avoid overhead
|
||||||
|
if lineCount%10000 == 0 {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return statements, ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -450,8 +572,13 @@ func (e *ParallelRestoreEngine) executeCopy(ctx context.Context, stmt *SQLStatem
|
|||||||
return tag.RowsAffected(), nil
|
return tag.RowsAffected(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close closes the connection pool
|
// Close closes the connection pool and stops the cleanup goroutine
|
||||||
func (e *ParallelRestoreEngine) Close() error {
|
func (e *ParallelRestoreEngine) Close() error {
|
||||||
|
// Signal the cleanup goroutine to exit
|
||||||
|
if e.closeCh != nil {
|
||||||
|
close(e.closeCh)
|
||||||
|
}
|
||||||
|
// Close the pool
|
||||||
if e.pool != nil {
|
if e.pool != nil {
|
||||||
e.pool.Close()
|
e.pool.Close()
|
||||||
}
|
}
|
||||||
|
|||||||
121
internal/engine/native/parallel_restore_cancel_test.go
Normal file
121
internal/engine/native/parallel_restore_cancel_test.go
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
package native
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"dbbackup/internal/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
// mockLogger for tests
|
||||||
|
type mockLogger struct{}
|
||||||
|
|
||||||
|
func (m *mockLogger) Debug(msg string, args ...any) {}
|
||||||
|
func (m *mockLogger) Info(msg string, keysAndValues ...interface{}) {}
|
||||||
|
func (m *mockLogger) Warn(msg string, keysAndValues ...interface{}) {}
|
||||||
|
func (m *mockLogger) Error(msg string, keysAndValues ...interface{}) {}
|
||||||
|
func (m *mockLogger) Time(msg string, args ...any) {}
|
||||||
|
func (m *mockLogger) WithField(key string, value interface{}) logger.Logger { return m }
|
||||||
|
func (m *mockLogger) WithFields(fields map[string]interface{}) logger.Logger { return m }
|
||||||
|
func (m *mockLogger) StartOperation(name string) logger.OperationLogger { return &mockOpLogger{} }
|
||||||
|
|
||||||
|
type mockOpLogger struct{}
|
||||||
|
|
||||||
|
func (m *mockOpLogger) Update(msg string, args ...any) {}
|
||||||
|
func (m *mockOpLogger) Complete(msg string, args ...any) {}
|
||||||
|
func (m *mockOpLogger) Fail(msg string, args ...any) {}
|
||||||
|
|
||||||
|
// createTestEngine creates an engine without database connection for parsing tests
|
||||||
|
func createTestEngine() *ParallelRestoreEngine {
|
||||||
|
return &ParallelRestoreEngine{
|
||||||
|
config: &PostgreSQLNativeConfig{},
|
||||||
|
log: &mockLogger{},
|
||||||
|
parallelWorkers: 4,
|
||||||
|
closeCh: make(chan struct{}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseStatementsContextCancellation verifies that parsing can be cancelled
|
||||||
|
// This was a critical fix - parsing large SQL files would hang on Ctrl+C
|
||||||
|
func TestParseStatementsContextCancellation(t *testing.T) {
|
||||||
|
engine := createTestEngine()
|
||||||
|
|
||||||
|
// Create a large SQL content that would take a while to parse
|
||||||
|
var buf bytes.Buffer
|
||||||
|
buf.WriteString("-- Test dump\n")
|
||||||
|
buf.WriteString("SET statement_timeout = 0;\n")
|
||||||
|
|
||||||
|
// Add 1,000,000 lines to simulate a large dump
|
||||||
|
for i := 0; i < 1000000; i++ {
|
||||||
|
buf.WriteString("SELECT ")
|
||||||
|
buf.WriteString(string(rune('0' + (i % 10))))
|
||||||
|
buf.WriteString("; -- line padding to make file larger\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a context that cancels after 10ms
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
reader := strings.NewReader(buf.String())
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
_, err := engine.parseStatementsWithContext(ctx, reader)
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
|
||||||
|
// Should return quickly with context error, not hang
|
||||||
|
if elapsed > 500*time.Millisecond {
|
||||||
|
t.Errorf("Parsing took too long after cancellation: %v (expected < 500ms)", elapsed)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
t.Log("Parsing completed before timeout (system is very fast)")
|
||||||
|
} else if err == context.DeadlineExceeded || err == context.Canceled {
|
||||||
|
t.Logf("✓ Context cancellation worked correctly (elapsed: %v)", elapsed)
|
||||||
|
} else {
|
||||||
|
t.Logf("Got error: %v (elapsed: %v)", err, elapsed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseStatementsWithCopyDataCancellation tests cancellation during COPY data parsing
|
||||||
|
// This is where large restores spend most of their time
|
||||||
|
func TestParseStatementsWithCopyDataCancellation(t *testing.T) {
|
||||||
|
engine := createTestEngine()
|
||||||
|
|
||||||
|
// Create SQL with COPY statement and lots of data
|
||||||
|
var buf bytes.Buffer
|
||||||
|
buf.WriteString("CREATE TABLE test (id int, data text);\n")
|
||||||
|
buf.WriteString("COPY test (id, data) FROM stdin;\n")
|
||||||
|
|
||||||
|
// Add 500,000 rows of COPY data
|
||||||
|
for i := 0; i < 500000; i++ {
|
||||||
|
buf.WriteString("1\tsome test data for row number padding to make larger\n")
|
||||||
|
}
|
||||||
|
buf.WriteString("\\.\n")
|
||||||
|
buf.WriteString("SELECT 1;\n")
|
||||||
|
|
||||||
|
// Create a context that cancels after 10ms
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
reader := strings.NewReader(buf.String())
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
_, err := engine.parseStatementsWithContext(ctx, reader)
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
|
||||||
|
// Should return quickly with context error, not hang
|
||||||
|
if elapsed > 500*time.Millisecond {
|
||||||
|
t.Errorf("COPY parsing took too long after cancellation: %v (expected < 500ms)", elapsed)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
t.Log("Parsing completed before timeout (system is very fast)")
|
||||||
|
} else if err == context.DeadlineExceeded || err == context.Canceled {
|
||||||
|
t.Logf("✓ Context cancellation during COPY worked correctly (elapsed: %v)", elapsed)
|
||||||
|
} else {
|
||||||
|
t.Logf("Got error: %v (elapsed: %v)", err, elapsed)
|
||||||
|
}
|
||||||
|
}
|
||||||
666
internal/restore/dryrun.go
Normal file
666
internal/restore/dryrun.go
Normal file
@ -0,0 +1,666 @@
|
|||||||
|
package restore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"dbbackup/internal/cleanup"
|
||||||
|
"dbbackup/internal/config"
|
||||||
|
"dbbackup/internal/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DryRunCheck represents a single dry-run check result
|
||||||
|
type DryRunCheck struct {
|
||||||
|
Name string
|
||||||
|
Status DryRunStatus
|
||||||
|
Message string
|
||||||
|
Details string
|
||||||
|
Critical bool // If true, restore will definitely fail
|
||||||
|
}
|
||||||
|
|
||||||
|
// DryRunStatus represents the status of a dry-run check
|
||||||
|
type DryRunStatus int
|
||||||
|
|
||||||
|
const (
|
||||||
|
DryRunPassed DryRunStatus = iota
|
||||||
|
DryRunWarning
|
||||||
|
DryRunFailed
|
||||||
|
DryRunSkipped
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s DryRunStatus) String() string {
|
||||||
|
switch s {
|
||||||
|
case DryRunPassed:
|
||||||
|
return "PASS"
|
||||||
|
case DryRunWarning:
|
||||||
|
return "WARN"
|
||||||
|
case DryRunFailed:
|
||||||
|
return "FAIL"
|
||||||
|
case DryRunSkipped:
|
||||||
|
return "SKIP"
|
||||||
|
default:
|
||||||
|
return "UNKNOWN"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s DryRunStatus) Icon() string {
|
||||||
|
switch s {
|
||||||
|
case DryRunPassed:
|
||||||
|
return "[+]"
|
||||||
|
case DryRunWarning:
|
||||||
|
return "[!]"
|
||||||
|
case DryRunFailed:
|
||||||
|
return "[-]"
|
||||||
|
case DryRunSkipped:
|
||||||
|
return "[ ]"
|
||||||
|
default:
|
||||||
|
return "[?]"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DryRunResult contains all dry-run check results
|
||||||
|
type DryRunResult struct {
|
||||||
|
Checks []DryRunCheck
|
||||||
|
CanProceed bool
|
||||||
|
HasWarnings bool
|
||||||
|
CriticalCount int
|
||||||
|
WarningCount int
|
||||||
|
EstimatedTime time.Duration
|
||||||
|
RequiredDiskMB int64
|
||||||
|
AvailableDiskMB int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// RestoreDryRun performs comprehensive pre-restore validation
|
||||||
|
type RestoreDryRun struct {
|
||||||
|
cfg *config.Config
|
||||||
|
log logger.Logger
|
||||||
|
safety *Safety
|
||||||
|
archive string
|
||||||
|
target string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRestoreDryRun creates a new restore dry-run validator
|
||||||
|
func NewRestoreDryRun(cfg *config.Config, log logger.Logger, archivePath, targetDB string) *RestoreDryRun {
|
||||||
|
return &RestoreDryRun{
|
||||||
|
cfg: cfg,
|
||||||
|
log: log,
|
||||||
|
safety: NewSafety(cfg, log),
|
||||||
|
archive: archivePath,
|
||||||
|
target: targetDB,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run executes all dry-run checks
|
||||||
|
func (r *RestoreDryRun) Run(ctx context.Context) (*DryRunResult, error) {
|
||||||
|
result := &DryRunResult{
|
||||||
|
Checks: make([]DryRunCheck, 0, 10),
|
||||||
|
CanProceed: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
r.log.Info("Running restore dry-run checks",
|
||||||
|
"archive", r.archive,
|
||||||
|
"target", r.target)
|
||||||
|
|
||||||
|
// 1. Archive existence and accessibility
|
||||||
|
result.Checks = append(result.Checks, r.checkArchiveAccess())
|
||||||
|
|
||||||
|
// 2. Archive format validation
|
||||||
|
result.Checks = append(result.Checks, r.checkArchiveFormat())
|
||||||
|
|
||||||
|
// 3. Database connectivity
|
||||||
|
result.Checks = append(result.Checks, r.checkDatabaseConnectivity(ctx))
|
||||||
|
|
||||||
|
// 4. User permissions (CREATE DATABASE, DROP, etc.)
|
||||||
|
result.Checks = append(result.Checks, r.checkUserPermissions(ctx))
|
||||||
|
|
||||||
|
// 5. Target database conflicts
|
||||||
|
result.Checks = append(result.Checks, r.checkTargetConflicts(ctx))
|
||||||
|
|
||||||
|
// 6. Disk space requirements
|
||||||
|
diskCheck, requiredMB, availableMB := r.checkDiskSpace()
|
||||||
|
result.Checks = append(result.Checks, diskCheck)
|
||||||
|
result.RequiredDiskMB = requiredMB
|
||||||
|
result.AvailableDiskMB = availableMB
|
||||||
|
|
||||||
|
// 7. Work directory permissions
|
||||||
|
result.Checks = append(result.Checks, r.checkWorkDirectory())
|
||||||
|
|
||||||
|
// 8. Required tools availability
|
||||||
|
result.Checks = append(result.Checks, r.checkRequiredTools())
|
||||||
|
|
||||||
|
// 9. PostgreSQL lock settings (for parallel restore)
|
||||||
|
result.Checks = append(result.Checks, r.checkLockSettings(ctx))
|
||||||
|
|
||||||
|
// 10. Memory availability
|
||||||
|
result.Checks = append(result.Checks, r.checkMemoryAvailability())
|
||||||
|
|
||||||
|
// Calculate summary
|
||||||
|
for _, check := range result.Checks {
|
||||||
|
switch check.Status {
|
||||||
|
case DryRunFailed:
|
||||||
|
if check.Critical {
|
||||||
|
result.CriticalCount++
|
||||||
|
result.CanProceed = false
|
||||||
|
} else {
|
||||||
|
result.WarningCount++
|
||||||
|
result.HasWarnings = true
|
||||||
|
}
|
||||||
|
case DryRunWarning:
|
||||||
|
result.WarningCount++
|
||||||
|
result.HasWarnings = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Estimate restore time based on archive size
|
||||||
|
result.EstimatedTime = r.estimateRestoreTime()
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkArchiveAccess verifies the archive file is accessible
|
||||||
|
func (r *RestoreDryRun) checkArchiveAccess() DryRunCheck {
|
||||||
|
check := DryRunCheck{
|
||||||
|
Name: "Archive Access",
|
||||||
|
Critical: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
info, err := os.Stat(r.archive)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "Archive file not found"
|
||||||
|
check.Details = r.archive
|
||||||
|
} else if os.IsPermission(err) {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "Permission denied reading archive"
|
||||||
|
check.Details = err.Error()
|
||||||
|
} else {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "Cannot access archive"
|
||||||
|
check.Details = err.Error()
|
||||||
|
}
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
if info.Size() == 0 {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "Archive file is empty"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = fmt.Sprintf("Archive accessible (%s)", formatBytesSize(info.Size()))
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkArchiveFormat validates the archive format
|
||||||
|
func (r *RestoreDryRun) checkArchiveFormat() DryRunCheck {
|
||||||
|
check := DryRunCheck{
|
||||||
|
Name: "Archive Format",
|
||||||
|
Critical: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := r.safety.ValidateArchive(r.archive)
|
||||||
|
if err != nil {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "Invalid archive format"
|
||||||
|
check.Details = err.Error()
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
format := DetectArchiveFormat(r.archive)
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = fmt.Sprintf("Valid %s format", format.String())
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkDatabaseConnectivity tests database connection
|
||||||
|
func (r *RestoreDryRun) checkDatabaseConnectivity(ctx context.Context) DryRunCheck {
|
||||||
|
check := DryRunCheck{
|
||||||
|
Name: "Database Connectivity",
|
||||||
|
Critical: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to list databases as a connectivity check
|
||||||
|
_, err := r.safety.ListUserDatabases(ctx)
|
||||||
|
if err != nil {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "Cannot connect to database server"
|
||||||
|
check.Details = err.Error()
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = fmt.Sprintf("Connected to %s:%d", r.cfg.Host, r.cfg.Port)
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkUserPermissions verifies required database permissions
|
||||||
|
func (r *RestoreDryRun) checkUserPermissions(ctx context.Context) DryRunCheck {
|
||||||
|
check := DryRunCheck{
|
||||||
|
Name: "User Permissions",
|
||||||
|
Critical: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.cfg.DatabaseType != "postgres" {
|
||||||
|
check.Status = DryRunSkipped
|
||||||
|
check.Message = "Permission check only implemented for PostgreSQL"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if user has CREATEDB privilege
|
||||||
|
query := `SELECT rolcreatedb, rolsuper FROM pg_roles WHERE rolname = current_user`
|
||||||
|
|
||||||
|
args := []string{
|
||||||
|
"-h", r.cfg.Host,
|
||||||
|
"-p", fmt.Sprintf("%d", r.cfg.Port),
|
||||||
|
"-U", r.cfg.User,
|
||||||
|
"-d", "postgres",
|
||||||
|
"-tA",
|
||||||
|
"-c", query,
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := cleanup.SafeCommand(ctx, "psql", args...)
|
||||||
|
if r.cfg.Password != "" {
|
||||||
|
cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSWORD=%s", r.cfg.Password))
|
||||||
|
}
|
||||||
|
|
||||||
|
output, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
check.Status = DryRunWarning
|
||||||
|
check.Message = "Could not verify permissions"
|
||||||
|
check.Details = err.Error()
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
result := strings.TrimSpace(string(output))
|
||||||
|
parts := strings.Split(result, "|")
|
||||||
|
|
||||||
|
if len(parts) >= 2 {
|
||||||
|
canCreate := parts[0] == "t"
|
||||||
|
isSuper := parts[1] == "t"
|
||||||
|
|
||||||
|
if isSuper {
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = "User is superuser (full permissions)"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
if canCreate {
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = "User has CREATEDB privilege"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "User lacks CREATEDB privilege"
|
||||||
|
check.Details = "Required for creating target database. Run: ALTER USER " + r.cfg.User + " CREATEDB;"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkTargetConflicts checks if target database already exists
|
||||||
|
func (r *RestoreDryRun) checkTargetConflicts(ctx context.Context) DryRunCheck {
|
||||||
|
check := DryRunCheck{
|
||||||
|
Name: "Target Database",
|
||||||
|
Critical: false, // Not critical - can be overwritten with --clean
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.target == "" {
|
||||||
|
check.Status = DryRunSkipped
|
||||||
|
check.Message = "Cluster restore - checking multiple databases"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
databases, err := r.safety.ListUserDatabases(ctx)
|
||||||
|
if err != nil {
|
||||||
|
check.Status = DryRunWarning
|
||||||
|
check.Message = "Could not check existing databases"
|
||||||
|
check.Details = err.Error()
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, db := range databases {
|
||||||
|
if db == r.target {
|
||||||
|
check.Status = DryRunWarning
|
||||||
|
check.Message = fmt.Sprintf("Database '%s' already exists", r.target)
|
||||||
|
check.Details = "Use --clean to drop and recreate, or choose different target"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = fmt.Sprintf("Target '%s' is available", r.target)
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkDiskSpace verifies sufficient disk space
|
||||||
|
func (r *RestoreDryRun) checkDiskSpace() (DryRunCheck, int64, int64) {
|
||||||
|
check := DryRunCheck{
|
||||||
|
Name: "Disk Space",
|
||||||
|
Critical: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get archive size
|
||||||
|
info, err := os.Stat(r.archive)
|
||||||
|
if err != nil {
|
||||||
|
check.Status = DryRunSkipped
|
||||||
|
check.Message = "Cannot determine archive size"
|
||||||
|
return check, 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Estimate uncompressed size (assume 3x compression ratio)
|
||||||
|
archiveSizeMB := info.Size() / 1024 / 1024
|
||||||
|
estimatedUncompressedMB := archiveSizeMB * 3
|
||||||
|
|
||||||
|
// Need space for: work dir extraction + restored database
|
||||||
|
// Work dir: full uncompressed size
|
||||||
|
// Database: roughly same as uncompressed SQL
|
||||||
|
requiredMB := estimatedUncompressedMB * 2
|
||||||
|
|
||||||
|
// Check available disk space in work directory
|
||||||
|
workDir := r.cfg.GetEffectiveWorkDir()
|
||||||
|
if workDir == "" {
|
||||||
|
workDir = r.cfg.BackupDir
|
||||||
|
}
|
||||||
|
|
||||||
|
var stat syscall.Statfs_t
|
||||||
|
if err := syscall.Statfs(workDir, &stat); err != nil {
|
||||||
|
check.Status = DryRunWarning
|
||||||
|
check.Message = "Cannot check disk space"
|
||||||
|
check.Details = err.Error()
|
||||||
|
return check, requiredMB, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
availableMB := int64(stat.Bavail*uint64(stat.Bsize)) / 1024 / 1024
|
||||||
|
|
||||||
|
if availableMB < requiredMB {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = fmt.Sprintf("Insufficient disk space: need %d MB, have %d MB", requiredMB, availableMB)
|
||||||
|
check.Details = fmt.Sprintf("Work directory: %s", workDir)
|
||||||
|
return check, requiredMB, availableMB
|
||||||
|
}
|
||||||
|
|
||||||
|
// Warn if less than 20% buffer
|
||||||
|
if availableMB < requiredMB*12/10 {
|
||||||
|
check.Status = DryRunWarning
|
||||||
|
check.Message = fmt.Sprintf("Low disk space margin: need %d MB, have %d MB", requiredMB, availableMB)
|
||||||
|
return check, requiredMB, availableMB
|
||||||
|
}
|
||||||
|
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = fmt.Sprintf("Sufficient space: need ~%d MB, have %d MB", requiredMB, availableMB)
|
||||||
|
return check, requiredMB, availableMB
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkWorkDirectory verifies work directory is writable
|
||||||
|
func (r *RestoreDryRun) checkWorkDirectory() DryRunCheck {
|
||||||
|
check := DryRunCheck{
|
||||||
|
Name: "Work Directory",
|
||||||
|
Critical: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
workDir := r.cfg.GetEffectiveWorkDir()
|
||||||
|
if workDir == "" {
|
||||||
|
workDir = r.cfg.BackupDir
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if directory exists
|
||||||
|
info, err := os.Stat(workDir)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "Work directory does not exist"
|
||||||
|
check.Details = workDir
|
||||||
|
} else {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "Cannot access work directory"
|
||||||
|
check.Details = err.Error()
|
||||||
|
}
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
if !info.IsDir() {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "Work path is not a directory"
|
||||||
|
check.Details = workDir
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to create a test file
|
||||||
|
testFile := filepath.Join(workDir, ".dbbackup-dryrun-test")
|
||||||
|
f, err := os.Create(testFile)
|
||||||
|
if err != nil {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = "Work directory is not writable"
|
||||||
|
check.Details = err.Error()
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
f.Close()
|
||||||
|
os.Remove(testFile)
|
||||||
|
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = fmt.Sprintf("Work directory writable: %s", workDir)
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkRequiredTools verifies required CLI tools are available
|
||||||
|
func (r *RestoreDryRun) checkRequiredTools() DryRunCheck {
|
||||||
|
check := DryRunCheck{
|
||||||
|
Name: "Required Tools",
|
||||||
|
Critical: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
var required []string
|
||||||
|
switch r.cfg.DatabaseType {
|
||||||
|
case "postgres":
|
||||||
|
required = []string{"pg_restore", "psql", "createdb"}
|
||||||
|
case "mysql", "mariadb":
|
||||||
|
required = []string{"mysql", "mysqldump"}
|
||||||
|
default:
|
||||||
|
check.Status = DryRunSkipped
|
||||||
|
check.Message = "Unknown database type"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
missing := []string{}
|
||||||
|
for _, tool := range required {
|
||||||
|
if _, err := LookPath(tool); err != nil {
|
||||||
|
missing = append(missing, tool)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(missing) > 0 {
|
||||||
|
check.Status = DryRunFailed
|
||||||
|
check.Message = fmt.Sprintf("Missing tools: %s", strings.Join(missing, ", "))
|
||||||
|
check.Details = "Install the database client tools package"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = fmt.Sprintf("All tools available: %s", strings.Join(required, ", "))
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkLockSettings checks PostgreSQL lock settings for parallel restore
|
||||||
|
func (r *RestoreDryRun) checkLockSettings(ctx context.Context) DryRunCheck {
|
||||||
|
check := DryRunCheck{
|
||||||
|
Name: "Lock Settings",
|
||||||
|
Critical: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.cfg.DatabaseType != "postgres" {
|
||||||
|
check.Status = DryRunSkipped
|
||||||
|
check.Message = "Lock check only for PostgreSQL"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check max_locks_per_transaction
|
||||||
|
query := `SHOW max_locks_per_transaction`
|
||||||
|
args := []string{
|
||||||
|
"-h", r.cfg.Host,
|
||||||
|
"-p", fmt.Sprintf("%d", r.cfg.Port),
|
||||||
|
"-U", r.cfg.User,
|
||||||
|
"-d", "postgres",
|
||||||
|
"-tA",
|
||||||
|
"-c", query,
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := cleanup.SafeCommand(ctx, "psql", args...)
|
||||||
|
if r.cfg.Password != "" {
|
||||||
|
cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSWORD=%s", r.cfg.Password))
|
||||||
|
}
|
||||||
|
|
||||||
|
output, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
check.Status = DryRunWarning
|
||||||
|
check.Message = "Could not check lock settings"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
locks := strings.TrimSpace(string(output))
|
||||||
|
if locks == "" {
|
||||||
|
check.Status = DryRunWarning
|
||||||
|
check.Message = "Could not determine max_locks_per_transaction"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default is 64, recommend at least 128 for parallel restores
|
||||||
|
var lockCount int
|
||||||
|
fmt.Sscanf(locks, "%d", &lockCount)
|
||||||
|
|
||||||
|
if lockCount < 128 {
|
||||||
|
check.Status = DryRunWarning
|
||||||
|
check.Message = fmt.Sprintf("max_locks_per_transaction=%d (recommend 128+ for parallel)", lockCount)
|
||||||
|
check.Details = "Set: ALTER SYSTEM SET max_locks_per_transaction = 128; then restart PostgreSQL"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = fmt.Sprintf("max_locks_per_transaction=%d (sufficient)", lockCount)
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkMemoryAvailability checks if enough memory is available
|
||||||
|
func (r *RestoreDryRun) checkMemoryAvailability() DryRunCheck {
|
||||||
|
check := DryRunCheck{
|
||||||
|
Name: "Memory Availability",
|
||||||
|
Critical: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read /proc/meminfo on Linux
|
||||||
|
data, err := os.ReadFile("/proc/meminfo")
|
||||||
|
if err != nil {
|
||||||
|
check.Status = DryRunSkipped
|
||||||
|
check.Message = "Cannot check memory (non-Linux?)"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
var availableKB int64
|
||||||
|
for _, line := range strings.Split(string(data), "\n") {
|
||||||
|
if strings.HasPrefix(line, "MemAvailable:") {
|
||||||
|
fmt.Sscanf(line, "MemAvailable: %d kB", &availableKB)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
availableMB := availableKB / 1024
|
||||||
|
|
||||||
|
// Recommend at least 1GB for restore operations
|
||||||
|
if availableMB < 1024 {
|
||||||
|
check.Status = DryRunWarning
|
||||||
|
check.Message = fmt.Sprintf("Low available memory: %d MB", availableMB)
|
||||||
|
check.Details = "Restore may be slow or fail. Consider closing other applications."
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
check.Status = DryRunPassed
|
||||||
|
check.Message = fmt.Sprintf("Available memory: %d MB", availableMB)
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
|
// estimateRestoreTime estimates restore duration based on archive size
|
||||||
|
func (r *RestoreDryRun) estimateRestoreTime() time.Duration {
|
||||||
|
info, err := os.Stat(r.archive)
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rough estimate: 100 MB/minute for restore operations
|
||||||
|
// This accounts for decompression, SQL parsing, and database writes
|
||||||
|
sizeMB := info.Size() / 1024 / 1024
|
||||||
|
minutes := sizeMB / 100
|
||||||
|
if minutes < 1 {
|
||||||
|
minutes = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return time.Duration(minutes) * time.Minute
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatBytesSize formats bytes to human-readable string
|
||||||
|
func formatBytesSize(bytes int64) string {
|
||||||
|
const (
|
||||||
|
KB = 1024
|
||||||
|
MB = KB * 1024
|
||||||
|
GB = MB * 1024
|
||||||
|
)
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case bytes >= GB:
|
||||||
|
return fmt.Sprintf("%.1f GB", float64(bytes)/GB)
|
||||||
|
case bytes >= MB:
|
||||||
|
return fmt.Sprintf("%.1f MB", float64(bytes)/MB)
|
||||||
|
case bytes >= KB:
|
||||||
|
return fmt.Sprintf("%.1f KB", float64(bytes)/KB)
|
||||||
|
default:
|
||||||
|
return fmt.Sprintf("%d B", bytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LookPath is a wrapper around exec.LookPath for testing
|
||||||
|
var LookPath = func(file string) (string, error) {
|
||||||
|
return exec.LookPath(file)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PrintDryRunResult prints a formatted dry-run result
|
||||||
|
func PrintDryRunResult(result *DryRunResult) {
|
||||||
|
fmt.Println("\n" + strings.Repeat("=", 60))
|
||||||
|
fmt.Println("RESTORE DRY-RUN RESULTS")
|
||||||
|
fmt.Println(strings.Repeat("=", 60))
|
||||||
|
|
||||||
|
for _, check := range result.Checks {
|
||||||
|
fmt.Printf("%s %-20s %s\n", check.Status.Icon(), check.Name+":", check.Message)
|
||||||
|
if check.Details != "" {
|
||||||
|
fmt.Printf(" └─ %s\n", check.Details)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println(strings.Repeat("-", 60))
|
||||||
|
|
||||||
|
if result.EstimatedTime > 0 {
|
||||||
|
fmt.Printf("Estimated restore time: %s\n", result.EstimatedTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.RequiredDiskMB > 0 {
|
||||||
|
fmt.Printf("Disk space: %d MB required, %d MB available\n",
|
||||||
|
result.RequiredDiskMB, result.AvailableDiskMB)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println()
|
||||||
|
if result.CanProceed {
|
||||||
|
if result.HasWarnings {
|
||||||
|
fmt.Println("⚠️ DRY-RUN: PASSED with warnings - restore can proceed")
|
||||||
|
} else {
|
||||||
|
fmt.Println("✅ DRY-RUN: PASSED - restore can proceed")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fmt.Printf("❌ DRY-RUN: FAILED - %d critical issue(s) must be resolved\n", result.CriticalCount)
|
||||||
|
}
|
||||||
|
fmt.Println()
|
||||||
|
}
|
||||||
@ -635,7 +635,8 @@ func (e *Engine) restoreWithNativeEngine(ctx context.Context, archivePath, targe
|
|||||||
"database", targetDB,
|
"database", targetDB,
|
||||||
"archive", archivePath)
|
"archive", archivePath)
|
||||||
|
|
||||||
parallelEngine, err := native.NewParallelRestoreEngine(nativeCfg, e.log, parallelWorkers)
|
// Pass context to ensure pool is properly closed on Ctrl+C cancellation
|
||||||
|
parallelEngine, err := native.NewParallelRestoreEngineWithContext(ctx, nativeCfg, e.log, parallelWorkers)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
e.log.Warn("Failed to create parallel restore engine, falling back to sequential", "error", err)
|
e.log.Warn("Failed to create parallel restore engine, falling back to sequential", "error", err)
|
||||||
// Fall back to sequential restore
|
// Fall back to sequential restore
|
||||||
@ -1342,9 +1343,14 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string, preExtr
|
|||||||
}
|
}
|
||||||
|
|
||||||
format := DetectArchiveFormat(archivePath)
|
format := DetectArchiveFormat(archivePath)
|
||||||
if format != FormatClusterTarGz {
|
if !format.CanBeClusterRestore() {
|
||||||
operation.Fail("Invalid cluster archive format")
|
operation.Fail("Invalid cluster archive format")
|
||||||
return fmt.Errorf("not a cluster archive: %s (detected format: %s)", archivePath, format)
|
return fmt.Errorf("not a valid cluster restore format: %s (detected format: %s). Supported: .tar.gz, .sql, .sql.gz", archivePath, format)
|
||||||
|
}
|
||||||
|
|
||||||
|
// For SQL-based cluster restores, use a different restore path
|
||||||
|
if format == FormatPostgreSQLSQL || format == FormatPostgreSQLSQLGz {
|
||||||
|
return e.restoreClusterFromSQL(ctx, archivePath, operation)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if we have a pre-extracted directory (optimization to avoid double extraction)
|
// Check if we have a pre-extracted directory (optimization to avoid double extraction)
|
||||||
@ -2177,6 +2183,45 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string, preExtr
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// restoreClusterFromSQL restores a pg_dumpall SQL file using the native engine
|
||||||
|
// This handles .sql and .sql.gz files containing full cluster dumps
|
||||||
|
func (e *Engine) restoreClusterFromSQL(ctx context.Context, archivePath string, operation logger.OperationLogger) error {
|
||||||
|
e.log.Info("Restoring cluster from SQL file (pg_dumpall format)",
|
||||||
|
"file", filepath.Base(archivePath),
|
||||||
|
"native_engine", true)
|
||||||
|
|
||||||
|
clusterStartTime := time.Now()
|
||||||
|
|
||||||
|
// Determine if compressed
|
||||||
|
compressed := strings.HasSuffix(strings.ToLower(archivePath), ".gz")
|
||||||
|
|
||||||
|
// Use native engine to restore directly to postgres database (globals + all databases)
|
||||||
|
e.log.Info("Restoring SQL dump using native engine...",
|
||||||
|
"compressed", compressed,
|
||||||
|
"size", FormatBytes(getFileSize(archivePath)))
|
||||||
|
|
||||||
|
e.progress.Start("Restoring cluster from SQL dump...")
|
||||||
|
|
||||||
|
// For pg_dumpall, we restore to the 'postgres' database which then creates other databases
|
||||||
|
targetDB := "postgres"
|
||||||
|
|
||||||
|
err := e.restoreWithNativeEngine(ctx, archivePath, targetDB, compressed)
|
||||||
|
if err != nil {
|
||||||
|
operation.Fail(fmt.Sprintf("SQL cluster restore failed: %v", err))
|
||||||
|
e.recordClusterRestoreMetrics(clusterStartTime, archivePath, 0, 0, false, err.Error())
|
||||||
|
return fmt.Errorf("SQL cluster restore failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
duration := time.Since(clusterStartTime)
|
||||||
|
e.progress.Complete(fmt.Sprintf("Cluster restored successfully from SQL in %s", duration.Round(time.Second)))
|
||||||
|
operation.Complete("SQL cluster restore completed")
|
||||||
|
|
||||||
|
// Record metrics
|
||||||
|
e.recordClusterRestoreMetrics(clusterStartTime, archivePath, 1, 1, true, "")
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// recordClusterRestoreMetrics records metrics for cluster restore operations
|
// recordClusterRestoreMetrics records metrics for cluster restore operations
|
||||||
func (e *Engine) recordClusterRestoreMetrics(startTime time.Time, archivePath string, totalDBs, successCount int, success bool, errorMsg string) {
|
func (e *Engine) recordClusterRestoreMetrics(startTime time.Time, archivePath string, totalDBs, successCount int, success bool, errorMsg string) {
|
||||||
duration := time.Since(startTime)
|
duration := time.Since(startTime)
|
||||||
@ -2480,7 +2525,14 @@ func (e *Engine) restoreGlobals(ctx context.Context, globalsFile string) error {
|
|||||||
cmdErr = ctx.Err()
|
cmdErr = ctx.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
<-stderrDone
|
// Wait for stderr reader with timeout to prevent indefinite hang
|
||||||
|
// if the process doesn't fully terminate
|
||||||
|
select {
|
||||||
|
case <-stderrDone:
|
||||||
|
// Normal completion
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
e.log.Warn("Stderr reader timeout - forcefully continuing")
|
||||||
|
}
|
||||||
|
|
||||||
// Only fail on actual command errors or FATAL PostgreSQL errors
|
// Only fail on actual command errors or FATAL PostgreSQL errors
|
||||||
// Regular ERROR messages (like "role already exists") are expected
|
// Regular ERROR messages (like "role already exists") are expected
|
||||||
@ -2924,6 +2976,15 @@ func (e *Engine) isIgnorableError(errorMsg string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getFileSize returns the size of a file, or 0 if it can't be read
|
||||||
|
func getFileSize(path string) int64 {
|
||||||
|
info, err := os.Stat(path)
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return info.Size()
|
||||||
|
}
|
||||||
|
|
||||||
// FormatBytes formats bytes to human readable format
|
// FormatBytes formats bytes to human readable format
|
||||||
func FormatBytes(bytes int64) string {
|
func FormatBytes(bytes int64) string {
|
||||||
const unit = 1024
|
const unit = 1024
|
||||||
|
|||||||
@ -168,11 +168,19 @@ func (f ArchiveFormat) IsCompressed() bool {
|
|||||||
f == FormatClusterTarGz
|
f == FormatClusterTarGz
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsClusterBackup returns true if the archive is a cluster backup
|
// IsClusterBackup returns true if the archive is a cluster backup (.tar.gz format created by dbbackup)
|
||||||
func (f ArchiveFormat) IsClusterBackup() bool {
|
func (f ArchiveFormat) IsClusterBackup() bool {
|
||||||
return f == FormatClusterTarGz
|
return f == FormatClusterTarGz
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CanBeClusterRestore returns true if the format can be used for cluster restore
|
||||||
|
// This includes .tar.gz (dbbackup format) and .sql/.sql.gz (pg_dumpall format for native engine)
|
||||||
|
func (f ArchiveFormat) CanBeClusterRestore() bool {
|
||||||
|
return f == FormatClusterTarGz ||
|
||||||
|
f == FormatPostgreSQLSQL ||
|
||||||
|
f == FormatPostgreSQLSQLGz
|
||||||
|
}
|
||||||
|
|
||||||
// IsPostgreSQL returns true if the archive is PostgreSQL format
|
// IsPostgreSQL returns true if the archive is PostgreSQL format
|
||||||
func (f ArchiveFormat) IsPostgreSQL() bool {
|
func (f ArchiveFormat) IsPostgreSQL() bool {
|
||||||
return f == FormatPostgreSQLDump ||
|
return f == FormatPostgreSQLDump ||
|
||||||
|
|||||||
@ -1,7 +1,15 @@
|
|||||||
package security
|
package security
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/ed25519"
|
||||||
|
"crypto/rand"
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"dbbackup/internal/logger"
|
"dbbackup/internal/logger"
|
||||||
@ -21,13 +29,36 @@ type AuditEvent struct {
|
|||||||
type AuditLogger struct {
|
type AuditLogger struct {
|
||||||
log logger.Logger
|
log logger.Logger
|
||||||
enabled bool
|
enabled bool
|
||||||
|
|
||||||
|
// For signed audit log support
|
||||||
|
mu sync.Mutex
|
||||||
|
entries []SignedAuditEntry
|
||||||
|
privateKey ed25519.PrivateKey
|
||||||
|
publicKey ed25519.PublicKey
|
||||||
|
prevHash string // Hash of previous entry for chaining
|
||||||
|
}
|
||||||
|
|
||||||
|
// SignedAuditEntry represents an audit entry with cryptographic signature
|
||||||
|
type SignedAuditEntry struct {
|
||||||
|
Sequence int64 `json:"seq"`
|
||||||
|
Timestamp string `json:"ts"`
|
||||||
|
User string `json:"user"`
|
||||||
|
Action string `json:"action"`
|
||||||
|
Resource string `json:"resource"`
|
||||||
|
Result string `json:"result"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
PrevHash string `json:"prev_hash"` // Hash chain for tamper detection
|
||||||
|
Hash string `json:"hash"` // SHA-256 of this entry (without signature)
|
||||||
|
Signature string `json:"sig"` // Ed25519 signature of Hash
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAuditLogger creates a new audit logger
|
// NewAuditLogger creates a new audit logger
|
||||||
func NewAuditLogger(log logger.Logger, enabled bool) *AuditLogger {
|
func NewAuditLogger(log logger.Logger, enabled bool) *AuditLogger {
|
||||||
return &AuditLogger{
|
return &AuditLogger{
|
||||||
log: log,
|
log: log,
|
||||||
enabled: enabled,
|
enabled: enabled,
|
||||||
|
entries: make([]SignedAuditEntry, 0),
|
||||||
|
prevHash: "genesis", // Initial hash for first entry
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -232,3 +263,337 @@ func GetCurrentUser() string {
|
|||||||
}
|
}
|
||||||
return "unknown"
|
return "unknown"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Audit Log Signing and Verification
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
// GenerateSigningKeys generates a new Ed25519 key pair for audit log signing
|
||||||
|
func GenerateSigningKeys() (privateKey ed25519.PrivateKey, publicKey ed25519.PublicKey, err error) {
|
||||||
|
publicKey, privateKey, err = ed25519.GenerateKey(rand.Reader)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// SavePrivateKey saves the private key to a file (PEM-like format)
|
||||||
|
func SavePrivateKey(path string, key ed25519.PrivateKey) error {
|
||||||
|
encoded := base64.StdEncoding.EncodeToString(key)
|
||||||
|
content := fmt.Sprintf("-----BEGIN DBBACKUP AUDIT PRIVATE KEY-----\n%s\n-----END DBBACKUP AUDIT PRIVATE KEY-----\n", encoded)
|
||||||
|
return os.WriteFile(path, []byte(content), 0600) // Restrictive permissions
|
||||||
|
}
|
||||||
|
|
||||||
|
// SavePublicKey saves the public key to a file (PEM-like format)
|
||||||
|
func SavePublicKey(path string, key ed25519.PublicKey) error {
|
||||||
|
encoded := base64.StdEncoding.EncodeToString(key)
|
||||||
|
content := fmt.Sprintf("-----BEGIN DBBACKUP AUDIT PUBLIC KEY-----\n%s\n-----END DBBACKUP AUDIT PUBLIC KEY-----\n", encoded)
|
||||||
|
return os.WriteFile(path, []byte(content), 0644)
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadPrivateKey loads a private key from file
|
||||||
|
func LoadPrivateKey(path string) (ed25519.PrivateKey, error) {
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read private key: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract base64 content between PEM markers
|
||||||
|
content := extractPEMContent(string(data))
|
||||||
|
if content == "" {
|
||||||
|
return nil, fmt.Errorf("invalid private key format")
|
||||||
|
}
|
||||||
|
|
||||||
|
decoded, err := base64.StdEncoding.DecodeString(content)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to decode private key: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(decoded) != ed25519.PrivateKeySize {
|
||||||
|
return nil, fmt.Errorf("invalid private key size")
|
||||||
|
}
|
||||||
|
|
||||||
|
return ed25519.PrivateKey(decoded), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadPublicKey loads a public key from file
|
||||||
|
func LoadPublicKey(path string) (ed25519.PublicKey, error) {
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read public key: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
content := extractPEMContent(string(data))
|
||||||
|
if content == "" {
|
||||||
|
return nil, fmt.Errorf("invalid public key format")
|
||||||
|
}
|
||||||
|
|
||||||
|
decoded, err := base64.StdEncoding.DecodeString(content)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to decode public key: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(decoded) != ed25519.PublicKeySize {
|
||||||
|
return nil, fmt.Errorf("invalid public key size")
|
||||||
|
}
|
||||||
|
|
||||||
|
return ed25519.PublicKey(decoded), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractPEMContent extracts base64 content from PEM-like format
|
||||||
|
func extractPEMContent(data string) string {
|
||||||
|
// Simple extraction - find content between markers
|
||||||
|
start := 0
|
||||||
|
for i := 0; i < len(data); i++ {
|
||||||
|
if data[i] == '\n' && i > 0 && data[i-1] == '-' {
|
||||||
|
start = i + 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
end := len(data)
|
||||||
|
for i := len(data) - 1; i > start; i-- {
|
||||||
|
if data[i] == '\n' && i+1 < len(data) && data[i+1] == '-' {
|
||||||
|
end = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if start >= end {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove whitespace
|
||||||
|
result := ""
|
||||||
|
for _, c := range data[start:end] {
|
||||||
|
if c != '\n' && c != '\r' && c != ' ' {
|
||||||
|
result += string(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// EnableSigning enables cryptographic signing for audit entries
|
||||||
|
func (a *AuditLogger) EnableSigning(privateKey ed25519.PrivateKey) {
|
||||||
|
a.mu.Lock()
|
||||||
|
defer a.mu.Unlock()
|
||||||
|
a.privateKey = privateKey
|
||||||
|
a.publicKey = privateKey.Public().(ed25519.PublicKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddSignedEntry adds a signed entry to the audit log
|
||||||
|
func (a *AuditLogger) AddSignedEntry(event AuditEvent) error {
|
||||||
|
if !a.enabled {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
a.mu.Lock()
|
||||||
|
defer a.mu.Unlock()
|
||||||
|
|
||||||
|
// Serialize details
|
||||||
|
detailsJSON := ""
|
||||||
|
if len(event.Details) > 0 {
|
||||||
|
if data, err := json.Marshal(event.Details); err == nil {
|
||||||
|
detailsJSON = string(data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
entry := SignedAuditEntry{
|
||||||
|
Sequence: int64(len(a.entries) + 1),
|
||||||
|
Timestamp: event.Timestamp.Format(time.RFC3339Nano),
|
||||||
|
User: event.User,
|
||||||
|
Action: event.Action,
|
||||||
|
Resource: event.Resource,
|
||||||
|
Result: event.Result,
|
||||||
|
Details: detailsJSON,
|
||||||
|
PrevHash: a.prevHash,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate hash of entry (without signature)
|
||||||
|
entry.Hash = a.calculateEntryHash(entry)
|
||||||
|
|
||||||
|
// Sign if private key is available
|
||||||
|
if a.privateKey != nil {
|
||||||
|
hashBytes, _ := hex.DecodeString(entry.Hash)
|
||||||
|
signature := ed25519.Sign(a.privateKey, hashBytes)
|
||||||
|
entry.Signature = base64.StdEncoding.EncodeToString(signature)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update chain
|
||||||
|
a.prevHash = entry.Hash
|
||||||
|
a.entries = append(a.entries, entry)
|
||||||
|
|
||||||
|
// Also log to standard logger
|
||||||
|
a.logEvent(event)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateEntryHash computes SHA-256 hash of an entry (without signature field)
|
||||||
|
func (a *AuditLogger) calculateEntryHash(entry SignedAuditEntry) string {
|
||||||
|
// Create canonical representation for hashing
|
||||||
|
data := fmt.Sprintf("%d|%s|%s|%s|%s|%s|%s|%s",
|
||||||
|
entry.Sequence,
|
||||||
|
entry.Timestamp,
|
||||||
|
entry.User,
|
||||||
|
entry.Action,
|
||||||
|
entry.Resource,
|
||||||
|
entry.Result,
|
||||||
|
entry.Details,
|
||||||
|
entry.PrevHash,
|
||||||
|
)
|
||||||
|
|
||||||
|
hash := sha256.Sum256([]byte(data))
|
||||||
|
return hex.EncodeToString(hash[:])
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExportSignedLog exports the signed audit log to a file
|
||||||
|
func (a *AuditLogger) ExportSignedLog(path string) error {
|
||||||
|
a.mu.Lock()
|
||||||
|
defer a.mu.Unlock()
|
||||||
|
|
||||||
|
data, err := json.MarshalIndent(a.entries, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to marshal audit log: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.WriteFile(path, data, 0644)
|
||||||
|
}
|
||||||
|
|
||||||
|
// VerifyAuditLog verifies the integrity of an exported audit log
|
||||||
|
func VerifyAuditLog(logPath string, publicKeyPath string) (*AuditVerificationResult, error) {
|
||||||
|
// Load public key
|
||||||
|
publicKey, err := LoadPublicKey(publicKeyPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to load public key: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load audit log
|
||||||
|
data, err := os.ReadFile(logPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read audit log: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var entries []SignedAuditEntry
|
||||||
|
if err := json.Unmarshal(data, &entries); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse audit log: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &AuditVerificationResult{
|
||||||
|
TotalEntries: len(entries),
|
||||||
|
ValidEntries: 0,
|
||||||
|
Errors: make([]string, 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
prevHash := "genesis"
|
||||||
|
|
||||||
|
for i, entry := range entries {
|
||||||
|
// Verify hash chain
|
||||||
|
if entry.PrevHash != prevHash {
|
||||||
|
result.Errors = append(result.Errors,
|
||||||
|
fmt.Sprintf("Entry %d: hash chain broken (expected %s, got %s)",
|
||||||
|
i+1, prevHash[:16]+"...", entry.PrevHash[:min(16, len(entry.PrevHash))]+"..."))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recalculate hash
|
||||||
|
expectedHash := calculateVerifyHash(entry)
|
||||||
|
if entry.Hash != expectedHash {
|
||||||
|
result.Errors = append(result.Errors,
|
||||||
|
fmt.Sprintf("Entry %d: hash mismatch (entry may be tampered)", i+1))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify signature
|
||||||
|
if entry.Signature != "" {
|
||||||
|
hashBytes, _ := hex.DecodeString(entry.Hash)
|
||||||
|
sigBytes, err := base64.StdEncoding.DecodeString(entry.Signature)
|
||||||
|
if err != nil {
|
||||||
|
result.Errors = append(result.Errors,
|
||||||
|
fmt.Sprintf("Entry %d: invalid signature encoding", i+1))
|
||||||
|
} else if !ed25519.Verify(publicKey, hashBytes, sigBytes) {
|
||||||
|
result.Errors = append(result.Errors,
|
||||||
|
fmt.Sprintf("Entry %d: signature verification failed", i+1))
|
||||||
|
} else {
|
||||||
|
result.ValidEntries++
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result.Errors = append(result.Errors,
|
||||||
|
fmt.Sprintf("Entry %d: missing signature", i+1))
|
||||||
|
}
|
||||||
|
|
||||||
|
prevHash = entry.Hash
|
||||||
|
}
|
||||||
|
|
||||||
|
result.ChainValid = len(result.Errors) == 0 ||
|
||||||
|
!containsChainError(result.Errors)
|
||||||
|
result.AllSignaturesValid = result.ValidEntries == result.TotalEntries
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// AuditVerificationResult contains the result of audit log verification
|
||||||
|
type AuditVerificationResult struct {
|
||||||
|
TotalEntries int
|
||||||
|
ValidEntries int
|
||||||
|
ChainValid bool
|
||||||
|
AllSignaturesValid bool
|
||||||
|
Errors []string
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsValid returns true if the audit log is completely valid
|
||||||
|
func (r *AuditVerificationResult) IsValid() bool {
|
||||||
|
return r.ChainValid && r.AllSignaturesValid && len(r.Errors) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a human-readable summary
|
||||||
|
func (r *AuditVerificationResult) String() string {
|
||||||
|
if r.IsValid() {
|
||||||
|
return fmt.Sprintf("✅ Audit log verified: %d entries, chain intact, all signatures valid",
|
||||||
|
r.TotalEntries)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Sprintf("❌ Audit log verification failed: %d/%d valid entries, %d errors",
|
||||||
|
r.ValidEntries, r.TotalEntries, len(r.Errors))
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateVerifyHash recalculates hash for verification
|
||||||
|
func calculateVerifyHash(entry SignedAuditEntry) string {
|
||||||
|
data := fmt.Sprintf("%d|%s|%s|%s|%s|%s|%s|%s",
|
||||||
|
entry.Sequence,
|
||||||
|
entry.Timestamp,
|
||||||
|
entry.User,
|
||||||
|
entry.Action,
|
||||||
|
entry.Resource,
|
||||||
|
entry.Result,
|
||||||
|
entry.Details,
|
||||||
|
entry.PrevHash,
|
||||||
|
)
|
||||||
|
|
||||||
|
hash := sha256.Sum256([]byte(data))
|
||||||
|
return hex.EncodeToString(hash[:])
|
||||||
|
}
|
||||||
|
|
||||||
|
// containsChainError checks if errors include hash chain issues
|
||||||
|
func containsChainError(errors []string) bool {
|
||||||
|
for _, err := range errors {
|
||||||
|
if len(err) > 0 && (err[0:min(20, len(err))] == "Entry" &&
|
||||||
|
(contains(err, "hash chain") || contains(err, "hash mismatch"))) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// contains is a simple string contains helper
|
||||||
|
func contains(s, substr string) bool {
|
||||||
|
for i := 0; i <= len(s)-len(substr); i++ {
|
||||||
|
if s[i:i+len(substr)] == substr {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// min returns the minimum of two ints
|
||||||
|
func min(a, b int) int {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|||||||
@ -168,6 +168,10 @@ func (m ArchiveBrowserModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
}
|
}
|
||||||
return m, nil
|
return m, nil
|
||||||
|
|
||||||
|
case tea.InterruptMsg:
|
||||||
|
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||||
|
return m.parent, nil
|
||||||
|
|
||||||
case tea.KeyMsg:
|
case tea.KeyMsg:
|
||||||
switch msg.String() {
|
switch msg.String() {
|
||||||
case "ctrl+c", "q", "esc":
|
case "ctrl+c", "q", "esc":
|
||||||
@ -205,13 +209,21 @@ func (m ArchiveBrowserModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
return diagnoseView, diagnoseView.Init()
|
return diagnoseView, diagnoseView.Init()
|
||||||
}
|
}
|
||||||
|
|
||||||
// For restore-cluster mode: MUST be a .tar.gz cluster archive
|
// For restore-cluster mode: check if format can be used for cluster restore
|
||||||
// Single .sql/.dump files are NOT valid cluster backups
|
// - .tar.gz: dbbackup cluster format (works with pg_restore)
|
||||||
if m.mode == "restore-cluster" && !selected.Format.IsClusterBackup() {
|
// - .sql/.sql.gz: pg_dumpall format (works with native engine or psql)
|
||||||
m.message = errorStyle.Render(fmt.Sprintf("⚠️ Not a cluster backup: %s is a single database backup (%s). Use 'Restore Single' mode instead, or select a .tar.gz cluster archive.", selected.Name, selected.Format.String()))
|
if m.mode == "restore-cluster" && !selected.Format.CanBeClusterRestore() {
|
||||||
|
m.message = errorStyle.Render(fmt.Sprintf("⚠️ %s cannot be used for cluster restore.\n\n Supported formats: .tar.gz (dbbackup), .sql, .sql.gz (pg_dumpall)",
|
||||||
|
selected.Name))
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For SQL-based cluster restore, enable native engine automatically
|
||||||
|
if m.mode == "restore-cluster" && !selected.Format.IsClusterBackup() {
|
||||||
|
// This is a .sql or .sql.gz file - use native engine
|
||||||
|
m.config.UseNativeEngine = true
|
||||||
|
}
|
||||||
|
|
||||||
// For single restore mode with cluster backup selected - offer to select individual database
|
// For single restore mode with cluster backup selected - offer to select individual database
|
||||||
if m.mode == "restore-single" && selected.Format.IsClusterBackup() {
|
if m.mode == "restore-single" && selected.Format.IsClusterBackup() {
|
||||||
clusterSelector := NewClusterDatabaseSelector(m.config, m.logger, m, m.ctx, selected, "single", false)
|
clusterSelector := NewClusterDatabaseSelector(m.config, m.logger, m, m.ctx, selected, "single", false)
|
||||||
|
|||||||
@ -54,13 +54,16 @@ type BackupExecutionModel struct {
|
|||||||
spinnerFrame int
|
spinnerFrame int
|
||||||
|
|
||||||
// Database count progress (for cluster backup)
|
// Database count progress (for cluster backup)
|
||||||
dbTotal int
|
dbTotal int
|
||||||
dbDone int
|
dbDone int
|
||||||
dbName string // Current database being backed up
|
dbName string // Current database being backed up
|
||||||
overallPhase int // 1=globals, 2=databases, 3=compressing
|
overallPhase int // 1=globals, 2=databases, 3=compressing
|
||||||
phaseDesc string // Description of current phase
|
phaseDesc string // Description of current phase
|
||||||
dbPhaseElapsed time.Duration // Elapsed time since database backup phase started
|
dbPhaseElapsed time.Duration // Elapsed time since database backup phase started
|
||||||
dbAvgPerDB time.Duration // Average time per database backup
|
dbAvgPerDB time.Duration // Average time per database backup
|
||||||
|
phase2StartTime time.Time // When phase 2 started (for realtime elapsed calculation)
|
||||||
|
bytesDone int64 // Size-weighted progress: bytes completed
|
||||||
|
bytesTotal int64 // Size-weighted progress: total bytes
|
||||||
}
|
}
|
||||||
|
|
||||||
// sharedBackupProgressState holds progress state that can be safely accessed from callbacks
|
// sharedBackupProgressState holds progress state that can be safely accessed from callbacks
|
||||||
@ -75,6 +78,8 @@ type sharedBackupProgressState struct {
|
|||||||
phase2StartTime time.Time // When phase 2 started (for realtime ETA calculation)
|
phase2StartTime time.Time // When phase 2 started (for realtime ETA calculation)
|
||||||
dbPhaseElapsed time.Duration // Elapsed time since database backup phase started
|
dbPhaseElapsed time.Duration // Elapsed time since database backup phase started
|
||||||
dbAvgPerDB time.Duration // Average time per database backup
|
dbAvgPerDB time.Duration // Average time per database backup
|
||||||
|
bytesDone int64 // Size-weighted progress: bytes completed
|
||||||
|
bytesTotal int64 // Size-weighted progress: total bytes
|
||||||
}
|
}
|
||||||
|
|
||||||
// Package-level shared progress state for backup operations
|
// Package-level shared progress state for backup operations
|
||||||
@ -95,7 +100,7 @@ func clearCurrentBackupProgress() {
|
|||||||
currentBackupProgressState = nil
|
currentBackupProgressState = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getCurrentBackupProgress() (dbTotal, dbDone int, dbName string, overallPhase int, phaseDesc string, hasUpdate bool, dbPhaseElapsed, dbAvgPerDB time.Duration, phase2StartTime time.Time) {
|
func getCurrentBackupProgress() (dbTotal, dbDone int, dbName string, overallPhase int, phaseDesc string, hasUpdate bool, dbPhaseElapsed, dbAvgPerDB time.Duration, phase2StartTime time.Time, bytesDone, bytesTotal int64) {
|
||||||
// CRITICAL: Add panic recovery
|
// CRITICAL: Add panic recovery
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := recover(); r != nil {
|
if r := recover(); r != nil {
|
||||||
@ -108,12 +113,12 @@ func getCurrentBackupProgress() (dbTotal, dbDone int, dbName string, overallPhas
|
|||||||
defer currentBackupProgressMu.Unlock()
|
defer currentBackupProgressMu.Unlock()
|
||||||
|
|
||||||
if currentBackupProgressState == nil {
|
if currentBackupProgressState == nil {
|
||||||
return 0, 0, "", 0, "", false, 0, 0, time.Time{}
|
return 0, 0, "", 0, "", false, 0, 0, time.Time{}, 0, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Double-check state isn't nil after lock
|
// Double-check state isn't nil after lock
|
||||||
if currentBackupProgressState == nil {
|
if currentBackupProgressState == nil {
|
||||||
return 0, 0, "", 0, "", false, 0, 0, time.Time{}
|
return 0, 0, "", 0, "", false, 0, 0, time.Time{}, 0, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
currentBackupProgressState.mu.Lock()
|
currentBackupProgressState.mu.Lock()
|
||||||
@ -123,16 +128,19 @@ func getCurrentBackupProgress() (dbTotal, dbDone int, dbName string, overallPhas
|
|||||||
currentBackupProgressState.hasUpdate = false
|
currentBackupProgressState.hasUpdate = false
|
||||||
|
|
||||||
// Calculate realtime phase elapsed if we have a phase 2 start time
|
// Calculate realtime phase elapsed if we have a phase 2 start time
|
||||||
dbPhaseElapsed = currentBackupProgressState.dbPhaseElapsed
|
// Always recalculate from phase2StartTime for accurate real-time display
|
||||||
if !currentBackupProgressState.phase2StartTime.IsZero() {
|
if !currentBackupProgressState.phase2StartTime.IsZero() {
|
||||||
dbPhaseElapsed = time.Since(currentBackupProgressState.phase2StartTime)
|
dbPhaseElapsed = time.Since(currentBackupProgressState.phase2StartTime)
|
||||||
|
} else {
|
||||||
|
dbPhaseElapsed = currentBackupProgressState.dbPhaseElapsed
|
||||||
}
|
}
|
||||||
|
|
||||||
return currentBackupProgressState.dbTotal, currentBackupProgressState.dbDone,
|
return currentBackupProgressState.dbTotal, currentBackupProgressState.dbDone,
|
||||||
currentBackupProgressState.dbName, currentBackupProgressState.overallPhase,
|
currentBackupProgressState.dbName, currentBackupProgressState.overallPhase,
|
||||||
currentBackupProgressState.phaseDesc, hasUpdate,
|
currentBackupProgressState.phaseDesc, hasUpdate,
|
||||||
dbPhaseElapsed, currentBackupProgressState.dbAvgPerDB,
|
dbPhaseElapsed, currentBackupProgressState.dbAvgPerDB,
|
||||||
currentBackupProgressState.phase2StartTime
|
currentBackupProgressState.phase2StartTime,
|
||||||
|
currentBackupProgressState.bytesDone, currentBackupProgressState.bytesTotal
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewBackupExecution(cfg *config.Config, log logger.Logger, parent tea.Model, ctx context.Context, backupType, dbName string, ratio int) BackupExecutionModel {
|
func NewBackupExecution(cfg *config.Config, log logger.Logger, parent tea.Model, ctx context.Context, backupType, dbName string, ratio int) BackupExecutionModel {
|
||||||
@ -181,11 +189,22 @@ type backupCompleteMsg struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config, log logger.Logger, backupType, dbName string, ratio int) tea.Cmd {
|
func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config, log logger.Logger, backupType, dbName string, ratio int) tea.Cmd {
|
||||||
return func() tea.Msg {
|
return func() (returnMsg tea.Msg) {
|
||||||
// CRITICAL: Add panic recovery to prevent TUI crashes on context cancellation
|
start := time.Now()
|
||||||
|
|
||||||
|
// CRITICAL: Add panic recovery that RETURNS a proper message to BubbleTea.
|
||||||
|
// Without this, if a panic occurs the command function returns nil,
|
||||||
|
// causing BubbleTea's execBatchMsg WaitGroup to hang forever waiting
|
||||||
|
// for a message that never comes.
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := recover(); r != nil {
|
if r := recover(); r != nil {
|
||||||
log.Error("Backup execution panic recovered", "panic", r, "database", dbName)
|
log.Error("Backup execution panic recovered", "panic", r, "database", dbName)
|
||||||
|
// CRITICAL: Set the named return value so BubbleTea receives a message
|
||||||
|
returnMsg = backupCompleteMsg{
|
||||||
|
result: "",
|
||||||
|
err: fmt.Errorf("backup panic: %v", r),
|
||||||
|
elapsed: time.Since(start),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@ -201,8 +220,6 @@ func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
start := time.Now()
|
|
||||||
|
|
||||||
// Setup shared progress state for TUI polling
|
// Setup shared progress state for TUI polling
|
||||||
progressState := &sharedBackupProgressState{}
|
progressState := &sharedBackupProgressState{}
|
||||||
setCurrentBackupProgress(progressState)
|
setCurrentBackupProgress(progressState)
|
||||||
@ -227,8 +244,8 @@ func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config,
|
|||||||
// Pass nil as indicator - TUI itself handles all display, no stdout printing
|
// Pass nil as indicator - TUI itself handles all display, no stdout printing
|
||||||
engine := backup.NewSilent(cfg, log, dbClient, nil)
|
engine := backup.NewSilent(cfg, log, dbClient, nil)
|
||||||
|
|
||||||
// Set database progress callback for cluster backups
|
// Set database progress callback for cluster backups (with size-weighted progress)
|
||||||
engine.SetDatabaseProgressCallback(func(done, total int, currentDB string) {
|
engine.SetDatabaseProgressCallback(func(done, total int, currentDB string, bytesDone, bytesTotal int64) {
|
||||||
// CRITICAL: Panic recovery to prevent nil pointer crashes
|
// CRITICAL: Panic recovery to prevent nil pointer crashes
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := recover(); r != nil {
|
if r := recover(); r != nil {
|
||||||
@ -245,13 +262,18 @@ func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config,
|
|||||||
progressState.dbDone = done
|
progressState.dbDone = done
|
||||||
progressState.dbTotal = total
|
progressState.dbTotal = total
|
||||||
progressState.dbName = currentDB
|
progressState.dbName = currentDB
|
||||||
|
progressState.bytesDone = bytesDone
|
||||||
|
progressState.bytesTotal = bytesTotal
|
||||||
progressState.overallPhase = backupPhaseDatabases
|
progressState.overallPhase = backupPhaseDatabases
|
||||||
progressState.phaseDesc = fmt.Sprintf("Phase 2/3: Backing up Databases (%d/%d)", done, total)
|
progressState.phaseDesc = fmt.Sprintf("Phase 2/3: Backing up Databases (%d/%d)", done, total)
|
||||||
progressState.hasUpdate = true
|
progressState.hasUpdate = true
|
||||||
// Set phase 2 start time on first callback (for realtime ETA calculation)
|
// Set phase 2 start time on first callback (for realtime ETA calculation)
|
||||||
if progressState.phase2StartTime.IsZero() {
|
if progressState.phase2StartTime.IsZero() {
|
||||||
progressState.phase2StartTime = time.Now()
|
progressState.phase2StartTime = time.Now()
|
||||||
|
log.Info("Phase 2 started", "time", progressState.phase2StartTime)
|
||||||
}
|
}
|
||||||
|
// Calculate elapsed time immediately
|
||||||
|
progressState.dbPhaseElapsed = time.Since(progressState.phase2StartTime)
|
||||||
progressState.mu.Unlock()
|
progressState.mu.Unlock()
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -310,7 +332,7 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
var overallPhase int
|
var overallPhase int
|
||||||
var phaseDesc string
|
var phaseDesc string
|
||||||
var hasUpdate bool
|
var hasUpdate bool
|
||||||
var dbPhaseElapsed, dbAvgPerDB time.Duration
|
var dbAvgPerDB time.Duration
|
||||||
|
|
||||||
func() {
|
func() {
|
||||||
defer func() {
|
defer func() {
|
||||||
@ -318,7 +340,17 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
m.logger.Warn("Backup progress polling panic recovered", "panic", r)
|
m.logger.Warn("Backup progress polling panic recovered", "panic", r)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
dbTotal, dbDone, dbName, overallPhase, phaseDesc, hasUpdate, dbPhaseElapsed, dbAvgPerDB, _ = getCurrentBackupProgress()
|
var phase2Start time.Time
|
||||||
|
var phaseElapsed time.Duration
|
||||||
|
var bytesDone, bytesTotal int64
|
||||||
|
dbTotal, dbDone, dbName, overallPhase, phaseDesc, hasUpdate, phaseElapsed, dbAvgPerDB, phase2Start, bytesDone, bytesTotal = getCurrentBackupProgress()
|
||||||
|
_ = phaseElapsed // We recalculate this below from phase2StartTime
|
||||||
|
if !phase2Start.IsZero() && m.phase2StartTime.IsZero() {
|
||||||
|
m.phase2StartTime = phase2Start
|
||||||
|
}
|
||||||
|
// Always update size info for accurate ETA
|
||||||
|
m.bytesDone = bytesDone
|
||||||
|
m.bytesTotal = bytesTotal
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if hasUpdate {
|
if hasUpdate {
|
||||||
@ -327,10 +359,14 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
m.dbName = dbName
|
m.dbName = dbName
|
||||||
m.overallPhase = overallPhase
|
m.overallPhase = overallPhase
|
||||||
m.phaseDesc = phaseDesc
|
m.phaseDesc = phaseDesc
|
||||||
m.dbPhaseElapsed = dbPhaseElapsed
|
|
||||||
m.dbAvgPerDB = dbAvgPerDB
|
m.dbAvgPerDB = dbAvgPerDB
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Always recalculate elapsed time from phase2StartTime for accurate real-time display
|
||||||
|
if !m.phase2StartTime.IsZero() {
|
||||||
|
m.dbPhaseElapsed = time.Since(m.phase2StartTime)
|
||||||
|
}
|
||||||
|
|
||||||
// Update status based on progress and elapsed time
|
// Update status based on progress and elapsed time
|
||||||
elapsedSec := int(time.Since(m.startTime).Seconds())
|
elapsedSec := int(time.Since(m.startTime).Seconds())
|
||||||
|
|
||||||
@ -426,14 +462,19 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// renderBackupDatabaseProgressBarWithTiming renders database backup progress with ETA
|
// renderBackupDatabaseProgressBarWithTiming renders database backup progress with size-weighted ETA
|
||||||
func renderBackupDatabaseProgressBarWithTiming(done, total int, dbPhaseElapsed, dbAvgPerDB time.Duration) string {
|
func renderBackupDatabaseProgressBarWithTiming(done, total int, dbPhaseElapsed time.Duration, bytesDone, bytesTotal int64) string {
|
||||||
if total == 0 {
|
if total == 0 {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate progress percentage
|
// Use size-weighted progress if available, otherwise fall back to count-based
|
||||||
percent := float64(done) / float64(total)
|
var percent float64
|
||||||
|
if bytesTotal > 0 {
|
||||||
|
percent = float64(bytesDone) / float64(bytesTotal)
|
||||||
|
} else {
|
||||||
|
percent = float64(done) / float64(total)
|
||||||
|
}
|
||||||
if percent > 1.0 {
|
if percent > 1.0 {
|
||||||
percent = 1.0
|
percent = 1.0
|
||||||
}
|
}
|
||||||
@ -446,19 +487,31 @@ func renderBackupDatabaseProgressBarWithTiming(done, total int, dbPhaseElapsed,
|
|||||||
}
|
}
|
||||||
bar := strings.Repeat("█", filled) + strings.Repeat("░", barWidth-filled)
|
bar := strings.Repeat("█", filled) + strings.Repeat("░", barWidth-filled)
|
||||||
|
|
||||||
// Calculate ETA similar to restore
|
// Calculate size-weighted ETA (much more accurate for mixed database sizes)
|
||||||
var etaStr string
|
var etaStr string
|
||||||
if done > 0 && done < total {
|
if bytesDone > 0 && bytesDone < bytesTotal && bytesTotal > 0 {
|
||||||
|
// Size-weighted: ETA = elapsed * (remaining_bytes / done_bytes)
|
||||||
|
remainingBytes := bytesTotal - bytesDone
|
||||||
|
eta := time.Duration(float64(dbPhaseElapsed) * float64(remainingBytes) / float64(bytesDone))
|
||||||
|
etaStr = fmt.Sprintf(" | ETA: %s", formatDuration(eta))
|
||||||
|
} else if done > 0 && done < total && bytesTotal == 0 {
|
||||||
|
// Fallback to count-based if no size info
|
||||||
avgPerDB := dbPhaseElapsed / time.Duration(done)
|
avgPerDB := dbPhaseElapsed / time.Duration(done)
|
||||||
remaining := total - done
|
remaining := total - done
|
||||||
eta := avgPerDB * time.Duration(remaining)
|
eta := avgPerDB * time.Duration(remaining)
|
||||||
etaStr = fmt.Sprintf(" | ETA: %s", formatDuration(eta))
|
etaStr = fmt.Sprintf(" | ETA: ~%s", formatDuration(eta))
|
||||||
} else if done == total {
|
} else if done == total {
|
||||||
etaStr = " | Complete"
|
etaStr = " | Complete"
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt.Sprintf(" Databases: [%s] %d/%d | Elapsed: %s%s\n",
|
// Show size progress if available
|
||||||
bar, done, total, formatDuration(dbPhaseElapsed), etaStr)
|
var sizeInfo string
|
||||||
|
if bytesTotal > 0 {
|
||||||
|
sizeInfo = fmt.Sprintf(" (%s/%s)", FormatBytes(bytesDone), FormatBytes(bytesTotal))
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Sprintf(" Databases: [%s] %d/%d%s | Elapsed: %s%s\n",
|
||||||
|
bar, done, total, sizeInfo, formatDuration(dbPhaseElapsed), etaStr)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m BackupExecutionModel) View() string {
|
func (m BackupExecutionModel) View() string {
|
||||||
@ -547,8 +600,8 @@ func (m BackupExecutionModel) View() string {
|
|||||||
}
|
}
|
||||||
s.WriteString("\n")
|
s.WriteString("\n")
|
||||||
|
|
||||||
// Database progress bar with timing
|
// Database progress bar with size-weighted timing
|
||||||
s.WriteString(renderBackupDatabaseProgressBarWithTiming(m.dbDone, m.dbTotal, m.dbPhaseElapsed, m.dbAvgPerDB))
|
s.WriteString(renderBackupDatabaseProgressBarWithTiming(m.dbDone, m.dbTotal, m.dbPhaseElapsed, m.bytesDone, m.bytesTotal))
|
||||||
s.WriteString("\n")
|
s.WriteString("\n")
|
||||||
} else {
|
} else {
|
||||||
// Intermediate phase (globals)
|
// Intermediate phase (globals)
|
||||||
|
|||||||
@ -97,13 +97,17 @@ func (m ClusterDatabaseSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
}
|
}
|
||||||
return m, nil
|
return m, nil
|
||||||
|
|
||||||
|
case tea.InterruptMsg:
|
||||||
|
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||||
|
return m.parent, nil
|
||||||
|
|
||||||
case tea.KeyMsg:
|
case tea.KeyMsg:
|
||||||
if m.loading {
|
if m.loading {
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
switch msg.String() {
|
switch msg.String() {
|
||||||
case "q", "esc":
|
case "ctrl+c", "q", "esc":
|
||||||
// Return to parent
|
// Return to parent
|
||||||
return m.parent, nil
|
return m.parent, nil
|
||||||
|
|
||||||
|
|||||||
@ -70,9 +70,18 @@ func (m ConfirmationModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
if m.onConfirm != nil {
|
if m.onConfirm != nil {
|
||||||
return m.onConfirm()
|
return m.onConfirm()
|
||||||
}
|
}
|
||||||
executor := NewBackupExecution(m.config, m.logger, m.parent, m.ctx, "cluster", "", 0)
|
// Default fallback (should not be reached if onConfirm is always provided)
|
||||||
|
ctx := m.ctx
|
||||||
|
if ctx == nil {
|
||||||
|
ctx = context.Background()
|
||||||
|
}
|
||||||
|
executor := NewBackupExecution(m.config, m.logger, m.parent, ctx, "cluster", "", 0)
|
||||||
return executor, executor.Init()
|
return executor, executor.Init()
|
||||||
|
|
||||||
|
case tea.InterruptMsg:
|
||||||
|
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||||
|
return m.parent, nil
|
||||||
|
|
||||||
case tea.KeyMsg:
|
case tea.KeyMsg:
|
||||||
// Auto-forward ESC/quit in auto-confirm mode
|
// Auto-forward ESC/quit in auto-confirm mode
|
||||||
if m.config.TUIAutoConfirm {
|
if m.config.TUIAutoConfirm {
|
||||||
@ -98,8 +107,12 @@ func (m ConfirmationModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
if m.onConfirm != nil {
|
if m.onConfirm != nil {
|
||||||
return m.onConfirm()
|
return m.onConfirm()
|
||||||
}
|
}
|
||||||
// Default: execute cluster backup for backward compatibility
|
// Default fallback (should not be reached if onConfirm is always provided)
|
||||||
executor := NewBackupExecution(m.config, m.logger, m.parent, m.ctx, "cluster", "", 0)
|
ctx := m.ctx
|
||||||
|
if ctx == nil {
|
||||||
|
ctx = context.Background()
|
||||||
|
}
|
||||||
|
executor := NewBackupExecution(m.config, m.logger, m, ctx, "cluster", "", 0)
|
||||||
return executor, executor.Init()
|
return executor, executor.Init()
|
||||||
}
|
}
|
||||||
return m.parent, nil
|
return m.parent, nil
|
||||||
|
|||||||
@ -126,6 +126,10 @@ func (m DatabaseSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
}
|
}
|
||||||
return m, nil
|
return m, nil
|
||||||
|
|
||||||
|
case tea.InterruptMsg:
|
||||||
|
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||||
|
return m.parent, nil
|
||||||
|
|
||||||
case tea.KeyMsg:
|
case tea.KeyMsg:
|
||||||
// Auto-forward ESC/quit in auto-confirm mode
|
// Auto-forward ESC/quit in auto-confirm mode
|
||||||
if m.config.TUIAutoConfirm {
|
if m.config.TUIAutoConfirm {
|
||||||
|
|||||||
@ -303,10 +303,10 @@ func (m *MenuModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
return m.handleSchedule()
|
return m.handleSchedule()
|
||||||
case 9: // View Backup Chain
|
case 9: // View Backup Chain
|
||||||
return m.handleChain()
|
return m.handleChain()
|
||||||
case 10: // System Resource Profile
|
case 10: // Separator
|
||||||
return m.handleProfile()
|
|
||||||
case 11: // Separator
|
|
||||||
// Do nothing
|
// Do nothing
|
||||||
|
case 11: // System Resource Profile
|
||||||
|
return m.handleProfile()
|
||||||
case 12: // Tools
|
case 12: // Tools
|
||||||
return m.handleTools()
|
return m.handleTools()
|
||||||
case 13: // View Active Operations
|
case 13: // View Active Operations
|
||||||
|
|||||||
@ -181,9 +181,17 @@ func (m *ProfileModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
}
|
}
|
||||||
return m, nil
|
return m, nil
|
||||||
|
|
||||||
|
case tea.InterruptMsg:
|
||||||
|
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||||
|
m.quitting = true
|
||||||
|
if m.parent != nil {
|
||||||
|
return m.parent, nil
|
||||||
|
}
|
||||||
|
return m, tea.Quit
|
||||||
|
|
||||||
case tea.KeyMsg:
|
case tea.KeyMsg:
|
||||||
switch msg.String() {
|
switch msg.String() {
|
||||||
case "q", "esc":
|
case "ctrl+c", "q", "esc":
|
||||||
m.quitting = true
|
m.quitting = true
|
||||||
if m.parent != nil {
|
if m.parent != nil {
|
||||||
return m.parent, nil
|
return m.parent, nil
|
||||||
|
|||||||
@ -245,9 +245,11 @@ func getCurrentRestoreProgress() (bytesTotal, bytesDone int64, description strin
|
|||||||
speed = calculateRollingSpeed(currentRestoreProgressState.speedSamples)
|
speed = calculateRollingSpeed(currentRestoreProgressState.speedSamples)
|
||||||
|
|
||||||
// Calculate realtime phase elapsed if we have a phase 3 start time
|
// Calculate realtime phase elapsed if we have a phase 3 start time
|
||||||
dbPhaseElapsed = currentRestoreProgressState.dbPhaseElapsed
|
// Always recalculate from phase3StartTime for accurate real-time display
|
||||||
if !currentRestoreProgressState.phase3StartTime.IsZero() {
|
if !currentRestoreProgressState.phase3StartTime.IsZero() {
|
||||||
dbPhaseElapsed = time.Since(currentRestoreProgressState.phase3StartTime)
|
dbPhaseElapsed = time.Since(currentRestoreProgressState.phase3StartTime)
|
||||||
|
} else {
|
||||||
|
dbPhaseElapsed = currentRestoreProgressState.dbPhaseElapsed
|
||||||
}
|
}
|
||||||
|
|
||||||
return currentRestoreProgressState.bytesTotal, currentRestoreProgressState.bytesDone,
|
return currentRestoreProgressState.bytesTotal, currentRestoreProgressState.bytesDone,
|
||||||
@ -308,13 +310,53 @@ func calculateRollingSpeed(samples []restoreSpeedSample) float64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config, log logger.Logger, archive ArchiveInfo, targetDB string, cleanFirst, createIfMissing bool, restoreType string, cleanClusterFirst bool, existingDBs []string, saveDebugLog bool) tea.Cmd {
|
func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config, log logger.Logger, archive ArchiveInfo, targetDB string, cleanFirst, createIfMissing bool, restoreType string, cleanClusterFirst bool, existingDBs []string, saveDebugLog bool) tea.Cmd {
|
||||||
return func() tea.Msg {
|
return func() (returnMsg tea.Msg) {
|
||||||
// CRITICAL: Add panic recovery to prevent TUI crashes on context cancellation
|
start := time.Now()
|
||||||
|
|
||||||
|
// TUI Debug Log: Always write to file when debug is enabled (even on success/hang)
|
||||||
|
var tuiDebugFile *os.File
|
||||||
|
if saveDebugLog {
|
||||||
|
workDir := cfg.GetEffectiveWorkDir()
|
||||||
|
tuiLogPath := filepath.Join(workDir, fmt.Sprintf("dbbackup-tui-debug-%s.log", time.Now().Format("20060102-150405")))
|
||||||
|
var err error
|
||||||
|
tuiDebugFile, err = os.Create(tuiLogPath)
|
||||||
|
if err == nil {
|
||||||
|
defer tuiDebugFile.Close()
|
||||||
|
fmt.Fprintf(tuiDebugFile, "=== TUI Restore Debug Log ===\n")
|
||||||
|
fmt.Fprintf(tuiDebugFile, "Started: %s\n", time.Now().Format(time.RFC3339))
|
||||||
|
fmt.Fprintf(tuiDebugFile, "Archive: %s\n", archive.Path)
|
||||||
|
fmt.Fprintf(tuiDebugFile, "RestoreType: %s\n", restoreType)
|
||||||
|
fmt.Fprintf(tuiDebugFile, "TargetDB: %s\n", targetDB)
|
||||||
|
fmt.Fprintf(tuiDebugFile, "CleanCluster: %v\n", cleanClusterFirst)
|
||||||
|
fmt.Fprintf(tuiDebugFile, "ExistingDBs: %v\n\n", existingDBs)
|
||||||
|
log.Info("TUI debug log enabled", "path", tuiLogPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tuiLog := func(msg string, args ...interface{}) {
|
||||||
|
if tuiDebugFile != nil {
|
||||||
|
fmt.Fprintf(tuiDebugFile, "[%s] %s", time.Now().Format("15:04:05.000"), fmt.Sprintf(msg, args...))
|
||||||
|
fmt.Fprintln(tuiDebugFile)
|
||||||
|
tuiDebugFile.Sync() // Flush immediately so we capture hangs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tuiLog("Starting restore execution")
|
||||||
|
|
||||||
|
// CRITICAL: Add panic recovery that RETURNS a proper message to BubbleTea.
|
||||||
|
// Without this, if a panic occurs the command function returns nil,
|
||||||
|
// causing BubbleTea's execBatchMsg WaitGroup to hang forever waiting
|
||||||
|
// for a message that never comes. This was the root cause of the
|
||||||
|
// TUI cluster restore hang/panic issue.
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := recover(); r != nil {
|
if r := recover(); r != nil {
|
||||||
log.Error("Restore execution panic recovered", "panic", r, "database", targetDB)
|
log.Error("Restore execution panic recovered", "panic", r, "database", targetDB)
|
||||||
// Return error message instead of crashing
|
// CRITICAL: Set the named return value so BubbleTea receives a message
|
||||||
// Note: We can't return from defer, so this just logs
|
// This prevents the WaitGroup deadlock in execBatchMsg
|
||||||
|
returnMsg = restoreCompleteMsg{
|
||||||
|
result: "",
|
||||||
|
err: fmt.Errorf("restore panic: %v", r),
|
||||||
|
elapsed: time.Since(start),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@ -322,8 +364,11 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
|||||||
// DO NOT create a new context here as it breaks Ctrl+C cancellation
|
// DO NOT create a new context here as it breaks Ctrl+C cancellation
|
||||||
ctx := parentCtx
|
ctx := parentCtx
|
||||||
|
|
||||||
|
tuiLog("Checking context state")
|
||||||
|
|
||||||
// Check if context is already cancelled
|
// Check if context is already cancelled
|
||||||
if ctx.Err() != nil {
|
if ctx.Err() != nil {
|
||||||
|
tuiLog("Context already cancelled: %v", ctx.Err())
|
||||||
return restoreCompleteMsg{
|
return restoreCompleteMsg{
|
||||||
result: "",
|
result: "",
|
||||||
err: fmt.Errorf("operation cancelled: %w", ctx.Err()),
|
err: fmt.Errorf("operation cancelled: %w", ctx.Err()),
|
||||||
@ -331,11 +376,12 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
start := time.Now()
|
tuiLog("Creating database client")
|
||||||
|
|
||||||
// Create database instance
|
// Create database instance
|
||||||
dbClient, err := database.New(cfg, log)
|
dbClient, err := database.New(cfg, log)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
tuiLog("Database client creation failed: %v", err)
|
||||||
return restoreCompleteMsg{
|
return restoreCompleteMsg{
|
||||||
result: "",
|
result: "",
|
||||||
err: fmt.Errorf("failed to create database client: %w", err),
|
err: fmt.Errorf("failed to create database client: %w", err),
|
||||||
@ -344,8 +390,11 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
|||||||
}
|
}
|
||||||
defer dbClient.Close()
|
defer dbClient.Close()
|
||||||
|
|
||||||
|
tuiLog("Database client created successfully")
|
||||||
|
|
||||||
// STEP 1: Clean cluster if requested (drop all existing user databases)
|
// STEP 1: Clean cluster if requested (drop all existing user databases)
|
||||||
if restoreType == "restore-cluster" && cleanClusterFirst {
|
if restoreType == "restore-cluster" && cleanClusterFirst {
|
||||||
|
tuiLog("STEP 1: Cleaning cluster (dropping existing DBs)")
|
||||||
// Re-detect databases at execution time to get current state
|
// Re-detect databases at execution time to get current state
|
||||||
// The preview list may be stale or detection may have failed earlier
|
// The preview list may be stale or detection may have failed earlier
|
||||||
safety := restore.NewSafety(cfg, log)
|
safety := restore.NewSafety(cfg, log)
|
||||||
@ -365,8 +414,9 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
|||||||
// This matches how cluster restore works - uses CLI tools, not database connections
|
// This matches how cluster restore works - uses CLI tools, not database connections
|
||||||
droppedCount := 0
|
droppedCount := 0
|
||||||
for _, dbName := range existingDBs {
|
for _, dbName := range existingDBs {
|
||||||
// Create timeout context for each database drop (5 minutes per DB - large DBs take time)
|
// Create timeout context for each database drop (60 seconds per DB)
|
||||||
dropCtx, dropCancel := context.WithTimeout(ctx, 5*time.Minute)
|
// Reduced from 5 minutes for better TUI responsiveness
|
||||||
|
dropCtx, dropCancel := context.WithTimeout(ctx, 60*time.Second)
|
||||||
if err := dropDatabaseCLI(dropCtx, cfg, dbName); err != nil {
|
if err := dropDatabaseCLI(dropCtx, cfg, dbName); err != nil {
|
||||||
log.Warn("Failed to drop database", "name", dbName, "error", err)
|
log.Warn("Failed to drop database", "name", dbName, "error", err)
|
||||||
// Continue with other databases
|
// Continue with other databases
|
||||||
@ -480,6 +530,8 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
|||||||
if progressState.phase3StartTime.IsZero() {
|
if progressState.phase3StartTime.IsZero() {
|
||||||
progressState.phase3StartTime = time.Now()
|
progressState.phase3StartTime = time.Now()
|
||||||
}
|
}
|
||||||
|
// Calculate elapsed time immediately for accurate display
|
||||||
|
progressState.dbPhaseElapsed = time.Since(progressState.phase3StartTime)
|
||||||
// Clear byte progress when switching to db progress
|
// Clear byte progress when switching to db progress
|
||||||
progressState.bytesTotal = 0
|
progressState.bytesTotal = 0
|
||||||
progressState.bytesDone = 0
|
progressState.bytesDone = 0
|
||||||
@ -521,6 +573,10 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
|||||||
if progressState.phase3StartTime.IsZero() {
|
if progressState.phase3StartTime.IsZero() {
|
||||||
progressState.phase3StartTime = time.Now()
|
progressState.phase3StartTime = time.Now()
|
||||||
}
|
}
|
||||||
|
// Recalculate elapsed for accuracy if phaseElapsed not provided
|
||||||
|
if phaseElapsed == 0 && !progressState.phase3StartTime.IsZero() {
|
||||||
|
progressState.dbPhaseElapsed = time.Since(progressState.phase3StartTime)
|
||||||
|
}
|
||||||
// Clear byte progress when switching to db progress
|
// Clear byte progress when switching to db progress
|
||||||
progressState.bytesTotal = 0
|
progressState.bytesTotal = 0
|
||||||
progressState.bytesDone = 0
|
progressState.bytesDone = 0
|
||||||
@ -561,6 +617,8 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
|||||||
if progressState.phase3StartTime.IsZero() {
|
if progressState.phase3StartTime.IsZero() {
|
||||||
progressState.phase3StartTime = time.Now()
|
progressState.phase3StartTime = time.Now()
|
||||||
}
|
}
|
||||||
|
// Calculate elapsed time immediately for accurate display
|
||||||
|
progressState.dbPhaseElapsed = time.Since(progressState.phase3StartTime)
|
||||||
|
|
||||||
// Update unified progress tracker
|
// Update unified progress tracker
|
||||||
if progressState.unifiedProgress != nil {
|
if progressState.unifiedProgress != nil {
|
||||||
@ -585,29 +643,39 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
|||||||
log.Info("Debug logging enabled", "path", debugLogPath)
|
log.Info("Debug logging enabled", "path", debugLogPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tuiLog("STEP 3: Executing restore (type=%s)", restoreType)
|
||||||
|
|
||||||
// STEP 3: Execute restore based on type
|
// STEP 3: Execute restore based on type
|
||||||
var restoreErr error
|
var restoreErr error
|
||||||
if restoreType == "restore-cluster" {
|
if restoreType == "restore-cluster" {
|
||||||
// Use pre-extracted directory if available (optimization)
|
// Use pre-extracted directory if available (optimization)
|
||||||
if archive.ExtractedDir != "" {
|
if archive.ExtractedDir != "" {
|
||||||
|
tuiLog("Using pre-extracted cluster directory: %s", archive.ExtractedDir)
|
||||||
log.Info("Using pre-extracted cluster directory", "path", archive.ExtractedDir)
|
log.Info("Using pre-extracted cluster directory", "path", archive.ExtractedDir)
|
||||||
defer os.RemoveAll(archive.ExtractedDir) // Cleanup after restore completes
|
defer os.RemoveAll(archive.ExtractedDir) // Cleanup after restore completes
|
||||||
restoreErr = engine.RestoreCluster(ctx, archive.Path, archive.ExtractedDir)
|
restoreErr = engine.RestoreCluster(ctx, archive.Path, archive.ExtractedDir)
|
||||||
} else {
|
} else {
|
||||||
|
tuiLog("Calling engine.RestoreCluster for: %s", archive.Path)
|
||||||
restoreErr = engine.RestoreCluster(ctx, archive.Path)
|
restoreErr = engine.RestoreCluster(ctx, archive.Path)
|
||||||
}
|
}
|
||||||
|
tuiLog("RestoreCluster returned: err=%v", restoreErr)
|
||||||
} else if restoreType == "restore-cluster-single" {
|
} else if restoreType == "restore-cluster-single" {
|
||||||
|
tuiLog("Calling RestoreSingleFromCluster: %s -> %s", archive.Path, targetDB)
|
||||||
// Restore single database from cluster backup
|
// Restore single database from cluster backup
|
||||||
// Also cleanup pre-extracted dir if present
|
// Also cleanup pre-extracted dir if present
|
||||||
if archive.ExtractedDir != "" {
|
if archive.ExtractedDir != "" {
|
||||||
defer os.RemoveAll(archive.ExtractedDir)
|
defer os.RemoveAll(archive.ExtractedDir)
|
||||||
}
|
}
|
||||||
restoreErr = engine.RestoreSingleFromCluster(ctx, archive.Path, targetDB, targetDB, cleanFirst, createIfMissing)
|
restoreErr = engine.RestoreSingleFromCluster(ctx, archive.Path, targetDB, targetDB, cleanFirst, createIfMissing)
|
||||||
|
tuiLog("RestoreSingleFromCluster returned: err=%v", restoreErr)
|
||||||
} else {
|
} else {
|
||||||
|
tuiLog("Calling RestoreSingle: %s -> %s", archive.Path, targetDB)
|
||||||
restoreErr = engine.RestoreSingle(ctx, archive.Path, targetDB, cleanFirst, createIfMissing)
|
restoreErr = engine.RestoreSingle(ctx, archive.Path, targetDB, cleanFirst, createIfMissing)
|
||||||
|
tuiLog("RestoreSingle returned: err=%v", restoreErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
if restoreErr != nil {
|
if restoreErr != nil {
|
||||||
|
tuiLog("Restore failed: %v", restoreErr)
|
||||||
return restoreCompleteMsg{
|
return restoreCompleteMsg{
|
||||||
result: "",
|
result: "",
|
||||||
err: restoreErr,
|
err: restoreErr,
|
||||||
@ -624,6 +692,8 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
|||||||
result = fmt.Sprintf("Successfully restored cluster from %s (cleaned %d existing database(s) first)", archive.Name, len(existingDBs))
|
result = fmt.Sprintf("Successfully restored cluster from %s (cleaned %d existing database(s) first)", archive.Name, len(existingDBs))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tuiLog("Restore completed successfully: %s", result)
|
||||||
|
|
||||||
return restoreCompleteMsg{
|
return restoreCompleteMsg{
|
||||||
result: result,
|
result: result,
|
||||||
err: nil,
|
err: nil,
|
||||||
|
|||||||
@ -99,6 +99,22 @@ type safetyCheckCompleteMsg struct {
|
|||||||
|
|
||||||
func runSafetyChecks(cfg *config.Config, log logger.Logger, archive ArchiveInfo, targetDB string) tea.Cmd {
|
func runSafetyChecks(cfg *config.Config, log logger.Logger, archive ArchiveInfo, targetDB string) tea.Cmd {
|
||||||
return func() tea.Msg {
|
return func() tea.Msg {
|
||||||
|
// Check if preflight checks should be skipped
|
||||||
|
if cfg != nil && cfg.SkipPreflightChecks {
|
||||||
|
// Return all checks as "skipped" with warning
|
||||||
|
checks := []SafetyCheck{
|
||||||
|
{Name: "Archive integrity", Status: "warning", Message: "⚠️ SKIPPED - preflight checks disabled", Critical: true},
|
||||||
|
{Name: "Dump validity", Status: "warning", Message: "⚠️ SKIPPED - preflight checks disabled", Critical: true},
|
||||||
|
{Name: "Disk space", Status: "warning", Message: "⚠️ SKIPPED - preflight checks disabled", Critical: true},
|
||||||
|
{Name: "Required tools", Status: "warning", Message: "⚠️ SKIPPED - preflight checks disabled", Critical: true},
|
||||||
|
{Name: "Target database", Status: "warning", Message: "⚠️ SKIPPED - preflight checks disabled", Critical: false},
|
||||||
|
}
|
||||||
|
return safetyCheckCompleteMsg{
|
||||||
|
checks: checks,
|
||||||
|
canProceed: true, // Allow proceeding but with warnings
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Dynamic timeout based on archive size for large database support
|
// Dynamic timeout based on archive size for large database support
|
||||||
// Base: 10 minutes + 1 minute per 5 GB, max 120 minutes
|
// Base: 10 minutes + 1 minute per 5 GB, max 120 minutes
|
||||||
timeoutMinutes := 10
|
timeoutMinutes := 10
|
||||||
@ -272,6 +288,10 @@ func (m RestorePreviewModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
}
|
}
|
||||||
return m, nil
|
return m, nil
|
||||||
|
|
||||||
|
case tea.InterruptMsg:
|
||||||
|
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||||
|
return m.parent, nil
|
||||||
|
|
||||||
case tea.KeyMsg:
|
case tea.KeyMsg:
|
||||||
switch msg.String() {
|
switch msg.String() {
|
||||||
case "ctrl+c", "q", "esc":
|
case "ctrl+c", "q", "esc":
|
||||||
@ -526,6 +546,14 @@ func (m RestorePreviewModel) View() string {
|
|||||||
s.WriteString(archiveHeaderStyle.Render("[SAFETY] Checks"))
|
s.WriteString(archiveHeaderStyle.Render("[SAFETY] Checks"))
|
||||||
s.WriteString("\n")
|
s.WriteString("\n")
|
||||||
|
|
||||||
|
// Show warning banner if preflight checks are skipped
|
||||||
|
if m.config != nil && m.config.SkipPreflightChecks {
|
||||||
|
s.WriteString(CheckWarningStyle.Render(" ⚠️ PREFLIGHT CHECKS DISABLED ⚠️"))
|
||||||
|
s.WriteString("\n")
|
||||||
|
s.WriteString(CheckWarningStyle.Render(" Restore may fail unexpectedly. Re-enable in Settings."))
|
||||||
|
s.WriteString("\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
if m.checking {
|
if m.checking {
|
||||||
s.WriteString(infoStyle.Render(" Running safety checks..."))
|
s.WriteString(infoStyle.Render(" Running safety checks..."))
|
||||||
s.WriteString("\n")
|
s.WriteString("\n")
|
||||||
|
|||||||
@ -165,6 +165,22 @@ func NewSettingsModel(cfg *config.Config, log logger.Logger, parent tea.Model) S
|
|||||||
Type: "selector",
|
Type: "selector",
|
||||||
Description: "Enable for databases with many tables/LOBs. Reduces parallelism, increases max_locks_per_transaction.",
|
Description: "Enable for databases with many tables/LOBs. Reduces parallelism, increases max_locks_per_transaction.",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Key: "skip_preflight_checks",
|
||||||
|
DisplayName: "Skip Preflight Checks",
|
||||||
|
Value: func(c *config.Config) string {
|
||||||
|
if c.SkipPreflightChecks {
|
||||||
|
return "⚠️ SKIPPED (dangerous)"
|
||||||
|
}
|
||||||
|
return "Enabled (safe)"
|
||||||
|
},
|
||||||
|
Update: func(c *config.Config, v string) error {
|
||||||
|
c.SkipPreflightChecks = !c.SkipPreflightChecks
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
Type: "selector",
|
||||||
|
Description: "⚠️ WARNING: Skipping checks may result in failed restores or data loss. Only use if checks are too slow.",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Key: "cluster_parallelism",
|
Key: "cluster_parallelism",
|
||||||
DisplayName: "Cluster Parallelism",
|
DisplayName: "Cluster Parallelism",
|
||||||
|
|||||||
2
main.go
2
main.go
@ -16,7 +16,7 @@ import (
|
|||||||
|
|
||||||
// Build information (set by ldflags)
|
// Build information (set by ldflags)
|
||||||
var (
|
var (
|
||||||
version = "5.8.4"
|
version = "5.8.24"
|
||||||
buildTime = "unknown"
|
buildTime = "unknown"
|
||||||
gitCommit = "unknown"
|
gitCommit = "unknown"
|
||||||
)
|
)
|
||||||
|
|||||||
233
release.sh
Executable file
233
release.sh
Executable file
@ -0,0 +1,233 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Release script for dbbackup
|
||||||
|
# Builds binaries and creates/updates GitHub release
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./release.sh # Build and release current version
|
||||||
|
# ./release.sh --bump # Bump patch version, build, and release
|
||||||
|
# ./release.sh --update # Update existing release with new binaries
|
||||||
|
# ./release.sh --dry-run # Show what would happen without doing it
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Colors
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[0;33m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
BOLD='\033[1m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
TOKEN_FILE=".gh_token"
|
||||||
|
MAIN_FILE="main.go"
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
BUMP_VERSION=false
|
||||||
|
UPDATE_ONLY=false
|
||||||
|
DRY_RUN=false
|
||||||
|
RELEASE_MSG=""
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--bump)
|
||||||
|
BUMP_VERSION=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--update)
|
||||||
|
UPDATE_ONLY=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--dry-run)
|
||||||
|
DRY_RUN=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-m|--message)
|
||||||
|
RELEASE_MSG="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--help|-h)
|
||||||
|
echo "Usage: $0 [OPTIONS]"
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo " --bump Bump patch version before release"
|
||||||
|
echo " --update Update existing release (don't create new)"
|
||||||
|
echo " --dry-run Show what would happen without doing it"
|
||||||
|
echo " -m, --message Release message/comment (required for new releases)"
|
||||||
|
echo " --help Show this help"
|
||||||
|
echo ""
|
||||||
|
echo "Examples:"
|
||||||
|
echo " $0 -m \"Fix TUI crash on cluster restore\""
|
||||||
|
echo " $0 --bump -m \"Add new backup compression option\""
|
||||||
|
echo " $0 --update # Just update binaries, no message needed"
|
||||||
|
echo ""
|
||||||
|
echo "Token file: .gh_token (gitignored)"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo -e "${RED}Unknown option: $1${NC}"
|
||||||
|
echo "Use --help for usage"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Check for GitHub token
|
||||||
|
if [ ! -f "$TOKEN_FILE" ]; then
|
||||||
|
echo -e "${RED}❌ Token file not found: $TOKEN_FILE${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "Create it with:"
|
||||||
|
echo " echo 'your_github_token' > $TOKEN_FILE"
|
||||||
|
echo ""
|
||||||
|
echo "The file is gitignored for security."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
GH_TOKEN=$(cat "$TOKEN_FILE" | tr -d '[:space:]')
|
||||||
|
if [ -z "$GH_TOKEN" ]; then
|
||||||
|
echo -e "${RED}❌ Token file is empty${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
export GH_TOKEN
|
||||||
|
|
||||||
|
# Get current version
|
||||||
|
CURRENT_VERSION=$(grep 'version.*=' "$MAIN_FILE" | head -1 | sed 's/.*"\(.*\)".*/\1/')
|
||||||
|
echo -e "${BLUE}📦 Current version: ${YELLOW}${CURRENT_VERSION}${NC}"
|
||||||
|
|
||||||
|
# Bump version if requested
|
||||||
|
if [ "$BUMP_VERSION" = true ]; then
|
||||||
|
# Parse version (X.Y.Z)
|
||||||
|
MAJOR=$(echo "$CURRENT_VERSION" | cut -d. -f1)
|
||||||
|
MINOR=$(echo "$CURRENT_VERSION" | cut -d. -f2)
|
||||||
|
PATCH=$(echo "$CURRENT_VERSION" | cut -d. -f3)
|
||||||
|
|
||||||
|
NEW_PATCH=$((PATCH + 1))
|
||||||
|
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
|
||||||
|
|
||||||
|
echo -e "${GREEN}📈 Bumping version: ${YELLOW}${CURRENT_VERSION}${NC} → ${GREEN}${NEW_VERSION}${NC}"
|
||||||
|
|
||||||
|
if [ "$DRY_RUN" = false ]; then
|
||||||
|
sed -i "s/version.*=.*\"${CURRENT_VERSION}\"/version = \"${NEW_VERSION}\"/" "$MAIN_FILE"
|
||||||
|
CURRENT_VERSION="$NEW_VERSION"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
TAG="v${CURRENT_VERSION}"
|
||||||
|
echo -e "${BLUE}🏷️ Release tag: ${YELLOW}${TAG}${NC}"
|
||||||
|
|
||||||
|
# Require message for new releases (not updates)
|
||||||
|
if [ -z "$RELEASE_MSG" ] && [ "$UPDATE_ONLY" = false ] && [ "$DRY_RUN" = false ]; then
|
||||||
|
echo -e "${RED}❌ Release message required. Use -m \"Your message\"${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "Example:"
|
||||||
|
echo " $0 -m \"Fix TUI crash on cluster restore\""
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$DRY_RUN" = true ]; then
|
||||||
|
echo -e "${YELLOW}🔍 DRY RUN - No changes will be made${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "Would execute:"
|
||||||
|
echo " 1. Build binaries with build_all.sh"
|
||||||
|
echo " 2. Commit and push changes"
|
||||||
|
echo " 3. Create/update release ${TAG}"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Build binaries
|
||||||
|
echo ""
|
||||||
|
echo -e "${BOLD}${BLUE}🔨 Building binaries...${NC}"
|
||||||
|
bash build_all.sh
|
||||||
|
|
||||||
|
# Check if there are changes to commit
|
||||||
|
if [ -n "$(git status --porcelain)" ]; then
|
||||||
|
echo ""
|
||||||
|
echo -e "${BLUE}📝 Committing changes...${NC}"
|
||||||
|
git add -A
|
||||||
|
|
||||||
|
# Generate commit message using the release message
|
||||||
|
if [ -n "$RELEASE_MSG" ]; then
|
||||||
|
COMMIT_MSG="${TAG}: ${RELEASE_MSG}"
|
||||||
|
elif [ "$BUMP_VERSION" = true ]; then
|
||||||
|
COMMIT_MSG="${TAG}: Version bump"
|
||||||
|
else
|
||||||
|
COMMIT_MSG="${TAG}: Release build"
|
||||||
|
fi
|
||||||
|
|
||||||
|
git commit -m "$COMMIT_MSG"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Push changes
|
||||||
|
echo -e "${BLUE}⬆️ Pushing to origin...${NC}"
|
||||||
|
git push origin main
|
||||||
|
|
||||||
|
# Handle tag
|
||||||
|
TAG_EXISTS=$(git tag -l "$TAG")
|
||||||
|
if [ -z "$TAG_EXISTS" ]; then
|
||||||
|
echo -e "${BLUE}🏷️ Creating tag ${TAG}...${NC}"
|
||||||
|
git tag "$TAG"
|
||||||
|
git push origin "$TAG"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠️ Tag ${TAG} already exists${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if release exists
|
||||||
|
echo ""
|
||||||
|
echo -e "${BLUE}🚀 Preparing release...${NC}"
|
||||||
|
|
||||||
|
RELEASE_EXISTS=$(gh release view "$TAG" 2>/dev/null && echo "yes" || echo "no")
|
||||||
|
|
||||||
|
if [ "$RELEASE_EXISTS" = "yes" ] || [ "$UPDATE_ONLY" = true ]; then
|
||||||
|
echo -e "${YELLOW}📦 Updating existing release ${TAG}...${NC}"
|
||||||
|
|
||||||
|
# Delete existing assets and upload new ones
|
||||||
|
for binary in bin/dbbackup_*; do
|
||||||
|
if [ -f "$binary" ]; then
|
||||||
|
ASSET_NAME=$(basename "$binary")
|
||||||
|
echo " Uploading $ASSET_NAME..."
|
||||||
|
gh release upload "$TAG" "$binary" --clobber
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
else
|
||||||
|
echo -e "${GREEN}📦 Creating new release ${TAG}...${NC}"
|
||||||
|
|
||||||
|
# Generate release notes with the provided message
|
||||||
|
NOTES="## ${TAG}: ${RELEASE_MSG}
|
||||||
|
|
||||||
|
### Downloads
|
||||||
|
| Platform | Architecture | Binary |
|
||||||
|
|----------|--------------|--------|
|
||||||
|
| Linux | x86_64 (Intel/AMD) | \`dbbackup_linux_amd64\` |
|
||||||
|
| Linux | ARM64 | \`dbbackup_linux_arm64\` |
|
||||||
|
| Linux | ARMv7 | \`dbbackup_linux_arm_armv7\` |
|
||||||
|
| macOS | Intel | \`dbbackup_darwin_amd64\` |
|
||||||
|
| macOS | Apple Silicon (M1/M2) | \`dbbackup_darwin_arm64\` |
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
\`\`\`bash
|
||||||
|
# Linux x86_64
|
||||||
|
curl -LO https://github.com/PlusOne/dbbackup/releases/download/${TAG}/dbbackup_linux_amd64
|
||||||
|
chmod +x dbbackup_linux_amd64
|
||||||
|
sudo mv dbbackup_linux_amd64 /usr/local/bin/dbbackup
|
||||||
|
|
||||||
|
# macOS Apple Silicon
|
||||||
|
curl -LO https://github.com/PlusOne/dbbackup/releases/download/${TAG}/dbbackup_darwin_arm64
|
||||||
|
chmod +x dbbackup_darwin_arm64
|
||||||
|
sudo mv dbbackup_darwin_arm64 /usr/local/bin/dbbackup
|
||||||
|
\`\`\`
|
||||||
|
"
|
||||||
|
|
||||||
|
gh release create "$TAG" \
|
||||||
|
--title "${TAG}: ${RELEASE_MSG}" \
|
||||||
|
--notes "$NOTES" \
|
||||||
|
bin/dbbackup_linux_amd64 \
|
||||||
|
bin/dbbackup_linux_arm64 \
|
||||||
|
bin/dbbackup_linux_arm_armv7 \
|
||||||
|
bin/dbbackup_darwin_amd64 \
|
||||||
|
bin/dbbackup_darwin_arm64
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${GREEN}${BOLD}✅ Release complete!${NC}"
|
||||||
|
echo -e " ${BLUE}https://github.com/PlusOne/dbbackup/releases/tag/${TAG}${NC}"
|
||||||
222
scripts/dbtest.sh
Normal file
222
scripts/dbtest.sh
Normal file
@ -0,0 +1,222 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Enterprise Database Test Utility
|
||||||
|
set -e
|
||||||
|
|
||||||
|
DB_NAME="${DB_NAME:-testdb_500gb}"
|
||||||
|
TARGET_GB="${TARGET_GB:-500}"
|
||||||
|
BLOB_KB="${BLOB_KB:-100}"
|
||||||
|
BATCH_ROWS="${BATCH_ROWS:-10000}"
|
||||||
|
|
||||||
|
show_help() {
|
||||||
|
cat << 'HELP'
|
||||||
|
╔═══════════════════════════════════════════════════════════════╗
|
||||||
|
║ ENTERPRISE DATABASE TEST UTILITY ║
|
||||||
|
╚═══════════════════════════════════════════════════════════════╝
|
||||||
|
|
||||||
|
Usage: ./dbtest.sh <command> [options]
|
||||||
|
|
||||||
|
Commands:
|
||||||
|
status Show current database status
|
||||||
|
generate Generate test database (interactive)
|
||||||
|
generate-bg Generate in background (tmux)
|
||||||
|
stop Stop running generation
|
||||||
|
drop Drop test database
|
||||||
|
drop-all Drop ALL non-system databases
|
||||||
|
backup Run dbbackup to SMB
|
||||||
|
estimate Estimate generation time
|
||||||
|
log Show generation log
|
||||||
|
attach Attach to tmux session
|
||||||
|
|
||||||
|
Environment variables:
|
||||||
|
DB_NAME=testdb_500gb Database name
|
||||||
|
TARGET_GB=500 Target size in GB
|
||||||
|
BLOB_KB=100 Blob size in KB
|
||||||
|
BATCH_ROWS=10000 Rows per batch
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
./dbtest.sh generate # Interactive generation
|
||||||
|
TARGET_GB=100 ./dbtest.sh generate-bg # 100GB in background
|
||||||
|
DB_NAME=mytest ./dbtest.sh drop # Drop specific database
|
||||||
|
./dbtest.sh drop-all # Clean slate
|
||||||
|
HELP
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_status() {
|
||||||
|
echo "╔═══════════════════════════════════════════════════════════════╗"
|
||||||
|
echo "║ DATABASE STATUS - $(date '+%Y-%m-%d %H:%M:%S') ║"
|
||||||
|
echo "╚═══════════════════════════════════════════════════════════════╝"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "┌─ GENERATION ──────────────────────────────────────────────────┐"
|
||||||
|
if tmux has-session -t dbgen 2>/dev/null; then
|
||||||
|
echo "│ Status: ⏳ RUNNING (attach: ./dbtest.sh attach)"
|
||||||
|
echo "│ Log: $(tail -1 /root/generate_500gb.log 2>/dev/null | cut -c1-55)"
|
||||||
|
else
|
||||||
|
echo "│ Status: ⏹ Not running"
|
||||||
|
fi
|
||||||
|
echo "└───────────────────────────────────────────────────────────────┘"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "┌─ POSTGRESQL DATABASES ─────────────────────────────────────────┐"
|
||||||
|
sudo -u postgres psql -t -c "SELECT datname || ': ' || pg_size_pretty(pg_database_size(datname)) FROM pg_database WHERE datname NOT LIKE 'template%' ORDER BY pg_database_size(datname) DESC" 2>/dev/null | sed 's/^/│ /'
|
||||||
|
echo "└───────────────────────────────────────────────────────────────┘"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "┌─ STORAGE ──────────────────────────────────────────────────────┐"
|
||||||
|
echo -n "│ Fast 1TB: "; df -h /mnt/HC_Volume_104577460 2>/dev/null | awk 'NR==2{print $3"/"$2" ("$5")"}' || echo "N/A"
|
||||||
|
echo -n "│ SMB 10TB: "; df -h /mnt/smb-devdb 2>/dev/null | awk 'NR==2{print $3"/"$2" ("$5")"}' || echo "N/A"
|
||||||
|
echo -n "│ Local: "; df -h / | awk 'NR==2{print $3"/"$2" ("$5")"}'
|
||||||
|
echo "└───────────────────────────────────────────────────────────────┘"
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_stop() {
|
||||||
|
echo "Stopping generation..."
|
||||||
|
tmux kill-session -t dbgen 2>/dev/null && echo "Stopped." || echo "Not running."
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_drop() {
|
||||||
|
echo "Dropping database: $DB_NAME"
|
||||||
|
sudo -u postgres psql -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='$DB_NAME' AND pid <> pg_backend_pid();" 2>/dev/null || true
|
||||||
|
sudo -u postgres dropdb --if-exists "$DB_NAME" && echo "Dropped: $DB_NAME" || echo "Not found."
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_drop_all() {
|
||||||
|
echo "WARNING: This will drop ALL non-system databases!"
|
||||||
|
read -p "Type 'YES' to confirm: " confirm
|
||||||
|
[ "$confirm" != "YES" ] && echo "Cancelled." && exit 0
|
||||||
|
|
||||||
|
for db in $(sudo -u postgres psql -t -c "SELECT datname FROM pg_database WHERE datname NOT IN ('postgres','template0','template1')"); do
|
||||||
|
db=$(echo $db | tr -d ' ')
|
||||||
|
[ -n "$db" ] && echo "Dropping: $db" && sudo -u postgres dropdb --if-exists "$db"
|
||||||
|
done
|
||||||
|
echo "Done."
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_log() {
|
||||||
|
tail -50 /root/generate_500gb.log 2>/dev/null || echo "No log file."
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_attach() {
|
||||||
|
tmux has-session -t dbgen 2>/dev/null && tmux attach -t dbgen || echo "Not running."
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_backup() {
|
||||||
|
mkdir -p /mnt/smb-devdb/cluster-500gb
|
||||||
|
dbbackup backup cluster --backup-dir /mnt/smb-devdb/cluster-500gb
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_estimate() {
|
||||||
|
echo "Target: ${TARGET_GB}GB with ${BLOB_KB}KB blobs"
|
||||||
|
mins=$((TARGET_GB / 2))
|
||||||
|
echo "Estimated: ~${mins} minutes (~$((mins/60)) hours)"
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_generate() {
|
||||||
|
echo "=== Interactive Database Generator ==="
|
||||||
|
read -p "Database name [$DB_NAME]: " i; DB_NAME="${i:-$DB_NAME}"
|
||||||
|
read -p "Target size GB [$TARGET_GB]: " i; TARGET_GB="${i:-$TARGET_GB}"
|
||||||
|
read -p "Blob size KB [$BLOB_KB]: " i; BLOB_KB="${i:-$BLOB_KB}"
|
||||||
|
read -p "Rows per batch [$BATCH_ROWS]: " i; BATCH_ROWS="${i:-$BATCH_ROWS}"
|
||||||
|
|
||||||
|
echo "Config: $DB_NAME, ${TARGET_GB}GB, ${BLOB_KB}KB blobs"
|
||||||
|
read -p "Start? [y/N]: " c
|
||||||
|
[[ "$c" != "y" && "$c" != "Y" ]] && echo "Cancelled." && exit 0
|
||||||
|
|
||||||
|
do_generate
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_generate_bg() {
|
||||||
|
echo "Starting: $DB_NAME, ${TARGET_GB}GB, ${BLOB_KB}KB blobs"
|
||||||
|
tmux kill-session -t dbgen 2>/dev/null || true
|
||||||
|
|
||||||
|
tmux new-session -d -s dbgen "DB_NAME=$DB_NAME TARGET_GB=$TARGET_GB BLOB_KB=$BLOB_KB BATCH_ROWS=$BATCH_ROWS /root/dbtest.sh _run 2>&1 | tee /root/generate_500gb.log"
|
||||||
|
echo "Started in tmux. Use: ./dbtest.sh log | attach | stop"
|
||||||
|
}
|
||||||
|
|
||||||
|
do_generate() {
|
||||||
|
BLOB_BYTES=$((BLOB_KB * 1024))
|
||||||
|
echo "=== ${TARGET_GB}GB Generator ==="
|
||||||
|
echo "Started: $(date)"
|
||||||
|
|
||||||
|
sudo -u postgres dropdb --if-exists "$DB_NAME"
|
||||||
|
sudo -u postgres createdb "$DB_NAME"
|
||||||
|
sudo -u postgres psql -d "$DB_NAME" -c "CREATE EXTENSION IF NOT EXISTS pgcrypto;"
|
||||||
|
|
||||||
|
sudo -u postgres psql -d "$DB_NAME" << 'EOSQL'
|
||||||
|
CREATE OR REPLACE FUNCTION large_random_bytes(size_bytes INT) RETURNS BYTEA AS $$
|
||||||
|
DECLARE r BYTEA := E'\x'; c INT := 1024; m INT := size_bytes;
|
||||||
|
BEGIN
|
||||||
|
WHILE m > 0 LOOP
|
||||||
|
IF m >= c THEN r := r || gen_random_bytes(c); m := m - c;
|
||||||
|
ELSE r := r || gen_random_bytes(m); m := 0; END IF;
|
||||||
|
END LOOP;
|
||||||
|
RETURN r;
|
||||||
|
END; $$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE TABLE enterprise_documents (
|
||||||
|
id BIGSERIAL PRIMARY KEY, uuid UUID DEFAULT gen_random_uuid(),
|
||||||
|
created_at TIMESTAMPTZ DEFAULT now(), document_type VARCHAR(50),
|
||||||
|
document_name VARCHAR(255), file_size BIGINT, content BYTEA
|
||||||
|
);
|
||||||
|
ALTER TABLE enterprise_documents ALTER COLUMN content SET STORAGE EXTERNAL;
|
||||||
|
CREATE INDEX idx_doc_created ON enterprise_documents(created_at);
|
||||||
|
|
||||||
|
CREATE TABLE enterprise_transactions (
|
||||||
|
id BIGSERIAL PRIMARY KEY, created_at TIMESTAMPTZ DEFAULT now(),
|
||||||
|
customer_id BIGINT, amount DECIMAL(15,2), status VARCHAR(20)
|
||||||
|
);
|
||||||
|
EOSQL
|
||||||
|
|
||||||
|
echo "Tables created"
|
||||||
|
batch=0
|
||||||
|
start=$(date +%s)
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
sz=$(sudo -u postgres psql -t -A -c "SELECT pg_database_size('$DB_NAME')/1024/1024/1024")
|
||||||
|
[ "$sz" -ge "$TARGET_GB" ] && echo "=== Target reached: ${sz}GB ===" && break
|
||||||
|
|
||||||
|
batch=$((batch + 1))
|
||||||
|
pct=$((sz * 100 / TARGET_GB))
|
||||||
|
el=$(($(date +%s) - start))
|
||||||
|
if [ $sz -gt 0 ] && [ $el -gt 0 ]; then
|
||||||
|
eta="$(((TARGET_GB-sz)*el/sz/60))min"
|
||||||
|
else
|
||||||
|
eta="..."
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Batch $batch: ${sz}GB/${TARGET_GB}GB (${pct}%) ETA:$eta"
|
||||||
|
|
||||||
|
sudo -u postgres psql -q -d "$DB_NAME" -c "
|
||||||
|
INSERT INTO enterprise_documents (document_type, document_name, file_size, content)
|
||||||
|
SELECT (ARRAY['PDF','DOCX','IMG','VID'])[floor(random()*4+1)],
|
||||||
|
'Doc_'||i||'_'||substr(md5(random()::TEXT),1,8), $BLOB_BYTES,
|
||||||
|
large_random_bytes($BLOB_BYTES)
|
||||||
|
FROM generate_series(1, $BATCH_ROWS) i;"
|
||||||
|
|
||||||
|
sudo -u postgres psql -q -d "$DB_NAME" -c "
|
||||||
|
INSERT INTO enterprise_transactions (customer_id, amount, status)
|
||||||
|
SELECT (random()*1000000)::BIGINT, (random()*10000)::DECIMAL(15,2),
|
||||||
|
(ARRAY['ok','pending','failed'])[floor(random()*3+1)]
|
||||||
|
FROM generate_series(1, 20000);"
|
||||||
|
done
|
||||||
|
|
||||||
|
sudo -u postgres psql -d "$DB_NAME" -c "ANALYZE;"
|
||||||
|
sudo -u postgres psql -d "$DB_NAME" -c "SELECT pg_size_pretty(pg_database_size('$DB_NAME')) as size, (SELECT count(*) FROM enterprise_documents) as docs;"
|
||||||
|
echo "Completed: $(date)"
|
||||||
|
}
|
||||||
|
|
||||||
|
case "${1:-help}" in
|
||||||
|
status) cmd_status ;;
|
||||||
|
generate) cmd_generate ;;
|
||||||
|
generate-bg) cmd_generate_bg ;;
|
||||||
|
stop) cmd_stop ;;
|
||||||
|
drop) cmd_drop ;;
|
||||||
|
drop-all) cmd_drop_all ;;
|
||||||
|
backup) cmd_backup ;;
|
||||||
|
estimate) cmd_estimate ;;
|
||||||
|
log) cmd_log ;;
|
||||||
|
attach) cmd_attach ;;
|
||||||
|
_run) do_generate ;;
|
||||||
|
help|--help|-h) show_help ;;
|
||||||
|
*) echo "Unknown: $1"; show_help ;;
|
||||||
|
esac
|
||||||
Reference in New Issue
Block a user