Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 354c083e38 | |||
| a211befea8 | |||
| d6fbc77c21 | |||
| e449e2f448 | |||
| dceab64b67 | |||
| a101fb81ab | |||
| 555177f5a7 | |||
| 0d416ecb55 | |||
| 1fe16ef89b | |||
| 4507ec682f | |||
| 084b8bd279 | |||
| 0d85caea53 | |||
| 3624ff54ff | |||
| 696273816e | |||
| 2b7cfa4b67 | |||
| 714ff3a41d | |||
| b095e2fab5 | |||
| e6c0ca0667 | |||
| 79dc604eb6 | |||
| de88e38f93 | |||
| 97c52ab9e5 | |||
| 3c9e5f04ca | |||
| 86a28b6ec5 | |||
| 63b35414d2 | |||
| db46770e7f | |||
| 51764a677a | |||
| bdbbb59e51 |
@ -49,13 +49,14 @@ jobs:
|
||||
env:
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: testdb
|
||||
ports: ['5432:5432']
|
||||
# Use container networking instead of host port binding
|
||||
# This avoids "port already in use" errors on shared runners
|
||||
mysql:
|
||||
image: mysql:8
|
||||
env:
|
||||
MYSQL_ROOT_PASSWORD: mysql
|
||||
MYSQL_DATABASE: testdb
|
||||
ports: ['3306:3306']
|
||||
# Use container networking instead of host port binding
|
||||
steps:
|
||||
- name: Checkout code
|
||||
env:
|
||||
@ -80,7 +81,7 @@ jobs:
|
||||
done
|
||||
|
||||
- name: Build dbbackup
|
||||
run: go build -o dbbackup .
|
||||
run: go build -trimpath -o dbbackup .
|
||||
|
||||
- name: Test PostgreSQL backup/restore
|
||||
env:
|
||||
@ -239,7 +240,7 @@ jobs:
|
||||
echo "Focus: PostgreSQL native engine validation only"
|
||||
|
||||
- name: Build dbbackup for native testing
|
||||
run: go build -o dbbackup-native .
|
||||
run: go build -trimpath -o dbbackup-native .
|
||||
|
||||
- name: Test PostgreSQL Native Engine
|
||||
env:
|
||||
@ -383,7 +384,7 @@ jobs:
|
||||
- name: Build for current platform
|
||||
run: |
|
||||
echo "Building dbbackup for testing..."
|
||||
go build -ldflags="-s -w" -o dbbackup .
|
||||
go build -trimpath -ldflags="-s -w" -o dbbackup .
|
||||
echo "Build successful!"
|
||||
ls -lh dbbackup
|
||||
./dbbackup version || echo "Binary created successfully"
|
||||
@ -419,7 +420,7 @@ jobs:
|
||||
|
||||
# Test Linux amd64 build (with CGO for SQLite)
|
||||
echo "Testing linux/amd64 build (CGO enabled)..."
|
||||
if CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-linux-amd64 .; then
|
||||
if CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-linux-amd64 .; then
|
||||
echo "✅ linux/amd64 build successful"
|
||||
ls -lh release/dbbackup-linux-amd64
|
||||
else
|
||||
@ -428,7 +429,7 @@ jobs:
|
||||
|
||||
# Test Darwin amd64 (no CGO - cross-compile limitation)
|
||||
echo "Testing darwin/amd64 build (CGO disabled)..."
|
||||
if CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-darwin-amd64 .; then
|
||||
if CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-darwin-amd64 .; then
|
||||
echo "✅ darwin/amd64 build successful"
|
||||
ls -lh release/dbbackup-darwin-amd64
|
||||
else
|
||||
@ -508,23 +509,23 @@ jobs:
|
||||
|
||||
# Linux amd64 (with CGO for SQLite)
|
||||
echo "Building linux/amd64 (CGO enabled)..."
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-linux-amd64 .
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-linux-amd64 .
|
||||
|
||||
# Linux arm64 (with CGO for SQLite)
|
||||
echo "Building linux/arm64 (CGO enabled)..."
|
||||
CC=aarch64-linux-gnu-gcc CGO_ENABLED=1 GOOS=linux GOARCH=arm64 go build -ldflags="-s -w" -o release/dbbackup-linux-arm64 .
|
||||
CC=aarch64-linux-gnu-gcc CGO_ENABLED=1 GOOS=linux GOARCH=arm64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-linux-arm64 .
|
||||
|
||||
# Darwin amd64 (no CGO - cross-compile limitation)
|
||||
echo "Building darwin/amd64 (CGO disabled)..."
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-darwin-amd64 .
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-darwin-amd64 .
|
||||
|
||||
# Darwin arm64 (no CGO - cross-compile limitation)
|
||||
echo "Building darwin/arm64 (CGO disabled)..."
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build -ldflags="-s -w" -o release/dbbackup-darwin-arm64 .
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-darwin-arm64 .
|
||||
|
||||
# FreeBSD amd64 (no CGO - cross-compile limitation)
|
||||
echo "Building freebsd/amd64 (CGO disabled)..."
|
||||
CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 go build -ldflags="-s -w" -o release/dbbackup-freebsd-amd64 .
|
||||
CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 go build -trimpath -ldflags="-s -w" -o release/dbbackup-freebsd-amd64 .
|
||||
|
||||
echo "All builds complete:"
|
||||
ls -lh release/
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -18,6 +18,7 @@ bin/
|
||||
|
||||
# Ignore local configuration (may contain IPs/credentials)
|
||||
.dbbackup.conf
|
||||
.gh_token
|
||||
|
||||
# Ignore session/development notes
|
||||
TODO_SESSION.md
|
||||
|
||||
85
CHANGELOG.md
85
CHANGELOG.md
@ -5,6 +5,91 @@ All notable changes to dbbackup will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [5.8.26] - 2026-02-05
|
||||
|
||||
### Improved
|
||||
- **Size-Weighted ETA for Cluster Backups**: ETAs now based on database sizes, not count
|
||||
- Query database sizes upfront before starting cluster backup
|
||||
- Progress bar shows bytes completed vs total bytes (e.g., `0B/500.0GB`)
|
||||
- ETA calculated using size-weighted formula: `elapsed * (remaining_bytes / done_bytes)`
|
||||
- Much more accurate for clusters with mixed database sizes (e.g., 8MB postgres + 500GB fakedb)
|
||||
- Falls back to count-based ETA with `~` prefix if sizes unavailable
|
||||
|
||||
## [5.8.25] - 2026-02-05
|
||||
|
||||
### Fixed
|
||||
- **Backup Database Elapsed Time Display**: Fixed bug where per-database elapsed time and ETA showed `0.0s` during cluster backups
|
||||
- Root cause: elapsed time was only updated when `hasUpdate` flag was true, not on every tick
|
||||
- Fix: Store `phase2StartTime` in model and recalculate elapsed time on every UI tick
|
||||
- Now shows accurate real-time elapsed and ETA for database backup phase
|
||||
|
||||
## [5.8.24] - 2026-02-05
|
||||
|
||||
### Added
|
||||
- **Skip Preflight Checks Option**: New TUI setting to disable pre-restore safety checks
|
||||
- Accessible via Settings menu → "Skip Preflight Checks"
|
||||
- Shows warning when enabled: "⚠️ SKIPPED (dangerous)"
|
||||
- Displays prominent warning banner on restore preview screen
|
||||
- Useful for enterprise scenarios where checks are too slow on large databases
|
||||
- Config field: `SkipPreflightChecks` (default: false)
|
||||
- Setting is persisted to config file with warning comment
|
||||
- Added nil-pointer safety checks throughout
|
||||
|
||||
## [5.8.23] - 2026-02-05
|
||||
|
||||
### Added
|
||||
- **Cancellation Tests**: Added Go unit tests for context cancellation verification
|
||||
- `TestParseStatementsContextCancellation` - verifies statement parsing can be cancelled
|
||||
- `TestParseStatementsWithCopyDataCancellation` - verifies COPY data parsing can be cancelled
|
||||
- Tests confirm cancellation responds within 10ms on large (1M+ line) files
|
||||
|
||||
## [5.8.15] - 2026-02-05
|
||||
|
||||
### Fixed
|
||||
- **TUI Cluster Restore Hang**: Fixed hang during large SQL file restore (pg_dumpall format)
|
||||
- Added context cancellation support to `parseStatementsWithContext()` with checks every 10000 lines
|
||||
- Added context cancellation checks in schema statement execution loop
|
||||
- Now uses context-aware parsing in `RestoreFile()` for proper Ctrl+C handling
|
||||
- This complements the v5.8.14 panic recovery fix by preventing hangs (not just panics)
|
||||
|
||||
## [5.8.14] - 2026-02-05
|
||||
|
||||
### Fixed
|
||||
- **TUI Cluster Restore Panic**: Fixed BubbleTea WaitGroup deadlock during cluster restore
|
||||
- Panic recovery in `tea.Cmd` functions now uses named return values to properly return messages
|
||||
- Previously, panic recovery returned nil which caused `execBatchMsg` WaitGroup to hang forever
|
||||
- Affected files: `restore_exec.go` and `backup_exec.go`
|
||||
|
||||
## [5.8.12] - 2026-02-04
|
||||
|
||||
### Fixed
|
||||
- **Config Loading**: Fixed config not loading for users without standard home directories
|
||||
- Now searches: current dir → home dir → /etc/dbbackup.conf → /etc/dbbackup/dbbackup.conf
|
||||
- Works for postgres user with home at /var/lib/postgresql
|
||||
- Added `ConfigSearchPaths()` and `LoadLocalConfigWithPath()` functions
|
||||
- Log now shows which config path was actually loaded
|
||||
|
||||
## [5.8.11] - 2026-02-04
|
||||
|
||||
### Fixed
|
||||
- **TUI Deadlock**: Fixed goroutine leaks in pgxpool connection handling
|
||||
- Removed redundant goroutines waiting on ctx.Done() in postgresql.go and parallel_restore.go
|
||||
- These were causing WaitGroup deadlocks when BubbleTea tried to shutdown
|
||||
|
||||
### Added
|
||||
- **systemd-run Resource Isolation**: New `internal/cleanup/cgroups.go` for long-running jobs
|
||||
- `RunWithResourceLimits()` wraps commands in systemd-run scopes
|
||||
- Configurable: MemoryHigh, MemoryMax, CPUQuota, IOWeight, Nice, Slice
|
||||
- Automatic cleanup on context cancellation
|
||||
- **Restore Dry-Run Checks**: New `internal/restore/dryrun.go` with 10 pre-restore validations
|
||||
- Archive access, format, connectivity, permissions, target conflicts
|
||||
- Disk space, work directory, required tools, lock settings, memory estimation
|
||||
- Returns pass/warning/fail status with detailed messages
|
||||
- **Audit Log Signing**: Enhanced `internal/security/audit.go` with Ed25519 cryptographic signing
|
||||
- `SignedAuditEntry` with sequence numbers, hash chains, and signatures
|
||||
- `GenerateSigningKeys()`, `SavePrivateKey()`, `LoadPublicKey()`
|
||||
- `EnableSigning()`, `ExportSignedLog()`, `VerifyAuditLog()` for tamper detection
|
||||
|
||||
## [5.7.10] - 2026-02-03
|
||||
|
||||
### Fixed
|
||||
|
||||
@ -19,7 +19,7 @@ COPY . .
|
||||
|
||||
# Build binary with cross-compilation support
|
||||
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
|
||||
go build -a -installsuffix cgo -ldflags="-w -s" -o dbbackup .
|
||||
go build -trimpath -a -installsuffix cgo -ldflags="-w -s" -o dbbackup .
|
||||
|
||||
# Final stage - minimal runtime image
|
||||
# Using pinned version 3.19 which has better QEMU compatibility
|
||||
|
||||
2
Makefile
2
Makefile
@ -15,7 +15,7 @@ all: lint test build
|
||||
## build: Build the binary with optimizations
|
||||
build:
|
||||
@echo "🔨 Building dbbackup $(VERSION)..."
|
||||
CGO_ENABLED=0 go build -ldflags="$(LDFLAGS)" -o bin/dbbackup .
|
||||
CGO_ENABLED=0 go build -trimpath -ldflags="$(LDFLAGS)" -o bin/dbbackup .
|
||||
@echo "✅ Built bin/dbbackup"
|
||||
|
||||
## build-debug: Build with debug symbols (for debugging)
|
||||
|
||||
@ -80,7 +80,7 @@ for platform_config in "${PLATFORMS[@]}"; do
|
||||
# Set environment and build (using export for better compatibility)
|
||||
# CGO_ENABLED=0 creates static binaries without glibc dependency
|
||||
export CGO_ENABLED=0 GOOS GOARCH
|
||||
if go build -ldflags "$LDFLAGS" -o "${BIN_DIR}/${binary_name}" . 2>/dev/null; then
|
||||
if go build -trimpath -ldflags "$LDFLAGS" -o "${BIN_DIR}/${binary_name}" . 2>/dev/null; then
|
||||
# Get file size
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
size=$(stat -f%z "${BIN_DIR}/${binary_name}" 2>/dev/null || echo "0")
|
||||
|
||||
@ -11,6 +11,7 @@ import (
|
||||
|
||||
"dbbackup/internal/database"
|
||||
"dbbackup/internal/engine/native"
|
||||
"dbbackup/internal/metadata"
|
||||
"dbbackup/internal/notify"
|
||||
|
||||
"github.com/klauspost/pgzip"
|
||||
@ -163,6 +164,54 @@ func runNativeBackup(ctx context.Context, db database.Database, databaseName, ba
|
||||
"duration", backupDuration,
|
||||
"engine", result.EngineUsed)
|
||||
|
||||
// Get actual file size from disk
|
||||
fileInfo, err := os.Stat(outputFile)
|
||||
var actualSize int64
|
||||
if err == nil {
|
||||
actualSize = fileInfo.Size()
|
||||
} else {
|
||||
actualSize = result.BytesProcessed
|
||||
}
|
||||
|
||||
// Calculate SHA256 checksum
|
||||
sha256sum, err := metadata.CalculateSHA256(outputFile)
|
||||
if err != nil {
|
||||
log.Warn("Failed to calculate SHA256", "error", err)
|
||||
sha256sum = ""
|
||||
}
|
||||
|
||||
// Create and save metadata file
|
||||
meta := &metadata.BackupMetadata{
|
||||
Version: "1.0",
|
||||
Timestamp: backupStartTime,
|
||||
Database: databaseName,
|
||||
DatabaseType: dbType,
|
||||
Host: cfg.Host,
|
||||
Port: cfg.Port,
|
||||
User: cfg.User,
|
||||
BackupFile: filepath.Base(outputFile),
|
||||
SizeBytes: actualSize,
|
||||
SHA256: sha256sum,
|
||||
Compression: "gzip",
|
||||
BackupType: backupType,
|
||||
Duration: backupDuration.Seconds(),
|
||||
ExtraInfo: map[string]string{
|
||||
"engine": result.EngineUsed,
|
||||
"objects_processed": fmt.Sprintf("%d", result.ObjectsProcessed),
|
||||
},
|
||||
}
|
||||
|
||||
if cfg.CompressionLevel == 0 {
|
||||
meta.Compression = "none"
|
||||
}
|
||||
|
||||
metaPath := outputFile + ".meta.json"
|
||||
if err := metadata.Save(metaPath, meta); err != nil {
|
||||
log.Warn("Failed to save metadata", "error", err)
|
||||
} else {
|
||||
log.Debug("Metadata saved", "path", metaPath)
|
||||
}
|
||||
|
||||
// Audit log: backup completed
|
||||
auditLogger.LogBackupComplete(user, databaseName, cfg.BackupDir, result.BytesProcessed)
|
||||
|
||||
|
||||
33
cmd/root.go
33
cmd/root.go
@ -15,11 +15,12 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
cfg *config.Config
|
||||
log logger.Logger
|
||||
auditLogger *security.AuditLogger
|
||||
rateLimiter *security.RateLimiter
|
||||
notifyManager *notify.Manager
|
||||
cfg *config.Config
|
||||
log logger.Logger
|
||||
auditLogger *security.AuditLogger
|
||||
rateLimiter *security.RateLimiter
|
||||
notifyManager *notify.Manager
|
||||
deprecatedPassword string
|
||||
)
|
||||
|
||||
// rootCmd represents the base command when called without any subcommands
|
||||
@ -47,6 +48,11 @@ For help with specific commands, use: dbbackup [command] --help`,
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check for deprecated password flag
|
||||
if deprecatedPassword != "" {
|
||||
return fmt.Errorf("--password flag is not supported for security reasons. Use environment variables instead:\n - MySQL/MariaDB: export MYSQL_PWD='your_password'\n - PostgreSQL: export PGPASSWORD='your_password' or use .pgpass file")
|
||||
}
|
||||
|
||||
// Store which flags were explicitly set by user
|
||||
flagsSet := make(map[string]bool)
|
||||
cmd.Flags().Visit(func(f *pflag.Flag) {
|
||||
@ -55,22 +61,24 @@ For help with specific commands, use: dbbackup [command] --help`,
|
||||
|
||||
// Load local config if not disabled
|
||||
if !cfg.NoLoadConfig {
|
||||
// Use custom config path if specified, otherwise default to current directory
|
||||
// Use custom config path if specified, otherwise search standard locations
|
||||
var localCfg *config.LocalConfig
|
||||
var configPath string
|
||||
var err error
|
||||
if cfg.ConfigPath != "" {
|
||||
localCfg, err = config.LoadLocalConfigFromPath(cfg.ConfigPath)
|
||||
configPath = cfg.ConfigPath
|
||||
if err != nil {
|
||||
log.Warn("Failed to load config from specified path", "path", cfg.ConfigPath, "error", err)
|
||||
} else if localCfg != nil {
|
||||
log.Info("Loaded configuration", "path", cfg.ConfigPath)
|
||||
}
|
||||
} else {
|
||||
localCfg, err = config.LoadLocalConfig()
|
||||
localCfg, configPath, err = config.LoadLocalConfigWithPath()
|
||||
if err != nil {
|
||||
log.Warn("Failed to load local config", "error", err)
|
||||
log.Warn("Failed to load config", "error", err)
|
||||
} else if localCfg != nil {
|
||||
log.Info("Loaded configuration from .dbbackup.conf")
|
||||
log.Info("Loaded configuration", "path", configPath)
|
||||
}
|
||||
}
|
||||
|
||||
@ -171,15 +179,8 @@ func Execute(ctx context.Context, config *config.Config, logger logger.Logger) e
|
||||
rootCmd.PersistentFlags().StringVar(&cfg.Database, "database", cfg.Database, "Database name")
|
||||
// SECURITY: Password flag removed - use PGPASSWORD/MYSQL_PWD environment variable or .pgpass file
|
||||
// Provide helpful error message for users expecting --password flag
|
||||
var deprecatedPassword string
|
||||
rootCmd.PersistentFlags().StringVar(&deprecatedPassword, "password", "", "DEPRECATED: Use MYSQL_PWD or PGPASSWORD environment variable instead")
|
||||
rootCmd.PersistentFlags().MarkHidden("password")
|
||||
rootCmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error {
|
||||
if deprecatedPassword != "" {
|
||||
return fmt.Errorf("--password flag is not supported for security reasons. Use environment variables instead:\n - MySQL/MariaDB: export MYSQL_PWD='your_password'\n - PostgreSQL: export PGPASSWORD='your_password' or use .pgpass file")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
rootCmd.PersistentFlags().StringVarP(&cfg.DatabaseType, "db-type", "d", cfg.DatabaseType, "Database type (postgres|mysql|mariadb)")
|
||||
rootCmd.PersistentFlags().StringVar(&cfg.BackupDir, "backup-dir", cfg.BackupDir, "Backup directory")
|
||||
rootCmd.PersistentFlags().BoolVar(&cfg.NoColor, "no-color", cfg.NoColor, "Disable colored output")
|
||||
|
||||
@ -39,7 +39,8 @@ import (
|
||||
type ProgressCallback func(current, total int64, description string)
|
||||
|
||||
// DatabaseProgressCallback is called with database count progress during cluster backup
|
||||
type DatabaseProgressCallback func(done, total int, dbName string)
|
||||
// bytesDone and bytesTotal enable size-weighted ETA calculations
|
||||
type DatabaseProgressCallback func(done, total int, dbName string, bytesDone, bytesTotal int64)
|
||||
|
||||
// Engine handles backup operations
|
||||
type Engine struct {
|
||||
@ -112,7 +113,8 @@ func (e *Engine) SetDatabaseProgressCallback(cb DatabaseProgressCallback) {
|
||||
}
|
||||
|
||||
// reportDatabaseProgress reports database count progress to the callback if set
|
||||
func (e *Engine) reportDatabaseProgress(done, total int, dbName string) {
|
||||
// bytesDone/bytesTotal enable size-weighted ETA calculations
|
||||
func (e *Engine) reportDatabaseProgress(done, total int, dbName string, bytesDone, bytesTotal int64) {
|
||||
// CRITICAL: Add panic recovery to prevent crashes during TUI shutdown
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
@ -121,7 +123,7 @@ func (e *Engine) reportDatabaseProgress(done, total int, dbName string) {
|
||||
}()
|
||||
|
||||
if e.dbProgressCallback != nil {
|
||||
e.dbProgressCallback(done, total, dbName)
|
||||
e.dbProgressCallback(done, total, dbName, bytesDone, bytesTotal)
|
||||
}
|
||||
}
|
||||
|
||||
@ -461,6 +463,18 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
||||
return fmt.Errorf("failed to list databases: %w", err)
|
||||
}
|
||||
|
||||
// Query database sizes upfront for accurate ETA calculation
|
||||
e.printf(" Querying database sizes for ETA estimation...\n")
|
||||
dbSizes := make(map[string]int64)
|
||||
var totalBytes int64
|
||||
for _, dbName := range databases {
|
||||
if size, err := e.db.GetDatabaseSize(ctx, dbName); err == nil {
|
||||
dbSizes[dbName] = size
|
||||
totalBytes += size
|
||||
}
|
||||
}
|
||||
var completedBytes int64 // Track bytes completed (atomic access)
|
||||
|
||||
// Create ETA estimator for database backups
|
||||
estimator := progress.NewETAEstimator("Backing up cluster", len(databases))
|
||||
quietProgress.SetEstimator(estimator)
|
||||
@ -520,25 +534,26 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
||||
default:
|
||||
}
|
||||
|
||||
// Get this database's size for progress tracking
|
||||
thisDbSize := dbSizes[name]
|
||||
|
||||
// Update estimator progress (thread-safe)
|
||||
mu.Lock()
|
||||
estimator.UpdateProgress(idx)
|
||||
e.printf(" [%d/%d] Backing up database: %s\n", idx+1, len(databases), name)
|
||||
quietProgress.Update(fmt.Sprintf("Backing up database %d/%d: %s", idx+1, len(databases), name))
|
||||
// Report database progress to TUI callback
|
||||
e.reportDatabaseProgress(idx+1, len(databases), name)
|
||||
// Report database progress to TUI callback with size-weighted info
|
||||
e.reportDatabaseProgress(idx+1, len(databases), name, completedBytes, totalBytes)
|
||||
mu.Unlock()
|
||||
|
||||
// Check database size and warn if very large
|
||||
if size, err := e.db.GetDatabaseSize(ctx, name); err == nil {
|
||||
sizeStr := formatBytes(size)
|
||||
mu.Lock()
|
||||
e.printf(" Database size: %s\n", sizeStr)
|
||||
if size > 10*1024*1024*1024 { // > 10GB
|
||||
e.printf(" [WARN] Large database detected - this may take a while\n")
|
||||
}
|
||||
mu.Unlock()
|
||||
// Use cached size, warn if very large
|
||||
sizeStr := formatBytes(thisDbSize)
|
||||
mu.Lock()
|
||||
e.printf(" Database size: %s\n", sizeStr)
|
||||
if thisDbSize > 10*1024*1024*1024 { // > 10GB
|
||||
e.printf(" [WARN] Large database detected - this may take a while\n")
|
||||
}
|
||||
mu.Unlock()
|
||||
|
||||
dumpFile := filepath.Join(tempDir, "dumps", name+".dump")
|
||||
|
||||
@ -635,6 +650,8 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
||||
}
|
||||
} else {
|
||||
// Native backup succeeded!
|
||||
// Update completed bytes for size-weighted ETA
|
||||
atomic.AddInt64(&completedBytes, thisDbSize)
|
||||
if info, statErr := os.Stat(sqlFile); statErr == nil {
|
||||
mu.Lock()
|
||||
e.printf(" [OK] Completed %s (%s) [native]\n", name, formatBytes(info.Size()))
|
||||
@ -687,6 +704,8 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
|
||||
mu.Unlock()
|
||||
atomic.AddInt32(&failCount, 1)
|
||||
} else {
|
||||
// Update completed bytes for size-weighted ETA
|
||||
atomic.AddInt64(&completedBytes, thisDbSize)
|
||||
compressedCandidate := strings.TrimSuffix(dumpFile, ".dump") + ".sql.gz"
|
||||
mu.Lock()
|
||||
if info, err := os.Stat(compressedCandidate); err == nil {
|
||||
|
||||
236
internal/cleanup/cgroups.go
Normal file
236
internal/cleanup/cgroups.go
Normal file
@ -0,0 +1,236 @@
|
||||
package cleanup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// ResourceLimits defines resource constraints for long-running operations
|
||||
type ResourceLimits struct {
|
||||
// MemoryHigh is the high memory limit (e.g., "4G", "2048M")
|
||||
// When exceeded, kernel will throttle and reclaim memory aggressively
|
||||
MemoryHigh string
|
||||
|
||||
// MemoryMax is the hard memory limit (e.g., "6G")
|
||||
// Process is killed if exceeded
|
||||
MemoryMax string
|
||||
|
||||
// CPUQuota limits CPU usage (e.g., "70%" for 70% of one CPU)
|
||||
CPUQuota string
|
||||
|
||||
// IOWeight sets I/O priority (1-10000, default 100)
|
||||
IOWeight int
|
||||
|
||||
// Nice sets process priority (-20 to 19)
|
||||
Nice int
|
||||
|
||||
// Slice is the systemd slice to run under (e.g., "dbbackup.slice")
|
||||
Slice string
|
||||
}
|
||||
|
||||
// DefaultResourceLimits returns sensible defaults for backup/restore operations
|
||||
func DefaultResourceLimits() *ResourceLimits {
|
||||
return &ResourceLimits{
|
||||
MemoryHigh: "4G",
|
||||
MemoryMax: "6G",
|
||||
CPUQuota: "80%",
|
||||
IOWeight: 100, // Default priority
|
||||
Nice: 10, // Slightly lower priority than interactive processes
|
||||
Slice: "dbbackup.slice",
|
||||
}
|
||||
}
|
||||
|
||||
// SystemdRunAvailable checks if systemd-run is available on this system
|
||||
func SystemdRunAvailable() bool {
|
||||
if runtime.GOOS != "linux" {
|
||||
return false
|
||||
}
|
||||
_, err := exec.LookPath("systemd-run")
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// RunWithResourceLimits executes a command with resource limits via systemd-run
|
||||
// Falls back to direct execution if systemd-run is not available
|
||||
func RunWithResourceLimits(ctx context.Context, log logger.Logger, limits *ResourceLimits, name string, args ...string) error {
|
||||
if limits == nil {
|
||||
limits = DefaultResourceLimits()
|
||||
}
|
||||
|
||||
// If systemd-run not available, fall back to direct execution
|
||||
if !SystemdRunAvailable() {
|
||||
log.Debug("systemd-run not available, running without resource limits")
|
||||
cmd := exec.CommandContext(ctx, name, args...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
// Build systemd-run command
|
||||
systemdArgs := buildSystemdArgs(limits, name, args)
|
||||
|
||||
log.Info("Running with systemd resource limits",
|
||||
"command", name,
|
||||
"memory_high", limits.MemoryHigh,
|
||||
"cpu_quota", limits.CPUQuota)
|
||||
|
||||
cmd := exec.CommandContext(ctx, "systemd-run", systemdArgs...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
// RunWithResourceLimitsOutput executes with limits and returns combined output
|
||||
func RunWithResourceLimitsOutput(ctx context.Context, log logger.Logger, limits *ResourceLimits, name string, args ...string) ([]byte, error) {
|
||||
if limits == nil {
|
||||
limits = DefaultResourceLimits()
|
||||
}
|
||||
|
||||
// If systemd-run not available, fall back to direct execution
|
||||
if !SystemdRunAvailable() {
|
||||
log.Debug("systemd-run not available, running without resource limits")
|
||||
cmd := exec.CommandContext(ctx, name, args...)
|
||||
return cmd.CombinedOutput()
|
||||
}
|
||||
|
||||
// Build systemd-run command
|
||||
systemdArgs := buildSystemdArgs(limits, name, args)
|
||||
|
||||
log.Debug("Running with systemd resource limits",
|
||||
"command", name,
|
||||
"memory_high", limits.MemoryHigh)
|
||||
|
||||
cmd := exec.CommandContext(ctx, "systemd-run", systemdArgs...)
|
||||
return cmd.CombinedOutput()
|
||||
}
|
||||
|
||||
// buildSystemdArgs constructs the systemd-run argument list
|
||||
func buildSystemdArgs(limits *ResourceLimits, name string, args []string) []string {
|
||||
systemdArgs := []string{
|
||||
"--scope", // Run as transient scope (not service)
|
||||
"--user", // Run in user session (no root required)
|
||||
"--quiet", // Reduce systemd noise
|
||||
"--collect", // Automatically clean up after exit
|
||||
}
|
||||
|
||||
// Add description for easier identification
|
||||
systemdArgs = append(systemdArgs, fmt.Sprintf("--description=dbbackup: %s", name))
|
||||
|
||||
// Add resource properties
|
||||
if limits.MemoryHigh != "" {
|
||||
systemdArgs = append(systemdArgs, fmt.Sprintf("--property=MemoryHigh=%s", limits.MemoryHigh))
|
||||
}
|
||||
|
||||
if limits.MemoryMax != "" {
|
||||
systemdArgs = append(systemdArgs, fmt.Sprintf("--property=MemoryMax=%s", limits.MemoryMax))
|
||||
}
|
||||
|
||||
if limits.CPUQuota != "" {
|
||||
systemdArgs = append(systemdArgs, fmt.Sprintf("--property=CPUQuota=%s", limits.CPUQuota))
|
||||
}
|
||||
|
||||
if limits.IOWeight > 0 {
|
||||
systemdArgs = append(systemdArgs, fmt.Sprintf("--property=IOWeight=%d", limits.IOWeight))
|
||||
}
|
||||
|
||||
if limits.Nice != 0 {
|
||||
systemdArgs = append(systemdArgs, fmt.Sprintf("--property=Nice=%d", limits.Nice))
|
||||
}
|
||||
|
||||
if limits.Slice != "" {
|
||||
systemdArgs = append(systemdArgs, fmt.Sprintf("--slice=%s", limits.Slice))
|
||||
}
|
||||
|
||||
// Add separator and command
|
||||
systemdArgs = append(systemdArgs, "--")
|
||||
systemdArgs = append(systemdArgs, name)
|
||||
systemdArgs = append(systemdArgs, args...)
|
||||
|
||||
return systemdArgs
|
||||
}
|
||||
|
||||
// WrapCommand creates an exec.Cmd that runs with resource limits
|
||||
// This allows the caller to customize stdin/stdout/stderr before running
|
||||
func WrapCommand(ctx context.Context, log logger.Logger, limits *ResourceLimits, name string, args ...string) *exec.Cmd {
|
||||
if limits == nil {
|
||||
limits = DefaultResourceLimits()
|
||||
}
|
||||
|
||||
// If systemd-run not available, return direct command
|
||||
if !SystemdRunAvailable() {
|
||||
log.Debug("systemd-run not available, returning unwrapped command")
|
||||
return exec.CommandContext(ctx, name, args...)
|
||||
}
|
||||
|
||||
// Build systemd-run command
|
||||
systemdArgs := buildSystemdArgs(limits, name, args)
|
||||
|
||||
log.Debug("Wrapping command with systemd resource limits",
|
||||
"command", name,
|
||||
"memory_high", limits.MemoryHigh)
|
||||
|
||||
return exec.CommandContext(ctx, "systemd-run", systemdArgs...)
|
||||
}
|
||||
|
||||
// ResourceLimitsFromConfig creates resource limits from size estimates
|
||||
// Useful for dynamically setting limits based on backup/restore size
|
||||
func ResourceLimitsFromConfig(estimatedSizeBytes int64, isRestore bool) *ResourceLimits {
|
||||
limits := DefaultResourceLimits()
|
||||
|
||||
// Estimate memory needs based on data size
|
||||
// Restore needs more memory than backup
|
||||
var memoryMultiplier float64 = 0.1 // 10% of data size for backup
|
||||
if isRestore {
|
||||
memoryMultiplier = 0.2 // 20% of data size for restore
|
||||
}
|
||||
|
||||
estimatedMemMB := int64(float64(estimatedSizeBytes/1024/1024) * memoryMultiplier)
|
||||
|
||||
// Clamp to reasonable values
|
||||
if estimatedMemMB < 512 {
|
||||
estimatedMemMB = 512 // Minimum 512MB
|
||||
}
|
||||
if estimatedMemMB > 16384 {
|
||||
estimatedMemMB = 16384 // Maximum 16GB
|
||||
}
|
||||
|
||||
limits.MemoryHigh = fmt.Sprintf("%dM", estimatedMemMB)
|
||||
limits.MemoryMax = fmt.Sprintf("%dM", estimatedMemMB*2) // 2x high limit
|
||||
|
||||
return limits
|
||||
}
|
||||
|
||||
// GetActiveResourceUsage returns current resource usage if running in systemd scope
|
||||
func GetActiveResourceUsage() (string, error) {
|
||||
if !SystemdRunAvailable() {
|
||||
return "", fmt.Errorf("systemd not available")
|
||||
}
|
||||
|
||||
// Check if we're running in a scope
|
||||
cmd := exec.Command("systemctl", "--user", "status", "--no-pager")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get systemd status: %w", err)
|
||||
}
|
||||
|
||||
// Extract dbbackup-related scopes
|
||||
lines := strings.Split(string(output), "\n")
|
||||
var dbbackupLines []string
|
||||
for _, line := range lines {
|
||||
if strings.Contains(line, "dbbackup") {
|
||||
dbbackupLines = append(dbbackupLines, strings.TrimSpace(line))
|
||||
}
|
||||
}
|
||||
|
||||
if len(dbbackupLines) == 0 {
|
||||
return "No active dbbackup scopes", nil
|
||||
}
|
||||
|
||||
return strings.Join(dbbackupLines, "\n"), nil
|
||||
}
|
||||
@ -131,6 +131,9 @@ type Config struct {
|
||||
TUIVerbose bool // Verbose TUI logging
|
||||
TUILogFile string // TUI event log file path
|
||||
|
||||
// Safety options
|
||||
SkipPreflightChecks bool // Skip pre-restore safety checks (archive integrity, disk space, etc.)
|
||||
|
||||
// Cloud storage options (v2.0)
|
||||
CloudEnabled bool // Enable cloud storage integration
|
||||
CloudProvider string // "s3", "minio", "b2", "azure", "gcs"
|
||||
|
||||
@ -35,15 +35,62 @@ type LocalConfig struct {
|
||||
ResourceProfile string
|
||||
LargeDBMode bool // Enable large database mode (reduces parallelism, increases locks)
|
||||
|
||||
// Safety settings
|
||||
SkipPreflightChecks bool // Skip pre-restore safety checks (dangerous)
|
||||
|
||||
// Security settings
|
||||
RetentionDays int
|
||||
MinBackups int
|
||||
MaxRetries int
|
||||
}
|
||||
|
||||
// LoadLocalConfig loads configuration from .dbbackup.conf in current directory
|
||||
// ConfigSearchPaths returns all paths where config files are searched, in order of priority
|
||||
func ConfigSearchPaths() []string {
|
||||
paths := []string{
|
||||
filepath.Join(".", ConfigFileName), // Current directory (highest priority)
|
||||
}
|
||||
|
||||
// User's home directory
|
||||
if home, err := os.UserHomeDir(); err == nil && home != "" {
|
||||
paths = append(paths, filepath.Join(home, ConfigFileName))
|
||||
}
|
||||
|
||||
// System-wide config locations
|
||||
paths = append(paths,
|
||||
"/etc/dbbackup.conf",
|
||||
"/etc/dbbackup/dbbackup.conf",
|
||||
)
|
||||
|
||||
return paths
|
||||
}
|
||||
|
||||
// LoadLocalConfig loads configuration from .dbbackup.conf
|
||||
// Search order: 1) current directory, 2) user's home directory, 3) /etc/dbbackup.conf, 4) /etc/dbbackup/dbbackup.conf
|
||||
func LoadLocalConfig() (*LocalConfig, error) {
|
||||
return LoadLocalConfigFromPath(filepath.Join(".", ConfigFileName))
|
||||
for _, path := range ConfigSearchPaths() {
|
||||
cfg, err := LoadLocalConfigFromPath(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if cfg != nil {
|
||||
return cfg, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// LoadLocalConfigWithPath loads configuration and returns the path it was loaded from
|
||||
func LoadLocalConfigWithPath() (*LocalConfig, string, error) {
|
||||
for _, path := range ConfigSearchPaths() {
|
||||
cfg, err := LoadLocalConfigFromPath(path)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if cfg != nil {
|
||||
return cfg, path, nil
|
||||
}
|
||||
}
|
||||
return nil, "", nil
|
||||
}
|
||||
|
||||
// LoadLocalConfigFromPath loads configuration from a specific path
|
||||
@ -152,6 +199,11 @@ func LoadLocalConfigFromPath(configPath string) (*LocalConfig, error) {
|
||||
cfg.MaxRetries = mr
|
||||
}
|
||||
}
|
||||
case "safety":
|
||||
switch key {
|
||||
case "skip_preflight_checks":
|
||||
cfg.SkipPreflightChecks = value == "true" || value == "1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -208,6 +260,14 @@ func SaveLocalConfigToPath(cfg *LocalConfig, configPath string) error {
|
||||
sb.WriteString(fmt.Sprintf("retention_days = %d\n", cfg.RetentionDays))
|
||||
sb.WriteString(fmt.Sprintf("min_backups = %d\n", cfg.MinBackups))
|
||||
sb.WriteString(fmt.Sprintf("max_retries = %d\n", cfg.MaxRetries))
|
||||
sb.WriteString("\n")
|
||||
|
||||
// Safety section - only write if non-default (dangerous setting)
|
||||
if cfg.SkipPreflightChecks {
|
||||
sb.WriteString("[safety]\n")
|
||||
sb.WriteString("# WARNING: Skipping preflight checks can lead to failed restores!\n")
|
||||
sb.WriteString(fmt.Sprintf("skip_preflight_checks = %t\n", cfg.SkipPreflightChecks))
|
||||
}
|
||||
|
||||
// Use 0644 permissions for readability
|
||||
if err := os.WriteFile(configPath, []byte(sb.String()), 0644); err != nil {
|
||||
@ -284,29 +344,36 @@ func ApplyLocalConfig(cfg *Config, local *LocalConfig) {
|
||||
if local.MaxRetries != 0 {
|
||||
cfg.MaxRetries = local.MaxRetries
|
||||
}
|
||||
|
||||
// Safety settings - apply even if false (explicit setting)
|
||||
// This is a dangerous setting, so we always respect what's in the config
|
||||
if local.SkipPreflightChecks {
|
||||
cfg.SkipPreflightChecks = true
|
||||
}
|
||||
}
|
||||
|
||||
// ConfigFromConfig creates a LocalConfig from a Config
|
||||
func ConfigFromConfig(cfg *Config) *LocalConfig {
|
||||
return &LocalConfig{
|
||||
DBType: cfg.DatabaseType,
|
||||
Host: cfg.Host,
|
||||
Port: cfg.Port,
|
||||
User: cfg.User,
|
||||
Database: cfg.Database,
|
||||
SSLMode: cfg.SSLMode,
|
||||
BackupDir: cfg.BackupDir,
|
||||
WorkDir: cfg.WorkDir,
|
||||
Compression: cfg.CompressionLevel,
|
||||
Jobs: cfg.Jobs,
|
||||
DumpJobs: cfg.DumpJobs,
|
||||
CPUWorkload: cfg.CPUWorkloadType,
|
||||
MaxCores: cfg.MaxCores,
|
||||
ClusterTimeout: cfg.ClusterTimeoutMinutes,
|
||||
ResourceProfile: cfg.ResourceProfile,
|
||||
LargeDBMode: cfg.LargeDBMode,
|
||||
RetentionDays: cfg.RetentionDays,
|
||||
MinBackups: cfg.MinBackups,
|
||||
MaxRetries: cfg.MaxRetries,
|
||||
DBType: cfg.DatabaseType,
|
||||
Host: cfg.Host,
|
||||
Port: cfg.Port,
|
||||
User: cfg.User,
|
||||
Database: cfg.Database,
|
||||
SSLMode: cfg.SSLMode,
|
||||
BackupDir: cfg.BackupDir,
|
||||
WorkDir: cfg.WorkDir,
|
||||
Compression: cfg.CompressionLevel,
|
||||
Jobs: cfg.Jobs,
|
||||
DumpJobs: cfg.DumpJobs,
|
||||
CPUWorkload: cfg.CPUWorkloadType,
|
||||
MaxCores: cfg.MaxCores,
|
||||
ClusterTimeout: cfg.ClusterTimeoutMinutes,
|
||||
ResourceProfile: cfg.ResourceProfile,
|
||||
LargeDBMode: cfg.LargeDBMode,
|
||||
SkipPreflightChecks: cfg.SkipPreflightChecks,
|
||||
RetentionDays: cfg.RetentionDays,
|
||||
MinBackups: cfg.MinBackups,
|
||||
MaxRetries: cfg.MaxRetries,
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,7 +74,7 @@ func (p *PostgreSQL) Connect(ctx context.Context) error {
|
||||
config.MinConns = 2 // Keep minimum connections ready
|
||||
config.MaxConnLifetime = 0 // No limit on connection lifetime
|
||||
config.MaxConnIdleTime = 0 // No idle timeout
|
||||
config.HealthCheckPeriod = 1 * time.Minute // Health check every minute
|
||||
config.HealthCheckPeriod = 5 * time.Second // Faster health check for quicker shutdown on Ctrl+C
|
||||
|
||||
// Optimize for large query results (BLOB data)
|
||||
config.ConnConfig.RuntimeParams["work_mem"] = "64MB"
|
||||
@ -97,6 +97,14 @@ func (p *PostgreSQL) Connect(ctx context.Context) error {
|
||||
|
||||
p.pool = pool
|
||||
p.db = db
|
||||
|
||||
// NOTE: We intentionally do NOT start a goroutine to close the pool on context cancellation.
|
||||
// The pool is closed via defer dbClient.Close() in the caller, which is the correct pattern.
|
||||
// Starting a goroutine here causes goroutine leaks and potential double-close issues when:
|
||||
// 1. The caller's defer runs first (normal case)
|
||||
// 2. Then context is cancelled and the goroutine tries to close an already-closed pool
|
||||
// This was causing deadlocks in the TUI when tea.Batch was waiting for commands to complete.
|
||||
|
||||
p.log.Info("Connected to PostgreSQL successfully", "driver", "pgx", "max_conns", config.MaxConns)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -28,6 +28,9 @@ type ParallelRestoreEngine struct {
|
||||
|
||||
// Configuration
|
||||
parallelWorkers int
|
||||
|
||||
// Internal cancel channel to stop the pool cleanup goroutine
|
||||
closeCh chan struct{}
|
||||
}
|
||||
|
||||
// ParallelRestoreOptions configures parallel restore behavior
|
||||
@ -71,7 +74,14 @@ const (
|
||||
)
|
||||
|
||||
// NewParallelRestoreEngine creates a new parallel restore engine
|
||||
// NOTE: Pass a cancellable context to ensure the pool is properly closed on Ctrl+C
|
||||
func NewParallelRestoreEngine(config *PostgreSQLNativeConfig, log logger.Logger, workers int) (*ParallelRestoreEngine, error) {
|
||||
return NewParallelRestoreEngineWithContext(context.Background(), config, log, workers)
|
||||
}
|
||||
|
||||
// NewParallelRestoreEngineWithContext creates a new parallel restore engine with context support
|
||||
// This ensures the connection pool is properly closed when the context is cancelled
|
||||
func NewParallelRestoreEngineWithContext(ctx context.Context, config *PostgreSQLNativeConfig, log logger.Logger, workers int) (*ParallelRestoreEngine, error) {
|
||||
if workers < 1 {
|
||||
workers = 4 // Default to 4 parallel workers
|
||||
}
|
||||
@ -94,17 +104,43 @@ func NewParallelRestoreEngine(config *PostgreSQLNativeConfig, log logger.Logger,
|
||||
poolConfig.MaxConns = int32(workers + 2)
|
||||
poolConfig.MinConns = int32(workers)
|
||||
|
||||
pool, err := pgxpool.NewWithConfig(context.Background(), poolConfig)
|
||||
// CRITICAL: Reduce health check period to allow faster shutdown
|
||||
// Default is 1 minute which causes hangs on Ctrl+C
|
||||
poolConfig.HealthCheckPeriod = 5 * time.Second
|
||||
|
||||
// CRITICAL: Set connection-level timeouts to ensure queries can be cancelled
|
||||
// This prevents infinite hangs on slow/stuck operations
|
||||
poolConfig.ConnConfig.RuntimeParams = map[string]string{
|
||||
"statement_timeout": "3600000", // 1 hour max per statement (in ms)
|
||||
"lock_timeout": "300000", // 5 min max wait for locks (in ms)
|
||||
"idle_in_transaction_session_timeout": "600000", // 10 min idle timeout (in ms)
|
||||
}
|
||||
|
||||
// Use the provided context so pool health checks stop when context is cancelled
|
||||
pool, err := pgxpool.NewWithConfig(ctx, poolConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create connection pool: %w", err)
|
||||
}
|
||||
|
||||
return &ParallelRestoreEngine{
|
||||
closeCh := make(chan struct{})
|
||||
|
||||
engine := &ParallelRestoreEngine{
|
||||
config: config,
|
||||
pool: pool,
|
||||
log: log,
|
||||
parallelWorkers: workers,
|
||||
}, nil
|
||||
closeCh: closeCh,
|
||||
}
|
||||
|
||||
// NOTE: We intentionally do NOT start a goroutine to close the pool on context cancellation.
|
||||
// The pool is closed via defer parallelEngine.Close() in the caller (restore/engine.go).
|
||||
// The Close() method properly signals closeCh and closes the pool.
|
||||
// Starting a goroutine here can cause:
|
||||
// 1. Race conditions with explicit Close() calls
|
||||
// 2. Goroutine leaks if neither ctx nor Close() fires
|
||||
// 3. Deadlocks with BubbleTea's event loop
|
||||
|
||||
return engine, nil
|
||||
}
|
||||
|
||||
// RestoreFile restores from a SQL file with parallel execution
|
||||
@ -146,7 +182,7 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
||||
options.ProgressCallback("parsing", 0, 0, "")
|
||||
}
|
||||
|
||||
statements, err := e.parseStatements(reader)
|
||||
statements, err := e.parseStatementsWithContext(ctx, reader)
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to parse SQL: %w", err)
|
||||
}
|
||||
@ -177,6 +213,13 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
||||
|
||||
schemaStmts := 0
|
||||
for _, stmt := range statements {
|
||||
// Check for context cancellation periodically
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return result, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
if stmt.Type == StmtSchema || stmt.Type == StmtOther {
|
||||
if err := e.executeStatement(ctx, stmt.SQL); err != nil {
|
||||
if options.ContinueOnError {
|
||||
@ -215,17 +258,39 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
||||
semaphore := make(chan struct{}, options.Workers)
|
||||
var completedCopies int64
|
||||
var totalRows int64
|
||||
var cancelled int32 // Atomic flag to signal cancellation
|
||||
|
||||
copyLoop:
|
||||
for _, stmt := range copyStmts {
|
||||
// Check for context cancellation before starting new work
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
semaphore <- struct{}{} // Acquire worker slot
|
||||
select {
|
||||
case semaphore <- struct{}{}: // Acquire worker slot
|
||||
case <-ctx.Done():
|
||||
wg.Done()
|
||||
atomic.StoreInt32(&cancelled, 1)
|
||||
break copyLoop // CRITICAL: Use labeled break to exit the for loop, not just the select
|
||||
}
|
||||
|
||||
go func(s *SQLStatement) {
|
||||
defer wg.Done()
|
||||
defer func() { <-semaphore }() // Release worker slot
|
||||
|
||||
// Check cancellation before executing
|
||||
if ctx.Err() != nil || atomic.LoadInt32(&cancelled) == 1 {
|
||||
return
|
||||
}
|
||||
|
||||
rows, err := e.executeCopy(ctx, s)
|
||||
if err != nil {
|
||||
if ctx.Err() != nil {
|
||||
// Context cancelled, don't log as error
|
||||
return
|
||||
}
|
||||
if options.ContinueOnError {
|
||||
e.log.Warn("COPY failed", "table", s.TableName, "error", err)
|
||||
} else {
|
||||
@ -243,6 +308,12 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Check if cancelled
|
||||
if ctx.Err() != nil {
|
||||
return result, ctx.Err()
|
||||
}
|
||||
|
||||
result.TablesRestored = completedCopies
|
||||
result.RowsRestored = totalRows
|
||||
|
||||
@ -264,15 +335,36 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
||||
|
||||
// Execute post-data in parallel
|
||||
var completedPostData int64
|
||||
cancelled = 0 // Reset for phase 4
|
||||
postDataLoop:
|
||||
for _, sql := range postDataStmts {
|
||||
// Check for context cancellation before starting new work
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
semaphore <- struct{}{}
|
||||
select {
|
||||
case semaphore <- struct{}{}:
|
||||
case <-ctx.Done():
|
||||
wg.Done()
|
||||
atomic.StoreInt32(&cancelled, 1)
|
||||
break postDataLoop // CRITICAL: Use labeled break to exit the for loop, not just the select
|
||||
}
|
||||
|
||||
go func(stmt string) {
|
||||
defer wg.Done()
|
||||
defer func() { <-semaphore }()
|
||||
|
||||
// Check cancellation before executing
|
||||
if ctx.Err() != nil || atomic.LoadInt32(&cancelled) == 1 {
|
||||
return
|
||||
}
|
||||
|
||||
if err := e.executeStatement(ctx, stmt); err != nil {
|
||||
if ctx.Err() != nil {
|
||||
return // Context cancelled
|
||||
}
|
||||
if options.ContinueOnError {
|
||||
e.log.Warn("Post-data statement failed", "error", err)
|
||||
}
|
||||
@ -289,6 +381,11 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Check if cancelled
|
||||
if ctx.Err() != nil {
|
||||
return result, ctx.Err()
|
||||
}
|
||||
|
||||
result.Duration = time.Since(startTime)
|
||||
e.log.Info("Parallel restore completed",
|
||||
"duration", result.Duration,
|
||||
@ -301,6 +398,11 @@ func (e *ParallelRestoreEngine) RestoreFile(ctx context.Context, filePath string
|
||||
|
||||
// parseStatements reads and classifies all SQL statements
|
||||
func (e *ParallelRestoreEngine) parseStatements(reader io.Reader) ([]SQLStatement, error) {
|
||||
return e.parseStatementsWithContext(context.Background(), reader)
|
||||
}
|
||||
|
||||
// parseStatementsWithContext reads and classifies all SQL statements with context support
|
||||
func (e *ParallelRestoreEngine) parseStatementsWithContext(ctx context.Context, reader io.Reader) ([]SQLStatement, error) {
|
||||
scanner := bufio.NewScanner(reader)
|
||||
scanner.Buffer(make([]byte, 1024*1024), 64*1024*1024) // 64MB max for large statements
|
||||
|
||||
@ -308,8 +410,19 @@ func (e *ParallelRestoreEngine) parseStatements(reader io.Reader) ([]SQLStatemen
|
||||
var stmtBuffer bytes.Buffer
|
||||
var inCopyMode bool
|
||||
var currentCopyStmt *SQLStatement
|
||||
lineCount := 0
|
||||
|
||||
for scanner.Scan() {
|
||||
// Check for context cancellation every 10000 lines
|
||||
lineCount++
|
||||
if lineCount%10000 == 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return statements, ctx.Err()
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
line := scanner.Text()
|
||||
|
||||
// Handle COPY data mode
|
||||
@ -327,6 +440,15 @@ func (e *ParallelRestoreEngine) parseStatements(reader io.Reader) ([]SQLStatemen
|
||||
currentCopyStmt.CopyData.WriteString(line)
|
||||
currentCopyStmt.CopyData.WriteByte('\n')
|
||||
}
|
||||
// Check for context cancellation during COPY data parsing (large tables)
|
||||
// Check every 10000 lines to avoid overhead
|
||||
if lineCount%10000 == 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return statements, ctx.Err()
|
||||
default:
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
@ -450,8 +572,13 @@ func (e *ParallelRestoreEngine) executeCopy(ctx context.Context, stmt *SQLStatem
|
||||
return tag.RowsAffected(), nil
|
||||
}
|
||||
|
||||
// Close closes the connection pool
|
||||
// Close closes the connection pool and stops the cleanup goroutine
|
||||
func (e *ParallelRestoreEngine) Close() error {
|
||||
// Signal the cleanup goroutine to exit
|
||||
if e.closeCh != nil {
|
||||
close(e.closeCh)
|
||||
}
|
||||
// Close the pool
|
||||
if e.pool != nil {
|
||||
e.pool.Close()
|
||||
}
|
||||
|
||||
121
internal/engine/native/parallel_restore_cancel_test.go
Normal file
121
internal/engine/native/parallel_restore_cancel_test.go
Normal file
@ -0,0 +1,121 @@
|
||||
package native
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// mockLogger for tests
|
||||
type mockLogger struct{}
|
||||
|
||||
func (m *mockLogger) Debug(msg string, args ...any) {}
|
||||
func (m *mockLogger) Info(msg string, keysAndValues ...interface{}) {}
|
||||
func (m *mockLogger) Warn(msg string, keysAndValues ...interface{}) {}
|
||||
func (m *mockLogger) Error(msg string, keysAndValues ...interface{}) {}
|
||||
func (m *mockLogger) Time(msg string, args ...any) {}
|
||||
func (m *mockLogger) WithField(key string, value interface{}) logger.Logger { return m }
|
||||
func (m *mockLogger) WithFields(fields map[string]interface{}) logger.Logger { return m }
|
||||
func (m *mockLogger) StartOperation(name string) logger.OperationLogger { return &mockOpLogger{} }
|
||||
|
||||
type mockOpLogger struct{}
|
||||
|
||||
func (m *mockOpLogger) Update(msg string, args ...any) {}
|
||||
func (m *mockOpLogger) Complete(msg string, args ...any) {}
|
||||
func (m *mockOpLogger) Fail(msg string, args ...any) {}
|
||||
|
||||
// createTestEngine creates an engine without database connection for parsing tests
|
||||
func createTestEngine() *ParallelRestoreEngine {
|
||||
return &ParallelRestoreEngine{
|
||||
config: &PostgreSQLNativeConfig{},
|
||||
log: &mockLogger{},
|
||||
parallelWorkers: 4,
|
||||
closeCh: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseStatementsContextCancellation verifies that parsing can be cancelled
|
||||
// This was a critical fix - parsing large SQL files would hang on Ctrl+C
|
||||
func TestParseStatementsContextCancellation(t *testing.T) {
|
||||
engine := createTestEngine()
|
||||
|
||||
// Create a large SQL content that would take a while to parse
|
||||
var buf bytes.Buffer
|
||||
buf.WriteString("-- Test dump\n")
|
||||
buf.WriteString("SET statement_timeout = 0;\n")
|
||||
|
||||
// Add 1,000,000 lines to simulate a large dump
|
||||
for i := 0; i < 1000000; i++ {
|
||||
buf.WriteString("SELECT ")
|
||||
buf.WriteString(string(rune('0' + (i % 10))))
|
||||
buf.WriteString("; -- line padding to make file larger\n")
|
||||
}
|
||||
|
||||
// Create a context that cancels after 10ms
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
reader := strings.NewReader(buf.String())
|
||||
|
||||
start := time.Now()
|
||||
_, err := engine.parseStatementsWithContext(ctx, reader)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
// Should return quickly with context error, not hang
|
||||
if elapsed > 500*time.Millisecond {
|
||||
t.Errorf("Parsing took too long after cancellation: %v (expected < 500ms)", elapsed)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
t.Log("Parsing completed before timeout (system is very fast)")
|
||||
} else if err == context.DeadlineExceeded || err == context.Canceled {
|
||||
t.Logf("✓ Context cancellation worked correctly (elapsed: %v)", elapsed)
|
||||
} else {
|
||||
t.Logf("Got error: %v (elapsed: %v)", err, elapsed)
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseStatementsWithCopyDataCancellation tests cancellation during COPY data parsing
|
||||
// This is where large restores spend most of their time
|
||||
func TestParseStatementsWithCopyDataCancellation(t *testing.T) {
|
||||
engine := createTestEngine()
|
||||
|
||||
// Create SQL with COPY statement and lots of data
|
||||
var buf bytes.Buffer
|
||||
buf.WriteString("CREATE TABLE test (id int, data text);\n")
|
||||
buf.WriteString("COPY test (id, data) FROM stdin;\n")
|
||||
|
||||
// Add 500,000 rows of COPY data
|
||||
for i := 0; i < 500000; i++ {
|
||||
buf.WriteString("1\tsome test data for row number padding to make larger\n")
|
||||
}
|
||||
buf.WriteString("\\.\n")
|
||||
buf.WriteString("SELECT 1;\n")
|
||||
|
||||
// Create a context that cancels after 10ms
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
reader := strings.NewReader(buf.String())
|
||||
|
||||
start := time.Now()
|
||||
_, err := engine.parseStatementsWithContext(ctx, reader)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
// Should return quickly with context error, not hang
|
||||
if elapsed > 500*time.Millisecond {
|
||||
t.Errorf("COPY parsing took too long after cancellation: %v (expected < 500ms)", elapsed)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
t.Log("Parsing completed before timeout (system is very fast)")
|
||||
} else if err == context.DeadlineExceeded || err == context.Canceled {
|
||||
t.Logf("✓ Context cancellation during COPY worked correctly (elapsed: %v)", elapsed)
|
||||
} else {
|
||||
t.Logf("Got error: %v (elapsed: %v)", err, elapsed)
|
||||
}
|
||||
}
|
||||
666
internal/restore/dryrun.go
Normal file
666
internal/restore/dryrun.go
Normal file
@ -0,0 +1,666 @@
|
||||
package restore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/cleanup"
|
||||
"dbbackup/internal/config"
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// DryRunCheck represents a single dry-run check result
|
||||
type DryRunCheck struct {
|
||||
Name string
|
||||
Status DryRunStatus
|
||||
Message string
|
||||
Details string
|
||||
Critical bool // If true, restore will definitely fail
|
||||
}
|
||||
|
||||
// DryRunStatus represents the status of a dry-run check
|
||||
type DryRunStatus int
|
||||
|
||||
const (
|
||||
DryRunPassed DryRunStatus = iota
|
||||
DryRunWarning
|
||||
DryRunFailed
|
||||
DryRunSkipped
|
||||
)
|
||||
|
||||
func (s DryRunStatus) String() string {
|
||||
switch s {
|
||||
case DryRunPassed:
|
||||
return "PASS"
|
||||
case DryRunWarning:
|
||||
return "WARN"
|
||||
case DryRunFailed:
|
||||
return "FAIL"
|
||||
case DryRunSkipped:
|
||||
return "SKIP"
|
||||
default:
|
||||
return "UNKNOWN"
|
||||
}
|
||||
}
|
||||
|
||||
func (s DryRunStatus) Icon() string {
|
||||
switch s {
|
||||
case DryRunPassed:
|
||||
return "[+]"
|
||||
case DryRunWarning:
|
||||
return "[!]"
|
||||
case DryRunFailed:
|
||||
return "[-]"
|
||||
case DryRunSkipped:
|
||||
return "[ ]"
|
||||
default:
|
||||
return "[?]"
|
||||
}
|
||||
}
|
||||
|
||||
// DryRunResult contains all dry-run check results
|
||||
type DryRunResult struct {
|
||||
Checks []DryRunCheck
|
||||
CanProceed bool
|
||||
HasWarnings bool
|
||||
CriticalCount int
|
||||
WarningCount int
|
||||
EstimatedTime time.Duration
|
||||
RequiredDiskMB int64
|
||||
AvailableDiskMB int64
|
||||
}
|
||||
|
||||
// RestoreDryRun performs comprehensive pre-restore validation
|
||||
type RestoreDryRun struct {
|
||||
cfg *config.Config
|
||||
log logger.Logger
|
||||
safety *Safety
|
||||
archive string
|
||||
target string
|
||||
}
|
||||
|
||||
// NewRestoreDryRun creates a new restore dry-run validator
|
||||
func NewRestoreDryRun(cfg *config.Config, log logger.Logger, archivePath, targetDB string) *RestoreDryRun {
|
||||
return &RestoreDryRun{
|
||||
cfg: cfg,
|
||||
log: log,
|
||||
safety: NewSafety(cfg, log),
|
||||
archive: archivePath,
|
||||
target: targetDB,
|
||||
}
|
||||
}
|
||||
|
||||
// Run executes all dry-run checks
|
||||
func (r *RestoreDryRun) Run(ctx context.Context) (*DryRunResult, error) {
|
||||
result := &DryRunResult{
|
||||
Checks: make([]DryRunCheck, 0, 10),
|
||||
CanProceed: true,
|
||||
}
|
||||
|
||||
r.log.Info("Running restore dry-run checks",
|
||||
"archive", r.archive,
|
||||
"target", r.target)
|
||||
|
||||
// 1. Archive existence and accessibility
|
||||
result.Checks = append(result.Checks, r.checkArchiveAccess())
|
||||
|
||||
// 2. Archive format validation
|
||||
result.Checks = append(result.Checks, r.checkArchiveFormat())
|
||||
|
||||
// 3. Database connectivity
|
||||
result.Checks = append(result.Checks, r.checkDatabaseConnectivity(ctx))
|
||||
|
||||
// 4. User permissions (CREATE DATABASE, DROP, etc.)
|
||||
result.Checks = append(result.Checks, r.checkUserPermissions(ctx))
|
||||
|
||||
// 5. Target database conflicts
|
||||
result.Checks = append(result.Checks, r.checkTargetConflicts(ctx))
|
||||
|
||||
// 6. Disk space requirements
|
||||
diskCheck, requiredMB, availableMB := r.checkDiskSpace()
|
||||
result.Checks = append(result.Checks, diskCheck)
|
||||
result.RequiredDiskMB = requiredMB
|
||||
result.AvailableDiskMB = availableMB
|
||||
|
||||
// 7. Work directory permissions
|
||||
result.Checks = append(result.Checks, r.checkWorkDirectory())
|
||||
|
||||
// 8. Required tools availability
|
||||
result.Checks = append(result.Checks, r.checkRequiredTools())
|
||||
|
||||
// 9. PostgreSQL lock settings (for parallel restore)
|
||||
result.Checks = append(result.Checks, r.checkLockSettings(ctx))
|
||||
|
||||
// 10. Memory availability
|
||||
result.Checks = append(result.Checks, r.checkMemoryAvailability())
|
||||
|
||||
// Calculate summary
|
||||
for _, check := range result.Checks {
|
||||
switch check.Status {
|
||||
case DryRunFailed:
|
||||
if check.Critical {
|
||||
result.CriticalCount++
|
||||
result.CanProceed = false
|
||||
} else {
|
||||
result.WarningCount++
|
||||
result.HasWarnings = true
|
||||
}
|
||||
case DryRunWarning:
|
||||
result.WarningCount++
|
||||
result.HasWarnings = true
|
||||
}
|
||||
}
|
||||
|
||||
// Estimate restore time based on archive size
|
||||
result.EstimatedTime = r.estimateRestoreTime()
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// checkArchiveAccess verifies the archive file is accessible
|
||||
func (r *RestoreDryRun) checkArchiveAccess() DryRunCheck {
|
||||
check := DryRunCheck{
|
||||
Name: "Archive Access",
|
||||
Critical: true,
|
||||
}
|
||||
|
||||
info, err := os.Stat(r.archive)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "Archive file not found"
|
||||
check.Details = r.archive
|
||||
} else if os.IsPermission(err) {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "Permission denied reading archive"
|
||||
check.Details = err.Error()
|
||||
} else {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "Cannot access archive"
|
||||
check.Details = err.Error()
|
||||
}
|
||||
return check
|
||||
}
|
||||
|
||||
if info.Size() == 0 {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "Archive file is empty"
|
||||
return check
|
||||
}
|
||||
|
||||
check.Status = DryRunPassed
|
||||
check.Message = fmt.Sprintf("Archive accessible (%s)", formatBytesSize(info.Size()))
|
||||
return check
|
||||
}
|
||||
|
||||
// checkArchiveFormat validates the archive format
|
||||
func (r *RestoreDryRun) checkArchiveFormat() DryRunCheck {
|
||||
check := DryRunCheck{
|
||||
Name: "Archive Format",
|
||||
Critical: true,
|
||||
}
|
||||
|
||||
err := r.safety.ValidateArchive(r.archive)
|
||||
if err != nil {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "Invalid archive format"
|
||||
check.Details = err.Error()
|
||||
return check
|
||||
}
|
||||
|
||||
format := DetectArchiveFormat(r.archive)
|
||||
check.Status = DryRunPassed
|
||||
check.Message = fmt.Sprintf("Valid %s format", format.String())
|
||||
return check
|
||||
}
|
||||
|
||||
// checkDatabaseConnectivity tests database connection
|
||||
func (r *RestoreDryRun) checkDatabaseConnectivity(ctx context.Context) DryRunCheck {
|
||||
check := DryRunCheck{
|
||||
Name: "Database Connectivity",
|
||||
Critical: true,
|
||||
}
|
||||
|
||||
// Try to list databases as a connectivity check
|
||||
_, err := r.safety.ListUserDatabases(ctx)
|
||||
if err != nil {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "Cannot connect to database server"
|
||||
check.Details = err.Error()
|
||||
return check
|
||||
}
|
||||
|
||||
check.Status = DryRunPassed
|
||||
check.Message = fmt.Sprintf("Connected to %s:%d", r.cfg.Host, r.cfg.Port)
|
||||
return check
|
||||
}
|
||||
|
||||
// checkUserPermissions verifies required database permissions
|
||||
func (r *RestoreDryRun) checkUserPermissions(ctx context.Context) DryRunCheck {
|
||||
check := DryRunCheck{
|
||||
Name: "User Permissions",
|
||||
Critical: true,
|
||||
}
|
||||
|
||||
if r.cfg.DatabaseType != "postgres" {
|
||||
check.Status = DryRunSkipped
|
||||
check.Message = "Permission check only implemented for PostgreSQL"
|
||||
return check
|
||||
}
|
||||
|
||||
// Check if user has CREATEDB privilege
|
||||
query := `SELECT rolcreatedb, rolsuper FROM pg_roles WHERE rolname = current_user`
|
||||
|
||||
args := []string{
|
||||
"-h", r.cfg.Host,
|
||||
"-p", fmt.Sprintf("%d", r.cfg.Port),
|
||||
"-U", r.cfg.User,
|
||||
"-d", "postgres",
|
||||
"-tA",
|
||||
"-c", query,
|
||||
}
|
||||
|
||||
cmd := cleanup.SafeCommand(ctx, "psql", args...)
|
||||
if r.cfg.Password != "" {
|
||||
cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSWORD=%s", r.cfg.Password))
|
||||
}
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
check.Status = DryRunWarning
|
||||
check.Message = "Could not verify permissions"
|
||||
check.Details = err.Error()
|
||||
return check
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(string(output))
|
||||
parts := strings.Split(result, "|")
|
||||
|
||||
if len(parts) >= 2 {
|
||||
canCreate := parts[0] == "t"
|
||||
isSuper := parts[1] == "t"
|
||||
|
||||
if isSuper {
|
||||
check.Status = DryRunPassed
|
||||
check.Message = "User is superuser (full permissions)"
|
||||
return check
|
||||
}
|
||||
|
||||
if canCreate {
|
||||
check.Status = DryRunPassed
|
||||
check.Message = "User has CREATEDB privilege"
|
||||
return check
|
||||
}
|
||||
}
|
||||
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "User lacks CREATEDB privilege"
|
||||
check.Details = "Required for creating target database. Run: ALTER USER " + r.cfg.User + " CREATEDB;"
|
||||
return check
|
||||
}
|
||||
|
||||
// checkTargetConflicts checks if target database already exists
|
||||
func (r *RestoreDryRun) checkTargetConflicts(ctx context.Context) DryRunCheck {
|
||||
check := DryRunCheck{
|
||||
Name: "Target Database",
|
||||
Critical: false, // Not critical - can be overwritten with --clean
|
||||
}
|
||||
|
||||
if r.target == "" {
|
||||
check.Status = DryRunSkipped
|
||||
check.Message = "Cluster restore - checking multiple databases"
|
||||
return check
|
||||
}
|
||||
|
||||
databases, err := r.safety.ListUserDatabases(ctx)
|
||||
if err != nil {
|
||||
check.Status = DryRunWarning
|
||||
check.Message = "Could not check existing databases"
|
||||
check.Details = err.Error()
|
||||
return check
|
||||
}
|
||||
|
||||
for _, db := range databases {
|
||||
if db == r.target {
|
||||
check.Status = DryRunWarning
|
||||
check.Message = fmt.Sprintf("Database '%s' already exists", r.target)
|
||||
check.Details = "Use --clean to drop and recreate, or choose different target"
|
||||
return check
|
||||
}
|
||||
}
|
||||
|
||||
check.Status = DryRunPassed
|
||||
check.Message = fmt.Sprintf("Target '%s' is available", r.target)
|
||||
return check
|
||||
}
|
||||
|
||||
// checkDiskSpace verifies sufficient disk space
|
||||
func (r *RestoreDryRun) checkDiskSpace() (DryRunCheck, int64, int64) {
|
||||
check := DryRunCheck{
|
||||
Name: "Disk Space",
|
||||
Critical: true,
|
||||
}
|
||||
|
||||
// Get archive size
|
||||
info, err := os.Stat(r.archive)
|
||||
if err != nil {
|
||||
check.Status = DryRunSkipped
|
||||
check.Message = "Cannot determine archive size"
|
||||
return check, 0, 0
|
||||
}
|
||||
|
||||
// Estimate uncompressed size (assume 3x compression ratio)
|
||||
archiveSizeMB := info.Size() / 1024 / 1024
|
||||
estimatedUncompressedMB := archiveSizeMB * 3
|
||||
|
||||
// Need space for: work dir extraction + restored database
|
||||
// Work dir: full uncompressed size
|
||||
// Database: roughly same as uncompressed SQL
|
||||
requiredMB := estimatedUncompressedMB * 2
|
||||
|
||||
// Check available disk space in work directory
|
||||
workDir := r.cfg.GetEffectiveWorkDir()
|
||||
if workDir == "" {
|
||||
workDir = r.cfg.BackupDir
|
||||
}
|
||||
|
||||
var stat syscall.Statfs_t
|
||||
if err := syscall.Statfs(workDir, &stat); err != nil {
|
||||
check.Status = DryRunWarning
|
||||
check.Message = "Cannot check disk space"
|
||||
check.Details = err.Error()
|
||||
return check, requiredMB, 0
|
||||
}
|
||||
|
||||
availableMB := int64(stat.Bavail*uint64(stat.Bsize)) / 1024 / 1024
|
||||
|
||||
if availableMB < requiredMB {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = fmt.Sprintf("Insufficient disk space: need %d MB, have %d MB", requiredMB, availableMB)
|
||||
check.Details = fmt.Sprintf("Work directory: %s", workDir)
|
||||
return check, requiredMB, availableMB
|
||||
}
|
||||
|
||||
// Warn if less than 20% buffer
|
||||
if availableMB < requiredMB*12/10 {
|
||||
check.Status = DryRunWarning
|
||||
check.Message = fmt.Sprintf("Low disk space margin: need %d MB, have %d MB", requiredMB, availableMB)
|
||||
return check, requiredMB, availableMB
|
||||
}
|
||||
|
||||
check.Status = DryRunPassed
|
||||
check.Message = fmt.Sprintf("Sufficient space: need ~%d MB, have %d MB", requiredMB, availableMB)
|
||||
return check, requiredMB, availableMB
|
||||
}
|
||||
|
||||
// checkWorkDirectory verifies work directory is writable
|
||||
func (r *RestoreDryRun) checkWorkDirectory() DryRunCheck {
|
||||
check := DryRunCheck{
|
||||
Name: "Work Directory",
|
||||
Critical: true,
|
||||
}
|
||||
|
||||
workDir := r.cfg.GetEffectiveWorkDir()
|
||||
if workDir == "" {
|
||||
workDir = r.cfg.BackupDir
|
||||
}
|
||||
|
||||
// Check if directory exists
|
||||
info, err := os.Stat(workDir)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "Work directory does not exist"
|
||||
check.Details = workDir
|
||||
} else {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "Cannot access work directory"
|
||||
check.Details = err.Error()
|
||||
}
|
||||
return check
|
||||
}
|
||||
|
||||
if !info.IsDir() {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "Work path is not a directory"
|
||||
check.Details = workDir
|
||||
return check
|
||||
}
|
||||
|
||||
// Try to create a test file
|
||||
testFile := filepath.Join(workDir, ".dbbackup-dryrun-test")
|
||||
f, err := os.Create(testFile)
|
||||
if err != nil {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = "Work directory is not writable"
|
||||
check.Details = err.Error()
|
||||
return check
|
||||
}
|
||||
f.Close()
|
||||
os.Remove(testFile)
|
||||
|
||||
check.Status = DryRunPassed
|
||||
check.Message = fmt.Sprintf("Work directory writable: %s", workDir)
|
||||
return check
|
||||
}
|
||||
|
||||
// checkRequiredTools verifies required CLI tools are available
|
||||
func (r *RestoreDryRun) checkRequiredTools() DryRunCheck {
|
||||
check := DryRunCheck{
|
||||
Name: "Required Tools",
|
||||
Critical: true,
|
||||
}
|
||||
|
||||
var required []string
|
||||
switch r.cfg.DatabaseType {
|
||||
case "postgres":
|
||||
required = []string{"pg_restore", "psql", "createdb"}
|
||||
case "mysql", "mariadb":
|
||||
required = []string{"mysql", "mysqldump"}
|
||||
default:
|
||||
check.Status = DryRunSkipped
|
||||
check.Message = "Unknown database type"
|
||||
return check
|
||||
}
|
||||
|
||||
missing := []string{}
|
||||
for _, tool := range required {
|
||||
if _, err := LookPath(tool); err != nil {
|
||||
missing = append(missing, tool)
|
||||
}
|
||||
}
|
||||
|
||||
if len(missing) > 0 {
|
||||
check.Status = DryRunFailed
|
||||
check.Message = fmt.Sprintf("Missing tools: %s", strings.Join(missing, ", "))
|
||||
check.Details = "Install the database client tools package"
|
||||
return check
|
||||
}
|
||||
|
||||
check.Status = DryRunPassed
|
||||
check.Message = fmt.Sprintf("All tools available: %s", strings.Join(required, ", "))
|
||||
return check
|
||||
}
|
||||
|
||||
// checkLockSettings checks PostgreSQL lock settings for parallel restore
|
||||
func (r *RestoreDryRun) checkLockSettings(ctx context.Context) DryRunCheck {
|
||||
check := DryRunCheck{
|
||||
Name: "Lock Settings",
|
||||
Critical: false,
|
||||
}
|
||||
|
||||
if r.cfg.DatabaseType != "postgres" {
|
||||
check.Status = DryRunSkipped
|
||||
check.Message = "Lock check only for PostgreSQL"
|
||||
return check
|
||||
}
|
||||
|
||||
// Check max_locks_per_transaction
|
||||
query := `SHOW max_locks_per_transaction`
|
||||
args := []string{
|
||||
"-h", r.cfg.Host,
|
||||
"-p", fmt.Sprintf("%d", r.cfg.Port),
|
||||
"-U", r.cfg.User,
|
||||
"-d", "postgres",
|
||||
"-tA",
|
||||
"-c", query,
|
||||
}
|
||||
|
||||
cmd := cleanup.SafeCommand(ctx, "psql", args...)
|
||||
if r.cfg.Password != "" {
|
||||
cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSWORD=%s", r.cfg.Password))
|
||||
}
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
check.Status = DryRunWarning
|
||||
check.Message = "Could not check lock settings"
|
||||
return check
|
||||
}
|
||||
|
||||
locks := strings.TrimSpace(string(output))
|
||||
if locks == "" {
|
||||
check.Status = DryRunWarning
|
||||
check.Message = "Could not determine max_locks_per_transaction"
|
||||
return check
|
||||
}
|
||||
|
||||
// Default is 64, recommend at least 128 for parallel restores
|
||||
var lockCount int
|
||||
fmt.Sscanf(locks, "%d", &lockCount)
|
||||
|
||||
if lockCount < 128 {
|
||||
check.Status = DryRunWarning
|
||||
check.Message = fmt.Sprintf("max_locks_per_transaction=%d (recommend 128+ for parallel)", lockCount)
|
||||
check.Details = "Set: ALTER SYSTEM SET max_locks_per_transaction = 128; then restart PostgreSQL"
|
||||
return check
|
||||
}
|
||||
|
||||
check.Status = DryRunPassed
|
||||
check.Message = fmt.Sprintf("max_locks_per_transaction=%d (sufficient)", lockCount)
|
||||
return check
|
||||
}
|
||||
|
||||
// checkMemoryAvailability checks if enough memory is available
|
||||
func (r *RestoreDryRun) checkMemoryAvailability() DryRunCheck {
|
||||
check := DryRunCheck{
|
||||
Name: "Memory Availability",
|
||||
Critical: false,
|
||||
}
|
||||
|
||||
// Read /proc/meminfo on Linux
|
||||
data, err := os.ReadFile("/proc/meminfo")
|
||||
if err != nil {
|
||||
check.Status = DryRunSkipped
|
||||
check.Message = "Cannot check memory (non-Linux?)"
|
||||
return check
|
||||
}
|
||||
|
||||
var availableKB int64
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
if strings.HasPrefix(line, "MemAvailable:") {
|
||||
fmt.Sscanf(line, "MemAvailable: %d kB", &availableKB)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
availableMB := availableKB / 1024
|
||||
|
||||
// Recommend at least 1GB for restore operations
|
||||
if availableMB < 1024 {
|
||||
check.Status = DryRunWarning
|
||||
check.Message = fmt.Sprintf("Low available memory: %d MB", availableMB)
|
||||
check.Details = "Restore may be slow or fail. Consider closing other applications."
|
||||
return check
|
||||
}
|
||||
|
||||
check.Status = DryRunPassed
|
||||
check.Message = fmt.Sprintf("Available memory: %d MB", availableMB)
|
||||
return check
|
||||
}
|
||||
|
||||
// estimateRestoreTime estimates restore duration based on archive size
|
||||
func (r *RestoreDryRun) estimateRestoreTime() time.Duration {
|
||||
info, err := os.Stat(r.archive)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Rough estimate: 100 MB/minute for restore operations
|
||||
// This accounts for decompression, SQL parsing, and database writes
|
||||
sizeMB := info.Size() / 1024 / 1024
|
||||
minutes := sizeMB / 100
|
||||
if minutes < 1 {
|
||||
minutes = 1
|
||||
}
|
||||
|
||||
return time.Duration(minutes) * time.Minute
|
||||
}
|
||||
|
||||
// formatBytesSize formats bytes to human-readable string
|
||||
func formatBytesSize(bytes int64) string {
|
||||
const (
|
||||
KB = 1024
|
||||
MB = KB * 1024
|
||||
GB = MB * 1024
|
||||
)
|
||||
|
||||
switch {
|
||||
case bytes >= GB:
|
||||
return fmt.Sprintf("%.1f GB", float64(bytes)/GB)
|
||||
case bytes >= MB:
|
||||
return fmt.Sprintf("%.1f MB", float64(bytes)/MB)
|
||||
case bytes >= KB:
|
||||
return fmt.Sprintf("%.1f KB", float64(bytes)/KB)
|
||||
default:
|
||||
return fmt.Sprintf("%d B", bytes)
|
||||
}
|
||||
}
|
||||
|
||||
// LookPath is a wrapper around exec.LookPath for testing
|
||||
var LookPath = func(file string) (string, error) {
|
||||
return exec.LookPath(file)
|
||||
}
|
||||
|
||||
// PrintDryRunResult prints a formatted dry-run result
|
||||
func PrintDryRunResult(result *DryRunResult) {
|
||||
fmt.Println("\n" + strings.Repeat("=", 60))
|
||||
fmt.Println("RESTORE DRY-RUN RESULTS")
|
||||
fmt.Println(strings.Repeat("=", 60))
|
||||
|
||||
for _, check := range result.Checks {
|
||||
fmt.Printf("%s %-20s %s\n", check.Status.Icon(), check.Name+":", check.Message)
|
||||
if check.Details != "" {
|
||||
fmt.Printf(" └─ %s\n", check.Details)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println(strings.Repeat("-", 60))
|
||||
|
||||
if result.EstimatedTime > 0 {
|
||||
fmt.Printf("Estimated restore time: %s\n", result.EstimatedTime)
|
||||
}
|
||||
|
||||
if result.RequiredDiskMB > 0 {
|
||||
fmt.Printf("Disk space: %d MB required, %d MB available\n",
|
||||
result.RequiredDiskMB, result.AvailableDiskMB)
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
if result.CanProceed {
|
||||
if result.HasWarnings {
|
||||
fmt.Println("⚠️ DRY-RUN: PASSED with warnings - restore can proceed")
|
||||
} else {
|
||||
fmt.Println("✅ DRY-RUN: PASSED - restore can proceed")
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("❌ DRY-RUN: FAILED - %d critical issue(s) must be resolved\n", result.CriticalCount)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
@ -635,7 +635,8 @@ func (e *Engine) restoreWithNativeEngine(ctx context.Context, archivePath, targe
|
||||
"database", targetDB,
|
||||
"archive", archivePath)
|
||||
|
||||
parallelEngine, err := native.NewParallelRestoreEngine(nativeCfg, e.log, parallelWorkers)
|
||||
// Pass context to ensure pool is properly closed on Ctrl+C cancellation
|
||||
parallelEngine, err := native.NewParallelRestoreEngineWithContext(ctx, nativeCfg, e.log, parallelWorkers)
|
||||
if err != nil {
|
||||
e.log.Warn("Failed to create parallel restore engine, falling back to sequential", "error", err)
|
||||
// Fall back to sequential restore
|
||||
@ -1342,9 +1343,14 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string, preExtr
|
||||
}
|
||||
|
||||
format := DetectArchiveFormat(archivePath)
|
||||
if format != FormatClusterTarGz {
|
||||
if !format.CanBeClusterRestore() {
|
||||
operation.Fail("Invalid cluster archive format")
|
||||
return fmt.Errorf("not a cluster archive: %s (detected format: %s)", archivePath, format)
|
||||
return fmt.Errorf("not a valid cluster restore format: %s (detected format: %s). Supported: .tar.gz, .sql, .sql.gz", archivePath, format)
|
||||
}
|
||||
|
||||
// For SQL-based cluster restores, use a different restore path
|
||||
if format == FormatPostgreSQLSQL || format == FormatPostgreSQLSQLGz {
|
||||
return e.restoreClusterFromSQL(ctx, archivePath, operation)
|
||||
}
|
||||
|
||||
// Check if we have a pre-extracted directory (optimization to avoid double extraction)
|
||||
@ -2177,6 +2183,45 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string, preExtr
|
||||
return nil
|
||||
}
|
||||
|
||||
// restoreClusterFromSQL restores a pg_dumpall SQL file using the native engine
|
||||
// This handles .sql and .sql.gz files containing full cluster dumps
|
||||
func (e *Engine) restoreClusterFromSQL(ctx context.Context, archivePath string, operation logger.OperationLogger) error {
|
||||
e.log.Info("Restoring cluster from SQL file (pg_dumpall format)",
|
||||
"file", filepath.Base(archivePath),
|
||||
"native_engine", true)
|
||||
|
||||
clusterStartTime := time.Now()
|
||||
|
||||
// Determine if compressed
|
||||
compressed := strings.HasSuffix(strings.ToLower(archivePath), ".gz")
|
||||
|
||||
// Use native engine to restore directly to postgres database (globals + all databases)
|
||||
e.log.Info("Restoring SQL dump using native engine...",
|
||||
"compressed", compressed,
|
||||
"size", FormatBytes(getFileSize(archivePath)))
|
||||
|
||||
e.progress.Start("Restoring cluster from SQL dump...")
|
||||
|
||||
// For pg_dumpall, we restore to the 'postgres' database which then creates other databases
|
||||
targetDB := "postgres"
|
||||
|
||||
err := e.restoreWithNativeEngine(ctx, archivePath, targetDB, compressed)
|
||||
if err != nil {
|
||||
operation.Fail(fmt.Sprintf("SQL cluster restore failed: %v", err))
|
||||
e.recordClusterRestoreMetrics(clusterStartTime, archivePath, 0, 0, false, err.Error())
|
||||
return fmt.Errorf("SQL cluster restore failed: %w", err)
|
||||
}
|
||||
|
||||
duration := time.Since(clusterStartTime)
|
||||
e.progress.Complete(fmt.Sprintf("Cluster restored successfully from SQL in %s", duration.Round(time.Second)))
|
||||
operation.Complete("SQL cluster restore completed")
|
||||
|
||||
// Record metrics
|
||||
e.recordClusterRestoreMetrics(clusterStartTime, archivePath, 1, 1, true, "")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// recordClusterRestoreMetrics records metrics for cluster restore operations
|
||||
func (e *Engine) recordClusterRestoreMetrics(startTime time.Time, archivePath string, totalDBs, successCount int, success bool, errorMsg string) {
|
||||
duration := time.Since(startTime)
|
||||
@ -2480,7 +2525,14 @@ func (e *Engine) restoreGlobals(ctx context.Context, globalsFile string) error {
|
||||
cmdErr = ctx.Err()
|
||||
}
|
||||
|
||||
<-stderrDone
|
||||
// Wait for stderr reader with timeout to prevent indefinite hang
|
||||
// if the process doesn't fully terminate
|
||||
select {
|
||||
case <-stderrDone:
|
||||
// Normal completion
|
||||
case <-time.After(5 * time.Second):
|
||||
e.log.Warn("Stderr reader timeout - forcefully continuing")
|
||||
}
|
||||
|
||||
// Only fail on actual command errors or FATAL PostgreSQL errors
|
||||
// Regular ERROR messages (like "role already exists") are expected
|
||||
@ -2924,6 +2976,15 @@ func (e *Engine) isIgnorableError(errorMsg string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// getFileSize returns the size of a file, or 0 if it can't be read
|
||||
func getFileSize(path string) int64 {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return info.Size()
|
||||
}
|
||||
|
||||
// FormatBytes formats bytes to human readable format
|
||||
func FormatBytes(bytes int64) string {
|
||||
const unit = 1024
|
||||
|
||||
@ -168,11 +168,19 @@ func (f ArchiveFormat) IsCompressed() bool {
|
||||
f == FormatClusterTarGz
|
||||
}
|
||||
|
||||
// IsClusterBackup returns true if the archive is a cluster backup
|
||||
// IsClusterBackup returns true if the archive is a cluster backup (.tar.gz format created by dbbackup)
|
||||
func (f ArchiveFormat) IsClusterBackup() bool {
|
||||
return f == FormatClusterTarGz
|
||||
}
|
||||
|
||||
// CanBeClusterRestore returns true if the format can be used for cluster restore
|
||||
// This includes .tar.gz (dbbackup format) and .sql/.sql.gz (pg_dumpall format for native engine)
|
||||
func (f ArchiveFormat) CanBeClusterRestore() bool {
|
||||
return f == FormatClusterTarGz ||
|
||||
f == FormatPostgreSQLSQL ||
|
||||
f == FormatPostgreSQLSQLGz
|
||||
}
|
||||
|
||||
// IsPostgreSQL returns true if the archive is PostgreSQL format
|
||||
func (f ArchiveFormat) IsPostgreSQL() bool {
|
||||
return f == FormatPostgreSQLDump ||
|
||||
|
||||
@ -1,7 +1,15 @@
|
||||
package security
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
@ -21,13 +29,36 @@ type AuditEvent struct {
|
||||
type AuditLogger struct {
|
||||
log logger.Logger
|
||||
enabled bool
|
||||
|
||||
// For signed audit log support
|
||||
mu sync.Mutex
|
||||
entries []SignedAuditEntry
|
||||
privateKey ed25519.PrivateKey
|
||||
publicKey ed25519.PublicKey
|
||||
prevHash string // Hash of previous entry for chaining
|
||||
}
|
||||
|
||||
// SignedAuditEntry represents an audit entry with cryptographic signature
|
||||
type SignedAuditEntry struct {
|
||||
Sequence int64 `json:"seq"`
|
||||
Timestamp string `json:"ts"`
|
||||
User string `json:"user"`
|
||||
Action string `json:"action"`
|
||||
Resource string `json:"resource"`
|
||||
Result string `json:"result"`
|
||||
Details string `json:"details,omitempty"`
|
||||
PrevHash string `json:"prev_hash"` // Hash chain for tamper detection
|
||||
Hash string `json:"hash"` // SHA-256 of this entry (without signature)
|
||||
Signature string `json:"sig"` // Ed25519 signature of Hash
|
||||
}
|
||||
|
||||
// NewAuditLogger creates a new audit logger
|
||||
func NewAuditLogger(log logger.Logger, enabled bool) *AuditLogger {
|
||||
return &AuditLogger{
|
||||
log: log,
|
||||
enabled: enabled,
|
||||
log: log,
|
||||
enabled: enabled,
|
||||
entries: make([]SignedAuditEntry, 0),
|
||||
prevHash: "genesis", // Initial hash for first entry
|
||||
}
|
||||
}
|
||||
|
||||
@ -232,3 +263,337 @@ func GetCurrentUser() string {
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Audit Log Signing and Verification
|
||||
// =============================================================================
|
||||
|
||||
// GenerateSigningKeys generates a new Ed25519 key pair for audit log signing
|
||||
func GenerateSigningKeys() (privateKey ed25519.PrivateKey, publicKey ed25519.PublicKey, err error) {
|
||||
publicKey, privateKey, err = ed25519.GenerateKey(rand.Reader)
|
||||
return
|
||||
}
|
||||
|
||||
// SavePrivateKey saves the private key to a file (PEM-like format)
|
||||
func SavePrivateKey(path string, key ed25519.PrivateKey) error {
|
||||
encoded := base64.StdEncoding.EncodeToString(key)
|
||||
content := fmt.Sprintf("-----BEGIN DBBACKUP AUDIT PRIVATE KEY-----\n%s\n-----END DBBACKUP AUDIT PRIVATE KEY-----\n", encoded)
|
||||
return os.WriteFile(path, []byte(content), 0600) // Restrictive permissions
|
||||
}
|
||||
|
||||
// SavePublicKey saves the public key to a file (PEM-like format)
|
||||
func SavePublicKey(path string, key ed25519.PublicKey) error {
|
||||
encoded := base64.StdEncoding.EncodeToString(key)
|
||||
content := fmt.Sprintf("-----BEGIN DBBACKUP AUDIT PUBLIC KEY-----\n%s\n-----END DBBACKUP AUDIT PUBLIC KEY-----\n", encoded)
|
||||
return os.WriteFile(path, []byte(content), 0644)
|
||||
}
|
||||
|
||||
// LoadPrivateKey loads a private key from file
|
||||
func LoadPrivateKey(path string) (ed25519.PrivateKey, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read private key: %w", err)
|
||||
}
|
||||
|
||||
// Extract base64 content between PEM markers
|
||||
content := extractPEMContent(string(data))
|
||||
if content == "" {
|
||||
return nil, fmt.Errorf("invalid private key format")
|
||||
}
|
||||
|
||||
decoded, err := base64.StdEncoding.DecodeString(content)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode private key: %w", err)
|
||||
}
|
||||
|
||||
if len(decoded) != ed25519.PrivateKeySize {
|
||||
return nil, fmt.Errorf("invalid private key size")
|
||||
}
|
||||
|
||||
return ed25519.PrivateKey(decoded), nil
|
||||
}
|
||||
|
||||
// LoadPublicKey loads a public key from file
|
||||
func LoadPublicKey(path string) (ed25519.PublicKey, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read public key: %w", err)
|
||||
}
|
||||
|
||||
content := extractPEMContent(string(data))
|
||||
if content == "" {
|
||||
return nil, fmt.Errorf("invalid public key format")
|
||||
}
|
||||
|
||||
decoded, err := base64.StdEncoding.DecodeString(content)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode public key: %w", err)
|
||||
}
|
||||
|
||||
if len(decoded) != ed25519.PublicKeySize {
|
||||
return nil, fmt.Errorf("invalid public key size")
|
||||
}
|
||||
|
||||
return ed25519.PublicKey(decoded), nil
|
||||
}
|
||||
|
||||
// extractPEMContent extracts base64 content from PEM-like format
|
||||
func extractPEMContent(data string) string {
|
||||
// Simple extraction - find content between markers
|
||||
start := 0
|
||||
for i := 0; i < len(data); i++ {
|
||||
if data[i] == '\n' && i > 0 && data[i-1] == '-' {
|
||||
start = i + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
end := len(data)
|
||||
for i := len(data) - 1; i > start; i-- {
|
||||
if data[i] == '\n' && i+1 < len(data) && data[i+1] == '-' {
|
||||
end = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if start >= end {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Remove whitespace
|
||||
result := ""
|
||||
for _, c := range data[start:end] {
|
||||
if c != '\n' && c != '\r' && c != ' ' {
|
||||
result += string(c)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// EnableSigning enables cryptographic signing for audit entries
|
||||
func (a *AuditLogger) EnableSigning(privateKey ed25519.PrivateKey) {
|
||||
a.mu.Lock()
|
||||
defer a.mu.Unlock()
|
||||
a.privateKey = privateKey
|
||||
a.publicKey = privateKey.Public().(ed25519.PublicKey)
|
||||
}
|
||||
|
||||
// AddSignedEntry adds a signed entry to the audit log
|
||||
func (a *AuditLogger) AddSignedEntry(event AuditEvent) error {
|
||||
if !a.enabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
a.mu.Lock()
|
||||
defer a.mu.Unlock()
|
||||
|
||||
// Serialize details
|
||||
detailsJSON := ""
|
||||
if len(event.Details) > 0 {
|
||||
if data, err := json.Marshal(event.Details); err == nil {
|
||||
detailsJSON = string(data)
|
||||
}
|
||||
}
|
||||
|
||||
entry := SignedAuditEntry{
|
||||
Sequence: int64(len(a.entries) + 1),
|
||||
Timestamp: event.Timestamp.Format(time.RFC3339Nano),
|
||||
User: event.User,
|
||||
Action: event.Action,
|
||||
Resource: event.Resource,
|
||||
Result: event.Result,
|
||||
Details: detailsJSON,
|
||||
PrevHash: a.prevHash,
|
||||
}
|
||||
|
||||
// Calculate hash of entry (without signature)
|
||||
entry.Hash = a.calculateEntryHash(entry)
|
||||
|
||||
// Sign if private key is available
|
||||
if a.privateKey != nil {
|
||||
hashBytes, _ := hex.DecodeString(entry.Hash)
|
||||
signature := ed25519.Sign(a.privateKey, hashBytes)
|
||||
entry.Signature = base64.StdEncoding.EncodeToString(signature)
|
||||
}
|
||||
|
||||
// Update chain
|
||||
a.prevHash = entry.Hash
|
||||
a.entries = append(a.entries, entry)
|
||||
|
||||
// Also log to standard logger
|
||||
a.logEvent(event)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// calculateEntryHash computes SHA-256 hash of an entry (without signature field)
|
||||
func (a *AuditLogger) calculateEntryHash(entry SignedAuditEntry) string {
|
||||
// Create canonical representation for hashing
|
||||
data := fmt.Sprintf("%d|%s|%s|%s|%s|%s|%s|%s",
|
||||
entry.Sequence,
|
||||
entry.Timestamp,
|
||||
entry.User,
|
||||
entry.Action,
|
||||
entry.Resource,
|
||||
entry.Result,
|
||||
entry.Details,
|
||||
entry.PrevHash,
|
||||
)
|
||||
|
||||
hash := sha256.Sum256([]byte(data))
|
||||
return hex.EncodeToString(hash[:])
|
||||
}
|
||||
|
||||
// ExportSignedLog exports the signed audit log to a file
|
||||
func (a *AuditLogger) ExportSignedLog(path string) error {
|
||||
a.mu.Lock()
|
||||
defer a.mu.Unlock()
|
||||
|
||||
data, err := json.MarshalIndent(a.entries, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal audit log: %w", err)
|
||||
}
|
||||
|
||||
return os.WriteFile(path, data, 0644)
|
||||
}
|
||||
|
||||
// VerifyAuditLog verifies the integrity of an exported audit log
|
||||
func VerifyAuditLog(logPath string, publicKeyPath string) (*AuditVerificationResult, error) {
|
||||
// Load public key
|
||||
publicKey, err := LoadPublicKey(publicKeyPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load public key: %w", err)
|
||||
}
|
||||
|
||||
// Load audit log
|
||||
data, err := os.ReadFile(logPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read audit log: %w", err)
|
||||
}
|
||||
|
||||
var entries []SignedAuditEntry
|
||||
if err := json.Unmarshal(data, &entries); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse audit log: %w", err)
|
||||
}
|
||||
|
||||
result := &AuditVerificationResult{
|
||||
TotalEntries: len(entries),
|
||||
ValidEntries: 0,
|
||||
Errors: make([]string, 0),
|
||||
}
|
||||
|
||||
prevHash := "genesis"
|
||||
|
||||
for i, entry := range entries {
|
||||
// Verify hash chain
|
||||
if entry.PrevHash != prevHash {
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("Entry %d: hash chain broken (expected %s, got %s)",
|
||||
i+1, prevHash[:16]+"...", entry.PrevHash[:min(16, len(entry.PrevHash))]+"..."))
|
||||
}
|
||||
|
||||
// Recalculate hash
|
||||
expectedHash := calculateVerifyHash(entry)
|
||||
if entry.Hash != expectedHash {
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("Entry %d: hash mismatch (entry may be tampered)", i+1))
|
||||
}
|
||||
|
||||
// Verify signature
|
||||
if entry.Signature != "" {
|
||||
hashBytes, _ := hex.DecodeString(entry.Hash)
|
||||
sigBytes, err := base64.StdEncoding.DecodeString(entry.Signature)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("Entry %d: invalid signature encoding", i+1))
|
||||
} else if !ed25519.Verify(publicKey, hashBytes, sigBytes) {
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("Entry %d: signature verification failed", i+1))
|
||||
} else {
|
||||
result.ValidEntries++
|
||||
}
|
||||
} else {
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("Entry %d: missing signature", i+1))
|
||||
}
|
||||
|
||||
prevHash = entry.Hash
|
||||
}
|
||||
|
||||
result.ChainValid = len(result.Errors) == 0 ||
|
||||
!containsChainError(result.Errors)
|
||||
result.AllSignaturesValid = result.ValidEntries == result.TotalEntries
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// AuditVerificationResult contains the result of audit log verification
|
||||
type AuditVerificationResult struct {
|
||||
TotalEntries int
|
||||
ValidEntries int
|
||||
ChainValid bool
|
||||
AllSignaturesValid bool
|
||||
Errors []string
|
||||
}
|
||||
|
||||
// IsValid returns true if the audit log is completely valid
|
||||
func (r *AuditVerificationResult) IsValid() bool {
|
||||
return r.ChainValid && r.AllSignaturesValid && len(r.Errors) == 0
|
||||
}
|
||||
|
||||
// String returns a human-readable summary
|
||||
func (r *AuditVerificationResult) String() string {
|
||||
if r.IsValid() {
|
||||
return fmt.Sprintf("✅ Audit log verified: %d entries, chain intact, all signatures valid",
|
||||
r.TotalEntries)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("❌ Audit log verification failed: %d/%d valid entries, %d errors",
|
||||
r.ValidEntries, r.TotalEntries, len(r.Errors))
|
||||
}
|
||||
|
||||
// calculateVerifyHash recalculates hash for verification
|
||||
func calculateVerifyHash(entry SignedAuditEntry) string {
|
||||
data := fmt.Sprintf("%d|%s|%s|%s|%s|%s|%s|%s",
|
||||
entry.Sequence,
|
||||
entry.Timestamp,
|
||||
entry.User,
|
||||
entry.Action,
|
||||
entry.Resource,
|
||||
entry.Result,
|
||||
entry.Details,
|
||||
entry.PrevHash,
|
||||
)
|
||||
|
||||
hash := sha256.Sum256([]byte(data))
|
||||
return hex.EncodeToString(hash[:])
|
||||
}
|
||||
|
||||
// containsChainError checks if errors include hash chain issues
|
||||
func containsChainError(errors []string) bool {
|
||||
for _, err := range errors {
|
||||
if len(err) > 0 && (err[0:min(20, len(err))] == "Entry" &&
|
||||
(contains(err, "hash chain") || contains(err, "hash mismatch"))) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// contains is a simple string contains helper
|
||||
func contains(s, substr string) bool {
|
||||
for i := 0; i <= len(s)-len(substr); i++ {
|
||||
if s[i:i+len(substr)] == substr {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// min returns the minimum of two ints
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
@ -168,6 +168,10 @@ func (m ArchiveBrowserModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
}
|
||||
return m, nil
|
||||
|
||||
case tea.InterruptMsg:
|
||||
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||
return m.parent, nil
|
||||
|
||||
case tea.KeyMsg:
|
||||
switch msg.String() {
|
||||
case "ctrl+c", "q", "esc":
|
||||
@ -205,13 +209,21 @@ func (m ArchiveBrowserModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
return diagnoseView, diagnoseView.Init()
|
||||
}
|
||||
|
||||
// For restore-cluster mode: MUST be a .tar.gz cluster archive
|
||||
// Single .sql/.dump files are NOT valid cluster backups
|
||||
if m.mode == "restore-cluster" && !selected.Format.IsClusterBackup() {
|
||||
m.message = errorStyle.Render(fmt.Sprintf("⚠️ Not a cluster backup: %s is a single database backup (%s). Use 'Restore Single' mode instead, or select a .tar.gz cluster archive.", selected.Name, selected.Format.String()))
|
||||
// For restore-cluster mode: check if format can be used for cluster restore
|
||||
// - .tar.gz: dbbackup cluster format (works with pg_restore)
|
||||
// - .sql/.sql.gz: pg_dumpall format (works with native engine or psql)
|
||||
if m.mode == "restore-cluster" && !selected.Format.CanBeClusterRestore() {
|
||||
m.message = errorStyle.Render(fmt.Sprintf("⚠️ %s cannot be used for cluster restore.\n\n Supported formats: .tar.gz (dbbackup), .sql, .sql.gz (pg_dumpall)",
|
||||
selected.Name))
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// For SQL-based cluster restore, enable native engine automatically
|
||||
if m.mode == "restore-cluster" && !selected.Format.IsClusterBackup() {
|
||||
// This is a .sql or .sql.gz file - use native engine
|
||||
m.config.UseNativeEngine = true
|
||||
}
|
||||
|
||||
// For single restore mode with cluster backup selected - offer to select individual database
|
||||
if m.mode == "restore-single" && selected.Format.IsClusterBackup() {
|
||||
clusterSelector := NewClusterDatabaseSelector(m.config, m.logger, m, m.ctx, selected, "single", false)
|
||||
|
||||
@ -54,13 +54,16 @@ type BackupExecutionModel struct {
|
||||
spinnerFrame int
|
||||
|
||||
// Database count progress (for cluster backup)
|
||||
dbTotal int
|
||||
dbDone int
|
||||
dbName string // Current database being backed up
|
||||
overallPhase int // 1=globals, 2=databases, 3=compressing
|
||||
phaseDesc string // Description of current phase
|
||||
dbPhaseElapsed time.Duration // Elapsed time since database backup phase started
|
||||
dbAvgPerDB time.Duration // Average time per database backup
|
||||
dbTotal int
|
||||
dbDone int
|
||||
dbName string // Current database being backed up
|
||||
overallPhase int // 1=globals, 2=databases, 3=compressing
|
||||
phaseDesc string // Description of current phase
|
||||
dbPhaseElapsed time.Duration // Elapsed time since database backup phase started
|
||||
dbAvgPerDB time.Duration // Average time per database backup
|
||||
phase2StartTime time.Time // When phase 2 started (for realtime elapsed calculation)
|
||||
bytesDone int64 // Size-weighted progress: bytes completed
|
||||
bytesTotal int64 // Size-weighted progress: total bytes
|
||||
}
|
||||
|
||||
// sharedBackupProgressState holds progress state that can be safely accessed from callbacks
|
||||
@ -75,6 +78,8 @@ type sharedBackupProgressState struct {
|
||||
phase2StartTime time.Time // When phase 2 started (for realtime ETA calculation)
|
||||
dbPhaseElapsed time.Duration // Elapsed time since database backup phase started
|
||||
dbAvgPerDB time.Duration // Average time per database backup
|
||||
bytesDone int64 // Size-weighted progress: bytes completed
|
||||
bytesTotal int64 // Size-weighted progress: total bytes
|
||||
}
|
||||
|
||||
// Package-level shared progress state for backup operations
|
||||
@ -95,7 +100,7 @@ func clearCurrentBackupProgress() {
|
||||
currentBackupProgressState = nil
|
||||
}
|
||||
|
||||
func getCurrentBackupProgress() (dbTotal, dbDone int, dbName string, overallPhase int, phaseDesc string, hasUpdate bool, dbPhaseElapsed, dbAvgPerDB time.Duration, phase2StartTime time.Time) {
|
||||
func getCurrentBackupProgress() (dbTotal, dbDone int, dbName string, overallPhase int, phaseDesc string, hasUpdate bool, dbPhaseElapsed, dbAvgPerDB time.Duration, phase2StartTime time.Time, bytesDone, bytesTotal int64) {
|
||||
// CRITICAL: Add panic recovery
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
@ -108,12 +113,12 @@ func getCurrentBackupProgress() (dbTotal, dbDone int, dbName string, overallPhas
|
||||
defer currentBackupProgressMu.Unlock()
|
||||
|
||||
if currentBackupProgressState == nil {
|
||||
return 0, 0, "", 0, "", false, 0, 0, time.Time{}
|
||||
return 0, 0, "", 0, "", false, 0, 0, time.Time{}, 0, 0
|
||||
}
|
||||
|
||||
// Double-check state isn't nil after lock
|
||||
if currentBackupProgressState == nil {
|
||||
return 0, 0, "", 0, "", false, 0, 0, time.Time{}
|
||||
return 0, 0, "", 0, "", false, 0, 0, time.Time{}, 0, 0
|
||||
}
|
||||
|
||||
currentBackupProgressState.mu.Lock()
|
||||
@ -123,16 +128,19 @@ func getCurrentBackupProgress() (dbTotal, dbDone int, dbName string, overallPhas
|
||||
currentBackupProgressState.hasUpdate = false
|
||||
|
||||
// Calculate realtime phase elapsed if we have a phase 2 start time
|
||||
dbPhaseElapsed = currentBackupProgressState.dbPhaseElapsed
|
||||
// Always recalculate from phase2StartTime for accurate real-time display
|
||||
if !currentBackupProgressState.phase2StartTime.IsZero() {
|
||||
dbPhaseElapsed = time.Since(currentBackupProgressState.phase2StartTime)
|
||||
} else {
|
||||
dbPhaseElapsed = currentBackupProgressState.dbPhaseElapsed
|
||||
}
|
||||
|
||||
return currentBackupProgressState.dbTotal, currentBackupProgressState.dbDone,
|
||||
currentBackupProgressState.dbName, currentBackupProgressState.overallPhase,
|
||||
currentBackupProgressState.phaseDesc, hasUpdate,
|
||||
dbPhaseElapsed, currentBackupProgressState.dbAvgPerDB,
|
||||
currentBackupProgressState.phase2StartTime
|
||||
currentBackupProgressState.phase2StartTime,
|
||||
currentBackupProgressState.bytesDone, currentBackupProgressState.bytesTotal
|
||||
}
|
||||
|
||||
func NewBackupExecution(cfg *config.Config, log logger.Logger, parent tea.Model, ctx context.Context, backupType, dbName string, ratio int) BackupExecutionModel {
|
||||
@ -181,11 +189,22 @@ type backupCompleteMsg struct {
|
||||
}
|
||||
|
||||
func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config, log logger.Logger, backupType, dbName string, ratio int) tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
// CRITICAL: Add panic recovery to prevent TUI crashes on context cancellation
|
||||
return func() (returnMsg tea.Msg) {
|
||||
start := time.Now()
|
||||
|
||||
// CRITICAL: Add panic recovery that RETURNS a proper message to BubbleTea.
|
||||
// Without this, if a panic occurs the command function returns nil,
|
||||
// causing BubbleTea's execBatchMsg WaitGroup to hang forever waiting
|
||||
// for a message that never comes.
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Error("Backup execution panic recovered", "panic", r, "database", dbName)
|
||||
// CRITICAL: Set the named return value so BubbleTea receives a message
|
||||
returnMsg = backupCompleteMsg{
|
||||
result: "",
|
||||
err: fmt.Errorf("backup panic: %v", r),
|
||||
elapsed: time.Since(start),
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
@ -201,8 +220,6 @@ func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config,
|
||||
}
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
|
||||
// Setup shared progress state for TUI polling
|
||||
progressState := &sharedBackupProgressState{}
|
||||
setCurrentBackupProgress(progressState)
|
||||
@ -227,8 +244,8 @@ func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config,
|
||||
// Pass nil as indicator - TUI itself handles all display, no stdout printing
|
||||
engine := backup.NewSilent(cfg, log, dbClient, nil)
|
||||
|
||||
// Set database progress callback for cluster backups
|
||||
engine.SetDatabaseProgressCallback(func(done, total int, currentDB string) {
|
||||
// Set database progress callback for cluster backups (with size-weighted progress)
|
||||
engine.SetDatabaseProgressCallback(func(done, total int, currentDB string, bytesDone, bytesTotal int64) {
|
||||
// CRITICAL: Panic recovery to prevent nil pointer crashes
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
@ -245,13 +262,18 @@ func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config,
|
||||
progressState.dbDone = done
|
||||
progressState.dbTotal = total
|
||||
progressState.dbName = currentDB
|
||||
progressState.bytesDone = bytesDone
|
||||
progressState.bytesTotal = bytesTotal
|
||||
progressState.overallPhase = backupPhaseDatabases
|
||||
progressState.phaseDesc = fmt.Sprintf("Phase 2/3: Backing up Databases (%d/%d)", done, total)
|
||||
progressState.hasUpdate = true
|
||||
// Set phase 2 start time on first callback (for realtime ETA calculation)
|
||||
if progressState.phase2StartTime.IsZero() {
|
||||
progressState.phase2StartTime = time.Now()
|
||||
log.Info("Phase 2 started", "time", progressState.phase2StartTime)
|
||||
}
|
||||
// Calculate elapsed time immediately
|
||||
progressState.dbPhaseElapsed = time.Since(progressState.phase2StartTime)
|
||||
progressState.mu.Unlock()
|
||||
})
|
||||
|
||||
@ -310,7 +332,7 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
var overallPhase int
|
||||
var phaseDesc string
|
||||
var hasUpdate bool
|
||||
var dbPhaseElapsed, dbAvgPerDB time.Duration
|
||||
var dbAvgPerDB time.Duration
|
||||
|
||||
func() {
|
||||
defer func() {
|
||||
@ -318,7 +340,17 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.logger.Warn("Backup progress polling panic recovered", "panic", r)
|
||||
}
|
||||
}()
|
||||
dbTotal, dbDone, dbName, overallPhase, phaseDesc, hasUpdate, dbPhaseElapsed, dbAvgPerDB, _ = getCurrentBackupProgress()
|
||||
var phase2Start time.Time
|
||||
var phaseElapsed time.Duration
|
||||
var bytesDone, bytesTotal int64
|
||||
dbTotal, dbDone, dbName, overallPhase, phaseDesc, hasUpdate, phaseElapsed, dbAvgPerDB, phase2Start, bytesDone, bytesTotal = getCurrentBackupProgress()
|
||||
_ = phaseElapsed // We recalculate this below from phase2StartTime
|
||||
if !phase2Start.IsZero() && m.phase2StartTime.IsZero() {
|
||||
m.phase2StartTime = phase2Start
|
||||
}
|
||||
// Always update size info for accurate ETA
|
||||
m.bytesDone = bytesDone
|
||||
m.bytesTotal = bytesTotal
|
||||
}()
|
||||
|
||||
if hasUpdate {
|
||||
@ -327,10 +359,14 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.dbName = dbName
|
||||
m.overallPhase = overallPhase
|
||||
m.phaseDesc = phaseDesc
|
||||
m.dbPhaseElapsed = dbPhaseElapsed
|
||||
m.dbAvgPerDB = dbAvgPerDB
|
||||
}
|
||||
|
||||
// Always recalculate elapsed time from phase2StartTime for accurate real-time display
|
||||
if !m.phase2StartTime.IsZero() {
|
||||
m.dbPhaseElapsed = time.Since(m.phase2StartTime)
|
||||
}
|
||||
|
||||
// Update status based on progress and elapsed time
|
||||
elapsedSec := int(time.Since(m.startTime).Seconds())
|
||||
|
||||
@ -426,14 +462,19 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// renderBackupDatabaseProgressBarWithTiming renders database backup progress with ETA
|
||||
func renderBackupDatabaseProgressBarWithTiming(done, total int, dbPhaseElapsed, dbAvgPerDB time.Duration) string {
|
||||
// renderBackupDatabaseProgressBarWithTiming renders database backup progress with size-weighted ETA
|
||||
func renderBackupDatabaseProgressBarWithTiming(done, total int, dbPhaseElapsed time.Duration, bytesDone, bytesTotal int64) string {
|
||||
if total == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Calculate progress percentage
|
||||
percent := float64(done) / float64(total)
|
||||
// Use size-weighted progress if available, otherwise fall back to count-based
|
||||
var percent float64
|
||||
if bytesTotal > 0 {
|
||||
percent = float64(bytesDone) / float64(bytesTotal)
|
||||
} else {
|
||||
percent = float64(done) / float64(total)
|
||||
}
|
||||
if percent > 1.0 {
|
||||
percent = 1.0
|
||||
}
|
||||
@ -446,19 +487,31 @@ func renderBackupDatabaseProgressBarWithTiming(done, total int, dbPhaseElapsed,
|
||||
}
|
||||
bar := strings.Repeat("█", filled) + strings.Repeat("░", barWidth-filled)
|
||||
|
||||
// Calculate ETA similar to restore
|
||||
// Calculate size-weighted ETA (much more accurate for mixed database sizes)
|
||||
var etaStr string
|
||||
if done > 0 && done < total {
|
||||
if bytesDone > 0 && bytesDone < bytesTotal && bytesTotal > 0 {
|
||||
// Size-weighted: ETA = elapsed * (remaining_bytes / done_bytes)
|
||||
remainingBytes := bytesTotal - bytesDone
|
||||
eta := time.Duration(float64(dbPhaseElapsed) * float64(remainingBytes) / float64(bytesDone))
|
||||
etaStr = fmt.Sprintf(" | ETA: %s", formatDuration(eta))
|
||||
} else if done > 0 && done < total && bytesTotal == 0 {
|
||||
// Fallback to count-based if no size info
|
||||
avgPerDB := dbPhaseElapsed / time.Duration(done)
|
||||
remaining := total - done
|
||||
eta := avgPerDB * time.Duration(remaining)
|
||||
etaStr = fmt.Sprintf(" | ETA: %s", formatDuration(eta))
|
||||
etaStr = fmt.Sprintf(" | ETA: ~%s", formatDuration(eta))
|
||||
} else if done == total {
|
||||
etaStr = " | Complete"
|
||||
}
|
||||
|
||||
return fmt.Sprintf(" Databases: [%s] %d/%d | Elapsed: %s%s\n",
|
||||
bar, done, total, formatDuration(dbPhaseElapsed), etaStr)
|
||||
// Show size progress if available
|
||||
var sizeInfo string
|
||||
if bytesTotal > 0 {
|
||||
sizeInfo = fmt.Sprintf(" (%s/%s)", FormatBytes(bytesDone), FormatBytes(bytesTotal))
|
||||
}
|
||||
|
||||
return fmt.Sprintf(" Databases: [%s] %d/%d%s | Elapsed: %s%s\n",
|
||||
bar, done, total, sizeInfo, formatDuration(dbPhaseElapsed), etaStr)
|
||||
}
|
||||
|
||||
func (m BackupExecutionModel) View() string {
|
||||
@ -547,8 +600,8 @@ func (m BackupExecutionModel) View() string {
|
||||
}
|
||||
s.WriteString("\n")
|
||||
|
||||
// Database progress bar with timing
|
||||
s.WriteString(renderBackupDatabaseProgressBarWithTiming(m.dbDone, m.dbTotal, m.dbPhaseElapsed, m.dbAvgPerDB))
|
||||
// Database progress bar with size-weighted timing
|
||||
s.WriteString(renderBackupDatabaseProgressBarWithTiming(m.dbDone, m.dbTotal, m.dbPhaseElapsed, m.bytesDone, m.bytesTotal))
|
||||
s.WriteString("\n")
|
||||
} else {
|
||||
// Intermediate phase (globals)
|
||||
|
||||
@ -97,13 +97,17 @@ func (m ClusterDatabaseSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
}
|
||||
return m, nil
|
||||
|
||||
case tea.InterruptMsg:
|
||||
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||
return m.parent, nil
|
||||
|
||||
case tea.KeyMsg:
|
||||
if m.loading {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
switch msg.String() {
|
||||
case "q", "esc":
|
||||
case "ctrl+c", "q", "esc":
|
||||
// Return to parent
|
||||
return m.parent, nil
|
||||
|
||||
|
||||
@ -70,9 +70,18 @@ func (m ConfirmationModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
if m.onConfirm != nil {
|
||||
return m.onConfirm()
|
||||
}
|
||||
executor := NewBackupExecution(m.config, m.logger, m.parent, m.ctx, "cluster", "", 0)
|
||||
// Default fallback (should not be reached if onConfirm is always provided)
|
||||
ctx := m.ctx
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
executor := NewBackupExecution(m.config, m.logger, m.parent, ctx, "cluster", "", 0)
|
||||
return executor, executor.Init()
|
||||
|
||||
case tea.InterruptMsg:
|
||||
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||
return m.parent, nil
|
||||
|
||||
case tea.KeyMsg:
|
||||
// Auto-forward ESC/quit in auto-confirm mode
|
||||
if m.config.TUIAutoConfirm {
|
||||
@ -98,8 +107,12 @@ func (m ConfirmationModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
if m.onConfirm != nil {
|
||||
return m.onConfirm()
|
||||
}
|
||||
// Default: execute cluster backup for backward compatibility
|
||||
executor := NewBackupExecution(m.config, m.logger, m.parent, m.ctx, "cluster", "", 0)
|
||||
// Default fallback (should not be reached if onConfirm is always provided)
|
||||
ctx := m.ctx
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
executor := NewBackupExecution(m.config, m.logger, m, ctx, "cluster", "", 0)
|
||||
return executor, executor.Init()
|
||||
}
|
||||
return m.parent, nil
|
||||
|
||||
@ -126,6 +126,10 @@ func (m DatabaseSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
}
|
||||
return m, nil
|
||||
|
||||
case tea.InterruptMsg:
|
||||
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||
return m.parent, nil
|
||||
|
||||
case tea.KeyMsg:
|
||||
// Auto-forward ESC/quit in auto-confirm mode
|
||||
if m.config.TUIAutoConfirm {
|
||||
|
||||
@ -303,10 +303,10 @@ func (m *MenuModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
return m.handleSchedule()
|
||||
case 9: // View Backup Chain
|
||||
return m.handleChain()
|
||||
case 10: // System Resource Profile
|
||||
return m.handleProfile()
|
||||
case 11: // Separator
|
||||
case 10: // Separator
|
||||
// Do nothing
|
||||
case 11: // System Resource Profile
|
||||
return m.handleProfile()
|
||||
case 12: // Tools
|
||||
return m.handleTools()
|
||||
case 13: // View Active Operations
|
||||
|
||||
@ -181,9 +181,17 @@ func (m *ProfileModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
}
|
||||
return m, nil
|
||||
|
||||
case tea.InterruptMsg:
|
||||
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||
m.quitting = true
|
||||
if m.parent != nil {
|
||||
return m.parent, nil
|
||||
}
|
||||
return m, tea.Quit
|
||||
|
||||
case tea.KeyMsg:
|
||||
switch msg.String() {
|
||||
case "q", "esc":
|
||||
case "ctrl+c", "q", "esc":
|
||||
m.quitting = true
|
||||
if m.parent != nil {
|
||||
return m.parent, nil
|
||||
|
||||
@ -245,9 +245,11 @@ func getCurrentRestoreProgress() (bytesTotal, bytesDone int64, description strin
|
||||
speed = calculateRollingSpeed(currentRestoreProgressState.speedSamples)
|
||||
|
||||
// Calculate realtime phase elapsed if we have a phase 3 start time
|
||||
dbPhaseElapsed = currentRestoreProgressState.dbPhaseElapsed
|
||||
// Always recalculate from phase3StartTime for accurate real-time display
|
||||
if !currentRestoreProgressState.phase3StartTime.IsZero() {
|
||||
dbPhaseElapsed = time.Since(currentRestoreProgressState.phase3StartTime)
|
||||
} else {
|
||||
dbPhaseElapsed = currentRestoreProgressState.dbPhaseElapsed
|
||||
}
|
||||
|
||||
return currentRestoreProgressState.bytesTotal, currentRestoreProgressState.bytesDone,
|
||||
@ -308,13 +310,53 @@ func calculateRollingSpeed(samples []restoreSpeedSample) float64 {
|
||||
}
|
||||
|
||||
func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config, log logger.Logger, archive ArchiveInfo, targetDB string, cleanFirst, createIfMissing bool, restoreType string, cleanClusterFirst bool, existingDBs []string, saveDebugLog bool) tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
// CRITICAL: Add panic recovery to prevent TUI crashes on context cancellation
|
||||
return func() (returnMsg tea.Msg) {
|
||||
start := time.Now()
|
||||
|
||||
// TUI Debug Log: Always write to file when debug is enabled (even on success/hang)
|
||||
var tuiDebugFile *os.File
|
||||
if saveDebugLog {
|
||||
workDir := cfg.GetEffectiveWorkDir()
|
||||
tuiLogPath := filepath.Join(workDir, fmt.Sprintf("dbbackup-tui-debug-%s.log", time.Now().Format("20060102-150405")))
|
||||
var err error
|
||||
tuiDebugFile, err = os.Create(tuiLogPath)
|
||||
if err == nil {
|
||||
defer tuiDebugFile.Close()
|
||||
fmt.Fprintf(tuiDebugFile, "=== TUI Restore Debug Log ===\n")
|
||||
fmt.Fprintf(tuiDebugFile, "Started: %s\n", time.Now().Format(time.RFC3339))
|
||||
fmt.Fprintf(tuiDebugFile, "Archive: %s\n", archive.Path)
|
||||
fmt.Fprintf(tuiDebugFile, "RestoreType: %s\n", restoreType)
|
||||
fmt.Fprintf(tuiDebugFile, "TargetDB: %s\n", targetDB)
|
||||
fmt.Fprintf(tuiDebugFile, "CleanCluster: %v\n", cleanClusterFirst)
|
||||
fmt.Fprintf(tuiDebugFile, "ExistingDBs: %v\n\n", existingDBs)
|
||||
log.Info("TUI debug log enabled", "path", tuiLogPath)
|
||||
}
|
||||
}
|
||||
tuiLog := func(msg string, args ...interface{}) {
|
||||
if tuiDebugFile != nil {
|
||||
fmt.Fprintf(tuiDebugFile, "[%s] %s", time.Now().Format("15:04:05.000"), fmt.Sprintf(msg, args...))
|
||||
fmt.Fprintln(tuiDebugFile)
|
||||
tuiDebugFile.Sync() // Flush immediately so we capture hangs
|
||||
}
|
||||
}
|
||||
|
||||
tuiLog("Starting restore execution")
|
||||
|
||||
// CRITICAL: Add panic recovery that RETURNS a proper message to BubbleTea.
|
||||
// Without this, if a panic occurs the command function returns nil,
|
||||
// causing BubbleTea's execBatchMsg WaitGroup to hang forever waiting
|
||||
// for a message that never comes. This was the root cause of the
|
||||
// TUI cluster restore hang/panic issue.
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Error("Restore execution panic recovered", "panic", r, "database", targetDB)
|
||||
// Return error message instead of crashing
|
||||
// Note: We can't return from defer, so this just logs
|
||||
// CRITICAL: Set the named return value so BubbleTea receives a message
|
||||
// This prevents the WaitGroup deadlock in execBatchMsg
|
||||
returnMsg = restoreCompleteMsg{
|
||||
result: "",
|
||||
err: fmt.Errorf("restore panic: %v", r),
|
||||
elapsed: time.Since(start),
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
@ -322,8 +364,11 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
||||
// DO NOT create a new context here as it breaks Ctrl+C cancellation
|
||||
ctx := parentCtx
|
||||
|
||||
tuiLog("Checking context state")
|
||||
|
||||
// Check if context is already cancelled
|
||||
if ctx.Err() != nil {
|
||||
tuiLog("Context already cancelled: %v", ctx.Err())
|
||||
return restoreCompleteMsg{
|
||||
result: "",
|
||||
err: fmt.Errorf("operation cancelled: %w", ctx.Err()),
|
||||
@ -331,11 +376,12 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
||||
}
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
tuiLog("Creating database client")
|
||||
|
||||
// Create database instance
|
||||
dbClient, err := database.New(cfg, log)
|
||||
if err != nil {
|
||||
tuiLog("Database client creation failed: %v", err)
|
||||
return restoreCompleteMsg{
|
||||
result: "",
|
||||
err: fmt.Errorf("failed to create database client: %w", err),
|
||||
@ -344,8 +390,11 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
||||
}
|
||||
defer dbClient.Close()
|
||||
|
||||
tuiLog("Database client created successfully")
|
||||
|
||||
// STEP 1: Clean cluster if requested (drop all existing user databases)
|
||||
if restoreType == "restore-cluster" && cleanClusterFirst {
|
||||
tuiLog("STEP 1: Cleaning cluster (dropping existing DBs)")
|
||||
// Re-detect databases at execution time to get current state
|
||||
// The preview list may be stale or detection may have failed earlier
|
||||
safety := restore.NewSafety(cfg, log)
|
||||
@ -365,8 +414,9 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
||||
// This matches how cluster restore works - uses CLI tools, not database connections
|
||||
droppedCount := 0
|
||||
for _, dbName := range existingDBs {
|
||||
// Create timeout context for each database drop (5 minutes per DB - large DBs take time)
|
||||
dropCtx, dropCancel := context.WithTimeout(ctx, 5*time.Minute)
|
||||
// Create timeout context for each database drop (60 seconds per DB)
|
||||
// Reduced from 5 minutes for better TUI responsiveness
|
||||
dropCtx, dropCancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
if err := dropDatabaseCLI(dropCtx, cfg, dbName); err != nil {
|
||||
log.Warn("Failed to drop database", "name", dbName, "error", err)
|
||||
// Continue with other databases
|
||||
@ -480,6 +530,8 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
||||
if progressState.phase3StartTime.IsZero() {
|
||||
progressState.phase3StartTime = time.Now()
|
||||
}
|
||||
// Calculate elapsed time immediately for accurate display
|
||||
progressState.dbPhaseElapsed = time.Since(progressState.phase3StartTime)
|
||||
// Clear byte progress when switching to db progress
|
||||
progressState.bytesTotal = 0
|
||||
progressState.bytesDone = 0
|
||||
@ -521,6 +573,10 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
||||
if progressState.phase3StartTime.IsZero() {
|
||||
progressState.phase3StartTime = time.Now()
|
||||
}
|
||||
// Recalculate elapsed for accuracy if phaseElapsed not provided
|
||||
if phaseElapsed == 0 && !progressState.phase3StartTime.IsZero() {
|
||||
progressState.dbPhaseElapsed = time.Since(progressState.phase3StartTime)
|
||||
}
|
||||
// Clear byte progress when switching to db progress
|
||||
progressState.bytesTotal = 0
|
||||
progressState.bytesDone = 0
|
||||
@ -561,6 +617,8 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
||||
if progressState.phase3StartTime.IsZero() {
|
||||
progressState.phase3StartTime = time.Now()
|
||||
}
|
||||
// Calculate elapsed time immediately for accurate display
|
||||
progressState.dbPhaseElapsed = time.Since(progressState.phase3StartTime)
|
||||
|
||||
// Update unified progress tracker
|
||||
if progressState.unifiedProgress != nil {
|
||||
@ -585,29 +643,39 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
||||
log.Info("Debug logging enabled", "path", debugLogPath)
|
||||
}
|
||||
|
||||
tuiLog("STEP 3: Executing restore (type=%s)", restoreType)
|
||||
|
||||
// STEP 3: Execute restore based on type
|
||||
var restoreErr error
|
||||
if restoreType == "restore-cluster" {
|
||||
// Use pre-extracted directory if available (optimization)
|
||||
if archive.ExtractedDir != "" {
|
||||
tuiLog("Using pre-extracted cluster directory: %s", archive.ExtractedDir)
|
||||
log.Info("Using pre-extracted cluster directory", "path", archive.ExtractedDir)
|
||||
defer os.RemoveAll(archive.ExtractedDir) // Cleanup after restore completes
|
||||
restoreErr = engine.RestoreCluster(ctx, archive.Path, archive.ExtractedDir)
|
||||
} else {
|
||||
tuiLog("Calling engine.RestoreCluster for: %s", archive.Path)
|
||||
restoreErr = engine.RestoreCluster(ctx, archive.Path)
|
||||
}
|
||||
tuiLog("RestoreCluster returned: err=%v", restoreErr)
|
||||
} else if restoreType == "restore-cluster-single" {
|
||||
tuiLog("Calling RestoreSingleFromCluster: %s -> %s", archive.Path, targetDB)
|
||||
// Restore single database from cluster backup
|
||||
// Also cleanup pre-extracted dir if present
|
||||
if archive.ExtractedDir != "" {
|
||||
defer os.RemoveAll(archive.ExtractedDir)
|
||||
}
|
||||
restoreErr = engine.RestoreSingleFromCluster(ctx, archive.Path, targetDB, targetDB, cleanFirst, createIfMissing)
|
||||
tuiLog("RestoreSingleFromCluster returned: err=%v", restoreErr)
|
||||
} else {
|
||||
tuiLog("Calling RestoreSingle: %s -> %s", archive.Path, targetDB)
|
||||
restoreErr = engine.RestoreSingle(ctx, archive.Path, targetDB, cleanFirst, createIfMissing)
|
||||
tuiLog("RestoreSingle returned: err=%v", restoreErr)
|
||||
}
|
||||
|
||||
if restoreErr != nil {
|
||||
tuiLog("Restore failed: %v", restoreErr)
|
||||
return restoreCompleteMsg{
|
||||
result: "",
|
||||
err: restoreErr,
|
||||
@ -624,6 +692,8 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
||||
result = fmt.Sprintf("Successfully restored cluster from %s (cleaned %d existing database(s) first)", archive.Name, len(existingDBs))
|
||||
}
|
||||
|
||||
tuiLog("Restore completed successfully: %s", result)
|
||||
|
||||
return restoreCompleteMsg{
|
||||
result: result,
|
||||
err: nil,
|
||||
|
||||
@ -99,6 +99,22 @@ type safetyCheckCompleteMsg struct {
|
||||
|
||||
func runSafetyChecks(cfg *config.Config, log logger.Logger, archive ArchiveInfo, targetDB string) tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
// Check if preflight checks should be skipped
|
||||
if cfg != nil && cfg.SkipPreflightChecks {
|
||||
// Return all checks as "skipped" with warning
|
||||
checks := []SafetyCheck{
|
||||
{Name: "Archive integrity", Status: "warning", Message: "⚠️ SKIPPED - preflight checks disabled", Critical: true},
|
||||
{Name: "Dump validity", Status: "warning", Message: "⚠️ SKIPPED - preflight checks disabled", Critical: true},
|
||||
{Name: "Disk space", Status: "warning", Message: "⚠️ SKIPPED - preflight checks disabled", Critical: true},
|
||||
{Name: "Required tools", Status: "warning", Message: "⚠️ SKIPPED - preflight checks disabled", Critical: true},
|
||||
{Name: "Target database", Status: "warning", Message: "⚠️ SKIPPED - preflight checks disabled", Critical: false},
|
||||
}
|
||||
return safetyCheckCompleteMsg{
|
||||
checks: checks,
|
||||
canProceed: true, // Allow proceeding but with warnings
|
||||
}
|
||||
}
|
||||
|
||||
// Dynamic timeout based on archive size for large database support
|
||||
// Base: 10 minutes + 1 minute per 5 GB, max 120 minutes
|
||||
timeoutMinutes := 10
|
||||
@ -272,6 +288,10 @@ func (m RestorePreviewModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
}
|
||||
return m, nil
|
||||
|
||||
case tea.InterruptMsg:
|
||||
// Handle Ctrl+C signal (SIGINT) - Bubbletea v1.3+ sends this instead of KeyMsg for ctrl+c
|
||||
return m.parent, nil
|
||||
|
||||
case tea.KeyMsg:
|
||||
switch msg.String() {
|
||||
case "ctrl+c", "q", "esc":
|
||||
@ -526,6 +546,14 @@ func (m RestorePreviewModel) View() string {
|
||||
s.WriteString(archiveHeaderStyle.Render("[SAFETY] Checks"))
|
||||
s.WriteString("\n")
|
||||
|
||||
// Show warning banner if preflight checks are skipped
|
||||
if m.config != nil && m.config.SkipPreflightChecks {
|
||||
s.WriteString(CheckWarningStyle.Render(" ⚠️ PREFLIGHT CHECKS DISABLED ⚠️"))
|
||||
s.WriteString("\n")
|
||||
s.WriteString(CheckWarningStyle.Render(" Restore may fail unexpectedly. Re-enable in Settings."))
|
||||
s.WriteString("\n\n")
|
||||
}
|
||||
|
||||
if m.checking {
|
||||
s.WriteString(infoStyle.Render(" Running safety checks..."))
|
||||
s.WriteString("\n")
|
||||
|
||||
@ -165,6 +165,22 @@ func NewSettingsModel(cfg *config.Config, log logger.Logger, parent tea.Model) S
|
||||
Type: "selector",
|
||||
Description: "Enable for databases with many tables/LOBs. Reduces parallelism, increases max_locks_per_transaction.",
|
||||
},
|
||||
{
|
||||
Key: "skip_preflight_checks",
|
||||
DisplayName: "Skip Preflight Checks",
|
||||
Value: func(c *config.Config) string {
|
||||
if c.SkipPreflightChecks {
|
||||
return "⚠️ SKIPPED (dangerous)"
|
||||
}
|
||||
return "Enabled (safe)"
|
||||
},
|
||||
Update: func(c *config.Config, v string) error {
|
||||
c.SkipPreflightChecks = !c.SkipPreflightChecks
|
||||
return nil
|
||||
},
|
||||
Type: "selector",
|
||||
Description: "⚠️ WARNING: Skipping checks may result in failed restores or data loss. Only use if checks are too slow.",
|
||||
},
|
||||
{
|
||||
Key: "cluster_parallelism",
|
||||
DisplayName: "Cluster Parallelism",
|
||||
|
||||
2
main.go
2
main.go
@ -16,7 +16,7 @@ import (
|
||||
|
||||
// Build information (set by ldflags)
|
||||
var (
|
||||
version = "5.8.3"
|
||||
version = "5.8.24"
|
||||
buildTime = "unknown"
|
||||
gitCommit = "unknown"
|
||||
)
|
||||
|
||||
233
release.sh
Executable file
233
release.sh
Executable file
@ -0,0 +1,233 @@
|
||||
#!/bin/bash
|
||||
# Release script for dbbackup
|
||||
# Builds binaries and creates/updates GitHub release
|
||||
#
|
||||
# Usage:
|
||||
# ./release.sh # Build and release current version
|
||||
# ./release.sh --bump # Bump patch version, build, and release
|
||||
# ./release.sh --update # Update existing release with new binaries
|
||||
# ./release.sh --dry-run # Show what would happen without doing it
|
||||
|
||||
set -e
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[0;33m'
|
||||
BLUE='\033[0;34m'
|
||||
BOLD='\033[1m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Configuration
|
||||
TOKEN_FILE=".gh_token"
|
||||
MAIN_FILE="main.go"
|
||||
|
||||
# Parse arguments
|
||||
BUMP_VERSION=false
|
||||
UPDATE_ONLY=false
|
||||
DRY_RUN=false
|
||||
RELEASE_MSG=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--bump)
|
||||
BUMP_VERSION=true
|
||||
shift
|
||||
;;
|
||||
--update)
|
||||
UPDATE_ONLY=true
|
||||
shift
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
-m|--message)
|
||||
RELEASE_MSG="$2"
|
||||
shift 2
|
||||
;;
|
||||
--help|-h)
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --bump Bump patch version before release"
|
||||
echo " --update Update existing release (don't create new)"
|
||||
echo " --dry-run Show what would happen without doing it"
|
||||
echo " -m, --message Release message/comment (required for new releases)"
|
||||
echo " --help Show this help"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 -m \"Fix TUI crash on cluster restore\""
|
||||
echo " $0 --bump -m \"Add new backup compression option\""
|
||||
echo " $0 --update # Just update binaries, no message needed"
|
||||
echo ""
|
||||
echo "Token file: .gh_token (gitignored)"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo -e "${RED}Unknown option: $1${NC}"
|
||||
echo "Use --help for usage"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Check for GitHub token
|
||||
if [ ! -f "$TOKEN_FILE" ]; then
|
||||
echo -e "${RED}❌ Token file not found: $TOKEN_FILE${NC}"
|
||||
echo ""
|
||||
echo "Create it with:"
|
||||
echo " echo 'your_github_token' > $TOKEN_FILE"
|
||||
echo ""
|
||||
echo "The file is gitignored for security."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
GH_TOKEN=$(cat "$TOKEN_FILE" | tr -d '[:space:]')
|
||||
if [ -z "$GH_TOKEN" ]; then
|
||||
echo -e "${RED}❌ Token file is empty${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export GH_TOKEN
|
||||
|
||||
# Get current version
|
||||
CURRENT_VERSION=$(grep 'version.*=' "$MAIN_FILE" | head -1 | sed 's/.*"\(.*\)".*/\1/')
|
||||
echo -e "${BLUE}📦 Current version: ${YELLOW}${CURRENT_VERSION}${NC}"
|
||||
|
||||
# Bump version if requested
|
||||
if [ "$BUMP_VERSION" = true ]; then
|
||||
# Parse version (X.Y.Z)
|
||||
MAJOR=$(echo "$CURRENT_VERSION" | cut -d. -f1)
|
||||
MINOR=$(echo "$CURRENT_VERSION" | cut -d. -f2)
|
||||
PATCH=$(echo "$CURRENT_VERSION" | cut -d. -f3)
|
||||
|
||||
NEW_PATCH=$((PATCH + 1))
|
||||
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
|
||||
|
||||
echo -e "${GREEN}📈 Bumping version: ${YELLOW}${CURRENT_VERSION}${NC} → ${GREEN}${NEW_VERSION}${NC}"
|
||||
|
||||
if [ "$DRY_RUN" = false ]; then
|
||||
sed -i "s/version.*=.*\"${CURRENT_VERSION}\"/version = \"${NEW_VERSION}\"/" "$MAIN_FILE"
|
||||
CURRENT_VERSION="$NEW_VERSION"
|
||||
fi
|
||||
fi
|
||||
|
||||
TAG="v${CURRENT_VERSION}"
|
||||
echo -e "${BLUE}🏷️ Release tag: ${YELLOW}${TAG}${NC}"
|
||||
|
||||
# Require message for new releases (not updates)
|
||||
if [ -z "$RELEASE_MSG" ] && [ "$UPDATE_ONLY" = false ] && [ "$DRY_RUN" = false ]; then
|
||||
echo -e "${RED}❌ Release message required. Use -m \"Your message\"${NC}"
|
||||
echo ""
|
||||
echo "Example:"
|
||||
echo " $0 -m \"Fix TUI crash on cluster restore\""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$DRY_RUN" = true ]; then
|
||||
echo -e "${YELLOW}🔍 DRY RUN - No changes will be made${NC}"
|
||||
echo ""
|
||||
echo "Would execute:"
|
||||
echo " 1. Build binaries with build_all.sh"
|
||||
echo " 2. Commit and push changes"
|
||||
echo " 3. Create/update release ${TAG}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Build binaries
|
||||
echo ""
|
||||
echo -e "${BOLD}${BLUE}🔨 Building binaries...${NC}"
|
||||
bash build_all.sh
|
||||
|
||||
# Check if there are changes to commit
|
||||
if [ -n "$(git status --porcelain)" ]; then
|
||||
echo ""
|
||||
echo -e "${BLUE}📝 Committing changes...${NC}"
|
||||
git add -A
|
||||
|
||||
# Generate commit message using the release message
|
||||
if [ -n "$RELEASE_MSG" ]; then
|
||||
COMMIT_MSG="${TAG}: ${RELEASE_MSG}"
|
||||
elif [ "$BUMP_VERSION" = true ]; then
|
||||
COMMIT_MSG="${TAG}: Version bump"
|
||||
else
|
||||
COMMIT_MSG="${TAG}: Release build"
|
||||
fi
|
||||
|
||||
git commit -m "$COMMIT_MSG"
|
||||
fi
|
||||
|
||||
# Push changes
|
||||
echo -e "${BLUE}⬆️ Pushing to origin...${NC}"
|
||||
git push origin main
|
||||
|
||||
# Handle tag
|
||||
TAG_EXISTS=$(git tag -l "$TAG")
|
||||
if [ -z "$TAG_EXISTS" ]; then
|
||||
echo -e "${BLUE}🏷️ Creating tag ${TAG}...${NC}"
|
||||
git tag "$TAG"
|
||||
git push origin "$TAG"
|
||||
else
|
||||
echo -e "${YELLOW}⚠️ Tag ${TAG} already exists${NC}"
|
||||
fi
|
||||
|
||||
# Check if release exists
|
||||
echo ""
|
||||
echo -e "${BLUE}🚀 Preparing release...${NC}"
|
||||
|
||||
RELEASE_EXISTS=$(gh release view "$TAG" 2>/dev/null && echo "yes" || echo "no")
|
||||
|
||||
if [ "$RELEASE_EXISTS" = "yes" ] || [ "$UPDATE_ONLY" = true ]; then
|
||||
echo -e "${YELLOW}📦 Updating existing release ${TAG}...${NC}"
|
||||
|
||||
# Delete existing assets and upload new ones
|
||||
for binary in bin/dbbackup_*; do
|
||||
if [ -f "$binary" ]; then
|
||||
ASSET_NAME=$(basename "$binary")
|
||||
echo " Uploading $ASSET_NAME..."
|
||||
gh release upload "$TAG" "$binary" --clobber
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo -e "${GREEN}📦 Creating new release ${TAG}...${NC}"
|
||||
|
||||
# Generate release notes with the provided message
|
||||
NOTES="## ${TAG}: ${RELEASE_MSG}
|
||||
|
||||
### Downloads
|
||||
| Platform | Architecture | Binary |
|
||||
|----------|--------------|--------|
|
||||
| Linux | x86_64 (Intel/AMD) | \`dbbackup_linux_amd64\` |
|
||||
| Linux | ARM64 | \`dbbackup_linux_arm64\` |
|
||||
| Linux | ARMv7 | \`dbbackup_linux_arm_armv7\` |
|
||||
| macOS | Intel | \`dbbackup_darwin_amd64\` |
|
||||
| macOS | Apple Silicon (M1/M2) | \`dbbackup_darwin_arm64\` |
|
||||
|
||||
### Installation
|
||||
\`\`\`bash
|
||||
# Linux x86_64
|
||||
curl -LO https://github.com/PlusOne/dbbackup/releases/download/${TAG}/dbbackup_linux_amd64
|
||||
chmod +x dbbackup_linux_amd64
|
||||
sudo mv dbbackup_linux_amd64 /usr/local/bin/dbbackup
|
||||
|
||||
# macOS Apple Silicon
|
||||
curl -LO https://github.com/PlusOne/dbbackup/releases/download/${TAG}/dbbackup_darwin_arm64
|
||||
chmod +x dbbackup_darwin_arm64
|
||||
sudo mv dbbackup_darwin_arm64 /usr/local/bin/dbbackup
|
||||
\`\`\`
|
||||
"
|
||||
|
||||
gh release create "$TAG" \
|
||||
--title "${TAG}: ${RELEASE_MSG}" \
|
||||
--notes "$NOTES" \
|
||||
bin/dbbackup_linux_amd64 \
|
||||
bin/dbbackup_linux_arm64 \
|
||||
bin/dbbackup_linux_arm_armv7 \
|
||||
bin/dbbackup_darwin_amd64 \
|
||||
bin/dbbackup_darwin_arm64
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}${BOLD}✅ Release complete!${NC}"
|
||||
echo -e " ${BLUE}https://github.com/PlusOne/dbbackup/releases/tag/${TAG}${NC}"
|
||||
222
scripts/dbtest.sh
Normal file
222
scripts/dbtest.sh
Normal file
@ -0,0 +1,222 @@
|
||||
#!/bin/bash
|
||||
# Enterprise Database Test Utility
|
||||
set -e
|
||||
|
||||
DB_NAME="${DB_NAME:-testdb_500gb}"
|
||||
TARGET_GB="${TARGET_GB:-500}"
|
||||
BLOB_KB="${BLOB_KB:-100}"
|
||||
BATCH_ROWS="${BATCH_ROWS:-10000}"
|
||||
|
||||
show_help() {
|
||||
cat << 'HELP'
|
||||
╔═══════════════════════════════════════════════════════════════╗
|
||||
║ ENTERPRISE DATABASE TEST UTILITY ║
|
||||
╚═══════════════════════════════════════════════════════════════╝
|
||||
|
||||
Usage: ./dbtest.sh <command> [options]
|
||||
|
||||
Commands:
|
||||
status Show current database status
|
||||
generate Generate test database (interactive)
|
||||
generate-bg Generate in background (tmux)
|
||||
stop Stop running generation
|
||||
drop Drop test database
|
||||
drop-all Drop ALL non-system databases
|
||||
backup Run dbbackup to SMB
|
||||
estimate Estimate generation time
|
||||
log Show generation log
|
||||
attach Attach to tmux session
|
||||
|
||||
Environment variables:
|
||||
DB_NAME=testdb_500gb Database name
|
||||
TARGET_GB=500 Target size in GB
|
||||
BLOB_KB=100 Blob size in KB
|
||||
BATCH_ROWS=10000 Rows per batch
|
||||
|
||||
Examples:
|
||||
./dbtest.sh generate # Interactive generation
|
||||
TARGET_GB=100 ./dbtest.sh generate-bg # 100GB in background
|
||||
DB_NAME=mytest ./dbtest.sh drop # Drop specific database
|
||||
./dbtest.sh drop-all # Clean slate
|
||||
HELP
|
||||
}
|
||||
|
||||
cmd_status() {
|
||||
echo "╔═══════════════════════════════════════════════════════════════╗"
|
||||
echo "║ DATABASE STATUS - $(date '+%Y-%m-%d %H:%M:%S') ║"
|
||||
echo "╚═══════════════════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
echo "┌─ GENERATION ──────────────────────────────────────────────────┐"
|
||||
if tmux has-session -t dbgen 2>/dev/null; then
|
||||
echo "│ Status: ⏳ RUNNING (attach: ./dbtest.sh attach)"
|
||||
echo "│ Log: $(tail -1 /root/generate_500gb.log 2>/dev/null | cut -c1-55)"
|
||||
else
|
||||
echo "│ Status: ⏹ Not running"
|
||||
fi
|
||||
echo "└───────────────────────────────────────────────────────────────┘"
|
||||
echo ""
|
||||
|
||||
echo "┌─ POSTGRESQL DATABASES ─────────────────────────────────────────┐"
|
||||
sudo -u postgres psql -t -c "SELECT datname || ': ' || pg_size_pretty(pg_database_size(datname)) FROM pg_database WHERE datname NOT LIKE 'template%' ORDER BY pg_database_size(datname) DESC" 2>/dev/null | sed 's/^/│ /'
|
||||
echo "└───────────────────────────────────────────────────────────────┘"
|
||||
echo ""
|
||||
|
||||
echo "┌─ STORAGE ──────────────────────────────────────────────────────┐"
|
||||
echo -n "│ Fast 1TB: "; df -h /mnt/HC_Volume_104577460 2>/dev/null | awk 'NR==2{print $3"/"$2" ("$5")"}' || echo "N/A"
|
||||
echo -n "│ SMB 10TB: "; df -h /mnt/smb-devdb 2>/dev/null | awk 'NR==2{print $3"/"$2" ("$5")"}' || echo "N/A"
|
||||
echo -n "│ Local: "; df -h / | awk 'NR==2{print $3"/"$2" ("$5")"}'
|
||||
echo "└───────────────────────────────────────────────────────────────┘"
|
||||
}
|
||||
|
||||
cmd_stop() {
|
||||
echo "Stopping generation..."
|
||||
tmux kill-session -t dbgen 2>/dev/null && echo "Stopped." || echo "Not running."
|
||||
}
|
||||
|
||||
cmd_drop() {
|
||||
echo "Dropping database: $DB_NAME"
|
||||
sudo -u postgres psql -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='$DB_NAME' AND pid <> pg_backend_pid();" 2>/dev/null || true
|
||||
sudo -u postgres dropdb --if-exists "$DB_NAME" && echo "Dropped: $DB_NAME" || echo "Not found."
|
||||
}
|
||||
|
||||
cmd_drop_all() {
|
||||
echo "WARNING: This will drop ALL non-system databases!"
|
||||
read -p "Type 'YES' to confirm: " confirm
|
||||
[ "$confirm" != "YES" ] && echo "Cancelled." && exit 0
|
||||
|
||||
for db in $(sudo -u postgres psql -t -c "SELECT datname FROM pg_database WHERE datname NOT IN ('postgres','template0','template1')"); do
|
||||
db=$(echo $db | tr -d ' ')
|
||||
[ -n "$db" ] && echo "Dropping: $db" && sudo -u postgres dropdb --if-exists "$db"
|
||||
done
|
||||
echo "Done."
|
||||
}
|
||||
|
||||
cmd_log() {
|
||||
tail -50 /root/generate_500gb.log 2>/dev/null || echo "No log file."
|
||||
}
|
||||
|
||||
cmd_attach() {
|
||||
tmux has-session -t dbgen 2>/dev/null && tmux attach -t dbgen || echo "Not running."
|
||||
}
|
||||
|
||||
cmd_backup() {
|
||||
mkdir -p /mnt/smb-devdb/cluster-500gb
|
||||
dbbackup backup cluster --backup-dir /mnt/smb-devdb/cluster-500gb
|
||||
}
|
||||
|
||||
cmd_estimate() {
|
||||
echo "Target: ${TARGET_GB}GB with ${BLOB_KB}KB blobs"
|
||||
mins=$((TARGET_GB / 2))
|
||||
echo "Estimated: ~${mins} minutes (~$((mins/60)) hours)"
|
||||
}
|
||||
|
||||
cmd_generate() {
|
||||
echo "=== Interactive Database Generator ==="
|
||||
read -p "Database name [$DB_NAME]: " i; DB_NAME="${i:-$DB_NAME}"
|
||||
read -p "Target size GB [$TARGET_GB]: " i; TARGET_GB="${i:-$TARGET_GB}"
|
||||
read -p "Blob size KB [$BLOB_KB]: " i; BLOB_KB="${i:-$BLOB_KB}"
|
||||
read -p "Rows per batch [$BATCH_ROWS]: " i; BATCH_ROWS="${i:-$BATCH_ROWS}"
|
||||
|
||||
echo "Config: $DB_NAME, ${TARGET_GB}GB, ${BLOB_KB}KB blobs"
|
||||
read -p "Start? [y/N]: " c
|
||||
[[ "$c" != "y" && "$c" != "Y" ]] && echo "Cancelled." && exit 0
|
||||
|
||||
do_generate
|
||||
}
|
||||
|
||||
cmd_generate_bg() {
|
||||
echo "Starting: $DB_NAME, ${TARGET_GB}GB, ${BLOB_KB}KB blobs"
|
||||
tmux kill-session -t dbgen 2>/dev/null || true
|
||||
|
||||
tmux new-session -d -s dbgen "DB_NAME=$DB_NAME TARGET_GB=$TARGET_GB BLOB_KB=$BLOB_KB BATCH_ROWS=$BATCH_ROWS /root/dbtest.sh _run 2>&1 | tee /root/generate_500gb.log"
|
||||
echo "Started in tmux. Use: ./dbtest.sh log | attach | stop"
|
||||
}
|
||||
|
||||
do_generate() {
|
||||
BLOB_BYTES=$((BLOB_KB * 1024))
|
||||
echo "=== ${TARGET_GB}GB Generator ==="
|
||||
echo "Started: $(date)"
|
||||
|
||||
sudo -u postgres dropdb --if-exists "$DB_NAME"
|
||||
sudo -u postgres createdb "$DB_NAME"
|
||||
sudo -u postgres psql -d "$DB_NAME" -c "CREATE EXTENSION IF NOT EXISTS pgcrypto;"
|
||||
|
||||
sudo -u postgres psql -d "$DB_NAME" << 'EOSQL'
|
||||
CREATE OR REPLACE FUNCTION large_random_bytes(size_bytes INT) RETURNS BYTEA AS $$
|
||||
DECLARE r BYTEA := E'\x'; c INT := 1024; m INT := size_bytes;
|
||||
BEGIN
|
||||
WHILE m > 0 LOOP
|
||||
IF m >= c THEN r := r || gen_random_bytes(c); m := m - c;
|
||||
ELSE r := r || gen_random_bytes(m); m := 0; END IF;
|
||||
END LOOP;
|
||||
RETURN r;
|
||||
END; $$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TABLE enterprise_documents (
|
||||
id BIGSERIAL PRIMARY KEY, uuid UUID DEFAULT gen_random_uuid(),
|
||||
created_at TIMESTAMPTZ DEFAULT now(), document_type VARCHAR(50),
|
||||
document_name VARCHAR(255), file_size BIGINT, content BYTEA
|
||||
);
|
||||
ALTER TABLE enterprise_documents ALTER COLUMN content SET STORAGE EXTERNAL;
|
||||
CREATE INDEX idx_doc_created ON enterprise_documents(created_at);
|
||||
|
||||
CREATE TABLE enterprise_transactions (
|
||||
id BIGSERIAL PRIMARY KEY, created_at TIMESTAMPTZ DEFAULT now(),
|
||||
customer_id BIGINT, amount DECIMAL(15,2), status VARCHAR(20)
|
||||
);
|
||||
EOSQL
|
||||
|
||||
echo "Tables created"
|
||||
batch=0
|
||||
start=$(date +%s)
|
||||
|
||||
while true; do
|
||||
sz=$(sudo -u postgres psql -t -A -c "SELECT pg_database_size('$DB_NAME')/1024/1024/1024")
|
||||
[ "$sz" -ge "$TARGET_GB" ] && echo "=== Target reached: ${sz}GB ===" && break
|
||||
|
||||
batch=$((batch + 1))
|
||||
pct=$((sz * 100 / TARGET_GB))
|
||||
el=$(($(date +%s) - start))
|
||||
if [ $sz -gt 0 ] && [ $el -gt 0 ]; then
|
||||
eta="$(((TARGET_GB-sz)*el/sz/60))min"
|
||||
else
|
||||
eta="..."
|
||||
fi
|
||||
|
||||
echo "Batch $batch: ${sz}GB/${TARGET_GB}GB (${pct}%) ETA:$eta"
|
||||
|
||||
sudo -u postgres psql -q -d "$DB_NAME" -c "
|
||||
INSERT INTO enterprise_documents (document_type, document_name, file_size, content)
|
||||
SELECT (ARRAY['PDF','DOCX','IMG','VID'])[floor(random()*4+1)],
|
||||
'Doc_'||i||'_'||substr(md5(random()::TEXT),1,8), $BLOB_BYTES,
|
||||
large_random_bytes($BLOB_BYTES)
|
||||
FROM generate_series(1, $BATCH_ROWS) i;"
|
||||
|
||||
sudo -u postgres psql -q -d "$DB_NAME" -c "
|
||||
INSERT INTO enterprise_transactions (customer_id, amount, status)
|
||||
SELECT (random()*1000000)::BIGINT, (random()*10000)::DECIMAL(15,2),
|
||||
(ARRAY['ok','pending','failed'])[floor(random()*3+1)]
|
||||
FROM generate_series(1, 20000);"
|
||||
done
|
||||
|
||||
sudo -u postgres psql -d "$DB_NAME" -c "ANALYZE;"
|
||||
sudo -u postgres psql -d "$DB_NAME" -c "SELECT pg_size_pretty(pg_database_size('$DB_NAME')) as size, (SELECT count(*) FROM enterprise_documents) as docs;"
|
||||
echo "Completed: $(date)"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
status) cmd_status ;;
|
||||
generate) cmd_generate ;;
|
||||
generate-bg) cmd_generate_bg ;;
|
||||
stop) cmd_stop ;;
|
||||
drop) cmd_drop ;;
|
||||
drop-all) cmd_drop_all ;;
|
||||
backup) cmd_backup ;;
|
||||
estimate) cmd_estimate ;;
|
||||
log) cmd_log ;;
|
||||
attach) cmd_attach ;;
|
||||
_run) do_generate ;;
|
||||
help|--help|-h) show_help ;;
|
||||
*) echo "Unknown: $1"; show_help ;;
|
||||
esac
|
||||
Reference in New Issue
Block a user