Compare commits

..

2 Commits

Author SHA1 Message Date
4938dc1918 fix: max_locks_per_transaction requires PostgreSQL restart
All checks were successful
CI/CD / Test (push) Successful in 1m19s
CI/CD / Lint (push) Successful in 1m31s
CI/CD / Build & Release (push) Successful in 3m34s
- Fixed critical bug where ALTER SYSTEM + pg_reload_conf() was used
  but max_locks_per_transaction requires a full PostgreSQL restart
- Added automatic restart attempt (systemctl, service, pg_ctl)
- Added loud warnings if restart fails with manual fix instructions
- Updated preflight checks to warn about low max_locks_per_transaction
- This was causing 'out of shared memory' errors on BLOB-heavy restores
2026-01-15 18:50:10 +01:00
09a917766f TUI: Add database progress tracking to cluster backup
All checks were successful
CI/CD / Test (push) Successful in 1m27s
CI/CD / Lint (push) Successful in 1m33s
CI/CD / Build & Release (push) Successful in 3m28s
- Add ProgressCallback and DatabaseProgressCallback to backup engine
- Add SetProgressCallback() and SetDatabaseProgressCallback() methods
- Wire database progress callback in backup_exec.go
- Show progress bar (Database: [████░░░] X/Y) during cluster backups
- Hide spinner when progress bar is displayed
- Use shared progress state pattern for thread-safe TUI updates
2026-01-15 15:32:26 +01:00
5 changed files with 271 additions and 24 deletions

View File

@@ -4,8 +4,8 @@ This directory contains pre-compiled binaries for the DB Backup Tool across mult
## Build Information
- **Version**: 3.42.34
- **Build Time**: 2026-01-14_16:19:00_UTC
- **Git Commit**: 7711a20
- **Build Time**: 2026-01-15_14:33:12_UTC
- **Git Commit**: 09a9177
## Recent Updates (v1.1.0)
- ✅ Fixed TUI progress display with line-by-line output

View File

@@ -28,14 +28,22 @@ import (
"dbbackup/internal/swap"
)
// ProgressCallback is called with byte-level progress updates during backup operations
type ProgressCallback func(current, total int64, description string)
// DatabaseProgressCallback is called with database count progress during cluster backup
type DatabaseProgressCallback func(done, total int, dbName string)
// Engine handles backup operations
type Engine struct {
cfg *config.Config
log logger.Logger
db database.Database
progress progress.Indicator
detailedReporter *progress.DetailedReporter
silent bool // Silent mode for TUI
cfg *config.Config
log logger.Logger
db database.Database
progress progress.Indicator
detailedReporter *progress.DetailedReporter
silent bool // Silent mode for TUI
progressCallback ProgressCallback
dbProgressCallback DatabaseProgressCallback
}
// New creates a new backup engine
@@ -86,6 +94,30 @@ func NewSilent(cfg *config.Config, log logger.Logger, db database.Database, prog
}
}
// SetProgressCallback sets a callback for detailed progress reporting (for TUI mode)
func (e *Engine) SetProgressCallback(cb ProgressCallback) {
e.progressCallback = cb
}
// SetDatabaseProgressCallback sets a callback for database count progress during cluster backup
func (e *Engine) SetDatabaseProgressCallback(cb DatabaseProgressCallback) {
e.dbProgressCallback = cb
}
// reportProgress reports progress to the callback if set
func (e *Engine) reportProgress(current, total int64, description string) {
if e.progressCallback != nil {
e.progressCallback(current, total, description)
}
}
// reportDatabaseProgress reports database count progress to the callback if set
func (e *Engine) reportDatabaseProgress(done, total int, dbName string) {
if e.dbProgressCallback != nil {
e.dbProgressCallback(done, total, dbName)
}
}
// loggerAdapter adapts our logger to the progress.Logger interface
type loggerAdapter struct {
logger logger.Logger
@@ -465,6 +497,8 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
estimator.UpdateProgress(idx)
e.printf(" [%d/%d] Backing up database: %s\n", idx+1, len(databases), name)
quietProgress.Update(fmt.Sprintf("Backing up database %d/%d: %s", idx+1, len(databases), name))
// Report database progress to TUI callback
e.reportDatabaseProgress(idx+1, len(databases), name)
mu.Unlock()
// Check database size and warn if very large

View File

@@ -1937,6 +1937,8 @@ type OriginalSettings struct {
}
// boostPostgreSQLSettings boosts multiple PostgreSQL settings for large restores
// NOTE: max_locks_per_transaction requires a PostgreSQL RESTART to take effect!
// maintenance_work_mem can be changed with pg_reload_conf().
func (e *Engine) boostPostgreSQLSettings(ctx context.Context, lockBoostValue int) (*OriginalSettings, error) {
connStr := e.buildConnString()
db, err := sql.Open("pgx", connStr)
@@ -1956,30 +1958,113 @@ func (e *Engine) boostPostgreSQLSettings(ctx context.Context, lockBoostValue int
// Get current maintenance_work_mem
db.QueryRowContext(ctx, "SHOW maintenance_work_mem").Scan(&original.MaintenanceWorkMem)
// Boost max_locks_per_transaction (if not already high enough)
// CRITICAL: max_locks_per_transaction requires a PostgreSQL RESTART!
// pg_reload_conf() is NOT sufficient for this parameter.
needsRestart := false
if original.MaxLocks < lockBoostValue {
_, err = db.ExecContext(ctx, fmt.Sprintf("ALTER SYSTEM SET max_locks_per_transaction = %d", lockBoostValue))
if err != nil {
e.log.Warn("Could not boost max_locks_per_transaction", "error", err)
e.log.Warn("Could not set max_locks_per_transaction", "error", err)
} else {
needsRestart = true
e.log.Warn("max_locks_per_transaction requires PostgreSQL restart to take effect",
"current", original.MaxLocks,
"target", lockBoostValue)
}
}
// Boost maintenance_work_mem to 2GB for faster index creation
// (this one CAN be applied via pg_reload_conf)
_, err = db.ExecContext(ctx, "ALTER SYSTEM SET maintenance_work_mem = '2GB'")
if err != nil {
e.log.Warn("Could not boost maintenance_work_mem", "error", err)
}
// Reload config to apply changes (no restart needed for these settings)
// Reload config to apply maintenance_work_mem
_, err = db.ExecContext(ctx, "SELECT pg_reload_conf()")
if err != nil {
return original, fmt.Errorf("failed to reload config: %w", err)
}
// If max_locks_per_transaction needs a restart, try to do it
if needsRestart {
if restarted := e.tryRestartPostgreSQL(ctx); restarted {
e.log.Info("PostgreSQL restarted successfully - max_locks_per_transaction now active")
// Wait for PostgreSQL to be ready
time.Sleep(3 * time.Second)
} else {
// Cannot restart - warn user loudly
e.log.Error("=" + strings.Repeat("=", 70))
e.log.Error("WARNING: max_locks_per_transaction change requires PostgreSQL restart!")
e.log.Error("Current value: " + strconv.Itoa(original.MaxLocks) + ", needed: " + strconv.Itoa(lockBoostValue))
e.log.Error("Restore may fail with 'out of shared memory' error on BLOB-heavy databases.")
e.log.Error("")
e.log.Error("To fix manually:")
e.log.Error(" 1. sudo systemctl restart postgresql")
e.log.Error(" 2. Or: sudo -u postgres pg_ctl restart -D $PGDATA")
e.log.Error(" 3. Then re-run the restore")
e.log.Error("=" + strings.Repeat("=", 70))
// Continue anyway - might work for small restores
}
}
return original, nil
}
// tryRestartPostgreSQL attempts to restart PostgreSQL using various methods
// Returns true if restart was successful
func (e *Engine) tryRestartPostgreSQL(ctx context.Context) bool {
e.progress.Update("Attempting PostgreSQL restart for lock settings...")
// Method 1: systemctl (most common on modern Linux)
cmd := exec.CommandContext(ctx, "sudo", "systemctl", "restart", "postgresql")
if err := cmd.Run(); err == nil {
return true
}
// Method 2: systemctl with version suffix (e.g., postgresql-15)
for _, ver := range []string{"17", "16", "15", "14", "13", "12"} {
cmd = exec.CommandContext(ctx, "sudo", "systemctl", "restart", "postgresql-"+ver)
if err := cmd.Run(); err == nil {
return true
}
}
// Method 3: service command (older systems)
cmd = exec.CommandContext(ctx, "sudo", "service", "postgresql", "restart")
if err := cmd.Run(); err == nil {
return true
}
// Method 4: pg_ctl as postgres user
cmd = exec.CommandContext(ctx, "sudo", "-u", "postgres", "pg_ctl", "restart", "-D", "/var/lib/postgresql/data", "-m", "fast")
if err := cmd.Run(); err == nil {
return true
}
// Method 5: Try common PGDATA paths
pgdataPaths := []string{
"/var/lib/pgsql/data",
"/var/lib/pgsql/17/data",
"/var/lib/pgsql/16/data",
"/var/lib/pgsql/15/data",
"/var/lib/postgresql/17/main",
"/var/lib/postgresql/16/main",
"/var/lib/postgresql/15/main",
}
for _, pgdata := range pgdataPaths {
cmd = exec.CommandContext(ctx, "sudo", "-u", "postgres", "pg_ctl", "restart", "-D", pgdata, "-m", "fast")
if err := cmd.Run(); err == nil {
return true
}
}
return false
}
// resetPostgreSQLSettings restores original PostgreSQL settings
// NOTE: max_locks_per_transaction changes are written but require restart to take effect.
// We don't restart here since we're done with the restore.
func (e *Engine) resetPostgreSQLSettings(ctx context.Context, original *OriginalSettings) error {
connStr := e.buildConnString()
db, err := sql.Open("pgx", connStr)
@@ -1988,25 +2073,28 @@ func (e *Engine) resetPostgreSQLSettings(ctx context.Context, original *Original
}
defer db.Close()
// Reset max_locks_per_transaction
// Reset max_locks_per_transaction (will take effect on next restart)
if original.MaxLocks == 64 { // Default
db.ExecContext(ctx, "ALTER SYSTEM RESET max_locks_per_transaction")
} else if original.MaxLocks > 0 {
db.ExecContext(ctx, fmt.Sprintf("ALTER SYSTEM SET max_locks_per_transaction = %d", original.MaxLocks))
}
// Reset maintenance_work_mem
// Reset maintenance_work_mem (takes effect immediately with reload)
if original.MaintenanceWorkMem == "64MB" { // Default
db.ExecContext(ctx, "ALTER SYSTEM RESET maintenance_work_mem")
} else if original.MaintenanceWorkMem != "" {
db.ExecContext(ctx, fmt.Sprintf("ALTER SYSTEM SET maintenance_work_mem = '%s'", original.MaintenanceWorkMem))
}
// Reload config
// Reload config (only maintenance_work_mem will take effect immediately)
_, err = db.ExecContext(ctx, "SELECT pg_reload_conf()")
if err != nil {
return fmt.Errorf("failed to reload config: %w", err)
}
e.log.Info("PostgreSQL settings reset queued",
"note", "max_locks_per_transaction will revert on next PostgreSQL restart")
return nil
}

View File

@@ -201,10 +201,19 @@ func (e *Engine) checkPostgreSQL(ctx context.Context, result *PreflightResult) {
result.PostgreSQL.IsSuperuser = isSuperuser
}
// Add info/warnings
// CRITICAL: max_locks_per_transaction requires PostgreSQL RESTART to change!
// Warn users loudly about this - it's the #1 cause of "out of shared memory" errors
if result.PostgreSQL.MaxLocksPerTransaction < 256 {
e.log.Info("PostgreSQL max_locks_per_transaction is low - will auto-boost",
"current", result.PostgreSQL.MaxLocksPerTransaction)
e.log.Warn("PostgreSQL max_locks_per_transaction is LOW",
"current", result.PostgreSQL.MaxLocksPerTransaction,
"recommended", "256+",
"note", "REQUIRES PostgreSQL restart to change!")
result.Warnings = append(result.Warnings,
fmt.Sprintf("max_locks_per_transaction=%d is low (recommend 256+). "+
"This setting requires PostgreSQL RESTART to change. "+
"BLOB-heavy databases may fail with 'out of shared memory' error.",
result.PostgreSQL.MaxLocksPerTransaction))
}
// Parse shared_buffers and warn if very low

130
internal/tui/backup_exec.go Executable file → Normal file
View File

@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"strings"
"sync"
"time"
tea "github.com/charmbracelet/bubbletea"
@@ -33,6 +34,56 @@ type BackupExecutionModel struct {
startTime time.Time
details []string
spinnerFrame int
// Database count progress (for cluster backup)
dbTotal int
dbDone int
dbName string // Current database being backed up
}
// sharedBackupProgressState holds progress state that can be safely accessed from callbacks
type sharedBackupProgressState struct {
mu sync.Mutex
dbTotal int
dbDone int
dbName string
hasUpdate bool
}
// Package-level shared progress state for backup operations
var (
currentBackupProgressMu sync.Mutex
currentBackupProgressState *sharedBackupProgressState
)
func setCurrentBackupProgress(state *sharedBackupProgressState) {
currentBackupProgressMu.Lock()
defer currentBackupProgressMu.Unlock()
currentBackupProgressState = state
}
func clearCurrentBackupProgress() {
currentBackupProgressMu.Lock()
defer currentBackupProgressMu.Unlock()
currentBackupProgressState = nil
}
func getCurrentBackupProgress() (dbTotal, dbDone int, dbName string, hasUpdate bool) {
currentBackupProgressMu.Lock()
defer currentBackupProgressMu.Unlock()
if currentBackupProgressState == nil {
return 0, 0, "", false
}
currentBackupProgressState.mu.Lock()
defer currentBackupProgressState.mu.Unlock()
hasUpdate = currentBackupProgressState.hasUpdate
currentBackupProgressState.hasUpdate = false
return currentBackupProgressState.dbTotal, currentBackupProgressState.dbDone,
currentBackupProgressState.dbName, hasUpdate
}
func NewBackupExecution(cfg *config.Config, log logger.Logger, parent tea.Model, ctx context.Context, backupType, dbName string, ratio int) BackupExecutionModel {
@@ -55,7 +106,6 @@ func NewBackupExecution(cfg *config.Config, log logger.Logger, parent tea.Model,
}
func (m BackupExecutionModel) Init() tea.Cmd {
// TUI handles all display through View() - no progress callbacks needed
return tea.Batch(
executeBackupWithTUIProgress(m.ctx, m.config, m.logger, m.backupType, m.databaseName, m.ratio),
backupTickCmd(),
@@ -91,6 +141,11 @@ func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config,
start := time.Now()
// Setup shared progress state for TUI polling
progressState := &sharedBackupProgressState{}
setCurrentBackupProgress(progressState)
defer clearCurrentBackupProgress()
dbClient, err := database.New(cfg, log)
if err != nil {
return backupCompleteMsg{
@@ -110,6 +165,16 @@ func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config,
// Pass nil as indicator - TUI itself handles all display, no stdout printing
engine := backup.NewSilent(cfg, log, dbClient, nil)
// Set database progress callback for cluster backups
engine.SetDatabaseProgressCallback(func(done, total int, currentDB string) {
progressState.mu.Lock()
progressState.dbDone = done
progressState.dbTotal = total
progressState.dbName = currentDB
progressState.hasUpdate = true
progressState.mu.Unlock()
})
var backupErr error
switch backupType {
case "single":
@@ -157,10 +222,21 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
// Increment spinner frame for smooth animation
m.spinnerFrame = (m.spinnerFrame + 1) % len(spinnerFrames)
// Update status based on elapsed time to show progress
// Poll for database progress updates from callbacks
dbTotal, dbDone, dbName, hasUpdate := getCurrentBackupProgress()
if hasUpdate {
m.dbTotal = dbTotal
m.dbDone = dbDone
m.dbName = dbName
}
// Update status based on progress and elapsed time
elapsedSec := int(time.Since(m.startTime).Seconds())
if elapsedSec < 2 {
if m.dbTotal > 0 && m.dbDone > 0 {
// We have real progress from cluster backup
m.status = fmt.Sprintf("Backing up database: %s", m.dbName)
} else if elapsedSec < 2 {
m.status = "Initializing backup..."
} else if elapsedSec < 5 {
if m.backupType == "cluster" {
@@ -234,6 +310,34 @@ func (m BackupExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
return m, nil
}
// renderDatabaseProgressBar renders a progress bar for database count progress
func renderBackupDatabaseProgressBar(done, total int, dbName string, width int) string {
if total == 0 {
return ""
}
// Calculate progress percentage
percent := float64(done) / float64(total)
if percent > 1.0 {
percent = 1.0
}
// Calculate filled width
barWidth := width - 20 // Leave room for label and percentage
if barWidth < 10 {
barWidth = 10
}
filled := int(float64(barWidth) * percent)
if filled > barWidth {
filled = barWidth
}
// Build progress bar
bar := strings.Repeat("█", filled) + strings.Repeat("░", barWidth-filled)
return fmt.Sprintf(" Database: [%s] %d/%d", bar, done, total)
}
func (m BackupExecutionModel) View() string {
var s strings.Builder
s.Grow(512) // Pre-allocate estimated capacity for better performance
@@ -255,12 +359,24 @@ func (m BackupExecutionModel) View() string {
s.WriteString(fmt.Sprintf(" %-10s %s\n", "Duration:", time.Since(m.startTime).Round(time.Second)))
s.WriteString("\n")
// Status with spinner
// Status display
if !m.done {
if m.cancelling {
s.WriteString(fmt.Sprintf(" %s %s\n", spinnerFrames[m.spinnerFrame], m.status))
// Show database progress bar if we have progress data (cluster backup)
if m.dbTotal > 0 && m.dbDone > 0 {
// Show progress bar instead of spinner when we have real progress
progressBar := renderBackupDatabaseProgressBar(m.dbDone, m.dbTotal, m.dbName, 50)
s.WriteString(progressBar + "\n")
s.WriteString(fmt.Sprintf(" %s\n", m.status))
} else {
s.WriteString(fmt.Sprintf(" %s %s\n", spinnerFrames[m.spinnerFrame], m.status))
// Show spinner during initial phases
if m.cancelling {
s.WriteString(fmt.Sprintf(" %s %s\n", spinnerFrames[m.spinnerFrame], m.status))
} else {
s.WriteString(fmt.Sprintf(" %s %s\n", spinnerFrames[m.spinnerFrame], m.status))
}
}
if !m.cancelling {
s.WriteString("\n [KEY] Press Ctrl+C or ESC to cancel\n")
}
} else {