Compare commits

...

2 Commits

Author SHA1 Message Date
dd1db844ce fix: improve lock capacity calculation for smaller VMs
All checks were successful
CI/CD / Test (push) Successful in 1m16s
CI/CD / Lint (push) Successful in 1m25s
CI/CD / Build & Release (push) Successful in 3m13s
- Fix boostLockCapacity: max_locks_per_transaction requires RESTART, not reload
- Calculate total lock capacity: max_locks × (max_connections + max_prepared_txns)
- Add TotalLockCapacity to preflight checks with warning if < 200,000
- Update error hints to explain capacity formula and recommend 4096+ for small VMs
- Show max_connections and total capacity in preflight summary

Fixes OOM 'out of shared memory' errors on VMs with reduced resources
2026-01-17 07:48:17 +01:00
4ea3ec2cf8 fix(preflight): improve BLOB count detection and block restore when max_locks_per_transaction is critically low
All checks were successful
CI/CD / Test (push) Successful in 1m15s
CI/CD / Lint (push) Successful in 1m26s
CI/CD / Build & Release (push) Successful in 3m14s
- Add higher lock boost tiers for extreme BLOB counts (100K+, 200K+)
- Calculate actual lock requirement: (totalBLOBs / max_connections) * 1.5
- Block restore with CRITICAL error when BLOB count exceeds 2x safe limit
- Improve preflight summary with PASSED/FAILED status display
- Add clearer fix instructions for max_locks_per_transaction
2026-01-17 07:25:45 +01:00
4 changed files with 195 additions and 32 deletions

View File

@@ -4,8 +4,8 @@ This directory contains pre-compiled binaries for the DB Backup Tool across mult
## Build Information ## Build Information
- **Version**: 3.42.50 - **Version**: 3.42.50
- **Build Time**: 2026-01-16_17:31:38_UTC - **Build Time**: 2026-01-17_06:25:57_UTC
- **Git Commit**: 698b8a7 - **Git Commit**: 4ea3ec2
## Recent Updates (v1.1.0) ## Recent Updates (v1.1.0)
- ✅ Fixed TUI progress display with line-by-line output - ✅ Fixed TUI progress display with line-by-line output

View File

@@ -68,8 +68,8 @@ func ClassifyError(errorMsg string) *ErrorClassification {
Type: "critical", Type: "critical",
Category: "locks", Category: "locks",
Message: errorMsg, Message: errorMsg,
Hint: "Lock table exhausted - typically caused by large objects (BLOBs) during restore", Hint: "Lock table exhausted. Total capacity = max_locks_per_transaction × (max_connections + max_prepared_transactions). If you reduced VM size or max_connections, you need higher max_locks_per_transaction to compensate.",
Action: "Option 1: Increase max_locks_per_transaction to 1024+ in postgresql.conf (requires restart). Option 2: Update dbbackup and retry - phased restore now auto-enabled for BLOB databases", Action: "Fix: ALTER SYSTEM SET max_locks_per_transaction = 4096; then RESTART PostgreSQL. For smaller VMs with fewer connections, you need higher max_locks_per_transaction values.",
Severity: 2, Severity: 2,
} }
case "permission_denied": case "permission_denied":
@@ -142,8 +142,8 @@ func ClassifyError(errorMsg string) *ErrorClassification {
Type: "critical", Type: "critical",
Category: "locks", Category: "locks",
Message: errorMsg, Message: errorMsg,
Hint: "Lock table exhausted - typically caused by large objects (BLOBs) during restore", Hint: "Lock table exhausted. Total capacity = max_locks_per_transaction × (max_connections + max_prepared_transactions). If you reduced VM size or max_connections, you need higher max_locks_per_transaction to compensate.",
Action: "Option 1: Increase max_locks_per_transaction to 1024+ in postgresql.conf (requires restart). Option 2: Update dbbackup and retry - phased restore now auto-enabled for BLOB databases", Action: "Fix: ALTER SYSTEM SET max_locks_per_transaction = 4096; then RESTART PostgreSQL. For smaller VMs with fewer connections, you need higher max_locks_per_transaction values.",
Severity: 2, Severity: 2,
} }
} }

View File

@@ -2125,9 +2125,10 @@ func (e *Engine) quickValidateSQLDump(archivePath string, compressed bool) error
return nil return nil
} }
// boostLockCapacity temporarily increases max_locks_per_transaction to prevent OOM // boostLockCapacity checks and reports on max_locks_per_transaction capacity.
// during large restores with many BLOBs. Returns the original value for later reset. // IMPORTANT: max_locks_per_transaction requires a PostgreSQL RESTART to change!
// Uses ALTER SYSTEM + pg_reload_conf() so no restart is needed. // This function now calculates total lock capacity based on max_connections and
// warns the user if capacity is insufficient for the restore.
func (e *Engine) boostLockCapacity(ctx context.Context) (int, error) { func (e *Engine) boostLockCapacity(ctx context.Context) (int, error) {
// Connect to PostgreSQL to run system commands // Connect to PostgreSQL to run system commands
connStr := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=postgres sslmode=disable", connStr := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=postgres sslmode=disable",
@@ -2145,7 +2146,7 @@ func (e *Engine) boostLockCapacity(ctx context.Context) (int, error) {
} }
defer db.Close() defer db.Close()
// Get current value // Get current max_locks_per_transaction
var currentValue int var currentValue int
err = db.QueryRowContext(ctx, "SHOW max_locks_per_transaction").Scan(&currentValue) err = db.QueryRowContext(ctx, "SHOW max_locks_per_transaction").Scan(&currentValue)
if err != nil { if err != nil {
@@ -2158,22 +2159,56 @@ func (e *Engine) boostLockCapacity(ctx context.Context) (int, error) {
fmt.Sscanf(currentValueStr, "%d", &currentValue) fmt.Sscanf(currentValueStr, "%d", &currentValue)
} }
// Skip if already high enough // Get max_connections to calculate total lock capacity
if currentValue >= 2048 { var maxConns int
e.log.Info("max_locks_per_transaction already sufficient", "value", currentValue) if err := db.QueryRowContext(ctx, "SHOW max_connections").Scan(&maxConns); err != nil {
return currentValue, nil maxConns = 100 // default
} }
// Boost to 2048 (enough for most BLOB-heavy databases) // Get max_prepared_transactions
_, err = db.ExecContext(ctx, "ALTER SYSTEM SET max_locks_per_transaction = 2048") var maxPreparedTxns int
if err != nil { if err := db.QueryRowContext(ctx, "SHOW max_prepared_transactions").Scan(&maxPreparedTxns); err != nil {
return currentValue, fmt.Errorf("failed to set max_locks_per_transaction: %w", err) maxPreparedTxns = 0
} }
// Reload config without restart // Calculate total lock table capacity:
_, err = db.ExecContext(ctx, "SELECT pg_reload_conf()") // Total locks = max_locks_per_transaction × (max_connections + max_prepared_transactions)
if err != nil { totalLockCapacity := currentValue * (maxConns + maxPreparedTxns)
return currentValue, fmt.Errorf("failed to reload config: %w", err)
e.log.Info("PostgreSQL lock table capacity",
"max_locks_per_transaction", currentValue,
"max_connections", maxConns,
"max_prepared_transactions", maxPreparedTxns,
"total_lock_capacity", totalLockCapacity)
// Minimum recommended total capacity for BLOB-heavy restores: 200,000 locks
minRecommendedCapacity := 200000
if totalLockCapacity < minRecommendedCapacity {
recommendedMaxLocks := minRecommendedCapacity / (maxConns + maxPreparedTxns)
if recommendedMaxLocks < 4096 {
recommendedMaxLocks = 4096
}
e.log.Warn("Lock table capacity may be insufficient for BLOB-heavy restores",
"current_total_capacity", totalLockCapacity,
"recommended_capacity", minRecommendedCapacity,
"current_max_locks", currentValue,
"recommended_max_locks", recommendedMaxLocks,
"note", "max_locks_per_transaction requires PostgreSQL RESTART to change")
// Write suggested fix to ALTER SYSTEM but warn about restart
_, err = db.ExecContext(ctx, fmt.Sprintf("ALTER SYSTEM SET max_locks_per_transaction = %d", recommendedMaxLocks))
if err != nil {
e.log.Warn("Could not set recommended max_locks_per_transaction (needs superuser)", "error", err)
} else {
e.log.Warn("Wrote recommended max_locks_per_transaction to postgresql.auto.conf",
"value", recommendedMaxLocks,
"action", "RESTART PostgreSQL to apply: sudo systemctl restart postgresql")
}
} else {
e.log.Info("Lock table capacity is sufficient",
"total_capacity", totalLockCapacity,
"max_locks_per_transaction", currentValue)
} }
return currentValue, nil return currentValue, nil

View File

@@ -48,12 +48,14 @@ type LinuxChecks struct {
// PostgreSQLChecks contains PostgreSQL configuration checks // PostgreSQLChecks contains PostgreSQL configuration checks
type PostgreSQLChecks struct { type PostgreSQLChecks struct {
MaxLocksPerTransaction int // Current setting MaxLocksPerTransaction int // Current setting
MaintenanceWorkMem string // Current setting MaxPreparedTransactions int // Current setting (affects lock capacity)
SharedBuffers string // Current setting (info only) TotalLockCapacity int // Calculated: max_locks × (max_connections + max_prepared)
MaxConnections int // Current setting MaintenanceWorkMem string // Current setting
Version string // PostgreSQL version SharedBuffers string // Current setting (info only)
IsSuperuser bool // Can we modify settings? MaxConnections int // Current setting
Version string // PostgreSQL version
IsSuperuser bool // Can we modify settings?
} }
// ArchiveChecks contains analysis of the backup archive // ArchiveChecks contains analysis of the backup archive
@@ -201,6 +203,29 @@ func (e *Engine) checkPostgreSQL(ctx context.Context, result *PreflightResult) {
result.PostgreSQL.IsSuperuser = isSuperuser result.PostgreSQL.IsSuperuser = isSuperuser
} }
// Check max_prepared_transactions for lock capacity calculation
var maxPreparedTxns string
if err := db.QueryRowContext(ctx, "SHOW max_prepared_transactions").Scan(&maxPreparedTxns); err == nil {
result.PostgreSQL.MaxPreparedTransactions, _ = strconv.Atoi(maxPreparedTxns)
}
// CRITICAL: Calculate TOTAL lock table capacity
// Formula: max_locks_per_transaction × (max_connections + max_prepared_transactions)
// This is THE key capacity metric for BLOB-heavy restores
maxConns := result.PostgreSQL.MaxConnections
if maxConns == 0 {
maxConns = 100 // default
}
maxPrepared := result.PostgreSQL.MaxPreparedTransactions
totalLockCapacity := result.PostgreSQL.MaxLocksPerTransaction * (maxConns + maxPrepared)
result.PostgreSQL.TotalLockCapacity = totalLockCapacity
e.log.Info("PostgreSQL lock table capacity",
"max_locks_per_transaction", result.PostgreSQL.MaxLocksPerTransaction,
"max_connections", maxConns,
"max_prepared_transactions", maxPrepared,
"total_lock_capacity", totalLockCapacity)
// CRITICAL: max_locks_per_transaction requires PostgreSQL RESTART to change! // CRITICAL: max_locks_per_transaction requires PostgreSQL RESTART to change!
// Warn users loudly about this - it's the #1 cause of "out of shared memory" errors // Warn users loudly about this - it's the #1 cause of "out of shared memory" errors
if result.PostgreSQL.MaxLocksPerTransaction < 256 { if result.PostgreSQL.MaxLocksPerTransaction < 256 {
@@ -212,10 +237,38 @@ func (e *Engine) checkPostgreSQL(ctx context.Context, result *PreflightResult) {
result.Warnings = append(result.Warnings, result.Warnings = append(result.Warnings,
fmt.Sprintf("max_locks_per_transaction=%d is low (recommend 256+). "+ fmt.Sprintf("max_locks_per_transaction=%d is low (recommend 256+). "+
"This setting requires PostgreSQL RESTART to change. "+ "This setting requires PostgreSQL RESTART to change. "+
"BLOB-heavy databases may fail with 'out of shared memory' error.", "BLOB-heavy databases may fail with 'out of shared memory' error. "+
"Fix: Edit postgresql.conf, set max_locks_per_transaction=2048, then restart PostgreSQL.",
result.PostgreSQL.MaxLocksPerTransaction)) result.PostgreSQL.MaxLocksPerTransaction))
} }
// NEW: Check total lock capacity is sufficient for typical BLOB operations
// Minimum recommended: 200,000 for moderate BLOB databases
minRecommendedCapacity := 200000
if totalLockCapacity < minRecommendedCapacity {
recommendedMaxLocks := minRecommendedCapacity / (maxConns + maxPrepared)
if recommendedMaxLocks < 4096 {
recommendedMaxLocks = 4096
}
e.log.Warn("Total lock table capacity is LOW for BLOB-heavy restores",
"current_capacity", totalLockCapacity,
"recommended", minRecommendedCapacity,
"current_max_locks", result.PostgreSQL.MaxLocksPerTransaction,
"current_max_connections", maxConns,
"recommended_max_locks", recommendedMaxLocks,
"note", "VMs with fewer connections need higher max_locks_per_transaction")
result.Warnings = append(result.Warnings,
fmt.Sprintf("Total lock capacity=%d is low (recommend %d+). "+
"Capacity = max_locks_per_transaction(%d) × max_connections(%d). "+
"If you reduced VM size/connections, increase max_locks_per_transaction to %d. "+
"Fix: ALTER SYSTEM SET max_locks_per_transaction = %d; then RESTART PostgreSQL.",
totalLockCapacity, minRecommendedCapacity,
result.PostgreSQL.MaxLocksPerTransaction, maxConns,
recommendedMaxLocks, recommendedMaxLocks))
}
// Parse shared_buffers and warn if very low // Parse shared_buffers and warn if very low
sharedBuffersMB := parseMemoryToMB(result.PostgreSQL.SharedBuffers) sharedBuffersMB := parseMemoryToMB(result.PostgreSQL.SharedBuffers)
if sharedBuffersMB > 0 && sharedBuffersMB < 256 { if sharedBuffersMB > 0 && sharedBuffersMB < 256 {
@@ -324,14 +377,57 @@ func (e *Engine) calculateRecommendations(result *PreflightResult) {
if result.Archive.TotalBlobCount > 50000 { if result.Archive.TotalBlobCount > 50000 {
lockBoost = 16384 lockBoost = 16384
} }
if result.Archive.TotalBlobCount > 100000 {
lockBoost = 32768
}
if result.Archive.TotalBlobCount > 200000 {
lockBoost = 65536
}
// Cap at reasonable maximum // For extreme cases, calculate actual requirement
if lockBoost > 16384 { // Rule of thumb: ~1 lock per BLOB, divided by max_connections (default 100)
lockBoost = 16384 // Add 50% safety margin
maxConns := result.PostgreSQL.MaxConnections
if maxConns == 0 {
maxConns = 100 // default
}
calculatedLocks := (result.Archive.TotalBlobCount / maxConns) * 3 / 2 // 1.5x safety margin
if calculatedLocks > lockBoost {
lockBoost = calculatedLocks
} }
result.Archive.RecommendedLockBoost = lockBoost result.Archive.RecommendedLockBoost = lockBoost
// CRITICAL: Check if current max_locks_per_transaction is dangerously low for this BLOB count
currentLocks := result.PostgreSQL.MaxLocksPerTransaction
if currentLocks > 0 && result.Archive.TotalBlobCount > 0 {
// Estimate max BLOBs we can handle: locks * max_connections
maxSafeBLOBs := currentLocks * maxConns
if result.Archive.TotalBlobCount > maxSafeBLOBs {
severity := "WARNING"
if result.Archive.TotalBlobCount > maxSafeBLOBs*2 {
severity = "CRITICAL"
result.CanProceed = false
}
e.log.Error(fmt.Sprintf("%s: max_locks_per_transaction too low for BLOB count", severity),
"current_max_locks", currentLocks,
"total_blobs", result.Archive.TotalBlobCount,
"max_safe_blobs", maxSafeBLOBs,
"recommended_max_locks", lockBoost)
result.Errors = append(result.Errors,
fmt.Sprintf("%s: Archive contains %s BLOBs but max_locks_per_transaction=%d can only safely handle ~%s. "+
"Increase max_locks_per_transaction to %d in postgresql.conf and RESTART PostgreSQL.",
severity,
humanize.Comma(int64(result.Archive.TotalBlobCount)),
currentLocks,
humanize.Comma(int64(maxSafeBLOBs)),
lockBoost))
}
}
// Log recommendation // Log recommendation
e.log.Info("Calculated recommended lock boost", e.log.Info("Calculated recommended lock boost",
"total_blobs", result.Archive.TotalBlobCount, "total_blobs", result.Archive.TotalBlobCount,
@@ -365,6 +461,13 @@ func (e *Engine) printPreflightSummary(result *PreflightResult) {
humanize.Comma(int64(result.PostgreSQL.MaxLocksPerTransaction)), humanize.Comma(int64(result.PostgreSQL.MaxLocksPerTransaction)),
humanize.Comma(int64(result.Archive.RecommendedLockBoost))), humanize.Comma(int64(result.Archive.RecommendedLockBoost))),
true) true)
printCheck("max_connections", humanize.Comma(int64(result.PostgreSQL.MaxConnections)), true)
// Show total lock capacity with warning if low
totalCapacityOK := result.PostgreSQL.TotalLockCapacity >= 200000
printCheck("Total Lock Capacity",
fmt.Sprintf("%s (max_locks × max_conns)",
humanize.Comma(int64(result.PostgreSQL.TotalLockCapacity))),
totalCapacityOK)
printCheck("maintenance_work_mem", fmt.Sprintf("%s → 2GB (auto-boost)", printCheck("maintenance_work_mem", fmt.Sprintf("%s → 2GB (auto-boost)",
result.PostgreSQL.MaintenanceWorkMem), true) result.PostgreSQL.MaintenanceWorkMem), true)
printInfo("shared_buffers", result.PostgreSQL.SharedBuffers) printInfo("shared_buffers", result.PostgreSQL.SharedBuffers)
@@ -386,6 +489,14 @@ func (e *Engine) printPreflightSummary(result *PreflightResult) {
} }
} }
// Errors (blocking issues)
if len(result.Errors) > 0 {
fmt.Println("\n ✗ ERRORS (must fix before proceeding):")
for _, e := range result.Errors {
fmt.Printf(" • %s\n", e)
}
}
// Warnings // Warnings
if len(result.Warnings) > 0 { if len(result.Warnings) > 0 {
fmt.Println("\n ⚠ Warnings:") fmt.Println("\n ⚠ Warnings:")
@@ -394,6 +505,23 @@ func (e *Engine) printPreflightSummary(result *PreflightResult) {
} }
} }
// Final status
fmt.Println()
if !result.CanProceed {
fmt.Println(" ┌─────────────────────────────────────────────────────────┐")
fmt.Println(" │ ✗ PREFLIGHT FAILED - Cannot proceed with restore │")
fmt.Println(" │ Fix the errors above and try again. │")
fmt.Println(" └─────────────────────────────────────────────────────────┘")
} else if len(result.Warnings) > 0 {
fmt.Println(" ┌─────────────────────────────────────────────────────────┐")
fmt.Println(" │ ⚠ PREFLIGHT PASSED WITH WARNINGS - Proceed with care │")
fmt.Println(" └─────────────────────────────────────────────────────────┘")
} else {
fmt.Println(" ┌─────────────────────────────────────────────────────────┐")
fmt.Println(" │ ✓ PREFLIGHT PASSED - Ready to restore │")
fmt.Println(" └─────────────────────────────────────────────────────────┘")
}
fmt.Println(strings.Repeat("─", 60)) fmt.Println(strings.Repeat("─", 60))
fmt.Println() fmt.Println()
} }