Add Phase 2 TUI improvements: disk space checks and error hints

- Created internal/checks package for disk space and error classification
- CheckDiskSpace(): Real-time disk usage detection (80% warning, 95% critical)
- CheckDiskSpaceForRestore(): 4x archive size requirement calculation
- ClassifyError(): Smart error classification (ignorable/warning/critical/fatal)
- FormatErrorWithHint(): User-friendly error messages with actionable solutions
- Integrated disk checks into backup/restore workflows with pre-flight validation
- Error hints for: lock exhaustion, disk full, syntax errors, permissions, connections
- Blocks operations at 95% disk usage, warns at 80%
This commit is contained in:
2025-11-18 13:24:07 +00:00
parent 3a2ff21e6f
commit fd5fae4dfa
4 changed files with 449 additions and 4 deletions

View File

@@ -16,6 +16,7 @@ import (
"sync/atomic"
"time"
"dbbackup/internal/checks"
"dbbackup/internal/config"
"dbbackup/internal/database"
"dbbackup/internal/logger"
@@ -303,6 +304,27 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
return fmt.Errorf("failed to create backup directory: %w", err)
}
// Check disk space before starting backup
e.log.Info("Checking disk space availability")
spaceCheck := checks.CheckDiskSpace(e.cfg.BackupDir)
if !e.silent {
// Show disk space status in CLI mode
fmt.Println("\n" + checks.FormatDiskSpaceMessage(spaceCheck))
}
if spaceCheck.Critical {
operation.Fail("Insufficient disk space")
quietProgress.Fail("Insufficient disk space - free up space and try again")
return fmt.Errorf("insufficient disk space: %.1f%% used, operation blocked", spaceCheck.UsedPercent)
}
if spaceCheck.Warning {
e.log.Warn("Low disk space - backup may fail if database is large",
"available_gb", float64(spaceCheck.AvailableBytes)/(1024*1024*1024),
"used_percent", spaceCheck.UsedPercent)
}
// Generate timestamp and filename
timestamp := time.Now().Format("20060102_150405")
outputFile := filepath.Join(e.cfg.BackupDir, fmt.Sprintf("cluster_%s.tar.gz", timestamp))

View File

@@ -0,0 +1,161 @@
package checks
import (
"fmt"
"path/filepath"
"syscall"
)
// DiskSpaceCheck represents disk space information
type DiskSpaceCheck struct {
Path string
TotalBytes uint64
AvailableBytes uint64
UsedBytes uint64
UsedPercent float64
Sufficient bool
Warning bool
Critical bool
}
// CheckDiskSpace checks available disk space for a given path
func CheckDiskSpace(path string) *DiskSpaceCheck {
// Get absolute path
absPath, err := filepath.Abs(path)
if err != nil {
absPath = path
}
// Get filesystem stats
var stat syscall.Statfs_t
if err := syscall.Statfs(absPath, &stat); err != nil {
// Return error state
return &DiskSpaceCheck{
Path: absPath,
Critical: true,
Sufficient: false,
}
}
// Calculate space
totalBytes := stat.Blocks * uint64(stat.Bsize)
availableBytes := stat.Bavail * uint64(stat.Bsize)
usedBytes := totalBytes - availableBytes
usedPercent := float64(usedBytes) / float64(totalBytes) * 100
check := &DiskSpaceCheck{
Path: absPath,
TotalBytes: totalBytes,
AvailableBytes: availableBytes,
UsedBytes: usedBytes,
UsedPercent: usedPercent,
}
// Determine status thresholds
check.Critical = usedPercent >= 95
check.Warning = usedPercent >= 80 && !check.Critical
check.Sufficient = !check.Critical && !check.Warning
return check
}
// CheckDiskSpaceForRestore checks if there's enough space for restore (needs 4x archive size)
func CheckDiskSpaceForRestore(path string, archiveSize int64) *DiskSpaceCheck {
check := CheckDiskSpace(path)
requiredBytes := uint64(archiveSize) * 4 // Account for decompression
// Override status based on required space
if check.AvailableBytes < requiredBytes {
check.Critical = true
check.Sufficient = false
check.Warning = false
} else if check.AvailableBytes < requiredBytes*2 {
check.Warning = true
check.Sufficient = false
}
return check
}
// FormatDiskSpaceMessage creates a user-friendly disk space message
func FormatDiskSpaceMessage(check *DiskSpaceCheck) string {
var status string
var icon string
if check.Critical {
status = "CRITICAL"
icon = "❌"
} else if check.Warning {
status = "WARNING"
icon = "⚠️ "
} else {
status = "OK"
icon = "✓"
}
msg := fmt.Sprintf(`📊 Disk Space Check (%s):
Path: %s
Total: %s
Available: %s (%.1f%% used)
%s Status: %s`,
status,
check.Path,
formatBytes(check.TotalBytes),
formatBytes(check.AvailableBytes),
check.UsedPercent,
icon,
status)
if check.Critical {
msg += "\n \n ⚠️ CRITICAL: Insufficient disk space!"
msg += "\n Operation blocked. Free up space before continuing."
} else if check.Warning {
msg += "\n \n ⚠️ WARNING: Low disk space!"
msg += "\n Backup may fail if database is larger than estimated."
} else {
msg += "\n \n ✓ Sufficient space available"
}
return msg
}
// EstimateBackupSize estimates backup size based on database size
func EstimateBackupSize(databaseSize uint64, compressionLevel int) uint64 {
// Typical compression ratios:
// Level 0 (no compression): 1.0x
// Level 1-3 (fast): 0.4-0.6x
// Level 4-6 (balanced): 0.3-0.4x
// Level 7-9 (best): 0.2-0.3x
var compressionRatio float64
if compressionLevel == 0 {
compressionRatio = 1.0
} else if compressionLevel <= 3 {
compressionRatio = 0.5
} else if compressionLevel <= 6 {
compressionRatio = 0.35
} else {
compressionRatio = 0.25
}
estimated := uint64(float64(databaseSize) * compressionRatio)
// Add 10% buffer for metadata, indexes, etc.
return uint64(float64(estimated) * 1.1)
}
// formatBytes formats bytes to human-readable format
func formatBytes(bytes uint64) string {
const unit = 1024
if bytes < unit {
return fmt.Sprintf("%d B", bytes)
}
div, exp := uint64(unit), 0
for n := bytes / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp])
}

View File

@@ -0,0 +1,221 @@
package checks
import (
"fmt"
"strings"
)
// ErrorClassification represents the severity and type of error
type ErrorClassification struct {
Type string // "ignorable", "warning", "critical", "fatal"
Category string // "disk_space", "locks", "corruption", "permissions", "network", "syntax"
Message string
Hint string
Action string // Suggested command or action
Severity int // 0=info, 1=warning, 2=error, 3=fatal
}
// ClassifyError analyzes an error message and provides actionable hints
func ClassifyError(errorMsg string) *ErrorClassification {
lowerMsg := strings.ToLower(errorMsg)
// Ignorable errors (objects already exist)
if strings.Contains(lowerMsg, "already exists") {
return &ErrorClassification{
Type: "ignorable",
Category: "duplicate",
Message: errorMsg,
Hint: "Object already exists in target database - this is normal during restore",
Action: "No action needed - restore will continue",
Severity: 0,
}
}
// Disk space errors
if strings.Contains(lowerMsg, "no space left") || strings.Contains(lowerMsg, "disk full") {
return &ErrorClassification{
Type: "critical",
Category: "disk_space",
Message: errorMsg,
Hint: "Insufficient disk space to complete operation",
Action: "Free up disk space: rm old_backups/* or increase storage",
Severity: 3,
}
}
// Lock exhaustion errors
if strings.Contains(lowerMsg, "max_locks_per_transaction") ||
strings.Contains(lowerMsg, "out of shared memory") ||
strings.Contains(lowerMsg, "could not open large object") {
return &ErrorClassification{
Type: "critical",
Category: "locks",
Message: errorMsg,
Hint: "Lock table exhausted - typically caused by large objects in parallel restore",
Action: "Increase max_locks_per_transaction in postgresql.conf to 512 or higher",
Severity: 2,
}
}
// Syntax errors (corrupted dump)
if strings.Contains(lowerMsg, "syntax error") {
return &ErrorClassification{
Type: "critical",
Category: "corruption",
Message: errorMsg,
Hint: "Syntax error in dump file - backup may be corrupted or incomplete",
Action: "Re-create backup with: dbbackup backup single <database>",
Severity: 3,
}
}
// Permission errors
if strings.Contains(lowerMsg, "permission denied") || strings.Contains(lowerMsg, "must be owner") {
return &ErrorClassification{
Type: "critical",
Category: "permissions",
Message: errorMsg,
Hint: "Insufficient permissions to perform operation",
Action: "Run as superuser or use --no-owner flag for restore",
Severity: 2,
}
}
// Connection errors
if strings.Contains(lowerMsg, "connection refused") ||
strings.Contains(lowerMsg, "could not connect") ||
strings.Contains(lowerMsg, "no pg_hba.conf entry") {
return &ErrorClassification{
Type: "critical",
Category: "network",
Message: errorMsg,
Hint: "Cannot connect to database server",
Action: "Check database is running and pg_hba.conf allows connection",
Severity: 2,
}
}
// Version compatibility warnings
if strings.Contains(lowerMsg, "version mismatch") || strings.Contains(lowerMsg, "incompatible") {
return &ErrorClassification{
Type: "warning",
Category: "version",
Message: errorMsg,
Hint: "PostgreSQL version mismatch between backup and restore target",
Action: "Review release notes for compatibility: https://www.postgresql.org/docs/",
Severity: 1,
}
}
// Excessive errors (corrupted dump)
if strings.Contains(errorMsg, "total errors:") {
parts := strings.Split(errorMsg, "total errors:")
if len(parts) > 1 {
var count int
if _, err := fmt.Sscanf(parts[1], "%d", &count); err == nil && count > 100000 {
return &ErrorClassification{
Type: "fatal",
Category: "corruption",
Message: errorMsg,
Hint: fmt.Sprintf("Excessive errors (%d) indicate severely corrupted dump file", count),
Action: "Re-create backup from source database",
Severity: 3,
}
}
}
}
// Default: unclassified error
return &ErrorClassification{
Type: "error",
Category: "unknown",
Message: errorMsg,
Hint: "An error occurred during operation",
Action: "Check logs for details or contact support",
Severity: 2,
}
}
// FormatErrorWithHint creates a user-friendly error message with hints
func FormatErrorWithHint(errorMsg string) string {
classification := ClassifyError(errorMsg)
var icon string
switch classification.Type {
case "ignorable":
icon = " "
case "warning":
icon = "⚠️ "
case "critical":
icon = "❌"
case "fatal":
icon = "🛑"
default:
icon = "⚠️ "
}
output := fmt.Sprintf("%s %s Error\n\n", icon, strings.ToUpper(classification.Type))
output += fmt.Sprintf("Category: %s\n", classification.Category)
output += fmt.Sprintf("Message: %s\n\n", classification.Message)
output += fmt.Sprintf("💡 Hint: %s\n\n", classification.Hint)
output += fmt.Sprintf("🔧 Action: %s\n", classification.Action)
return output
}
// FormatMultipleErrors formats multiple errors with classification
func FormatMultipleErrors(errors []string) string {
if len(errors) == 0 {
return "✓ No errors"
}
ignorable := 0
warnings := 0
critical := 0
fatal := 0
var criticalErrors []string
for _, err := range errors {
class := ClassifyError(err)
switch class.Type {
case "ignorable":
ignorable++
case "warning":
warnings++
case "critical":
critical++
if len(criticalErrors) < 3 { // Keep first 3 critical errors
criticalErrors = append(criticalErrors, err)
}
case "fatal":
fatal++
criticalErrors = append(criticalErrors, err)
}
}
output := "📊 Error Summary:\n\n"
if ignorable > 0 {
output += fmt.Sprintf(" %d ignorable (objects already exist)\n", ignorable)
}
if warnings > 0 {
output += fmt.Sprintf(" ⚠️ %d warnings\n", warnings)
}
if critical > 0 {
output += fmt.Sprintf(" ❌ %d critical errors\n", critical)
}
if fatal > 0 {
output += fmt.Sprintf(" 🛑 %d fatal errors\n", fatal)
}
if len(criticalErrors) > 0 {
output += "\n📝 Critical Issues:\n\n"
for i, err := range criticalErrors {
class := ClassifyError(err)
output += fmt.Sprintf("%d. %s\n", i+1, class.Hint)
output += fmt.Sprintf(" Action: %s\n\n", class.Action)
}
}
return output
}

View File

@@ -11,6 +11,7 @@ import (
"sync/atomic"
"time"
"dbbackup/internal/checks"
"dbbackup/internal/config"
"dbbackup/internal/database"
"dbbackup/internal/logger"
@@ -341,10 +342,21 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
return nil // Success despite ignorable errors
}
e.log.Error("Restore command failed", "error", err, "last_stderr", lastError, "error_count", errorCount)
// Classify error and provide helpful hints
if lastError != "" {
return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d)", err, lastError, errorCount)
classification := checks.ClassifyError(lastError)
e.log.Error("Restore command failed",
"error", err,
"last_stderr", lastError,
"error_count", errorCount,
"error_type", classification.Type,
"hint", classification.Hint,
"action", classification.Action)
return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d) - %s",
err, lastError, errorCount, classification.Hint)
}
e.log.Error("Restore command failed", "error", err, "last_stderr", lastError, "error_count", errorCount)
return fmt.Errorf("restore failed: %w", err)
}
@@ -412,10 +424,21 @@ func (e *Engine) executeRestoreWithDecompression(ctx context.Context, archivePat
return nil // Success despite ignorable errors
}
e.log.Error("Restore with decompression failed", "error", err, "last_stderr", lastError, "error_count", errorCount)
// Classify error and provide helpful hints
if lastError != "" {
return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d)", err, lastError, errorCount)
classification := checks.ClassifyError(lastError)
e.log.Error("Restore with decompression failed",
"error", err,
"last_stderr", lastError,
"error_count", errorCount,
"error_type", classification.Type,
"hint", classification.Hint,
"action", classification.Action)
return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d) - %s",
err, lastError, errorCount, classification.Hint)
}
e.log.Error("Restore with decompression failed", "error", err, "last_stderr", lastError, "error_count", errorCount)
return fmt.Errorf("restore failed: %w", err)
}
@@ -475,6 +498,24 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error {
return fmt.Errorf("not a cluster archive: %s (detected format: %s)", archivePath, format)
}
// Check disk space before starting restore
e.log.Info("Checking disk space for restore")
archiveInfo, err := os.Stat(archivePath)
if err == nil {
spaceCheck := checks.CheckDiskSpaceForRestore(e.cfg.BackupDir, archiveInfo.Size())
if spaceCheck.Critical {
operation.Fail("Insufficient disk space")
return fmt.Errorf("insufficient disk space for restore: %.1f%% used - need at least 4x archive size", spaceCheck.UsedPercent)
}
if spaceCheck.Warning {
e.log.Warn("Low disk space - restore may fail",
"available_gb", float64(spaceCheck.AvailableBytes)/(1024*1024*1024),
"used_percent", spaceCheck.UsedPercent)
}
}
if e.dryRun {
e.log.Info("DRY RUN: Would restore cluster", "archive", archivePath)
return e.previewClusterRestore(archivePath)