diff --git a/internal/backup/engine.go b/internal/backup/engine.go index c80ee1b..217e6ca 100644 --- a/internal/backup/engine.go +++ b/internal/backup/engine.go @@ -16,6 +16,7 @@ import ( "sync/atomic" "time" + "dbbackup/internal/checks" "dbbackup/internal/config" "dbbackup/internal/database" "dbbackup/internal/logger" @@ -303,6 +304,27 @@ func (e *Engine) BackupCluster(ctx context.Context) error { return fmt.Errorf("failed to create backup directory: %w", err) } + // Check disk space before starting backup + e.log.Info("Checking disk space availability") + spaceCheck := checks.CheckDiskSpace(e.cfg.BackupDir) + + if !e.silent { + // Show disk space status in CLI mode + fmt.Println("\n" + checks.FormatDiskSpaceMessage(spaceCheck)) + } + + if spaceCheck.Critical { + operation.Fail("Insufficient disk space") + quietProgress.Fail("Insufficient disk space - free up space and try again") + return fmt.Errorf("insufficient disk space: %.1f%% used, operation blocked", spaceCheck.UsedPercent) + } + + if spaceCheck.Warning { + e.log.Warn("Low disk space - backup may fail if database is large", + "available_gb", float64(spaceCheck.AvailableBytes)/(1024*1024*1024), + "used_percent", spaceCheck.UsedPercent) + } + // Generate timestamp and filename timestamp := time.Now().Format("20060102_150405") outputFile := filepath.Join(e.cfg.BackupDir, fmt.Sprintf("cluster_%s.tar.gz", timestamp)) diff --git a/internal/checks/disk_check.go b/internal/checks/disk_check.go new file mode 100644 index 0000000..4a2e535 --- /dev/null +++ b/internal/checks/disk_check.go @@ -0,0 +1,161 @@ +package checks + +import ( + "fmt" + "path/filepath" + "syscall" +) + +// DiskSpaceCheck represents disk space information +type DiskSpaceCheck struct { + Path string + TotalBytes uint64 + AvailableBytes uint64 + UsedBytes uint64 + UsedPercent float64 + Sufficient bool + Warning bool + Critical bool +} + +// CheckDiskSpace checks available disk space for a given path +func CheckDiskSpace(path string) *DiskSpaceCheck { + // Get absolute path + absPath, err := filepath.Abs(path) + if err != nil { + absPath = path + } + + // Get filesystem stats + var stat syscall.Statfs_t + if err := syscall.Statfs(absPath, &stat); err != nil { + // Return error state + return &DiskSpaceCheck{ + Path: absPath, + Critical: true, + Sufficient: false, + } + } + + // Calculate space + totalBytes := stat.Blocks * uint64(stat.Bsize) + availableBytes := stat.Bavail * uint64(stat.Bsize) + usedBytes := totalBytes - availableBytes + usedPercent := float64(usedBytes) / float64(totalBytes) * 100 + + check := &DiskSpaceCheck{ + Path: absPath, + TotalBytes: totalBytes, + AvailableBytes: availableBytes, + UsedBytes: usedBytes, + UsedPercent: usedPercent, + } + + // Determine status thresholds + check.Critical = usedPercent >= 95 + check.Warning = usedPercent >= 80 && !check.Critical + check.Sufficient = !check.Critical && !check.Warning + + return check +} + +// CheckDiskSpaceForRestore checks if there's enough space for restore (needs 4x archive size) +func CheckDiskSpaceForRestore(path string, archiveSize int64) *DiskSpaceCheck { + check := CheckDiskSpace(path) + requiredBytes := uint64(archiveSize) * 4 // Account for decompression + + // Override status based on required space + if check.AvailableBytes < requiredBytes { + check.Critical = true + check.Sufficient = false + check.Warning = false + } else if check.AvailableBytes < requiredBytes*2 { + check.Warning = true + check.Sufficient = false + } + + return check +} + +// FormatDiskSpaceMessage creates a user-friendly disk space message +func FormatDiskSpaceMessage(check *DiskSpaceCheck) string { + var status string + var icon string + + if check.Critical { + status = "CRITICAL" + icon = "āŒ" + } else if check.Warning { + status = "WARNING" + icon = "āš ļø " + } else { + status = "OK" + icon = "āœ“" + } + + msg := fmt.Sprintf(`šŸ“Š Disk Space Check (%s): + Path: %s + Total: %s + Available: %s (%.1f%% used) + %s Status: %s`, + status, + check.Path, + formatBytes(check.TotalBytes), + formatBytes(check.AvailableBytes), + check.UsedPercent, + icon, + status) + + if check.Critical { + msg += "\n \n āš ļø CRITICAL: Insufficient disk space!" + msg += "\n Operation blocked. Free up space before continuing." + } else if check.Warning { + msg += "\n \n āš ļø WARNING: Low disk space!" + msg += "\n Backup may fail if database is larger than estimated." + } else { + msg += "\n \n āœ“ Sufficient space available" + } + + return msg +} + +// EstimateBackupSize estimates backup size based on database size +func EstimateBackupSize(databaseSize uint64, compressionLevel int) uint64 { + // Typical compression ratios: + // Level 0 (no compression): 1.0x + // Level 1-3 (fast): 0.4-0.6x + // Level 4-6 (balanced): 0.3-0.4x + // Level 7-9 (best): 0.2-0.3x + + var compressionRatio float64 + if compressionLevel == 0 { + compressionRatio = 1.0 + } else if compressionLevel <= 3 { + compressionRatio = 0.5 + } else if compressionLevel <= 6 { + compressionRatio = 0.35 + } else { + compressionRatio = 0.25 + } + + estimated := uint64(float64(databaseSize) * compressionRatio) + + // Add 10% buffer for metadata, indexes, etc. + return uint64(float64(estimated) * 1.1) +} + + + +// formatBytes formats bytes to human-readable format +func formatBytes(bytes uint64) string { + const unit = 1024 + if bytes < unit { + return fmt.Sprintf("%d B", bytes) + } + div, exp := uint64(unit), 0 + for n := bytes / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp]) +} diff --git a/internal/checks/error_hints.go b/internal/checks/error_hints.go new file mode 100644 index 0000000..6403eff --- /dev/null +++ b/internal/checks/error_hints.go @@ -0,0 +1,221 @@ +package checks + +import ( + "fmt" + "strings" +) + +// ErrorClassification represents the severity and type of error +type ErrorClassification struct { + Type string // "ignorable", "warning", "critical", "fatal" + Category string // "disk_space", "locks", "corruption", "permissions", "network", "syntax" + Message string + Hint string + Action string // Suggested command or action + Severity int // 0=info, 1=warning, 2=error, 3=fatal +} + +// ClassifyError analyzes an error message and provides actionable hints +func ClassifyError(errorMsg string) *ErrorClassification { + lowerMsg := strings.ToLower(errorMsg) + + // Ignorable errors (objects already exist) + if strings.Contains(lowerMsg, "already exists") { + return &ErrorClassification{ + Type: "ignorable", + Category: "duplicate", + Message: errorMsg, + Hint: "Object already exists in target database - this is normal during restore", + Action: "No action needed - restore will continue", + Severity: 0, + } + } + + // Disk space errors + if strings.Contains(lowerMsg, "no space left") || strings.Contains(lowerMsg, "disk full") { + return &ErrorClassification{ + Type: "critical", + Category: "disk_space", + Message: errorMsg, + Hint: "Insufficient disk space to complete operation", + Action: "Free up disk space: rm old_backups/* or increase storage", + Severity: 3, + } + } + + // Lock exhaustion errors + if strings.Contains(lowerMsg, "max_locks_per_transaction") || + strings.Contains(lowerMsg, "out of shared memory") || + strings.Contains(lowerMsg, "could not open large object") { + return &ErrorClassification{ + Type: "critical", + Category: "locks", + Message: errorMsg, + Hint: "Lock table exhausted - typically caused by large objects in parallel restore", + Action: "Increase max_locks_per_transaction in postgresql.conf to 512 or higher", + Severity: 2, + } + } + + // Syntax errors (corrupted dump) + if strings.Contains(lowerMsg, "syntax error") { + return &ErrorClassification{ + Type: "critical", + Category: "corruption", + Message: errorMsg, + Hint: "Syntax error in dump file - backup may be corrupted or incomplete", + Action: "Re-create backup with: dbbackup backup single ", + Severity: 3, + } + } + + // Permission errors + if strings.Contains(lowerMsg, "permission denied") || strings.Contains(lowerMsg, "must be owner") { + return &ErrorClassification{ + Type: "critical", + Category: "permissions", + Message: errorMsg, + Hint: "Insufficient permissions to perform operation", + Action: "Run as superuser or use --no-owner flag for restore", + Severity: 2, + } + } + + // Connection errors + if strings.Contains(lowerMsg, "connection refused") || + strings.Contains(lowerMsg, "could not connect") || + strings.Contains(lowerMsg, "no pg_hba.conf entry") { + return &ErrorClassification{ + Type: "critical", + Category: "network", + Message: errorMsg, + Hint: "Cannot connect to database server", + Action: "Check database is running and pg_hba.conf allows connection", + Severity: 2, + } + } + + // Version compatibility warnings + if strings.Contains(lowerMsg, "version mismatch") || strings.Contains(lowerMsg, "incompatible") { + return &ErrorClassification{ + Type: "warning", + Category: "version", + Message: errorMsg, + Hint: "PostgreSQL version mismatch between backup and restore target", + Action: "Review release notes for compatibility: https://www.postgresql.org/docs/", + Severity: 1, + } + } + + // Excessive errors (corrupted dump) + if strings.Contains(errorMsg, "total errors:") { + parts := strings.Split(errorMsg, "total errors:") + if len(parts) > 1 { + var count int + if _, err := fmt.Sscanf(parts[1], "%d", &count); err == nil && count > 100000 { + return &ErrorClassification{ + Type: "fatal", + Category: "corruption", + Message: errorMsg, + Hint: fmt.Sprintf("Excessive errors (%d) indicate severely corrupted dump file", count), + Action: "Re-create backup from source database", + Severity: 3, + } + } + } + } + + // Default: unclassified error + return &ErrorClassification{ + Type: "error", + Category: "unknown", + Message: errorMsg, + Hint: "An error occurred during operation", + Action: "Check logs for details or contact support", + Severity: 2, + } +} + +// FormatErrorWithHint creates a user-friendly error message with hints +func FormatErrorWithHint(errorMsg string) string { + classification := ClassifyError(errorMsg) + + var icon string + switch classification.Type { + case "ignorable": + icon = "ā„¹ļø " + case "warning": + icon = "āš ļø " + case "critical": + icon = "āŒ" + case "fatal": + icon = "šŸ›‘" + default: + icon = "āš ļø " + } + + output := fmt.Sprintf("%s %s Error\n\n", icon, strings.ToUpper(classification.Type)) + output += fmt.Sprintf("Category: %s\n", classification.Category) + output += fmt.Sprintf("Message: %s\n\n", classification.Message) + output += fmt.Sprintf("šŸ’” Hint: %s\n\n", classification.Hint) + output += fmt.Sprintf("šŸ”§ Action: %s\n", classification.Action) + + return output +} + +// FormatMultipleErrors formats multiple errors with classification +func FormatMultipleErrors(errors []string) string { + if len(errors) == 0 { + return "āœ“ No errors" + } + + ignorable := 0 + warnings := 0 + critical := 0 + fatal := 0 + + var criticalErrors []string + + for _, err := range errors { + class := ClassifyError(err) + switch class.Type { + case "ignorable": + ignorable++ + case "warning": + warnings++ + case "critical": + critical++ + if len(criticalErrors) < 3 { // Keep first 3 critical errors + criticalErrors = append(criticalErrors, err) + } + case "fatal": + fatal++ + criticalErrors = append(criticalErrors, err) + } + } + + output := "šŸ“Š Error Summary:\n\n" + if ignorable > 0 { + output += fmt.Sprintf(" ā„¹ļø %d ignorable (objects already exist)\n", ignorable) + } + if warnings > 0 { + output += fmt.Sprintf(" āš ļø %d warnings\n", warnings) + } + if critical > 0 { + output += fmt.Sprintf(" āŒ %d critical errors\n", critical) + } + if fatal > 0 { + output += fmt.Sprintf(" šŸ›‘ %d fatal errors\n", fatal) + } + + if len(criticalErrors) > 0 { + output += "\nšŸ“ Critical Issues:\n\n" + for i, err := range criticalErrors { + class := ClassifyError(err) + output += fmt.Sprintf("%d. %s\n", i+1, class.Hint) + output += fmt.Sprintf(" Action: %s\n\n", class.Action) + } + } + + return output +} diff --git a/internal/restore/engine.go b/internal/restore/engine.go index 6f76b97..cfc2ade 100644 --- a/internal/restore/engine.go +++ b/internal/restore/engine.go @@ -11,6 +11,7 @@ import ( "sync/atomic" "time" + "dbbackup/internal/checks" "dbbackup/internal/config" "dbbackup/internal/database" "dbbackup/internal/logger" @@ -341,10 +342,21 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er return nil // Success despite ignorable errors } - e.log.Error("Restore command failed", "error", err, "last_stderr", lastError, "error_count", errorCount) + // Classify error and provide helpful hints if lastError != "" { - return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d)", err, lastError, errorCount) + classification := checks.ClassifyError(lastError) + e.log.Error("Restore command failed", + "error", err, + "last_stderr", lastError, + "error_count", errorCount, + "error_type", classification.Type, + "hint", classification.Hint, + "action", classification.Action) + return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d) - %s", + err, lastError, errorCount, classification.Hint) } + + e.log.Error("Restore command failed", "error", err, "last_stderr", lastError, "error_count", errorCount) return fmt.Errorf("restore failed: %w", err) } @@ -412,10 +424,21 @@ func (e *Engine) executeRestoreWithDecompression(ctx context.Context, archivePat return nil // Success despite ignorable errors } - e.log.Error("Restore with decompression failed", "error", err, "last_stderr", lastError, "error_count", errorCount) + // Classify error and provide helpful hints if lastError != "" { - return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d)", err, lastError, errorCount) + classification := checks.ClassifyError(lastError) + e.log.Error("Restore with decompression failed", + "error", err, + "last_stderr", lastError, + "error_count", errorCount, + "error_type", classification.Type, + "hint", classification.Hint, + "action", classification.Action) + return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d) - %s", + err, lastError, errorCount, classification.Hint) } + + e.log.Error("Restore with decompression failed", "error", err, "last_stderr", lastError, "error_count", errorCount) return fmt.Errorf("restore failed: %w", err) } @@ -474,6 +497,24 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error { operation.Fail("Invalid cluster archive format") return fmt.Errorf("not a cluster archive: %s (detected format: %s)", archivePath, format) } + + // Check disk space before starting restore + e.log.Info("Checking disk space for restore") + archiveInfo, err := os.Stat(archivePath) + if err == nil { + spaceCheck := checks.CheckDiskSpaceForRestore(e.cfg.BackupDir, archiveInfo.Size()) + + if spaceCheck.Critical { + operation.Fail("Insufficient disk space") + return fmt.Errorf("insufficient disk space for restore: %.1f%% used - need at least 4x archive size", spaceCheck.UsedPercent) + } + + if spaceCheck.Warning { + e.log.Warn("Low disk space - restore may fail", + "available_gb", float64(spaceCheck.AvailableBytes)/(1024*1024*1024), + "used_percent", spaceCheck.UsedPercent) + } + } if e.dryRun { e.log.Info("DRY RUN: Would restore cluster", "archive", archivePath)