v3.40.0: Restore diagnostics and error reporting

Features:
- restore diagnose command for backup file analysis
- Deep COPY block verification for truncated dump detection
- PGDMP signature and gzip integrity validation
- Detailed error reports with --save-debug-log flag
- Ring buffer stderr capture (prevents OOM on 2M+ errors)
- Error classification with actionable recommendations

TUI Enhancements:
- Automatic dump validity safety check before restore
- Press 'd' in archive browser to diagnose backups
- Press 'd' in restore preview for debug log toggle
- Debug logs saved to /tmp on failure when enabled

Documentation:
- Updated README with diagnose command and examples
- Updated CHANGELOG with full feature list
- Updated restore preview screenshots
This commit is contained in:
2026-01-05 15:17:54 +01:00
parent e7f0a9f5eb
commit 4c171c0e44
16 changed files with 2271 additions and 26 deletions

View File

@@ -0,0 +1,569 @@
package restore
import (
"bufio"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"time"
"dbbackup/internal/config"
"dbbackup/internal/logger"
)
// RestoreErrorReport contains comprehensive information about a restore failure
type RestoreErrorReport struct {
// Metadata
Timestamp time.Time `json:"timestamp"`
Version string `json:"version"`
GoVersion string `json:"go_version"`
OS string `json:"os"`
Arch string `json:"arch"`
// Archive info
ArchivePath string `json:"archive_path"`
ArchiveSize int64 `json:"archive_size"`
ArchiveFormat string `json:"archive_format"`
// Database info
TargetDB string `json:"target_db"`
DatabaseType string `json:"database_type"`
// Error details
ExitCode int `json:"exit_code"`
ErrorMessage string `json:"error_message"`
ErrorType string `json:"error_type"`
ErrorHint string `json:"error_hint"`
TotalErrors int `json:"total_errors"`
// Captured output
LastStderr []string `json:"last_stderr"`
FirstErrors []string `json:"first_errors"`
// Context around failure
FailureContext *FailureContext `json:"failure_context,omitempty"`
// Diagnosis results
DiagnosisResult *DiagnoseResult `json:"diagnosis_result,omitempty"`
// Environment (sanitized)
PostgresVersion string `json:"postgres_version,omitempty"`
PgRestoreVersion string `json:"pg_restore_version,omitempty"`
PsqlVersion string `json:"psql_version,omitempty"`
// Recommendations
Recommendations []string `json:"recommendations"`
}
// FailureContext captures context around where the failure occurred
type FailureContext struct {
// For SQL/COPY errors
FailedLine int `json:"failed_line,omitempty"`
FailedStatement string `json:"failed_statement,omitempty"`
SurroundingLines []string `json:"surrounding_lines,omitempty"`
// For COPY block errors
InCopyBlock bool `json:"in_copy_block,omitempty"`
CopyTableName string `json:"copy_table_name,omitempty"`
CopyStartLine int `json:"copy_start_line,omitempty"`
SampleCopyData []string `json:"sample_copy_data,omitempty"`
// File position info
BytePosition int64 `json:"byte_position,omitempty"`
PercentComplete float64 `json:"percent_complete,omitempty"`
}
// ErrorCollector captures detailed error information during restore
type ErrorCollector struct {
log logger.Logger
cfg *config.Config
archivePath string
targetDB string
format ArchiveFormat
// Captured data
stderrLines []string
firstErrors []string
lastErrors []string
totalErrors int
exitCode int
// Limits
maxStderrLines int
maxErrorCapture int
// State
startTime time.Time
enabled bool
}
// NewErrorCollector creates a new error collector
func NewErrorCollector(cfg *config.Config, log logger.Logger, archivePath, targetDB string, format ArchiveFormat, enabled bool) *ErrorCollector {
return &ErrorCollector{
log: log,
cfg: cfg,
archivePath: archivePath,
targetDB: targetDB,
format: format,
stderrLines: make([]string, 0, 100),
firstErrors: make([]string, 0, 10),
lastErrors: make([]string, 0, 10),
maxStderrLines: 100,
maxErrorCapture: 10,
startTime: time.Now(),
enabled: enabled,
}
}
// CaptureStderr processes and captures stderr output
func (ec *ErrorCollector) CaptureStderr(chunk string) {
if !ec.enabled {
return
}
lines := strings.Split(chunk, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" {
continue
}
// Store last N lines of stderr
if len(ec.stderrLines) >= ec.maxStderrLines {
// Shift array, drop oldest
ec.stderrLines = ec.stderrLines[1:]
}
ec.stderrLines = append(ec.stderrLines, line)
// Check if this is an error line
if isErrorLine(line) {
ec.totalErrors++
// Capture first N errors
if len(ec.firstErrors) < ec.maxErrorCapture {
ec.firstErrors = append(ec.firstErrors, line)
}
// Keep last N errors (ring buffer style)
if len(ec.lastErrors) >= ec.maxErrorCapture {
ec.lastErrors = ec.lastErrors[1:]
}
ec.lastErrors = append(ec.lastErrors, line)
}
}
}
// SetExitCode records the exit code
func (ec *ErrorCollector) SetExitCode(code int) {
ec.exitCode = code
}
// GenerateReport creates a comprehensive error report
func (ec *ErrorCollector) GenerateReport(errMessage string, errType string, errHint string) *RestoreErrorReport {
report := &RestoreErrorReport{
Timestamp: time.Now(),
Version: "1.0.0", // TODO: inject actual version
GoVersion: runtime.Version(),
OS: runtime.GOOS,
Arch: runtime.GOARCH,
ArchivePath: ec.archivePath,
ArchiveFormat: ec.format.String(),
TargetDB: ec.targetDB,
DatabaseType: getDatabaseType(ec.format),
ExitCode: ec.exitCode,
ErrorMessage: errMessage,
ErrorType: errType,
ErrorHint: errHint,
TotalErrors: ec.totalErrors,
LastStderr: ec.stderrLines,
FirstErrors: ec.firstErrors,
}
// Get archive size
if stat, err := os.Stat(ec.archivePath); err == nil {
report.ArchiveSize = stat.Size()
}
// Get tool versions
report.PostgresVersion = getCommandVersion("postgres", "--version")
report.PgRestoreVersion = getCommandVersion("pg_restore", "--version")
report.PsqlVersion = getCommandVersion("psql", "--version")
// Analyze failure context
report.FailureContext = ec.analyzeFailureContext()
// Run diagnosis if not already done
diagnoser := NewDiagnoser(ec.log, false)
if diagResult, err := diagnoser.DiagnoseFile(ec.archivePath); err == nil {
report.DiagnosisResult = diagResult
}
// Generate recommendations
report.Recommendations = ec.generateRecommendations(report)
return report
}
// analyzeFailureContext extracts context around the failure
func (ec *ErrorCollector) analyzeFailureContext() *FailureContext {
ctx := &FailureContext{}
// Look for line number in errors
for _, errLine := range ec.lastErrors {
if lineNum := extractLineNumber(errLine); lineNum > 0 {
ctx.FailedLine = lineNum
break
}
}
// Look for COPY-related errors
for _, errLine := range ec.lastErrors {
if strings.Contains(errLine, "COPY") || strings.Contains(errLine, "syntax error") {
ctx.InCopyBlock = true
// Try to extract table name
if tableName := extractTableName(errLine); tableName != "" {
ctx.CopyTableName = tableName
}
break
}
}
// If we have a line number, try to get surrounding context from the dump
if ctx.FailedLine > 0 && ec.archivePath != "" {
ctx.SurroundingLines = ec.getSurroundingLines(ctx.FailedLine, 5)
}
return ctx
}
// getSurroundingLines reads lines around a specific line number from the dump
func (ec *ErrorCollector) getSurroundingLines(lineNum int, context int) []string {
var reader io.Reader
var lines []string
file, err := os.Open(ec.archivePath)
if err != nil {
return nil
}
defer file.Close()
// Handle compressed files
if strings.HasSuffix(ec.archivePath, ".gz") {
gz, err := gzip.NewReader(file)
if err != nil {
return nil
}
defer gz.Close()
reader = gz
} else {
reader = file
}
scanner := bufio.NewScanner(reader)
buf := make([]byte, 0, 1024*1024)
scanner.Buffer(buf, 10*1024*1024)
currentLine := 0
startLine := lineNum - context
endLine := lineNum + context
if startLine < 1 {
startLine = 1
}
for scanner.Scan() {
currentLine++
if currentLine >= startLine && currentLine <= endLine {
prefix := " "
if currentLine == lineNum {
prefix = "> "
}
lines = append(lines, fmt.Sprintf("%s%d: %s", prefix, currentLine, truncateString(scanner.Text(), 100)))
}
if currentLine > endLine {
break
}
}
return lines
}
// generateRecommendations provides actionable recommendations based on the error
func (ec *ErrorCollector) generateRecommendations(report *RestoreErrorReport) []string {
var recs []string
// Check diagnosis results
if report.DiagnosisResult != nil {
if report.DiagnosisResult.IsTruncated {
recs = append(recs,
"CRITICAL: Backup file is truncated/incomplete",
"Action: Re-run the backup for the affected database",
"Check: Verify disk space was available during backup",
"Check: Verify network was stable during backup transfer",
)
}
if report.DiagnosisResult.IsCorrupted {
recs = append(recs,
"CRITICAL: Backup file appears corrupted",
"Action: Restore from a previous backup",
"Action: Verify backup file checksum if available",
)
}
if report.DiagnosisResult.Details != nil && report.DiagnosisResult.Details.UnterminatedCopy {
recs = append(recs,
fmt.Sprintf("ISSUE: COPY block for table '%s' was not terminated",
report.DiagnosisResult.Details.LastCopyTable),
"Cause: Backup was interrupted during data export",
"Action: Re-run backup ensuring it completes fully",
)
}
}
// Check error patterns
if report.TotalErrors > 1000000 {
recs = append(recs,
"ISSUE: Millions of errors indicate structural problem, not individual data issues",
"Cause: Likely wrong restore method or truncated dump",
"Check: Verify dump format matches restore command",
)
}
// Check for common error types
errLower := strings.ToLower(report.ErrorMessage)
if strings.Contains(errLower, "syntax error") {
recs = append(recs,
"ISSUE: SQL syntax errors during restore",
"Cause: COPY data being interpreted as SQL commands",
"Check: Run 'dbbackup restore diagnose <archive>' for detailed analysis",
)
}
if strings.Contains(errLower, "permission denied") {
recs = append(recs,
"ISSUE: Permission denied",
"Action: Check database user has sufficient privileges",
"Action: For ownership preservation, use a superuser account",
)
}
if strings.Contains(errLower, "does not exist") {
recs = append(recs,
"ISSUE: Missing object reference",
"Action: Ensure globals.sql was restored first (for roles/tablespaces)",
"Action: Check if target database was created",
)
}
if len(recs) == 0 {
recs = append(recs,
"Run 'dbbackup restore diagnose <archive>' for detailed analysis",
"Check the stderr output above for specific error messages",
"Review the PostgreSQL/MySQL logs on the target server",
)
}
return recs
}
// SaveReport saves the error report to a file
func (ec *ErrorCollector) SaveReport(report *RestoreErrorReport, outputPath string) error {
// Create directory if needed
dir := filepath.Dir(outputPath)
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
// Marshal to JSON with indentation
data, err := json.MarshalIndent(report, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal report: %w", err)
}
// Write file
if err := os.WriteFile(outputPath, data, 0644); err != nil {
return fmt.Errorf("failed to write report: %w", err)
}
return nil
}
// PrintReport prints a human-readable summary of the error report
func (ec *ErrorCollector) PrintReport(report *RestoreErrorReport) {
fmt.Println()
fmt.Println(strings.Repeat("═", 70))
fmt.Println(" 🔴 RESTORE ERROR REPORT")
fmt.Println(strings.Repeat("═", 70))
fmt.Printf("\n📅 Timestamp: %s\n", report.Timestamp.Format("2006-01-02 15:04:05"))
fmt.Printf("📦 Archive: %s\n", filepath.Base(report.ArchivePath))
fmt.Printf("📊 Format: %s\n", report.ArchiveFormat)
fmt.Printf("🎯 Target DB: %s\n", report.TargetDB)
fmt.Printf("⚠️ Exit Code: %d\n", report.ExitCode)
fmt.Printf("❌ Total Errors: %d\n", report.TotalErrors)
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("ERROR DETAILS:")
fmt.Println(strings.Repeat("─", 70))
fmt.Printf("\nType: %s\n", report.ErrorType)
fmt.Printf("Message: %s\n", report.ErrorMessage)
if report.ErrorHint != "" {
fmt.Printf("Hint: %s\n", report.ErrorHint)
}
// Show failure context
if report.FailureContext != nil && report.FailureContext.FailedLine > 0 {
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("FAILURE CONTEXT:")
fmt.Println(strings.Repeat("─", 70))
fmt.Printf("\nFailed at line: %d\n", report.FailureContext.FailedLine)
if report.FailureContext.InCopyBlock {
fmt.Printf("Inside COPY block for table: %s\n", report.FailureContext.CopyTableName)
}
if len(report.FailureContext.SurroundingLines) > 0 {
fmt.Println("\nSurrounding lines:")
for _, line := range report.FailureContext.SurroundingLines {
fmt.Println(line)
}
}
}
// Show first few errors
if len(report.FirstErrors) > 0 {
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("FIRST ERRORS:")
fmt.Println(strings.Repeat("─", 70))
for i, err := range report.FirstErrors {
if i >= 5 {
fmt.Printf("... and %d more\n", len(report.FirstErrors)-5)
break
}
fmt.Printf(" %d. %s\n", i+1, truncateString(err, 100))
}
}
// Show diagnosis summary
if report.DiagnosisResult != nil && !report.DiagnosisResult.IsValid {
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("DIAGNOSIS:")
fmt.Println(strings.Repeat("─", 70))
if report.DiagnosisResult.IsTruncated {
fmt.Println(" ❌ File is TRUNCATED")
}
if report.DiagnosisResult.IsCorrupted {
fmt.Println(" ❌ File is CORRUPTED")
}
for i, err := range report.DiagnosisResult.Errors {
if i >= 3 {
break
}
fmt.Printf(" • %s\n", err)
}
}
// Show recommendations
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("💡 RECOMMENDATIONS:")
fmt.Println(strings.Repeat("─", 70))
for _, rec := range report.Recommendations {
fmt.Printf(" • %s\n", rec)
}
// Show tool versions
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("ENVIRONMENT:")
fmt.Println(strings.Repeat("─", 70))
fmt.Printf(" OS: %s/%s\n", report.OS, report.Arch)
fmt.Printf(" Go: %s\n", report.GoVersion)
if report.PgRestoreVersion != "" {
fmt.Printf(" pg_restore: %s\n", report.PgRestoreVersion)
}
if report.PsqlVersion != "" {
fmt.Printf(" psql: %s\n", report.PsqlVersion)
}
fmt.Println(strings.Repeat("═", 70))
}
// Helper functions
func isErrorLine(line string) bool {
return strings.Contains(line, "ERROR:") ||
strings.Contains(line, "FATAL:") ||
strings.Contains(line, "error:") ||
strings.Contains(line, "PANIC:")
}
func extractLineNumber(errLine string) int {
// Look for patterns like "LINE 1:" or "line 123"
patterns := []string{"LINE ", "line "}
for _, pattern := range patterns {
if idx := strings.Index(errLine, pattern); idx >= 0 {
numStart := idx + len(pattern)
numEnd := numStart
for numEnd < len(errLine) && errLine[numEnd] >= '0' && errLine[numEnd] <= '9' {
numEnd++
}
if numEnd > numStart {
var num int
fmt.Sscanf(errLine[numStart:numEnd], "%d", &num)
return num
}
}
}
return 0
}
func extractTableName(errLine string) string {
// Look for patterns like 'COPY "tablename"' or 'table "tablename"'
patterns := []string{"COPY ", "table "}
for _, pattern := range patterns {
if idx := strings.Index(errLine, pattern); idx >= 0 {
start := idx + len(pattern)
// Skip optional quote
if start < len(errLine) && errLine[start] == '"' {
start++
}
end := start
for end < len(errLine) && errLine[end] != '"' && errLine[end] != ' ' && errLine[end] != '(' {
end++
}
if end > start {
return errLine[start:end]
}
}
}
return ""
}
func getDatabaseType(format ArchiveFormat) string {
if format.IsMySQL() {
return "mysql"
}
return "postgresql"
}
func getCommandVersion(cmd string, arg string) string {
output, err := exec.Command(cmd, arg).CombinedOutput()
if err != nil {
return ""
}
// Return first line only
lines := strings.Split(string(output), "\n")
if len(lines) > 0 {
return strings.TrimSpace(lines[0])
}
return ""
}