v3.40.0: Restore diagnostics and error reporting

Features:
- restore diagnose command for backup file analysis
- Deep COPY block verification for truncated dump detection
- PGDMP signature and gzip integrity validation
- Detailed error reports with --save-debug-log flag
- Ring buffer stderr capture (prevents OOM on 2M+ errors)
- Error classification with actionable recommendations

TUI Enhancements:
- Automatic dump validity safety check before restore
- Press 'd' in archive browser to diagnose backups
- Press 'd' in restore preview for debug log toggle
- Debug logs saved to /tmp on failure when enabled

Documentation:
- Updated README with diagnose command and examples
- Updated CHANGELOG with full feature list
- Updated restore preview screenshots
This commit is contained in:
2026-01-05 15:17:54 +01:00
parent e7f0a9f5eb
commit 4c171c0e44
16 changed files with 2271 additions and 26 deletions

View File

@@ -0,0 +1,726 @@
package restore
import (
"bufio"
"bytes"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"dbbackup/internal/logger"
)
// DiagnoseResult contains the results of a dump file diagnosis
type DiagnoseResult struct {
FilePath string `json:"file_path"`
FileName string `json:"file_name"`
FileSize int64 `json:"file_size"`
Format ArchiveFormat `json:"format"`
DetectedFormat string `json:"detected_format"`
IsValid bool `json:"is_valid"`
IsTruncated bool `json:"is_truncated"`
IsCorrupted bool `json:"is_corrupted"`
Errors []string `json:"errors,omitempty"`
Warnings []string `json:"warnings,omitempty"`
Details *DiagnoseDetails `json:"details,omitempty"`
}
// DiagnoseDetails contains detailed analysis of the dump file
type DiagnoseDetails struct {
// Header info
HasPGDMPSignature bool `json:"has_pgdmp_signature,omitempty"`
HasSQLHeader bool `json:"has_sql_header,omitempty"`
FirstBytes string `json:"first_bytes,omitempty"`
LastBytes string `json:"last_bytes,omitempty"`
// COPY block analysis (for SQL dumps)
CopyBlockCount int `json:"copy_block_count,omitempty"`
UnterminatedCopy bool `json:"unterminated_copy,omitempty"`
LastCopyTable string `json:"last_copy_table,omitempty"`
LastCopyLineNumber int `json:"last_copy_line_number,omitempty"`
SampleCopyData []string `json:"sample_copy_data,omitempty"`
// Structure analysis
HasCreateStatements bool `json:"has_create_statements,omitempty"`
HasInsertStatements bool `json:"has_insert_statements,omitempty"`
HasCopyStatements bool `json:"has_copy_statements,omitempty"`
HasTransactionBlock bool `json:"has_transaction_block,omitempty"`
ProperlyTerminated bool `json:"properly_terminated,omitempty"`
// pg_restore analysis (for custom format)
PgRestoreListable bool `json:"pg_restore_listable,omitempty"`
PgRestoreError string `json:"pg_restore_error,omitempty"`
TableCount int `json:"table_count,omitempty"`
TableList []string `json:"table_list,omitempty"`
// Compression analysis
GzipValid bool `json:"gzip_valid,omitempty"`
GzipError string `json:"gzip_error,omitempty"`
ExpandedSize int64 `json:"expanded_size,omitempty"`
CompressionRatio float64 `json:"compression_ratio,omitempty"`
}
// Diagnoser performs deep analysis of backup files
type Diagnoser struct {
log logger.Logger
verbose bool
}
// NewDiagnoser creates a new diagnoser
func NewDiagnoser(log logger.Logger, verbose bool) *Diagnoser {
return &Diagnoser{
log: log,
verbose: verbose,
}
}
// DiagnoseFile performs comprehensive diagnosis of a backup file
func (d *Diagnoser) DiagnoseFile(filePath string) (*DiagnoseResult, error) {
result := &DiagnoseResult{
FilePath: filePath,
FileName: filepath.Base(filePath),
Details: &DiagnoseDetails{},
IsValid: true, // Assume valid until proven otherwise
}
// Check file exists and get size
stat, err := os.Stat(filePath)
if err != nil {
result.IsValid = false
result.Errors = append(result.Errors, fmt.Sprintf("Cannot access file: %v", err))
return result, nil
}
result.FileSize = stat.Size()
if result.FileSize == 0 {
result.IsValid = false
result.IsTruncated = true
result.Errors = append(result.Errors, "File is empty (0 bytes)")
return result, nil
}
// Detect format
result.Format = DetectArchiveFormat(filePath)
result.DetectedFormat = result.Format.String()
// Analyze based on format
switch result.Format {
case FormatPostgreSQLDump:
d.diagnosePgDump(filePath, result)
case FormatPostgreSQLDumpGz:
d.diagnosePgDumpGz(filePath, result)
case FormatPostgreSQLSQL:
d.diagnoseSQLScript(filePath, false, result)
case FormatPostgreSQLSQLGz:
d.diagnoseSQLScript(filePath, true, result)
case FormatClusterTarGz:
d.diagnoseClusterArchive(filePath, result)
default:
result.Warnings = append(result.Warnings, "Unknown format - limited diagnosis available")
d.diagnoseUnknown(filePath, result)
}
return result, nil
}
// diagnosePgDump analyzes PostgreSQL custom format dump
func (d *Diagnoser) diagnosePgDump(filePath string, result *DiagnoseResult) {
file, err := os.Open(filePath)
if err != nil {
result.IsValid = false
result.Errors = append(result.Errors, fmt.Sprintf("Cannot open file: %v", err))
return
}
defer file.Close()
// Read first 512 bytes
header := make([]byte, 512)
n, err := file.Read(header)
if err != nil && err != io.EOF {
result.IsValid = false
result.Errors = append(result.Errors, fmt.Sprintf("Cannot read header: %v", err))
return
}
// Check PGDMP signature
if n >= 5 && string(header[:5]) == "PGDMP" {
result.Details.HasPGDMPSignature = true
result.Details.FirstBytes = "PGDMP..."
} else {
result.IsValid = false
result.IsCorrupted = true
result.Details.HasPGDMPSignature = false
result.Details.FirstBytes = fmt.Sprintf("%q", header[:minInt(n, 20)])
result.Errors = append(result.Errors,
"Missing PGDMP signature - file is NOT PostgreSQL custom format",
"This file may be SQL format incorrectly named as .dump",
"Try: file "+filePath+" to check actual file type")
return
}
// Try pg_restore --list to verify dump integrity
d.verifyWithPgRestore(filePath, result)
}
// diagnosePgDumpGz analyzes compressed PostgreSQL custom format dump
func (d *Diagnoser) diagnosePgDumpGz(filePath string, result *DiagnoseResult) {
file, err := os.Open(filePath)
if err != nil {
result.IsValid = false
result.Errors = append(result.Errors, fmt.Sprintf("Cannot open file: %v", err))
return
}
defer file.Close()
// Verify gzip integrity
gz, err := gzip.NewReader(file)
if err != nil {
result.IsValid = false
result.IsCorrupted = true
result.Details.GzipValid = false
result.Details.GzipError = err.Error()
result.Errors = append(result.Errors,
fmt.Sprintf("Invalid gzip format: %v", err),
"The file may be truncated or corrupted during transfer")
return
}
result.Details.GzipValid = true
// Read and check header
header := make([]byte, 512)
n, err := gz.Read(header)
if err != nil && err != io.EOF {
result.IsValid = false
result.Errors = append(result.Errors, fmt.Sprintf("Cannot read decompressed header: %v", err))
gz.Close()
return
}
gz.Close()
// Check PGDMP signature
if n >= 5 && string(header[:5]) == "PGDMP" {
result.Details.HasPGDMPSignature = true
result.Details.FirstBytes = "PGDMP..."
} else {
result.Details.HasPGDMPSignature = false
result.Details.FirstBytes = fmt.Sprintf("%q", header[:minInt(n, 20)])
// Check if it's actually SQL content
content := string(header[:n])
if strings.Contains(content, "PostgreSQL") || strings.Contains(content, "pg_dump") ||
strings.Contains(content, "SET ") || strings.Contains(content, "CREATE ") {
result.Details.HasSQLHeader = true
result.Warnings = append(result.Warnings,
"File contains SQL text but has .dump extension",
"This appears to be SQL format, not custom format",
"Restore should use psql, not pg_restore")
} else {
result.IsValid = false
result.IsCorrupted = true
result.Errors = append(result.Errors,
"Missing PGDMP signature in decompressed content",
"File is neither custom format nor valid SQL")
}
return
}
// Verify full gzip stream integrity by reading to end
file.Seek(0, 0)
gz, _ = gzip.NewReader(file)
var totalRead int64
buf := make([]byte, 32*1024)
for {
n, err := gz.Read(buf)
totalRead += int64(n)
if err == io.EOF {
break
}
if err != nil {
result.IsValid = false
result.IsTruncated = true
result.Details.ExpandedSize = totalRead
result.Errors = append(result.Errors,
fmt.Sprintf("Gzip stream truncated after %d bytes: %v", totalRead, err),
"The backup file appears to be incomplete",
"Check if backup process completed successfully")
gz.Close()
return
}
}
gz.Close()
result.Details.ExpandedSize = totalRead
if result.FileSize > 0 {
result.Details.CompressionRatio = float64(totalRead) / float64(result.FileSize)
}
}
// diagnoseSQLScript analyzes SQL script format
func (d *Diagnoser) diagnoseSQLScript(filePath string, compressed bool, result *DiagnoseResult) {
var reader io.Reader
var file *os.File
var gz *gzip.Reader
var err error
file, err = os.Open(filePath)
if err != nil {
result.IsValid = false
result.Errors = append(result.Errors, fmt.Sprintf("Cannot open file: %v", err))
return
}
defer file.Close()
if compressed {
gz, err = gzip.NewReader(file)
if err != nil {
result.IsValid = false
result.IsCorrupted = true
result.Details.GzipValid = false
result.Details.GzipError = err.Error()
result.Errors = append(result.Errors, fmt.Sprintf("Invalid gzip format: %v", err))
return
}
result.Details.GzipValid = true
reader = gz
defer gz.Close()
} else {
reader = file
}
// Analyze SQL content
scanner := bufio.NewScanner(reader)
// Increase buffer size for large lines (COPY data can have long lines)
buf := make([]byte, 0, 1024*1024)
scanner.Buffer(buf, 10*1024*1024)
var lineNumber int
var inCopyBlock bool
var lastCopyTable string
var copyStartLine int
var copyDataSamples []string
copyBlockPattern := regexp.MustCompile(`^COPY\s+("?[\w\."]+)"?\s+\(`)
copyEndPattern := regexp.MustCompile(`^\\\.`)
for scanner.Scan() {
lineNumber++
line := scanner.Text()
// Check first few lines for header
if lineNumber <= 10 {
if strings.Contains(line, "PostgreSQL") || strings.Contains(line, "pg_dump") {
result.Details.HasSQLHeader = true
}
}
// Track structure
upperLine := strings.ToUpper(strings.TrimSpace(line))
if strings.HasPrefix(upperLine, "CREATE ") {
result.Details.HasCreateStatements = true
}
if strings.HasPrefix(upperLine, "INSERT ") {
result.Details.HasInsertStatements = true
}
if strings.HasPrefix(upperLine, "BEGIN") {
result.Details.HasTransactionBlock = true
}
// Track COPY blocks
if copyBlockPattern.MatchString(line) {
if inCopyBlock {
// Previous COPY block wasn't terminated!
result.Details.UnterminatedCopy = true
result.IsTruncated = true
result.IsValid = false
result.Errors = append(result.Errors,
fmt.Sprintf("COPY block for '%s' starting at line %d was never terminated",
lastCopyTable, copyStartLine))
}
inCopyBlock = true
result.Details.HasCopyStatements = true
result.Details.CopyBlockCount++
matches := copyBlockPattern.FindStringSubmatch(line)
if len(matches) > 1 {
lastCopyTable = matches[1]
}
copyStartLine = lineNumber
copyDataSamples = nil
} else if copyEndPattern.MatchString(line) {
inCopyBlock = false
} else if inCopyBlock {
// We're in COPY data
if len(copyDataSamples) < 3 {
copyDataSamples = append(copyDataSamples, truncateString(line, 100))
}
}
// Store last line for termination check
if lineNumber > 0 && (lineNumber%100000 == 0) && d.verbose {
d.log.Debug("Scanning SQL file", "lines_processed", lineNumber)
}
}
if err := scanner.Err(); err != nil {
result.IsValid = false
result.IsTruncated = true
result.Errors = append(result.Errors,
fmt.Sprintf("Error reading file at line %d: %v", lineNumber, err),
"File may be truncated or contain invalid data")
}
// Check if we ended while still in a COPY block
if inCopyBlock {
result.Details.UnterminatedCopy = true
result.Details.LastCopyTable = lastCopyTable
result.Details.LastCopyLineNumber = copyStartLine
result.Details.SampleCopyData = copyDataSamples
result.IsTruncated = true
result.IsValid = false
result.Errors = append(result.Errors,
fmt.Sprintf("File ends inside COPY block for table '%s' (started at line %d)",
lastCopyTable, copyStartLine),
"The backup was truncated during data export",
"This explains the 'syntax error' during restore - COPY data is being interpreted as SQL")
if len(copyDataSamples) > 0 {
result.Errors = append(result.Errors,
fmt.Sprintf("Sample orphaned data: %s", copyDataSamples[0]))
}
} else {
result.Details.ProperlyTerminated = true
}
// Read last bytes for additional context
if !compressed {
file.Seek(-min(500, result.FileSize), 2)
lastBytes := make([]byte, 500)
n, _ := file.Read(lastBytes)
result.Details.LastBytes = strings.TrimSpace(string(lastBytes[:n]))
}
}
// diagnoseClusterArchive analyzes a cluster tar.gz archive
func (d *Diagnoser) diagnoseClusterArchive(filePath string, result *DiagnoseResult) {
// First verify tar.gz integrity
cmd := exec.Command("tar", "-tzf", filePath)
output, err := cmd.Output()
if err != nil {
result.IsValid = false
result.IsCorrupted = true
result.Errors = append(result.Errors,
fmt.Sprintf("Tar archive is invalid or corrupted: %v", err),
"Run: tar -tzf "+filePath+" 2>&1 | tail -20")
return
}
// Parse tar listing
files := strings.Split(strings.TrimSpace(string(output)), "\n")
var dumpFiles []string
hasGlobals := false
hasMetadata := false
for _, f := range files {
if strings.HasSuffix(f, ".dump") || strings.HasSuffix(f, ".sql.gz") {
dumpFiles = append(dumpFiles, f)
}
if strings.Contains(f, "globals.sql") {
hasGlobals = true
}
if strings.Contains(f, "manifest.json") || strings.Contains(f, "metadata.json") {
hasMetadata = true
}
}
result.Details.TableCount = len(dumpFiles)
result.Details.TableList = dumpFiles
if len(dumpFiles) == 0 {
result.Warnings = append(result.Warnings, "No database dump files found in archive")
}
if !hasGlobals {
result.Warnings = append(result.Warnings, "No globals.sql found - roles/tablespaces won't be restored")
}
if !hasMetadata {
result.Warnings = append(result.Warnings, "No manifest/metadata found - limited validation possible")
}
// For verbose mode, diagnose individual dumps inside the archive
if d.verbose && len(dumpFiles) > 0 {
d.log.Info("Cluster archive contains databases", "count", len(dumpFiles))
for _, df := range dumpFiles {
d.log.Info(" - " + df)
}
}
}
// diagnoseUnknown handles unknown format files
func (d *Diagnoser) diagnoseUnknown(filePath string, result *DiagnoseResult) {
file, err := os.Open(filePath)
if err != nil {
return
}
defer file.Close()
header := make([]byte, 512)
n, _ := file.Read(header)
result.Details.FirstBytes = fmt.Sprintf("%q", header[:minInt(n, 50)])
// Try to identify by content
content := string(header[:n])
if strings.Contains(content, "PGDMP") {
result.Warnings = append(result.Warnings, "File appears to be PostgreSQL custom format - rename to .dump")
} else if strings.Contains(content, "PostgreSQL") || strings.Contains(content, "pg_dump") {
result.Warnings = append(result.Warnings, "File appears to be PostgreSQL SQL - rename to .sql")
} else if bytes.HasPrefix(header, []byte{0x1f, 0x8b}) {
result.Warnings = append(result.Warnings, "File appears to be gzip compressed - add .gz extension")
}
}
// verifyWithPgRestore uses pg_restore --list to verify dump integrity
func (d *Diagnoser) verifyWithPgRestore(filePath string, result *DiagnoseResult) {
cmd := exec.Command("pg_restore", "--list", filePath)
output, err := cmd.CombinedOutput()
if err != nil {
result.Details.PgRestoreListable = false
result.Details.PgRestoreError = string(output)
// Check for specific errors
errStr := string(output)
if strings.Contains(errStr, "unexpected end of file") ||
strings.Contains(errStr, "invalid large-object TOC entry") {
result.IsTruncated = true
result.IsValid = false
result.Errors = append(result.Errors,
"pg_restore reports truncated or incomplete dump file",
fmt.Sprintf("Error: %s", truncateString(errStr, 200)))
} else if strings.Contains(errStr, "not a valid archive") {
result.IsCorrupted = true
result.IsValid = false
result.Errors = append(result.Errors,
"pg_restore reports file is not a valid archive",
"File may be corrupted or wrong format")
} else {
result.Warnings = append(result.Warnings,
fmt.Sprintf("pg_restore --list warning: %s", truncateString(errStr, 200)))
}
return
}
result.Details.PgRestoreListable = true
// Count tables in the TOC
lines := strings.Split(string(output), "\n")
tableCount := 0
var tables []string
for _, line := range lines {
if strings.Contains(line, " TABLE DATA ") {
tableCount++
if len(tables) < 20 {
parts := strings.Fields(line)
if len(parts) > 3 {
tables = append(tables, parts[len(parts)-1])
}
}
}
}
result.Details.TableCount = tableCount
result.Details.TableList = tables
}
// DiagnoseClusterDumps extracts and diagnoses all dumps in a cluster archive
func (d *Diagnoser) DiagnoseClusterDumps(archivePath, tempDir string) ([]*DiagnoseResult, error) {
// Extract to temp directory
cmd := exec.Command("tar", "-xzf", archivePath, "-C", tempDir)
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("failed to extract archive: %w", err)
}
// Find dump files
dumpsDir := filepath.Join(tempDir, "dumps")
entries, err := os.ReadDir(dumpsDir)
if err != nil {
// Try without dumps subdirectory
entries, err = os.ReadDir(tempDir)
if err != nil {
return nil, fmt.Errorf("cannot read extracted files: %w", err)
}
dumpsDir = tempDir
}
var results []*DiagnoseResult
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
if !strings.HasSuffix(name, ".dump") && !strings.HasSuffix(name, ".sql.gz") &&
!strings.HasSuffix(name, ".sql") {
continue
}
dumpPath := filepath.Join(dumpsDir, name)
d.log.Info("Diagnosing dump file", "file", name)
result, err := d.DiagnoseFile(dumpPath)
if err != nil {
d.log.Warn("Failed to diagnose file", "file", name, "error", err)
continue
}
results = append(results, result)
}
return results, nil
}
// PrintDiagnosis outputs a human-readable diagnosis report
func (d *Diagnoser) PrintDiagnosis(result *DiagnoseResult) {
fmt.Println("\n" + strings.Repeat("=", 70))
fmt.Printf("📋 DIAGNOSIS: %s\n", result.FileName)
fmt.Println(strings.Repeat("=", 70))
// Basic info
fmt.Printf("\nFile: %s\n", result.FilePath)
fmt.Printf("Size: %s\n", formatBytes(result.FileSize))
fmt.Printf("Format: %s\n", result.DetectedFormat)
// Status
if result.IsValid {
fmt.Println("\n✅ STATUS: VALID")
} else {
fmt.Println("\n❌ STATUS: INVALID")
}
if result.IsTruncated {
fmt.Println("⚠️ TRUNCATED: Yes - file appears incomplete")
}
if result.IsCorrupted {
fmt.Println("⚠️ CORRUPTED: Yes - file structure is damaged")
}
// Details
if result.Details != nil {
fmt.Println("\n📊 DETAILS:")
if result.Details.HasPGDMPSignature {
fmt.Println(" ✓ Has PGDMP signature (PostgreSQL custom format)")
}
if result.Details.HasSQLHeader {
fmt.Println(" ✓ Has PostgreSQL SQL header")
}
if result.Details.GzipValid {
fmt.Println(" ✓ Gzip compression valid")
}
if result.Details.PgRestoreListable {
fmt.Printf(" ✓ pg_restore can list contents (%d tables)\n", result.Details.TableCount)
}
if result.Details.CopyBlockCount > 0 {
fmt.Printf(" • Contains %d COPY blocks\n", result.Details.CopyBlockCount)
}
if result.Details.UnterminatedCopy {
fmt.Printf(" ✗ Unterminated COPY block: %s (line %d)\n",
result.Details.LastCopyTable, result.Details.LastCopyLineNumber)
}
if result.Details.ProperlyTerminated {
fmt.Println(" ✓ All COPY blocks properly terminated")
}
if result.Details.ExpandedSize > 0 {
fmt.Printf(" • Expanded size: %s (ratio: %.1fx)\n",
formatBytes(result.Details.ExpandedSize), result.Details.CompressionRatio)
}
}
// Errors
if len(result.Errors) > 0 {
fmt.Println("\n❌ ERRORS:")
for _, e := range result.Errors {
fmt.Printf(" • %s\n", e)
}
}
// Warnings
if len(result.Warnings) > 0 {
fmt.Println("\n⚠ WARNINGS:")
for _, w := range result.Warnings {
fmt.Printf(" • %s\n", w)
}
}
// Recommendations
if !result.IsValid {
fmt.Println("\n💡 RECOMMENDATIONS:")
if result.IsTruncated {
fmt.Println(" 1. Re-run the backup process for this database")
fmt.Println(" 2. Check disk space on backup server during backup")
fmt.Println(" 3. Verify network stability if backup was remote")
fmt.Println(" 4. Check backup logs for errors during the backup")
}
if result.IsCorrupted {
fmt.Println(" 1. Verify backup file was transferred completely")
fmt.Println(" 2. Check if backup file was modified after creation")
fmt.Println(" 3. Try restoring from a previous backup")
}
}
fmt.Println(strings.Repeat("=", 70))
}
// PrintDiagnosisJSON outputs diagnosis as JSON
func (d *Diagnoser) PrintDiagnosisJSON(result *DiagnoseResult) error {
output, err := json.MarshalIndent(result, "", " ")
if err != nil {
return err
}
fmt.Println(string(output))
return nil
}
// Helper functions
func truncateString(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen-3] + "..."
}
func formatBytes(bytes int64) string {
const unit = 1024
if bytes < unit {
return fmt.Sprintf("%d B", bytes)
}
div, exp := int64(unit), 0
for n := bytes / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp])
}
func min(a, b int64) int64 {
if a < b {
return a
}
return b
}
func minInt(a, b int) int {
if a < b {
return a
}
return b
}

View File

@@ -27,6 +27,8 @@ type Engine struct {
progress progress.Indicator
detailedReporter *progress.DetailedReporter
dryRun bool
debugLogPath string // Path to save debug log on error
errorCollector *ErrorCollector // Collects detailed error info
}
// New creates a new restore engine
@@ -77,6 +79,11 @@ func NewWithProgress(cfg *config.Config, log logger.Logger, db database.Database
}
}
// SetDebugLogPath enables saving detailed error reports on failure
func (e *Engine) SetDebugLogPath(path string) {
e.debugLogPath = path
}
// loggerAdapter adapts our logger to the progress.Logger interface
type loggerAdapter struct {
logger logger.Logger
@@ -306,6 +313,11 @@ func (e *Engine) restoreMySQLSQL(ctx context.Context, archivePath, targetDB stri
// executeRestoreCommand executes a restore command
func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) error {
return e.executeRestoreCommandWithContext(ctx, cmdArgs, "", "", FormatUnknown)
}
// executeRestoreCommandWithContext executes a restore command with error collection context
func (e *Engine) executeRestoreCommandWithContext(ctx context.Context, cmdArgs []string, archivePath, targetDB string, format ArchiveFormat) error {
e.log.Info("Executing restore command", "command", strings.Join(cmdArgs, " "))
cmd := exec.CommandContext(ctx, cmdArgs[0], cmdArgs[1:]...)
@@ -316,6 +328,12 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
fmt.Sprintf("MYSQL_PWD=%s", e.cfg.Password),
)
// Create error collector if debug log path is set
var collector *ErrorCollector
if e.debugLogPath != "" {
collector = NewErrorCollector(e.cfg, e.log, archivePath, targetDB, format, true)
}
// Stream stderr to avoid memory issues with large output
// Don't use CombinedOutput() as it loads everything into memory
stderr, err := cmd.StderrPipe()
@@ -336,6 +354,12 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
n, err := stderr.Read(buf)
if n > 0 {
chunk := string(buf[:n])
// Feed to error collector if enabled
if collector != nil {
collector.CaptureStderr(chunk)
}
// Only capture REAL errors, not verbose output
if strings.Contains(chunk, "ERROR:") || strings.Contains(chunk, "FATAL:") || strings.Contains(chunk, "error:") {
lastError = strings.TrimSpace(chunk)
@@ -352,6 +376,12 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
}
if err := cmd.Wait(); err != nil {
// Get exit code
exitCode := 1
if exitErr, ok := err.(*exec.ExitError); ok {
exitCode = exitErr.ExitCode()
}
// PostgreSQL pg_restore returns exit code 1 even for ignorable errors
// Check if errors are ignorable (already exists, duplicate, etc.)
if lastError != "" && e.isIgnorableError(lastError) {
@@ -360,8 +390,12 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
}
// Classify error and provide helpful hints
var classification *checks.ErrorClassification
var errType, errHint string
if lastError != "" {
classification := checks.ClassifyError(lastError)
classification = checks.ClassifyError(lastError)
errType = classification.Type
errHint = classification.Hint
e.log.Error("Restore command failed",
"error", err,
"last_stderr", lastError,
@@ -369,11 +403,37 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
"error_type", classification.Type,
"hint", classification.Hint,
"action", classification.Action)
return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d) - %s",
err, lastError, errorCount, classification.Hint)
} else {
e.log.Error("Restore command failed", "error", err, "error_count", errorCount)
}
e.log.Error("Restore command failed", "error", err, "last_stderr", lastError, "error_count", errorCount)
// Generate and save error report if collector is enabled
if collector != nil {
collector.SetExitCode(exitCode)
report := collector.GenerateReport(
lastError,
errType,
errHint,
)
// Print report to console
collector.PrintReport(report)
// Save to file
if e.debugLogPath != "" {
if saveErr := collector.SaveReport(report, e.debugLogPath); saveErr != nil {
e.log.Warn("Failed to save debug log", "error", saveErr)
} else {
e.log.Info("Debug log saved", "path", e.debugLogPath)
fmt.Printf("\n📋 Detailed error report saved to: %s\n", e.debugLogPath)
}
}
}
if lastError != "" {
return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d) - %s",
err, lastError, errorCount, errHint)
}
return fmt.Errorf("restore failed: %w", err)
}

View File

@@ -0,0 +1,569 @@
package restore
import (
"bufio"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"time"
"dbbackup/internal/config"
"dbbackup/internal/logger"
)
// RestoreErrorReport contains comprehensive information about a restore failure
type RestoreErrorReport struct {
// Metadata
Timestamp time.Time `json:"timestamp"`
Version string `json:"version"`
GoVersion string `json:"go_version"`
OS string `json:"os"`
Arch string `json:"arch"`
// Archive info
ArchivePath string `json:"archive_path"`
ArchiveSize int64 `json:"archive_size"`
ArchiveFormat string `json:"archive_format"`
// Database info
TargetDB string `json:"target_db"`
DatabaseType string `json:"database_type"`
// Error details
ExitCode int `json:"exit_code"`
ErrorMessage string `json:"error_message"`
ErrorType string `json:"error_type"`
ErrorHint string `json:"error_hint"`
TotalErrors int `json:"total_errors"`
// Captured output
LastStderr []string `json:"last_stderr"`
FirstErrors []string `json:"first_errors"`
// Context around failure
FailureContext *FailureContext `json:"failure_context,omitempty"`
// Diagnosis results
DiagnosisResult *DiagnoseResult `json:"diagnosis_result,omitempty"`
// Environment (sanitized)
PostgresVersion string `json:"postgres_version,omitempty"`
PgRestoreVersion string `json:"pg_restore_version,omitempty"`
PsqlVersion string `json:"psql_version,omitempty"`
// Recommendations
Recommendations []string `json:"recommendations"`
}
// FailureContext captures context around where the failure occurred
type FailureContext struct {
// For SQL/COPY errors
FailedLine int `json:"failed_line,omitempty"`
FailedStatement string `json:"failed_statement,omitempty"`
SurroundingLines []string `json:"surrounding_lines,omitempty"`
// For COPY block errors
InCopyBlock bool `json:"in_copy_block,omitempty"`
CopyTableName string `json:"copy_table_name,omitempty"`
CopyStartLine int `json:"copy_start_line,omitempty"`
SampleCopyData []string `json:"sample_copy_data,omitempty"`
// File position info
BytePosition int64 `json:"byte_position,omitempty"`
PercentComplete float64 `json:"percent_complete,omitempty"`
}
// ErrorCollector captures detailed error information during restore
type ErrorCollector struct {
log logger.Logger
cfg *config.Config
archivePath string
targetDB string
format ArchiveFormat
// Captured data
stderrLines []string
firstErrors []string
lastErrors []string
totalErrors int
exitCode int
// Limits
maxStderrLines int
maxErrorCapture int
// State
startTime time.Time
enabled bool
}
// NewErrorCollector creates a new error collector
func NewErrorCollector(cfg *config.Config, log logger.Logger, archivePath, targetDB string, format ArchiveFormat, enabled bool) *ErrorCollector {
return &ErrorCollector{
log: log,
cfg: cfg,
archivePath: archivePath,
targetDB: targetDB,
format: format,
stderrLines: make([]string, 0, 100),
firstErrors: make([]string, 0, 10),
lastErrors: make([]string, 0, 10),
maxStderrLines: 100,
maxErrorCapture: 10,
startTime: time.Now(),
enabled: enabled,
}
}
// CaptureStderr processes and captures stderr output
func (ec *ErrorCollector) CaptureStderr(chunk string) {
if !ec.enabled {
return
}
lines := strings.Split(chunk, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" {
continue
}
// Store last N lines of stderr
if len(ec.stderrLines) >= ec.maxStderrLines {
// Shift array, drop oldest
ec.stderrLines = ec.stderrLines[1:]
}
ec.stderrLines = append(ec.stderrLines, line)
// Check if this is an error line
if isErrorLine(line) {
ec.totalErrors++
// Capture first N errors
if len(ec.firstErrors) < ec.maxErrorCapture {
ec.firstErrors = append(ec.firstErrors, line)
}
// Keep last N errors (ring buffer style)
if len(ec.lastErrors) >= ec.maxErrorCapture {
ec.lastErrors = ec.lastErrors[1:]
}
ec.lastErrors = append(ec.lastErrors, line)
}
}
}
// SetExitCode records the exit code
func (ec *ErrorCollector) SetExitCode(code int) {
ec.exitCode = code
}
// GenerateReport creates a comprehensive error report
func (ec *ErrorCollector) GenerateReport(errMessage string, errType string, errHint string) *RestoreErrorReport {
report := &RestoreErrorReport{
Timestamp: time.Now(),
Version: "1.0.0", // TODO: inject actual version
GoVersion: runtime.Version(),
OS: runtime.GOOS,
Arch: runtime.GOARCH,
ArchivePath: ec.archivePath,
ArchiveFormat: ec.format.String(),
TargetDB: ec.targetDB,
DatabaseType: getDatabaseType(ec.format),
ExitCode: ec.exitCode,
ErrorMessage: errMessage,
ErrorType: errType,
ErrorHint: errHint,
TotalErrors: ec.totalErrors,
LastStderr: ec.stderrLines,
FirstErrors: ec.firstErrors,
}
// Get archive size
if stat, err := os.Stat(ec.archivePath); err == nil {
report.ArchiveSize = stat.Size()
}
// Get tool versions
report.PostgresVersion = getCommandVersion("postgres", "--version")
report.PgRestoreVersion = getCommandVersion("pg_restore", "--version")
report.PsqlVersion = getCommandVersion("psql", "--version")
// Analyze failure context
report.FailureContext = ec.analyzeFailureContext()
// Run diagnosis if not already done
diagnoser := NewDiagnoser(ec.log, false)
if diagResult, err := diagnoser.DiagnoseFile(ec.archivePath); err == nil {
report.DiagnosisResult = diagResult
}
// Generate recommendations
report.Recommendations = ec.generateRecommendations(report)
return report
}
// analyzeFailureContext extracts context around the failure
func (ec *ErrorCollector) analyzeFailureContext() *FailureContext {
ctx := &FailureContext{}
// Look for line number in errors
for _, errLine := range ec.lastErrors {
if lineNum := extractLineNumber(errLine); lineNum > 0 {
ctx.FailedLine = lineNum
break
}
}
// Look for COPY-related errors
for _, errLine := range ec.lastErrors {
if strings.Contains(errLine, "COPY") || strings.Contains(errLine, "syntax error") {
ctx.InCopyBlock = true
// Try to extract table name
if tableName := extractTableName(errLine); tableName != "" {
ctx.CopyTableName = tableName
}
break
}
}
// If we have a line number, try to get surrounding context from the dump
if ctx.FailedLine > 0 && ec.archivePath != "" {
ctx.SurroundingLines = ec.getSurroundingLines(ctx.FailedLine, 5)
}
return ctx
}
// getSurroundingLines reads lines around a specific line number from the dump
func (ec *ErrorCollector) getSurroundingLines(lineNum int, context int) []string {
var reader io.Reader
var lines []string
file, err := os.Open(ec.archivePath)
if err != nil {
return nil
}
defer file.Close()
// Handle compressed files
if strings.HasSuffix(ec.archivePath, ".gz") {
gz, err := gzip.NewReader(file)
if err != nil {
return nil
}
defer gz.Close()
reader = gz
} else {
reader = file
}
scanner := bufio.NewScanner(reader)
buf := make([]byte, 0, 1024*1024)
scanner.Buffer(buf, 10*1024*1024)
currentLine := 0
startLine := lineNum - context
endLine := lineNum + context
if startLine < 1 {
startLine = 1
}
for scanner.Scan() {
currentLine++
if currentLine >= startLine && currentLine <= endLine {
prefix := " "
if currentLine == lineNum {
prefix = "> "
}
lines = append(lines, fmt.Sprintf("%s%d: %s", prefix, currentLine, truncateString(scanner.Text(), 100)))
}
if currentLine > endLine {
break
}
}
return lines
}
// generateRecommendations provides actionable recommendations based on the error
func (ec *ErrorCollector) generateRecommendations(report *RestoreErrorReport) []string {
var recs []string
// Check diagnosis results
if report.DiagnosisResult != nil {
if report.DiagnosisResult.IsTruncated {
recs = append(recs,
"CRITICAL: Backup file is truncated/incomplete",
"Action: Re-run the backup for the affected database",
"Check: Verify disk space was available during backup",
"Check: Verify network was stable during backup transfer",
)
}
if report.DiagnosisResult.IsCorrupted {
recs = append(recs,
"CRITICAL: Backup file appears corrupted",
"Action: Restore from a previous backup",
"Action: Verify backup file checksum if available",
)
}
if report.DiagnosisResult.Details != nil && report.DiagnosisResult.Details.UnterminatedCopy {
recs = append(recs,
fmt.Sprintf("ISSUE: COPY block for table '%s' was not terminated",
report.DiagnosisResult.Details.LastCopyTable),
"Cause: Backup was interrupted during data export",
"Action: Re-run backup ensuring it completes fully",
)
}
}
// Check error patterns
if report.TotalErrors > 1000000 {
recs = append(recs,
"ISSUE: Millions of errors indicate structural problem, not individual data issues",
"Cause: Likely wrong restore method or truncated dump",
"Check: Verify dump format matches restore command",
)
}
// Check for common error types
errLower := strings.ToLower(report.ErrorMessage)
if strings.Contains(errLower, "syntax error") {
recs = append(recs,
"ISSUE: SQL syntax errors during restore",
"Cause: COPY data being interpreted as SQL commands",
"Check: Run 'dbbackup restore diagnose <archive>' for detailed analysis",
)
}
if strings.Contains(errLower, "permission denied") {
recs = append(recs,
"ISSUE: Permission denied",
"Action: Check database user has sufficient privileges",
"Action: For ownership preservation, use a superuser account",
)
}
if strings.Contains(errLower, "does not exist") {
recs = append(recs,
"ISSUE: Missing object reference",
"Action: Ensure globals.sql was restored first (for roles/tablespaces)",
"Action: Check if target database was created",
)
}
if len(recs) == 0 {
recs = append(recs,
"Run 'dbbackup restore diagnose <archive>' for detailed analysis",
"Check the stderr output above for specific error messages",
"Review the PostgreSQL/MySQL logs on the target server",
)
}
return recs
}
// SaveReport saves the error report to a file
func (ec *ErrorCollector) SaveReport(report *RestoreErrorReport, outputPath string) error {
// Create directory if needed
dir := filepath.Dir(outputPath)
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
// Marshal to JSON with indentation
data, err := json.MarshalIndent(report, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal report: %w", err)
}
// Write file
if err := os.WriteFile(outputPath, data, 0644); err != nil {
return fmt.Errorf("failed to write report: %w", err)
}
return nil
}
// PrintReport prints a human-readable summary of the error report
func (ec *ErrorCollector) PrintReport(report *RestoreErrorReport) {
fmt.Println()
fmt.Println(strings.Repeat("═", 70))
fmt.Println(" 🔴 RESTORE ERROR REPORT")
fmt.Println(strings.Repeat("═", 70))
fmt.Printf("\n📅 Timestamp: %s\n", report.Timestamp.Format("2006-01-02 15:04:05"))
fmt.Printf("📦 Archive: %s\n", filepath.Base(report.ArchivePath))
fmt.Printf("📊 Format: %s\n", report.ArchiveFormat)
fmt.Printf("🎯 Target DB: %s\n", report.TargetDB)
fmt.Printf("⚠️ Exit Code: %d\n", report.ExitCode)
fmt.Printf("❌ Total Errors: %d\n", report.TotalErrors)
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("ERROR DETAILS:")
fmt.Println(strings.Repeat("─", 70))
fmt.Printf("\nType: %s\n", report.ErrorType)
fmt.Printf("Message: %s\n", report.ErrorMessage)
if report.ErrorHint != "" {
fmt.Printf("Hint: %s\n", report.ErrorHint)
}
// Show failure context
if report.FailureContext != nil && report.FailureContext.FailedLine > 0 {
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("FAILURE CONTEXT:")
fmt.Println(strings.Repeat("─", 70))
fmt.Printf("\nFailed at line: %d\n", report.FailureContext.FailedLine)
if report.FailureContext.InCopyBlock {
fmt.Printf("Inside COPY block for table: %s\n", report.FailureContext.CopyTableName)
}
if len(report.FailureContext.SurroundingLines) > 0 {
fmt.Println("\nSurrounding lines:")
for _, line := range report.FailureContext.SurroundingLines {
fmt.Println(line)
}
}
}
// Show first few errors
if len(report.FirstErrors) > 0 {
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("FIRST ERRORS:")
fmt.Println(strings.Repeat("─", 70))
for i, err := range report.FirstErrors {
if i >= 5 {
fmt.Printf("... and %d more\n", len(report.FirstErrors)-5)
break
}
fmt.Printf(" %d. %s\n", i+1, truncateString(err, 100))
}
}
// Show diagnosis summary
if report.DiagnosisResult != nil && !report.DiagnosisResult.IsValid {
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("DIAGNOSIS:")
fmt.Println(strings.Repeat("─", 70))
if report.DiagnosisResult.IsTruncated {
fmt.Println(" ❌ File is TRUNCATED")
}
if report.DiagnosisResult.IsCorrupted {
fmt.Println(" ❌ File is CORRUPTED")
}
for i, err := range report.DiagnosisResult.Errors {
if i >= 3 {
break
}
fmt.Printf(" • %s\n", err)
}
}
// Show recommendations
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("💡 RECOMMENDATIONS:")
fmt.Println(strings.Repeat("─", 70))
for _, rec := range report.Recommendations {
fmt.Printf(" • %s\n", rec)
}
// Show tool versions
fmt.Println("\n" + strings.Repeat("─", 70))
fmt.Println("ENVIRONMENT:")
fmt.Println(strings.Repeat("─", 70))
fmt.Printf(" OS: %s/%s\n", report.OS, report.Arch)
fmt.Printf(" Go: %s\n", report.GoVersion)
if report.PgRestoreVersion != "" {
fmt.Printf(" pg_restore: %s\n", report.PgRestoreVersion)
}
if report.PsqlVersion != "" {
fmt.Printf(" psql: %s\n", report.PsqlVersion)
}
fmt.Println(strings.Repeat("═", 70))
}
// Helper functions
func isErrorLine(line string) bool {
return strings.Contains(line, "ERROR:") ||
strings.Contains(line, "FATAL:") ||
strings.Contains(line, "error:") ||
strings.Contains(line, "PANIC:")
}
func extractLineNumber(errLine string) int {
// Look for patterns like "LINE 1:" or "line 123"
patterns := []string{"LINE ", "line "}
for _, pattern := range patterns {
if idx := strings.Index(errLine, pattern); idx >= 0 {
numStart := idx + len(pattern)
numEnd := numStart
for numEnd < len(errLine) && errLine[numEnd] >= '0' && errLine[numEnd] <= '9' {
numEnd++
}
if numEnd > numStart {
var num int
fmt.Sscanf(errLine[numStart:numEnd], "%d", &num)
return num
}
}
}
return 0
}
func extractTableName(errLine string) string {
// Look for patterns like 'COPY "tablename"' or 'table "tablename"'
patterns := []string{"COPY ", "table "}
for _, pattern := range patterns {
if idx := strings.Index(errLine, pattern); idx >= 0 {
start := idx + len(pattern)
// Skip optional quote
if start < len(errLine) && errLine[start] == '"' {
start++
}
end := start
for end < len(errLine) && errLine[end] != '"' && errLine[end] != ' ' && errLine[end] != '(' {
end++
}
if end > start {
return errLine[start:end]
}
}
}
return ""
}
func getDatabaseType(format ArchiveFormat) string {
if format.IsMySQL() {
return "mysql"
}
return "postgresql"
}
func getCommandVersion(cmd string, arg string) string {
output, err := exec.Command(cmd, arg).CombinedOutput()
if err != nil {
return ""
}
// Return first line only
lines := strings.Split(string(output), "\n")
if len(lines) > 0 {
return strings.TrimSpace(lines[0])
}
return ""
}