v3.40.0: Restore diagnostics and error reporting
Features: - restore diagnose command for backup file analysis - Deep COPY block verification for truncated dump detection - PGDMP signature and gzip integrity validation - Detailed error reports with --save-debug-log flag - Ring buffer stderr capture (prevents OOM on 2M+ errors) - Error classification with actionable recommendations TUI Enhancements: - Automatic dump validity safety check before restore - Press 'd' in archive browser to diagnose backups - Press 'd' in restore preview for debug log toggle - Debug logs saved to /tmp on failure when enabled Documentation: - Updated README with diagnose command and examples - Updated CHANGELOG with full feature list - Updated restore preview screenshots
This commit is contained in:
726
internal/restore/diagnose.go
Normal file
726
internal/restore/diagnose.go
Normal file
@@ -0,0 +1,726 @@
|
||||
package restore
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// DiagnoseResult contains the results of a dump file diagnosis
|
||||
type DiagnoseResult struct {
|
||||
FilePath string `json:"file_path"`
|
||||
FileName string `json:"file_name"`
|
||||
FileSize int64 `json:"file_size"`
|
||||
Format ArchiveFormat `json:"format"`
|
||||
DetectedFormat string `json:"detected_format"`
|
||||
IsValid bool `json:"is_valid"`
|
||||
IsTruncated bool `json:"is_truncated"`
|
||||
IsCorrupted bool `json:"is_corrupted"`
|
||||
Errors []string `json:"errors,omitempty"`
|
||||
Warnings []string `json:"warnings,omitempty"`
|
||||
Details *DiagnoseDetails `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
// DiagnoseDetails contains detailed analysis of the dump file
|
||||
type DiagnoseDetails struct {
|
||||
// Header info
|
||||
HasPGDMPSignature bool `json:"has_pgdmp_signature,omitempty"`
|
||||
HasSQLHeader bool `json:"has_sql_header,omitempty"`
|
||||
FirstBytes string `json:"first_bytes,omitempty"`
|
||||
LastBytes string `json:"last_bytes,omitempty"`
|
||||
|
||||
// COPY block analysis (for SQL dumps)
|
||||
CopyBlockCount int `json:"copy_block_count,omitempty"`
|
||||
UnterminatedCopy bool `json:"unterminated_copy,omitempty"`
|
||||
LastCopyTable string `json:"last_copy_table,omitempty"`
|
||||
LastCopyLineNumber int `json:"last_copy_line_number,omitempty"`
|
||||
SampleCopyData []string `json:"sample_copy_data,omitempty"`
|
||||
|
||||
// Structure analysis
|
||||
HasCreateStatements bool `json:"has_create_statements,omitempty"`
|
||||
HasInsertStatements bool `json:"has_insert_statements,omitempty"`
|
||||
HasCopyStatements bool `json:"has_copy_statements,omitempty"`
|
||||
HasTransactionBlock bool `json:"has_transaction_block,omitempty"`
|
||||
ProperlyTerminated bool `json:"properly_terminated,omitempty"`
|
||||
|
||||
// pg_restore analysis (for custom format)
|
||||
PgRestoreListable bool `json:"pg_restore_listable,omitempty"`
|
||||
PgRestoreError string `json:"pg_restore_error,omitempty"`
|
||||
TableCount int `json:"table_count,omitempty"`
|
||||
TableList []string `json:"table_list,omitempty"`
|
||||
|
||||
// Compression analysis
|
||||
GzipValid bool `json:"gzip_valid,omitempty"`
|
||||
GzipError string `json:"gzip_error,omitempty"`
|
||||
ExpandedSize int64 `json:"expanded_size,omitempty"`
|
||||
CompressionRatio float64 `json:"compression_ratio,omitempty"`
|
||||
}
|
||||
|
||||
// Diagnoser performs deep analysis of backup files
|
||||
type Diagnoser struct {
|
||||
log logger.Logger
|
||||
verbose bool
|
||||
}
|
||||
|
||||
// NewDiagnoser creates a new diagnoser
|
||||
func NewDiagnoser(log logger.Logger, verbose bool) *Diagnoser {
|
||||
return &Diagnoser{
|
||||
log: log,
|
||||
verbose: verbose,
|
||||
}
|
||||
}
|
||||
|
||||
// DiagnoseFile performs comprehensive diagnosis of a backup file
|
||||
func (d *Diagnoser) DiagnoseFile(filePath string) (*DiagnoseResult, error) {
|
||||
result := &DiagnoseResult{
|
||||
FilePath: filePath,
|
||||
FileName: filepath.Base(filePath),
|
||||
Details: &DiagnoseDetails{},
|
||||
IsValid: true, // Assume valid until proven otherwise
|
||||
}
|
||||
|
||||
// Check file exists and get size
|
||||
stat, err := os.Stat(filePath)
|
||||
if err != nil {
|
||||
result.IsValid = false
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("Cannot access file: %v", err))
|
||||
return result, nil
|
||||
}
|
||||
result.FileSize = stat.Size()
|
||||
|
||||
if result.FileSize == 0 {
|
||||
result.IsValid = false
|
||||
result.IsTruncated = true
|
||||
result.Errors = append(result.Errors, "File is empty (0 bytes)")
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Detect format
|
||||
result.Format = DetectArchiveFormat(filePath)
|
||||
result.DetectedFormat = result.Format.String()
|
||||
|
||||
// Analyze based on format
|
||||
switch result.Format {
|
||||
case FormatPostgreSQLDump:
|
||||
d.diagnosePgDump(filePath, result)
|
||||
case FormatPostgreSQLDumpGz:
|
||||
d.diagnosePgDumpGz(filePath, result)
|
||||
case FormatPostgreSQLSQL:
|
||||
d.diagnoseSQLScript(filePath, false, result)
|
||||
case FormatPostgreSQLSQLGz:
|
||||
d.diagnoseSQLScript(filePath, true, result)
|
||||
case FormatClusterTarGz:
|
||||
d.diagnoseClusterArchive(filePath, result)
|
||||
default:
|
||||
result.Warnings = append(result.Warnings, "Unknown format - limited diagnosis available")
|
||||
d.diagnoseUnknown(filePath, result)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// diagnosePgDump analyzes PostgreSQL custom format dump
|
||||
func (d *Diagnoser) diagnosePgDump(filePath string, result *DiagnoseResult) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
result.IsValid = false
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("Cannot open file: %v", err))
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Read first 512 bytes
|
||||
header := make([]byte, 512)
|
||||
n, err := file.Read(header)
|
||||
if err != nil && err != io.EOF {
|
||||
result.IsValid = false
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("Cannot read header: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// Check PGDMP signature
|
||||
if n >= 5 && string(header[:5]) == "PGDMP" {
|
||||
result.Details.HasPGDMPSignature = true
|
||||
result.Details.FirstBytes = "PGDMP..."
|
||||
} else {
|
||||
result.IsValid = false
|
||||
result.IsCorrupted = true
|
||||
result.Details.HasPGDMPSignature = false
|
||||
result.Details.FirstBytes = fmt.Sprintf("%q", header[:minInt(n, 20)])
|
||||
result.Errors = append(result.Errors,
|
||||
"Missing PGDMP signature - file is NOT PostgreSQL custom format",
|
||||
"This file may be SQL format incorrectly named as .dump",
|
||||
"Try: file "+filePath+" to check actual file type")
|
||||
return
|
||||
}
|
||||
|
||||
// Try pg_restore --list to verify dump integrity
|
||||
d.verifyWithPgRestore(filePath, result)
|
||||
}
|
||||
|
||||
// diagnosePgDumpGz analyzes compressed PostgreSQL custom format dump
|
||||
func (d *Diagnoser) diagnosePgDumpGz(filePath string, result *DiagnoseResult) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
result.IsValid = false
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("Cannot open file: %v", err))
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Verify gzip integrity
|
||||
gz, err := gzip.NewReader(file)
|
||||
if err != nil {
|
||||
result.IsValid = false
|
||||
result.IsCorrupted = true
|
||||
result.Details.GzipValid = false
|
||||
result.Details.GzipError = err.Error()
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("Invalid gzip format: %v", err),
|
||||
"The file may be truncated or corrupted during transfer")
|
||||
return
|
||||
}
|
||||
result.Details.GzipValid = true
|
||||
|
||||
// Read and check header
|
||||
header := make([]byte, 512)
|
||||
n, err := gz.Read(header)
|
||||
if err != nil && err != io.EOF {
|
||||
result.IsValid = false
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("Cannot read decompressed header: %v", err))
|
||||
gz.Close()
|
||||
return
|
||||
}
|
||||
gz.Close()
|
||||
|
||||
// Check PGDMP signature
|
||||
if n >= 5 && string(header[:5]) == "PGDMP" {
|
||||
result.Details.HasPGDMPSignature = true
|
||||
result.Details.FirstBytes = "PGDMP..."
|
||||
} else {
|
||||
result.Details.HasPGDMPSignature = false
|
||||
result.Details.FirstBytes = fmt.Sprintf("%q", header[:minInt(n, 20)])
|
||||
|
||||
// Check if it's actually SQL content
|
||||
content := string(header[:n])
|
||||
if strings.Contains(content, "PostgreSQL") || strings.Contains(content, "pg_dump") ||
|
||||
strings.Contains(content, "SET ") || strings.Contains(content, "CREATE ") {
|
||||
result.Details.HasSQLHeader = true
|
||||
result.Warnings = append(result.Warnings,
|
||||
"File contains SQL text but has .dump extension",
|
||||
"This appears to be SQL format, not custom format",
|
||||
"Restore should use psql, not pg_restore")
|
||||
} else {
|
||||
result.IsValid = false
|
||||
result.IsCorrupted = true
|
||||
result.Errors = append(result.Errors,
|
||||
"Missing PGDMP signature in decompressed content",
|
||||
"File is neither custom format nor valid SQL")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Verify full gzip stream integrity by reading to end
|
||||
file.Seek(0, 0)
|
||||
gz, _ = gzip.NewReader(file)
|
||||
|
||||
var totalRead int64
|
||||
buf := make([]byte, 32*1024)
|
||||
for {
|
||||
n, err := gz.Read(buf)
|
||||
totalRead += int64(n)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
result.IsValid = false
|
||||
result.IsTruncated = true
|
||||
result.Details.ExpandedSize = totalRead
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("Gzip stream truncated after %d bytes: %v", totalRead, err),
|
||||
"The backup file appears to be incomplete",
|
||||
"Check if backup process completed successfully")
|
||||
gz.Close()
|
||||
return
|
||||
}
|
||||
}
|
||||
gz.Close()
|
||||
|
||||
result.Details.ExpandedSize = totalRead
|
||||
if result.FileSize > 0 {
|
||||
result.Details.CompressionRatio = float64(totalRead) / float64(result.FileSize)
|
||||
}
|
||||
}
|
||||
|
||||
// diagnoseSQLScript analyzes SQL script format
|
||||
func (d *Diagnoser) diagnoseSQLScript(filePath string, compressed bool, result *DiagnoseResult) {
|
||||
var reader io.Reader
|
||||
var file *os.File
|
||||
var gz *gzip.Reader
|
||||
var err error
|
||||
|
||||
file, err = os.Open(filePath)
|
||||
if err != nil {
|
||||
result.IsValid = false
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("Cannot open file: %v", err))
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
if compressed {
|
||||
gz, err = gzip.NewReader(file)
|
||||
if err != nil {
|
||||
result.IsValid = false
|
||||
result.IsCorrupted = true
|
||||
result.Details.GzipValid = false
|
||||
result.Details.GzipError = err.Error()
|
||||
result.Errors = append(result.Errors, fmt.Sprintf("Invalid gzip format: %v", err))
|
||||
return
|
||||
}
|
||||
result.Details.GzipValid = true
|
||||
reader = gz
|
||||
defer gz.Close()
|
||||
} else {
|
||||
reader = file
|
||||
}
|
||||
|
||||
// Analyze SQL content
|
||||
scanner := bufio.NewScanner(reader)
|
||||
// Increase buffer size for large lines (COPY data can have long lines)
|
||||
buf := make([]byte, 0, 1024*1024)
|
||||
scanner.Buffer(buf, 10*1024*1024)
|
||||
|
||||
var lineNumber int
|
||||
var inCopyBlock bool
|
||||
var lastCopyTable string
|
||||
var copyStartLine int
|
||||
var copyDataSamples []string
|
||||
|
||||
copyBlockPattern := regexp.MustCompile(`^COPY\s+("?[\w\."]+)"?\s+\(`)
|
||||
copyEndPattern := regexp.MustCompile(`^\\\.`)
|
||||
|
||||
for scanner.Scan() {
|
||||
lineNumber++
|
||||
line := scanner.Text()
|
||||
|
||||
// Check first few lines for header
|
||||
if lineNumber <= 10 {
|
||||
if strings.Contains(line, "PostgreSQL") || strings.Contains(line, "pg_dump") {
|
||||
result.Details.HasSQLHeader = true
|
||||
}
|
||||
}
|
||||
|
||||
// Track structure
|
||||
upperLine := strings.ToUpper(strings.TrimSpace(line))
|
||||
if strings.HasPrefix(upperLine, "CREATE ") {
|
||||
result.Details.HasCreateStatements = true
|
||||
}
|
||||
if strings.HasPrefix(upperLine, "INSERT ") {
|
||||
result.Details.HasInsertStatements = true
|
||||
}
|
||||
if strings.HasPrefix(upperLine, "BEGIN") {
|
||||
result.Details.HasTransactionBlock = true
|
||||
}
|
||||
|
||||
// Track COPY blocks
|
||||
if copyBlockPattern.MatchString(line) {
|
||||
if inCopyBlock {
|
||||
// Previous COPY block wasn't terminated!
|
||||
result.Details.UnterminatedCopy = true
|
||||
result.IsTruncated = true
|
||||
result.IsValid = false
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("COPY block for '%s' starting at line %d was never terminated",
|
||||
lastCopyTable, copyStartLine))
|
||||
}
|
||||
|
||||
inCopyBlock = true
|
||||
result.Details.HasCopyStatements = true
|
||||
result.Details.CopyBlockCount++
|
||||
|
||||
matches := copyBlockPattern.FindStringSubmatch(line)
|
||||
if len(matches) > 1 {
|
||||
lastCopyTable = matches[1]
|
||||
}
|
||||
copyStartLine = lineNumber
|
||||
copyDataSamples = nil
|
||||
|
||||
} else if copyEndPattern.MatchString(line) {
|
||||
inCopyBlock = false
|
||||
|
||||
} else if inCopyBlock {
|
||||
// We're in COPY data
|
||||
if len(copyDataSamples) < 3 {
|
||||
copyDataSamples = append(copyDataSamples, truncateString(line, 100))
|
||||
}
|
||||
}
|
||||
|
||||
// Store last line for termination check
|
||||
if lineNumber > 0 && (lineNumber%100000 == 0) && d.verbose {
|
||||
d.log.Debug("Scanning SQL file", "lines_processed", lineNumber)
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
result.IsValid = false
|
||||
result.IsTruncated = true
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("Error reading file at line %d: %v", lineNumber, err),
|
||||
"File may be truncated or contain invalid data")
|
||||
}
|
||||
|
||||
// Check if we ended while still in a COPY block
|
||||
if inCopyBlock {
|
||||
result.Details.UnterminatedCopy = true
|
||||
result.Details.LastCopyTable = lastCopyTable
|
||||
result.Details.LastCopyLineNumber = copyStartLine
|
||||
result.Details.SampleCopyData = copyDataSamples
|
||||
result.IsTruncated = true
|
||||
result.IsValid = false
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("File ends inside COPY block for table '%s' (started at line %d)",
|
||||
lastCopyTable, copyStartLine),
|
||||
"The backup was truncated during data export",
|
||||
"This explains the 'syntax error' during restore - COPY data is being interpreted as SQL")
|
||||
|
||||
if len(copyDataSamples) > 0 {
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("Sample orphaned data: %s", copyDataSamples[0]))
|
||||
}
|
||||
} else {
|
||||
result.Details.ProperlyTerminated = true
|
||||
}
|
||||
|
||||
// Read last bytes for additional context
|
||||
if !compressed {
|
||||
file.Seek(-min(500, result.FileSize), 2)
|
||||
lastBytes := make([]byte, 500)
|
||||
n, _ := file.Read(lastBytes)
|
||||
result.Details.LastBytes = strings.TrimSpace(string(lastBytes[:n]))
|
||||
}
|
||||
}
|
||||
|
||||
// diagnoseClusterArchive analyzes a cluster tar.gz archive
|
||||
func (d *Diagnoser) diagnoseClusterArchive(filePath string, result *DiagnoseResult) {
|
||||
// First verify tar.gz integrity
|
||||
cmd := exec.Command("tar", "-tzf", filePath)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
result.IsValid = false
|
||||
result.IsCorrupted = true
|
||||
result.Errors = append(result.Errors,
|
||||
fmt.Sprintf("Tar archive is invalid or corrupted: %v", err),
|
||||
"Run: tar -tzf "+filePath+" 2>&1 | tail -20")
|
||||
return
|
||||
}
|
||||
|
||||
// Parse tar listing
|
||||
files := strings.Split(strings.TrimSpace(string(output)), "\n")
|
||||
var dumpFiles []string
|
||||
hasGlobals := false
|
||||
hasMetadata := false
|
||||
|
||||
for _, f := range files {
|
||||
if strings.HasSuffix(f, ".dump") || strings.HasSuffix(f, ".sql.gz") {
|
||||
dumpFiles = append(dumpFiles, f)
|
||||
}
|
||||
if strings.Contains(f, "globals.sql") {
|
||||
hasGlobals = true
|
||||
}
|
||||
if strings.Contains(f, "manifest.json") || strings.Contains(f, "metadata.json") {
|
||||
hasMetadata = true
|
||||
}
|
||||
}
|
||||
|
||||
result.Details.TableCount = len(dumpFiles)
|
||||
result.Details.TableList = dumpFiles
|
||||
|
||||
if len(dumpFiles) == 0 {
|
||||
result.Warnings = append(result.Warnings, "No database dump files found in archive")
|
||||
}
|
||||
|
||||
if !hasGlobals {
|
||||
result.Warnings = append(result.Warnings, "No globals.sql found - roles/tablespaces won't be restored")
|
||||
}
|
||||
|
||||
if !hasMetadata {
|
||||
result.Warnings = append(result.Warnings, "No manifest/metadata found - limited validation possible")
|
||||
}
|
||||
|
||||
// For verbose mode, diagnose individual dumps inside the archive
|
||||
if d.verbose && len(dumpFiles) > 0 {
|
||||
d.log.Info("Cluster archive contains databases", "count", len(dumpFiles))
|
||||
for _, df := range dumpFiles {
|
||||
d.log.Info(" - " + df)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// diagnoseUnknown handles unknown format files
|
||||
func (d *Diagnoser) diagnoseUnknown(filePath string, result *DiagnoseResult) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
header := make([]byte, 512)
|
||||
n, _ := file.Read(header)
|
||||
result.Details.FirstBytes = fmt.Sprintf("%q", header[:minInt(n, 50)])
|
||||
|
||||
// Try to identify by content
|
||||
content := string(header[:n])
|
||||
if strings.Contains(content, "PGDMP") {
|
||||
result.Warnings = append(result.Warnings, "File appears to be PostgreSQL custom format - rename to .dump")
|
||||
} else if strings.Contains(content, "PostgreSQL") || strings.Contains(content, "pg_dump") {
|
||||
result.Warnings = append(result.Warnings, "File appears to be PostgreSQL SQL - rename to .sql")
|
||||
} else if bytes.HasPrefix(header, []byte{0x1f, 0x8b}) {
|
||||
result.Warnings = append(result.Warnings, "File appears to be gzip compressed - add .gz extension")
|
||||
}
|
||||
}
|
||||
|
||||
// verifyWithPgRestore uses pg_restore --list to verify dump integrity
|
||||
func (d *Diagnoser) verifyWithPgRestore(filePath string, result *DiagnoseResult) {
|
||||
cmd := exec.Command("pg_restore", "--list", filePath)
|
||||
output, err := cmd.CombinedOutput()
|
||||
|
||||
if err != nil {
|
||||
result.Details.PgRestoreListable = false
|
||||
result.Details.PgRestoreError = string(output)
|
||||
|
||||
// Check for specific errors
|
||||
errStr := string(output)
|
||||
if strings.Contains(errStr, "unexpected end of file") ||
|
||||
strings.Contains(errStr, "invalid large-object TOC entry") {
|
||||
result.IsTruncated = true
|
||||
result.IsValid = false
|
||||
result.Errors = append(result.Errors,
|
||||
"pg_restore reports truncated or incomplete dump file",
|
||||
fmt.Sprintf("Error: %s", truncateString(errStr, 200)))
|
||||
} else if strings.Contains(errStr, "not a valid archive") {
|
||||
result.IsCorrupted = true
|
||||
result.IsValid = false
|
||||
result.Errors = append(result.Errors,
|
||||
"pg_restore reports file is not a valid archive",
|
||||
"File may be corrupted or wrong format")
|
||||
} else {
|
||||
result.Warnings = append(result.Warnings,
|
||||
fmt.Sprintf("pg_restore --list warning: %s", truncateString(errStr, 200)))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
result.Details.PgRestoreListable = true
|
||||
|
||||
// Count tables in the TOC
|
||||
lines := strings.Split(string(output), "\n")
|
||||
tableCount := 0
|
||||
var tables []string
|
||||
for _, line := range lines {
|
||||
if strings.Contains(line, " TABLE DATA ") {
|
||||
tableCount++
|
||||
if len(tables) < 20 {
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) > 3 {
|
||||
tables = append(tables, parts[len(parts)-1])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result.Details.TableCount = tableCount
|
||||
result.Details.TableList = tables
|
||||
}
|
||||
|
||||
// DiagnoseClusterDumps extracts and diagnoses all dumps in a cluster archive
|
||||
func (d *Diagnoser) DiagnoseClusterDumps(archivePath, tempDir string) ([]*DiagnoseResult, error) {
|
||||
// Extract to temp directory
|
||||
cmd := exec.Command("tar", "-xzf", archivePath, "-C", tempDir)
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, fmt.Errorf("failed to extract archive: %w", err)
|
||||
}
|
||||
|
||||
// Find dump files
|
||||
dumpsDir := filepath.Join(tempDir, "dumps")
|
||||
entries, err := os.ReadDir(dumpsDir)
|
||||
if err != nil {
|
||||
// Try without dumps subdirectory
|
||||
entries, err = os.ReadDir(tempDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read extracted files: %w", err)
|
||||
}
|
||||
dumpsDir = tempDir
|
||||
}
|
||||
|
||||
var results []*DiagnoseResult
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
name := entry.Name()
|
||||
if !strings.HasSuffix(name, ".dump") && !strings.HasSuffix(name, ".sql.gz") &&
|
||||
!strings.HasSuffix(name, ".sql") {
|
||||
continue
|
||||
}
|
||||
|
||||
dumpPath := filepath.Join(dumpsDir, name)
|
||||
d.log.Info("Diagnosing dump file", "file", name)
|
||||
|
||||
result, err := d.DiagnoseFile(dumpPath)
|
||||
if err != nil {
|
||||
d.log.Warn("Failed to diagnose file", "file", name, "error", err)
|
||||
continue
|
||||
}
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// PrintDiagnosis outputs a human-readable diagnosis report
|
||||
func (d *Diagnoser) PrintDiagnosis(result *DiagnoseResult) {
|
||||
fmt.Println("\n" + strings.Repeat("=", 70))
|
||||
fmt.Printf("📋 DIAGNOSIS: %s\n", result.FileName)
|
||||
fmt.Println(strings.Repeat("=", 70))
|
||||
|
||||
// Basic info
|
||||
fmt.Printf("\nFile: %s\n", result.FilePath)
|
||||
fmt.Printf("Size: %s\n", formatBytes(result.FileSize))
|
||||
fmt.Printf("Format: %s\n", result.DetectedFormat)
|
||||
|
||||
// Status
|
||||
if result.IsValid {
|
||||
fmt.Println("\n✅ STATUS: VALID")
|
||||
} else {
|
||||
fmt.Println("\n❌ STATUS: INVALID")
|
||||
}
|
||||
|
||||
if result.IsTruncated {
|
||||
fmt.Println("⚠️ TRUNCATED: Yes - file appears incomplete")
|
||||
}
|
||||
if result.IsCorrupted {
|
||||
fmt.Println("⚠️ CORRUPTED: Yes - file structure is damaged")
|
||||
}
|
||||
|
||||
// Details
|
||||
if result.Details != nil {
|
||||
fmt.Println("\n📊 DETAILS:")
|
||||
|
||||
if result.Details.HasPGDMPSignature {
|
||||
fmt.Println(" ✓ Has PGDMP signature (PostgreSQL custom format)")
|
||||
}
|
||||
if result.Details.HasSQLHeader {
|
||||
fmt.Println(" ✓ Has PostgreSQL SQL header")
|
||||
}
|
||||
if result.Details.GzipValid {
|
||||
fmt.Println(" ✓ Gzip compression valid")
|
||||
}
|
||||
if result.Details.PgRestoreListable {
|
||||
fmt.Printf(" ✓ pg_restore can list contents (%d tables)\n", result.Details.TableCount)
|
||||
}
|
||||
if result.Details.CopyBlockCount > 0 {
|
||||
fmt.Printf(" • Contains %d COPY blocks\n", result.Details.CopyBlockCount)
|
||||
}
|
||||
if result.Details.UnterminatedCopy {
|
||||
fmt.Printf(" ✗ Unterminated COPY block: %s (line %d)\n",
|
||||
result.Details.LastCopyTable, result.Details.LastCopyLineNumber)
|
||||
}
|
||||
if result.Details.ProperlyTerminated {
|
||||
fmt.Println(" ✓ All COPY blocks properly terminated")
|
||||
}
|
||||
if result.Details.ExpandedSize > 0 {
|
||||
fmt.Printf(" • Expanded size: %s (ratio: %.1fx)\n",
|
||||
formatBytes(result.Details.ExpandedSize), result.Details.CompressionRatio)
|
||||
}
|
||||
}
|
||||
|
||||
// Errors
|
||||
if len(result.Errors) > 0 {
|
||||
fmt.Println("\n❌ ERRORS:")
|
||||
for _, e := range result.Errors {
|
||||
fmt.Printf(" • %s\n", e)
|
||||
}
|
||||
}
|
||||
|
||||
// Warnings
|
||||
if len(result.Warnings) > 0 {
|
||||
fmt.Println("\n⚠️ WARNINGS:")
|
||||
for _, w := range result.Warnings {
|
||||
fmt.Printf(" • %s\n", w)
|
||||
}
|
||||
}
|
||||
|
||||
// Recommendations
|
||||
if !result.IsValid {
|
||||
fmt.Println("\n💡 RECOMMENDATIONS:")
|
||||
if result.IsTruncated {
|
||||
fmt.Println(" 1. Re-run the backup process for this database")
|
||||
fmt.Println(" 2. Check disk space on backup server during backup")
|
||||
fmt.Println(" 3. Verify network stability if backup was remote")
|
||||
fmt.Println(" 4. Check backup logs for errors during the backup")
|
||||
}
|
||||
if result.IsCorrupted {
|
||||
fmt.Println(" 1. Verify backup file was transferred completely")
|
||||
fmt.Println(" 2. Check if backup file was modified after creation")
|
||||
fmt.Println(" 3. Try restoring from a previous backup")
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println(strings.Repeat("=", 70))
|
||||
}
|
||||
|
||||
// PrintDiagnosisJSON outputs diagnosis as JSON
|
||||
func (d *Diagnoser) PrintDiagnosisJSON(result *DiagnoseResult) error {
|
||||
output, err := json.MarshalIndent(result, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Println(string(output))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
func truncateString(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen-3] + "..."
|
||||
}
|
||||
|
||||
func formatBytes(bytes int64) string {
|
||||
const unit = 1024
|
||||
if bytes < unit {
|
||||
return fmt.Sprintf("%d B", bytes)
|
||||
}
|
||||
div, exp := int64(unit), 0
|
||||
for n := bytes / unit; n >= unit; n /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
func min(a, b int64) int64 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func minInt(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
@@ -27,6 +27,8 @@ type Engine struct {
|
||||
progress progress.Indicator
|
||||
detailedReporter *progress.DetailedReporter
|
||||
dryRun bool
|
||||
debugLogPath string // Path to save debug log on error
|
||||
errorCollector *ErrorCollector // Collects detailed error info
|
||||
}
|
||||
|
||||
// New creates a new restore engine
|
||||
@@ -77,6 +79,11 @@ func NewWithProgress(cfg *config.Config, log logger.Logger, db database.Database
|
||||
}
|
||||
}
|
||||
|
||||
// SetDebugLogPath enables saving detailed error reports on failure
|
||||
func (e *Engine) SetDebugLogPath(path string) {
|
||||
e.debugLogPath = path
|
||||
}
|
||||
|
||||
// loggerAdapter adapts our logger to the progress.Logger interface
|
||||
type loggerAdapter struct {
|
||||
logger logger.Logger
|
||||
@@ -306,6 +313,11 @@ func (e *Engine) restoreMySQLSQL(ctx context.Context, archivePath, targetDB stri
|
||||
|
||||
// executeRestoreCommand executes a restore command
|
||||
func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) error {
|
||||
return e.executeRestoreCommandWithContext(ctx, cmdArgs, "", "", FormatUnknown)
|
||||
}
|
||||
|
||||
// executeRestoreCommandWithContext executes a restore command with error collection context
|
||||
func (e *Engine) executeRestoreCommandWithContext(ctx context.Context, cmdArgs []string, archivePath, targetDB string, format ArchiveFormat) error {
|
||||
e.log.Info("Executing restore command", "command", strings.Join(cmdArgs, " "))
|
||||
|
||||
cmd := exec.CommandContext(ctx, cmdArgs[0], cmdArgs[1:]...)
|
||||
@@ -316,6 +328,12 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
|
||||
fmt.Sprintf("MYSQL_PWD=%s", e.cfg.Password),
|
||||
)
|
||||
|
||||
// Create error collector if debug log path is set
|
||||
var collector *ErrorCollector
|
||||
if e.debugLogPath != "" {
|
||||
collector = NewErrorCollector(e.cfg, e.log, archivePath, targetDB, format, true)
|
||||
}
|
||||
|
||||
// Stream stderr to avoid memory issues with large output
|
||||
// Don't use CombinedOutput() as it loads everything into memory
|
||||
stderr, err := cmd.StderrPipe()
|
||||
@@ -336,6 +354,12 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
|
||||
n, err := stderr.Read(buf)
|
||||
if n > 0 {
|
||||
chunk := string(buf[:n])
|
||||
|
||||
// Feed to error collector if enabled
|
||||
if collector != nil {
|
||||
collector.CaptureStderr(chunk)
|
||||
}
|
||||
|
||||
// Only capture REAL errors, not verbose output
|
||||
if strings.Contains(chunk, "ERROR:") || strings.Contains(chunk, "FATAL:") || strings.Contains(chunk, "error:") {
|
||||
lastError = strings.TrimSpace(chunk)
|
||||
@@ -352,6 +376,12 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
|
||||
}
|
||||
|
||||
if err := cmd.Wait(); err != nil {
|
||||
// Get exit code
|
||||
exitCode := 1
|
||||
if exitErr, ok := err.(*exec.ExitError); ok {
|
||||
exitCode = exitErr.ExitCode()
|
||||
}
|
||||
|
||||
// PostgreSQL pg_restore returns exit code 1 even for ignorable errors
|
||||
// Check if errors are ignorable (already exists, duplicate, etc.)
|
||||
if lastError != "" && e.isIgnorableError(lastError) {
|
||||
@@ -360,8 +390,12 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
|
||||
}
|
||||
|
||||
// Classify error and provide helpful hints
|
||||
var classification *checks.ErrorClassification
|
||||
var errType, errHint string
|
||||
if lastError != "" {
|
||||
classification := checks.ClassifyError(lastError)
|
||||
classification = checks.ClassifyError(lastError)
|
||||
errType = classification.Type
|
||||
errHint = classification.Hint
|
||||
e.log.Error("Restore command failed",
|
||||
"error", err,
|
||||
"last_stderr", lastError,
|
||||
@@ -369,11 +403,37 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
|
||||
"error_type", classification.Type,
|
||||
"hint", classification.Hint,
|
||||
"action", classification.Action)
|
||||
return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d) - %s",
|
||||
err, lastError, errorCount, classification.Hint)
|
||||
} else {
|
||||
e.log.Error("Restore command failed", "error", err, "error_count", errorCount)
|
||||
}
|
||||
|
||||
e.log.Error("Restore command failed", "error", err, "last_stderr", lastError, "error_count", errorCount)
|
||||
// Generate and save error report if collector is enabled
|
||||
if collector != nil {
|
||||
collector.SetExitCode(exitCode)
|
||||
report := collector.GenerateReport(
|
||||
lastError,
|
||||
errType,
|
||||
errHint,
|
||||
)
|
||||
|
||||
// Print report to console
|
||||
collector.PrintReport(report)
|
||||
|
||||
// Save to file
|
||||
if e.debugLogPath != "" {
|
||||
if saveErr := collector.SaveReport(report, e.debugLogPath); saveErr != nil {
|
||||
e.log.Warn("Failed to save debug log", "error", saveErr)
|
||||
} else {
|
||||
e.log.Info("Debug log saved", "path", e.debugLogPath)
|
||||
fmt.Printf("\n📋 Detailed error report saved to: %s\n", e.debugLogPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if lastError != "" {
|
||||
return fmt.Errorf("restore failed: %w (last error: %s, total errors: %d) - %s",
|
||||
err, lastError, errorCount, errHint)
|
||||
}
|
||||
return fmt.Errorf("restore failed: %w", err)
|
||||
}
|
||||
|
||||
|
||||
569
internal/restore/error_report.go
Normal file
569
internal/restore/error_report.go
Normal file
@@ -0,0 +1,569 @@
|
||||
package restore
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/config"
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// RestoreErrorReport contains comprehensive information about a restore failure
|
||||
type RestoreErrorReport struct {
|
||||
// Metadata
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Version string `json:"version"`
|
||||
GoVersion string `json:"go_version"`
|
||||
OS string `json:"os"`
|
||||
Arch string `json:"arch"`
|
||||
|
||||
// Archive info
|
||||
ArchivePath string `json:"archive_path"`
|
||||
ArchiveSize int64 `json:"archive_size"`
|
||||
ArchiveFormat string `json:"archive_format"`
|
||||
|
||||
// Database info
|
||||
TargetDB string `json:"target_db"`
|
||||
DatabaseType string `json:"database_type"`
|
||||
|
||||
// Error details
|
||||
ExitCode int `json:"exit_code"`
|
||||
ErrorMessage string `json:"error_message"`
|
||||
ErrorType string `json:"error_type"`
|
||||
ErrorHint string `json:"error_hint"`
|
||||
TotalErrors int `json:"total_errors"`
|
||||
|
||||
// Captured output
|
||||
LastStderr []string `json:"last_stderr"`
|
||||
FirstErrors []string `json:"first_errors"`
|
||||
|
||||
// Context around failure
|
||||
FailureContext *FailureContext `json:"failure_context,omitempty"`
|
||||
|
||||
// Diagnosis results
|
||||
DiagnosisResult *DiagnoseResult `json:"diagnosis_result,omitempty"`
|
||||
|
||||
// Environment (sanitized)
|
||||
PostgresVersion string `json:"postgres_version,omitempty"`
|
||||
PgRestoreVersion string `json:"pg_restore_version,omitempty"`
|
||||
PsqlVersion string `json:"psql_version,omitempty"`
|
||||
|
||||
// Recommendations
|
||||
Recommendations []string `json:"recommendations"`
|
||||
}
|
||||
|
||||
// FailureContext captures context around where the failure occurred
|
||||
type FailureContext struct {
|
||||
// For SQL/COPY errors
|
||||
FailedLine int `json:"failed_line,omitempty"`
|
||||
FailedStatement string `json:"failed_statement,omitempty"`
|
||||
SurroundingLines []string `json:"surrounding_lines,omitempty"`
|
||||
|
||||
// For COPY block errors
|
||||
InCopyBlock bool `json:"in_copy_block,omitempty"`
|
||||
CopyTableName string `json:"copy_table_name,omitempty"`
|
||||
CopyStartLine int `json:"copy_start_line,omitempty"`
|
||||
SampleCopyData []string `json:"sample_copy_data,omitempty"`
|
||||
|
||||
// File position info
|
||||
BytePosition int64 `json:"byte_position,omitempty"`
|
||||
PercentComplete float64 `json:"percent_complete,omitempty"`
|
||||
}
|
||||
|
||||
// ErrorCollector captures detailed error information during restore
|
||||
type ErrorCollector struct {
|
||||
log logger.Logger
|
||||
cfg *config.Config
|
||||
archivePath string
|
||||
targetDB string
|
||||
format ArchiveFormat
|
||||
|
||||
// Captured data
|
||||
stderrLines []string
|
||||
firstErrors []string
|
||||
lastErrors []string
|
||||
totalErrors int
|
||||
exitCode int
|
||||
|
||||
// Limits
|
||||
maxStderrLines int
|
||||
maxErrorCapture int
|
||||
|
||||
// State
|
||||
startTime time.Time
|
||||
enabled bool
|
||||
}
|
||||
|
||||
// NewErrorCollector creates a new error collector
|
||||
func NewErrorCollector(cfg *config.Config, log logger.Logger, archivePath, targetDB string, format ArchiveFormat, enabled bool) *ErrorCollector {
|
||||
return &ErrorCollector{
|
||||
log: log,
|
||||
cfg: cfg,
|
||||
archivePath: archivePath,
|
||||
targetDB: targetDB,
|
||||
format: format,
|
||||
stderrLines: make([]string, 0, 100),
|
||||
firstErrors: make([]string, 0, 10),
|
||||
lastErrors: make([]string, 0, 10),
|
||||
maxStderrLines: 100,
|
||||
maxErrorCapture: 10,
|
||||
startTime: time.Now(),
|
||||
enabled: enabled,
|
||||
}
|
||||
}
|
||||
|
||||
// CaptureStderr processes and captures stderr output
|
||||
func (ec *ErrorCollector) CaptureStderr(chunk string) {
|
||||
if !ec.enabled {
|
||||
return
|
||||
}
|
||||
|
||||
lines := strings.Split(chunk, "\n")
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Store last N lines of stderr
|
||||
if len(ec.stderrLines) >= ec.maxStderrLines {
|
||||
// Shift array, drop oldest
|
||||
ec.stderrLines = ec.stderrLines[1:]
|
||||
}
|
||||
ec.stderrLines = append(ec.stderrLines, line)
|
||||
|
||||
// Check if this is an error line
|
||||
if isErrorLine(line) {
|
||||
ec.totalErrors++
|
||||
|
||||
// Capture first N errors
|
||||
if len(ec.firstErrors) < ec.maxErrorCapture {
|
||||
ec.firstErrors = append(ec.firstErrors, line)
|
||||
}
|
||||
|
||||
// Keep last N errors (ring buffer style)
|
||||
if len(ec.lastErrors) >= ec.maxErrorCapture {
|
||||
ec.lastErrors = ec.lastErrors[1:]
|
||||
}
|
||||
ec.lastErrors = append(ec.lastErrors, line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SetExitCode records the exit code
|
||||
func (ec *ErrorCollector) SetExitCode(code int) {
|
||||
ec.exitCode = code
|
||||
}
|
||||
|
||||
// GenerateReport creates a comprehensive error report
|
||||
func (ec *ErrorCollector) GenerateReport(errMessage string, errType string, errHint string) *RestoreErrorReport {
|
||||
report := &RestoreErrorReport{
|
||||
Timestamp: time.Now(),
|
||||
Version: "1.0.0", // TODO: inject actual version
|
||||
GoVersion: runtime.Version(),
|
||||
OS: runtime.GOOS,
|
||||
Arch: runtime.GOARCH,
|
||||
ArchivePath: ec.archivePath,
|
||||
ArchiveFormat: ec.format.String(),
|
||||
TargetDB: ec.targetDB,
|
||||
DatabaseType: getDatabaseType(ec.format),
|
||||
ExitCode: ec.exitCode,
|
||||
ErrorMessage: errMessage,
|
||||
ErrorType: errType,
|
||||
ErrorHint: errHint,
|
||||
TotalErrors: ec.totalErrors,
|
||||
LastStderr: ec.stderrLines,
|
||||
FirstErrors: ec.firstErrors,
|
||||
}
|
||||
|
||||
// Get archive size
|
||||
if stat, err := os.Stat(ec.archivePath); err == nil {
|
||||
report.ArchiveSize = stat.Size()
|
||||
}
|
||||
|
||||
// Get tool versions
|
||||
report.PostgresVersion = getCommandVersion("postgres", "--version")
|
||||
report.PgRestoreVersion = getCommandVersion("pg_restore", "--version")
|
||||
report.PsqlVersion = getCommandVersion("psql", "--version")
|
||||
|
||||
// Analyze failure context
|
||||
report.FailureContext = ec.analyzeFailureContext()
|
||||
|
||||
// Run diagnosis if not already done
|
||||
diagnoser := NewDiagnoser(ec.log, false)
|
||||
if diagResult, err := diagnoser.DiagnoseFile(ec.archivePath); err == nil {
|
||||
report.DiagnosisResult = diagResult
|
||||
}
|
||||
|
||||
// Generate recommendations
|
||||
report.Recommendations = ec.generateRecommendations(report)
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
// analyzeFailureContext extracts context around the failure
|
||||
func (ec *ErrorCollector) analyzeFailureContext() *FailureContext {
|
||||
ctx := &FailureContext{}
|
||||
|
||||
// Look for line number in errors
|
||||
for _, errLine := range ec.lastErrors {
|
||||
if lineNum := extractLineNumber(errLine); lineNum > 0 {
|
||||
ctx.FailedLine = lineNum
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Look for COPY-related errors
|
||||
for _, errLine := range ec.lastErrors {
|
||||
if strings.Contains(errLine, "COPY") || strings.Contains(errLine, "syntax error") {
|
||||
ctx.InCopyBlock = true
|
||||
// Try to extract table name
|
||||
if tableName := extractTableName(errLine); tableName != "" {
|
||||
ctx.CopyTableName = tableName
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a line number, try to get surrounding context from the dump
|
||||
if ctx.FailedLine > 0 && ec.archivePath != "" {
|
||||
ctx.SurroundingLines = ec.getSurroundingLines(ctx.FailedLine, 5)
|
||||
}
|
||||
|
||||
return ctx
|
||||
}
|
||||
|
||||
// getSurroundingLines reads lines around a specific line number from the dump
|
||||
func (ec *ErrorCollector) getSurroundingLines(lineNum int, context int) []string {
|
||||
var reader io.Reader
|
||||
var lines []string
|
||||
|
||||
file, err := os.Open(ec.archivePath)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Handle compressed files
|
||||
if strings.HasSuffix(ec.archivePath, ".gz") {
|
||||
gz, err := gzip.NewReader(file)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
defer gz.Close()
|
||||
reader = gz
|
||||
} else {
|
||||
reader = file
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(reader)
|
||||
buf := make([]byte, 0, 1024*1024)
|
||||
scanner.Buffer(buf, 10*1024*1024)
|
||||
|
||||
currentLine := 0
|
||||
startLine := lineNum - context
|
||||
endLine := lineNum + context
|
||||
|
||||
if startLine < 1 {
|
||||
startLine = 1
|
||||
}
|
||||
|
||||
for scanner.Scan() {
|
||||
currentLine++
|
||||
if currentLine >= startLine && currentLine <= endLine {
|
||||
prefix := " "
|
||||
if currentLine == lineNum {
|
||||
prefix = "> "
|
||||
}
|
||||
lines = append(lines, fmt.Sprintf("%s%d: %s", prefix, currentLine, truncateString(scanner.Text(), 100)))
|
||||
}
|
||||
if currentLine > endLine {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return lines
|
||||
}
|
||||
|
||||
// generateRecommendations provides actionable recommendations based on the error
|
||||
func (ec *ErrorCollector) generateRecommendations(report *RestoreErrorReport) []string {
|
||||
var recs []string
|
||||
|
||||
// Check diagnosis results
|
||||
if report.DiagnosisResult != nil {
|
||||
if report.DiagnosisResult.IsTruncated {
|
||||
recs = append(recs,
|
||||
"CRITICAL: Backup file is truncated/incomplete",
|
||||
"Action: Re-run the backup for the affected database",
|
||||
"Check: Verify disk space was available during backup",
|
||||
"Check: Verify network was stable during backup transfer",
|
||||
)
|
||||
}
|
||||
if report.DiagnosisResult.IsCorrupted {
|
||||
recs = append(recs,
|
||||
"CRITICAL: Backup file appears corrupted",
|
||||
"Action: Restore from a previous backup",
|
||||
"Action: Verify backup file checksum if available",
|
||||
)
|
||||
}
|
||||
if report.DiagnosisResult.Details != nil && report.DiagnosisResult.Details.UnterminatedCopy {
|
||||
recs = append(recs,
|
||||
fmt.Sprintf("ISSUE: COPY block for table '%s' was not terminated",
|
||||
report.DiagnosisResult.Details.LastCopyTable),
|
||||
"Cause: Backup was interrupted during data export",
|
||||
"Action: Re-run backup ensuring it completes fully",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Check error patterns
|
||||
if report.TotalErrors > 1000000 {
|
||||
recs = append(recs,
|
||||
"ISSUE: Millions of errors indicate structural problem, not individual data issues",
|
||||
"Cause: Likely wrong restore method or truncated dump",
|
||||
"Check: Verify dump format matches restore command",
|
||||
)
|
||||
}
|
||||
|
||||
// Check for common error types
|
||||
errLower := strings.ToLower(report.ErrorMessage)
|
||||
if strings.Contains(errLower, "syntax error") {
|
||||
recs = append(recs,
|
||||
"ISSUE: SQL syntax errors during restore",
|
||||
"Cause: COPY data being interpreted as SQL commands",
|
||||
"Check: Run 'dbbackup restore diagnose <archive>' for detailed analysis",
|
||||
)
|
||||
}
|
||||
|
||||
if strings.Contains(errLower, "permission denied") {
|
||||
recs = append(recs,
|
||||
"ISSUE: Permission denied",
|
||||
"Action: Check database user has sufficient privileges",
|
||||
"Action: For ownership preservation, use a superuser account",
|
||||
)
|
||||
}
|
||||
|
||||
if strings.Contains(errLower, "does not exist") {
|
||||
recs = append(recs,
|
||||
"ISSUE: Missing object reference",
|
||||
"Action: Ensure globals.sql was restored first (for roles/tablespaces)",
|
||||
"Action: Check if target database was created",
|
||||
)
|
||||
}
|
||||
|
||||
if len(recs) == 0 {
|
||||
recs = append(recs,
|
||||
"Run 'dbbackup restore diagnose <archive>' for detailed analysis",
|
||||
"Check the stderr output above for specific error messages",
|
||||
"Review the PostgreSQL/MySQL logs on the target server",
|
||||
)
|
||||
}
|
||||
|
||||
return recs
|
||||
}
|
||||
|
||||
// SaveReport saves the error report to a file
|
||||
func (ec *ErrorCollector) SaveReport(report *RestoreErrorReport, outputPath string) error {
|
||||
// Create directory if needed
|
||||
dir := filepath.Dir(outputPath)
|
||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create directory: %w", err)
|
||||
}
|
||||
|
||||
// Marshal to JSON with indentation
|
||||
data, err := json.MarshalIndent(report, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal report: %w", err)
|
||||
}
|
||||
|
||||
// Write file
|
||||
if err := os.WriteFile(outputPath, data, 0644); err != nil {
|
||||
return fmt.Errorf("failed to write report: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PrintReport prints a human-readable summary of the error report
|
||||
func (ec *ErrorCollector) PrintReport(report *RestoreErrorReport) {
|
||||
fmt.Println()
|
||||
fmt.Println(strings.Repeat("═", 70))
|
||||
fmt.Println(" 🔴 RESTORE ERROR REPORT")
|
||||
fmt.Println(strings.Repeat("═", 70))
|
||||
|
||||
fmt.Printf("\n📅 Timestamp: %s\n", report.Timestamp.Format("2006-01-02 15:04:05"))
|
||||
fmt.Printf("📦 Archive: %s\n", filepath.Base(report.ArchivePath))
|
||||
fmt.Printf("📊 Format: %s\n", report.ArchiveFormat)
|
||||
fmt.Printf("🎯 Target DB: %s\n", report.TargetDB)
|
||||
fmt.Printf("⚠️ Exit Code: %d\n", report.ExitCode)
|
||||
fmt.Printf("❌ Total Errors: %d\n", report.TotalErrors)
|
||||
|
||||
fmt.Println("\n" + strings.Repeat("─", 70))
|
||||
fmt.Println("ERROR DETAILS:")
|
||||
fmt.Println(strings.Repeat("─", 70))
|
||||
|
||||
fmt.Printf("\nType: %s\n", report.ErrorType)
|
||||
fmt.Printf("Message: %s\n", report.ErrorMessage)
|
||||
if report.ErrorHint != "" {
|
||||
fmt.Printf("Hint: %s\n", report.ErrorHint)
|
||||
}
|
||||
|
||||
// Show failure context
|
||||
if report.FailureContext != nil && report.FailureContext.FailedLine > 0 {
|
||||
fmt.Println("\n" + strings.Repeat("─", 70))
|
||||
fmt.Println("FAILURE CONTEXT:")
|
||||
fmt.Println(strings.Repeat("─", 70))
|
||||
|
||||
fmt.Printf("\nFailed at line: %d\n", report.FailureContext.FailedLine)
|
||||
if report.FailureContext.InCopyBlock {
|
||||
fmt.Printf("Inside COPY block for table: %s\n", report.FailureContext.CopyTableName)
|
||||
}
|
||||
|
||||
if len(report.FailureContext.SurroundingLines) > 0 {
|
||||
fmt.Println("\nSurrounding lines:")
|
||||
for _, line := range report.FailureContext.SurroundingLines {
|
||||
fmt.Println(line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Show first few errors
|
||||
if len(report.FirstErrors) > 0 {
|
||||
fmt.Println("\n" + strings.Repeat("─", 70))
|
||||
fmt.Println("FIRST ERRORS:")
|
||||
fmt.Println(strings.Repeat("─", 70))
|
||||
|
||||
for i, err := range report.FirstErrors {
|
||||
if i >= 5 {
|
||||
fmt.Printf("... and %d more\n", len(report.FirstErrors)-5)
|
||||
break
|
||||
}
|
||||
fmt.Printf(" %d. %s\n", i+1, truncateString(err, 100))
|
||||
}
|
||||
}
|
||||
|
||||
// Show diagnosis summary
|
||||
if report.DiagnosisResult != nil && !report.DiagnosisResult.IsValid {
|
||||
fmt.Println("\n" + strings.Repeat("─", 70))
|
||||
fmt.Println("DIAGNOSIS:")
|
||||
fmt.Println(strings.Repeat("─", 70))
|
||||
|
||||
if report.DiagnosisResult.IsTruncated {
|
||||
fmt.Println(" ❌ File is TRUNCATED")
|
||||
}
|
||||
if report.DiagnosisResult.IsCorrupted {
|
||||
fmt.Println(" ❌ File is CORRUPTED")
|
||||
}
|
||||
for i, err := range report.DiagnosisResult.Errors {
|
||||
if i >= 3 {
|
||||
break
|
||||
}
|
||||
fmt.Printf(" • %s\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Show recommendations
|
||||
fmt.Println("\n" + strings.Repeat("─", 70))
|
||||
fmt.Println("💡 RECOMMENDATIONS:")
|
||||
fmt.Println(strings.Repeat("─", 70))
|
||||
|
||||
for _, rec := range report.Recommendations {
|
||||
fmt.Printf(" • %s\n", rec)
|
||||
}
|
||||
|
||||
// Show tool versions
|
||||
fmt.Println("\n" + strings.Repeat("─", 70))
|
||||
fmt.Println("ENVIRONMENT:")
|
||||
fmt.Println(strings.Repeat("─", 70))
|
||||
|
||||
fmt.Printf(" OS: %s/%s\n", report.OS, report.Arch)
|
||||
fmt.Printf(" Go: %s\n", report.GoVersion)
|
||||
if report.PgRestoreVersion != "" {
|
||||
fmt.Printf(" pg_restore: %s\n", report.PgRestoreVersion)
|
||||
}
|
||||
if report.PsqlVersion != "" {
|
||||
fmt.Printf(" psql: %s\n", report.PsqlVersion)
|
||||
}
|
||||
|
||||
fmt.Println(strings.Repeat("═", 70))
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
func isErrorLine(line string) bool {
|
||||
return strings.Contains(line, "ERROR:") ||
|
||||
strings.Contains(line, "FATAL:") ||
|
||||
strings.Contains(line, "error:") ||
|
||||
strings.Contains(line, "PANIC:")
|
||||
}
|
||||
|
||||
func extractLineNumber(errLine string) int {
|
||||
// Look for patterns like "LINE 1:" or "line 123"
|
||||
patterns := []string{"LINE ", "line "}
|
||||
for _, pattern := range patterns {
|
||||
if idx := strings.Index(errLine, pattern); idx >= 0 {
|
||||
numStart := idx + len(pattern)
|
||||
numEnd := numStart
|
||||
for numEnd < len(errLine) && errLine[numEnd] >= '0' && errLine[numEnd] <= '9' {
|
||||
numEnd++
|
||||
}
|
||||
if numEnd > numStart {
|
||||
var num int
|
||||
fmt.Sscanf(errLine[numStart:numEnd], "%d", &num)
|
||||
return num
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func extractTableName(errLine string) string {
|
||||
// Look for patterns like 'COPY "tablename"' or 'table "tablename"'
|
||||
patterns := []string{"COPY ", "table "}
|
||||
for _, pattern := range patterns {
|
||||
if idx := strings.Index(errLine, pattern); idx >= 0 {
|
||||
start := idx + len(pattern)
|
||||
// Skip optional quote
|
||||
if start < len(errLine) && errLine[start] == '"' {
|
||||
start++
|
||||
}
|
||||
end := start
|
||||
for end < len(errLine) && errLine[end] != '"' && errLine[end] != ' ' && errLine[end] != '(' {
|
||||
end++
|
||||
}
|
||||
if end > start {
|
||||
return errLine[start:end]
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getDatabaseType(format ArchiveFormat) string {
|
||||
if format.IsMySQL() {
|
||||
return "mysql"
|
||||
}
|
||||
return "postgresql"
|
||||
}
|
||||
|
||||
func getCommandVersion(cmd string, arg string) string {
|
||||
output, err := exec.Command(cmd, arg).CombinedOutput()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
// Return first line only
|
||||
lines := strings.Split(string(output), "\n")
|
||||
if len(lines) > 0 {
|
||||
return strings.TrimSpace(lines[0])
|
||||
}
|
||||
return ""
|
||||
}
|
||||
Reference in New Issue
Block a user