From b9b44dd9899bdb69d8fbfdddda0e3c71efab3ce9 Mon Sep 17 00:00:00 2001 From: Renz Date: Thu, 13 Nov 2025 12:47:40 +0000 Subject: [PATCH] restore: enhance error capture with detailed stderr logging and verbose pg_restore - Capture all ERROR/FATAL/error: messages from pg_restore/psql stderr - Include full error details in failure messages for better diagnostics - Add --verbose flag to pg_restore for comprehensive error reporting - Improve thread-safe logging in parallel cluster restore - Help diagnose cluster restore failures with actual PostgreSQL error messages --- internal/database/postgresql.go | 3 +++ internal/restore/engine.go | 47 ++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/internal/database/postgresql.go b/internal/database/postgresql.go index 994cdfd..b32acfe 100644 --- a/internal/database/postgresql.go +++ b/internal/database/postgresql.go @@ -371,6 +371,9 @@ func (p *PostgreSQL) BuildRestoreCommand(database, inputFile string, options Res cmd = append(cmd, "--single-transaction") } + // Add verbose flag for better error reporting + cmd = append(cmd, "--verbose") + // Database and input cmd = append(cmd, "--dbname="+database) cmd = append(cmd, inputFile) diff --git a/internal/restore/engine.go b/internal/restore/engine.go index 1295301..5445c31 100644 --- a/internal/restore/engine.go +++ b/internal/restore/engine.go @@ -287,13 +287,15 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er // Read stderr in chunks to log errors without loading all into memory buf := make([]byte, 4096) var lastError string + var allErrors []string for { n, err := stderr.Read(buf) if n > 0 { chunk := string(buf[:n]) - // Only log errors/warnings, not all output - if strings.Contains(chunk, "ERROR") || strings.Contains(chunk, "FATAL") { + // Capture all errors/warnings for better diagnostics + if strings.Contains(chunk, "ERROR") || strings.Contains(chunk, "FATAL") || strings.Contains(chunk, "error:") { lastError = chunk + allErrors = append(allErrors, strings.TrimSpace(chunk)) e.log.Warn("Restore stderr", "output", chunk) } } @@ -303,7 +305,15 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er } if err := cmd.Wait(); err != nil { - e.log.Error("Restore command failed", "error", err, "last_error", lastError) + // Include all captured errors in the return message for better diagnostics + errorDetails := lastError + if len(allErrors) > 0 { + errorDetails = strings.Join(allErrors, " | ") + } + e.log.Error("Restore command failed", "error", err, "stderr", errorDetails) + if errorDetails != "" { + return fmt.Errorf("restore failed: %w (stderr: %s)", err, errorDetails) + } return fmt.Errorf("restore failed: %w", err) } @@ -342,13 +352,15 @@ func (e *Engine) executeRestoreWithDecompression(ctx context.Context, archivePat // Read stderr in chunks to log errors without loading all into memory buf := make([]byte, 4096) var lastError string + var allErrors []string for { n, err := stderr.Read(buf) if n > 0 { chunk := string(buf[:n]) - // Only log errors/warnings, not all output - if strings.Contains(chunk, "ERROR") || strings.Contains(chunk, "FATAL") { + // Capture all errors/warnings for better diagnostics + if strings.Contains(chunk, "ERROR") || strings.Contains(chunk, "FATAL") || strings.Contains(chunk, "error:") { lastError = chunk + allErrors = append(allErrors, strings.TrimSpace(chunk)) e.log.Warn("Restore stderr", "output", chunk) } } @@ -358,7 +370,15 @@ func (e *Engine) executeRestoreWithDecompression(ctx context.Context, archivePat } if err := cmd.Wait(); err != nil { - e.log.Error("Restore with decompression failed", "error", err, "last_error", lastError) + // Include all captured errors in the return message for better diagnostics + errorDetails := lastError + if len(allErrors) > 0 { + errorDetails = strings.Join(allErrors, " | ") + } + e.log.Error("Restore with decompression failed", "error", err, "stderr", errorDetails) + if errorDetails != "" { + return fmt.Errorf("restore failed: %w (stderr: %s)", err, errorDetails) + } return fmt.Errorf("restore failed: %w", err) } @@ -572,17 +592,24 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error { var restoreErr error if isCompressedSQL { - e.log.Info("Detected compressed SQL format, using psql + gunzip", "file", dumpFile) + mu.Lock() + e.log.Info("Detected compressed SQL format, using psql + gunzip", "file", dumpFile, "database", dbName) + mu.Unlock() restoreErr = e.restorePostgreSQLSQL(ctx, dumpFile, dbName, true) } else { - e.log.Info("Detected custom dump format, using pg_restore", "file", dumpFile) + mu.Lock() + e.log.Info("Detected custom dump format, using pg_restore", "file", dumpFile, "database", dbName) + mu.Unlock() restoreErr = e.restorePostgreSQLDumpWithOwnership(ctx, dumpFile, dbName, false, preserveOwnership) } if restoreErr != nil { - e.log.Error("Failed to restore database", "name", dbName, "error", restoreErr) + mu.Lock() + e.log.Error("Failed to restore database", "name", dbName, "file", dumpFile, "error", restoreErr) + mu.Unlock() failedDBsMu.Lock() - failedDBs = append(failedDBs, fmt.Sprintf("%s: %v", dbName, restoreErr)) + // Include more context in the error message + failedDBs = append(failedDBs, fmt.Sprintf("%s: restore failed: %v", dbName, restoreErr)) failedDBsMu.Unlock() atomic.AddInt32(&failCount, 1) return