restore: enhance error capture with detailed stderr logging and verbose pg_restore
- Capture all ERROR/FATAL/error: messages from pg_restore/psql stderr - Include full error details in failure messages for better diagnostics - Add --verbose flag to pg_restore for comprehensive error reporting - Improve thread-safe logging in parallel cluster restore - Help diagnose cluster restore failures with actual PostgreSQL error messages
This commit is contained in:
@@ -371,6 +371,9 @@ func (p *PostgreSQL) BuildRestoreCommand(database, inputFile string, options Res
|
|||||||
cmd = append(cmd, "--single-transaction")
|
cmd = append(cmd, "--single-transaction")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add verbose flag for better error reporting
|
||||||
|
cmd = append(cmd, "--verbose")
|
||||||
|
|
||||||
// Database and input
|
// Database and input
|
||||||
cmd = append(cmd, "--dbname="+database)
|
cmd = append(cmd, "--dbname="+database)
|
||||||
cmd = append(cmd, inputFile)
|
cmd = append(cmd, inputFile)
|
||||||
|
|||||||
@@ -287,13 +287,15 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
|
|||||||
// Read stderr in chunks to log errors without loading all into memory
|
// Read stderr in chunks to log errors without loading all into memory
|
||||||
buf := make([]byte, 4096)
|
buf := make([]byte, 4096)
|
||||||
var lastError string
|
var lastError string
|
||||||
|
var allErrors []string
|
||||||
for {
|
for {
|
||||||
n, err := stderr.Read(buf)
|
n, err := stderr.Read(buf)
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
chunk := string(buf[:n])
|
chunk := string(buf[:n])
|
||||||
// Only log errors/warnings, not all output
|
// Capture all errors/warnings for better diagnostics
|
||||||
if strings.Contains(chunk, "ERROR") || strings.Contains(chunk, "FATAL") {
|
if strings.Contains(chunk, "ERROR") || strings.Contains(chunk, "FATAL") || strings.Contains(chunk, "error:") {
|
||||||
lastError = chunk
|
lastError = chunk
|
||||||
|
allErrors = append(allErrors, strings.TrimSpace(chunk))
|
||||||
e.log.Warn("Restore stderr", "output", chunk)
|
e.log.Warn("Restore stderr", "output", chunk)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -303,7 +305,15 @@ func (e *Engine) executeRestoreCommand(ctx context.Context, cmdArgs []string) er
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err := cmd.Wait(); err != nil {
|
if err := cmd.Wait(); err != nil {
|
||||||
e.log.Error("Restore command failed", "error", err, "last_error", lastError)
|
// Include all captured errors in the return message for better diagnostics
|
||||||
|
errorDetails := lastError
|
||||||
|
if len(allErrors) > 0 {
|
||||||
|
errorDetails = strings.Join(allErrors, " | ")
|
||||||
|
}
|
||||||
|
e.log.Error("Restore command failed", "error", err, "stderr", errorDetails)
|
||||||
|
if errorDetails != "" {
|
||||||
|
return fmt.Errorf("restore failed: %w (stderr: %s)", err, errorDetails)
|
||||||
|
}
|
||||||
return fmt.Errorf("restore failed: %w", err)
|
return fmt.Errorf("restore failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -342,13 +352,15 @@ func (e *Engine) executeRestoreWithDecompression(ctx context.Context, archivePat
|
|||||||
// Read stderr in chunks to log errors without loading all into memory
|
// Read stderr in chunks to log errors without loading all into memory
|
||||||
buf := make([]byte, 4096)
|
buf := make([]byte, 4096)
|
||||||
var lastError string
|
var lastError string
|
||||||
|
var allErrors []string
|
||||||
for {
|
for {
|
||||||
n, err := stderr.Read(buf)
|
n, err := stderr.Read(buf)
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
chunk := string(buf[:n])
|
chunk := string(buf[:n])
|
||||||
// Only log errors/warnings, not all output
|
// Capture all errors/warnings for better diagnostics
|
||||||
if strings.Contains(chunk, "ERROR") || strings.Contains(chunk, "FATAL") {
|
if strings.Contains(chunk, "ERROR") || strings.Contains(chunk, "FATAL") || strings.Contains(chunk, "error:") {
|
||||||
lastError = chunk
|
lastError = chunk
|
||||||
|
allErrors = append(allErrors, strings.TrimSpace(chunk))
|
||||||
e.log.Warn("Restore stderr", "output", chunk)
|
e.log.Warn("Restore stderr", "output", chunk)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -358,7 +370,15 @@ func (e *Engine) executeRestoreWithDecompression(ctx context.Context, archivePat
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err := cmd.Wait(); err != nil {
|
if err := cmd.Wait(); err != nil {
|
||||||
e.log.Error("Restore with decompression failed", "error", err, "last_error", lastError)
|
// Include all captured errors in the return message for better diagnostics
|
||||||
|
errorDetails := lastError
|
||||||
|
if len(allErrors) > 0 {
|
||||||
|
errorDetails = strings.Join(allErrors, " | ")
|
||||||
|
}
|
||||||
|
e.log.Error("Restore with decompression failed", "error", err, "stderr", errorDetails)
|
||||||
|
if errorDetails != "" {
|
||||||
|
return fmt.Errorf("restore failed: %w (stderr: %s)", err, errorDetails)
|
||||||
|
}
|
||||||
return fmt.Errorf("restore failed: %w", err)
|
return fmt.Errorf("restore failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -572,17 +592,24 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error {
|
|||||||
|
|
||||||
var restoreErr error
|
var restoreErr error
|
||||||
if isCompressedSQL {
|
if isCompressedSQL {
|
||||||
e.log.Info("Detected compressed SQL format, using psql + gunzip", "file", dumpFile)
|
mu.Lock()
|
||||||
|
e.log.Info("Detected compressed SQL format, using psql + gunzip", "file", dumpFile, "database", dbName)
|
||||||
|
mu.Unlock()
|
||||||
restoreErr = e.restorePostgreSQLSQL(ctx, dumpFile, dbName, true)
|
restoreErr = e.restorePostgreSQLSQL(ctx, dumpFile, dbName, true)
|
||||||
} else {
|
} else {
|
||||||
e.log.Info("Detected custom dump format, using pg_restore", "file", dumpFile)
|
mu.Lock()
|
||||||
|
e.log.Info("Detected custom dump format, using pg_restore", "file", dumpFile, "database", dbName)
|
||||||
|
mu.Unlock()
|
||||||
restoreErr = e.restorePostgreSQLDumpWithOwnership(ctx, dumpFile, dbName, false, preserveOwnership)
|
restoreErr = e.restorePostgreSQLDumpWithOwnership(ctx, dumpFile, dbName, false, preserveOwnership)
|
||||||
}
|
}
|
||||||
|
|
||||||
if restoreErr != nil {
|
if restoreErr != nil {
|
||||||
e.log.Error("Failed to restore database", "name", dbName, "error", restoreErr)
|
mu.Lock()
|
||||||
|
e.log.Error("Failed to restore database", "name", dbName, "file", dumpFile, "error", restoreErr)
|
||||||
|
mu.Unlock()
|
||||||
failedDBsMu.Lock()
|
failedDBsMu.Lock()
|
||||||
failedDBs = append(failedDBs, fmt.Sprintf("%s: %v", dbName, restoreErr))
|
// Include more context in the error message
|
||||||
|
failedDBs = append(failedDBs, fmt.Sprintf("%s: restore failed: %v", dbName, restoreErr))
|
||||||
failedDBsMu.Unlock()
|
failedDBsMu.Unlock()
|
||||||
atomic.AddInt32(&failCount, 1)
|
atomic.AddInt32(&failCount, 1)
|
||||||
return
|
return
|
||||||
|
|||||||
Reference in New Issue
Block a user