v3.42.30: Add go-multierror for better error aggregation
All checks were successful
CI/CD / Test (push) Successful in 1m20s
CI/CD / Lint (push) Successful in 1m25s
CI/CD / Build & Release (push) Successful in 3m14s

- Use hashicorp/go-multierror for cluster restore error collection
- Shows ALL failed databases with full error context (not just count)
- Bullet-pointed output for readability
- Thread-safe error aggregation with dedicated mutex
- Error wrapping with %w for proper error chain preservation
This commit is contained in:
2026-01-14 15:59:12 +01:00
parent c0d92b3a81
commit 7b4ab76313
5 changed files with 48 additions and 11 deletions

View File

@@ -20,6 +20,7 @@ import (
"dbbackup/internal/progress"
"dbbackup/internal/security"
"github.com/hashicorp/go-multierror"
_ "github.com/jackc/pgx/v5/stdlib" // PostgreSQL driver
)
@@ -961,7 +962,8 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error {
}()
}
var failedDBs []string
var restoreErrors *multierror.Error
var restoreErrorsMu sync.Mutex
totalDBs := 0
// Count total databases
@@ -995,7 +997,6 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error {
}
var successCount, failCount int32
var failedDBsMu sync.Mutex
var mu sync.Mutex // Protect shared resources (progress, logger)
// Create semaphore to limit concurrency
@@ -1050,9 +1051,9 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error {
// STEP 2: Create fresh database
if err := e.ensureDatabaseExists(ctx, dbName); err != nil {
e.log.Error("Failed to create database", "name", dbName, "error", err)
failedDBsMu.Lock()
failedDBs = append(failedDBs, fmt.Sprintf("%s: failed to create database: %v", dbName, err))
failedDBsMu.Unlock()
restoreErrorsMu.Lock()
restoreErrors = multierror.Append(restoreErrors, fmt.Errorf("%s: failed to create database: %w", dbName, err))
restoreErrorsMu.Unlock()
atomic.AddInt32(&failCount, 1)
return
}
@@ -1095,10 +1096,10 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error {
mu.Unlock()
}
failedDBsMu.Lock()
restoreErrorsMu.Lock()
// Include more context in the error message
failedDBs = append(failedDBs, fmt.Sprintf("%s: restore failed: %v", dbName, restoreErr))
failedDBsMu.Unlock()
restoreErrors = multierror.Append(restoreErrors, fmt.Errorf("%s: restore failed: %w", dbName, restoreErr))
restoreErrorsMu.Unlock()
atomic.AddInt32(&failCount, 1)
return
}
@@ -1116,7 +1117,17 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error {
failCountFinal := int(atomic.LoadInt32(&failCount))
if failCountFinal > 0 {
failedList := strings.Join(failedDBs, "\n ")
// Format multi-error with detailed output
restoreErrors.ErrorFormat = func(errs []error) string {
if len(errs) == 1 {
return errs[0].Error()
}
points := make([]string, len(errs))
for i, err := range errs {
points[i] = fmt.Sprintf(" • %s", err.Error())
}
return fmt.Sprintf("%d database(s) failed:\n%s", len(errs), strings.Join(points, "\n"))
}
// Log summary
e.log.Info("Cluster restore completed with failures",
@@ -1127,7 +1138,7 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string) error {
e.progress.Fail(fmt.Sprintf("Cluster restore: %d succeeded, %d failed out of %d total", successCountFinal, failCountFinal, totalDBs))
operation.Complete(fmt.Sprintf("Partial restore: %d/%d databases succeeded", successCountFinal, totalDBs))
return fmt.Errorf("cluster restore completed with %d failures:\n %s", failCountFinal, failedList)
return fmt.Errorf("cluster restore completed with %d failures:\n%s", failCountFinal, restoreErrors.Error())
}
e.progress.Complete(fmt.Sprintf("Cluster restored successfully: %d databases", successCountFinal))