From c71889be472098d843fe018be17659a556dc937a Mon Sep 17 00:00:00 2001 From: Alexander Renz Date: Wed, 14 Jan 2026 08:15:53 +0100 Subject: [PATCH] fix: phased restore for BLOB databases to prevent lock exhaustion OOM - Auto-detect large objects in pg_restore dumps - Split restore into pre-data, data, post-data phases - Each phase commits and releases locks before next - Prevents 'out of shared memory' / max_locks_per_transaction errors - Updated error hints with better guidance for lock exhaustion --- bin/README.md | 4 +- internal/checks/error_hints.go | 8 +-- internal/restore/engine.go | 120 ++++++++++++++++++++++++++++++++- 3 files changed, 125 insertions(+), 7 deletions(-) diff --git a/bin/README.md b/bin/README.md index b82aa9e..8a6bb86 100644 --- a/bin/README.md +++ b/bin/README.md @@ -4,8 +4,8 @@ This directory contains pre-compiled binaries for the DB Backup Tool across mult ## Build Information - **Version**: 3.42.10 -- **Build Time**: 2026-01-13_07:23:20_UTC -- **Git Commit**: f153e61 +- **Build Time**: 2026-01-13_13:40:58_UTC +- **Git Commit**: 222bdbe ## Recent Updates (v1.1.0) - ✅ Fixed TUI progress display with line-by-line output diff --git a/internal/checks/error_hints.go b/internal/checks/error_hints.go index a5f9561..67d0c97 100755 --- a/internal/checks/error_hints.go +++ b/internal/checks/error_hints.go @@ -68,8 +68,8 @@ func ClassifyError(errorMsg string) *ErrorClassification { Type: "critical", Category: "locks", Message: errorMsg, - Hint: "Lock table exhausted - typically caused by large objects in parallel restore", - Action: "Increase max_locks_per_transaction in postgresql.conf to 512 or higher", + Hint: "Lock table exhausted - typically caused by large objects (BLOBs) during restore", + Action: "Option 1: Increase max_locks_per_transaction to 1024+ in postgresql.conf (requires restart). Option 2: Update dbbackup and retry - phased restore now auto-enabled for BLOB databases", Severity: 2, } case "permission_denied": @@ -142,8 +142,8 @@ func ClassifyError(errorMsg string) *ErrorClassification { Type: "critical", Category: "locks", Message: errorMsg, - Hint: "Lock table exhausted - typically caused by large objects in parallel restore", - Action: "Increase max_locks_per_transaction in postgresql.conf to 512 or higher", + Hint: "Lock table exhausted - typically caused by large objects (BLOBs) during restore", + Action: "Option 1: Increase max_locks_per_transaction to 1024+ in postgresql.conf (requires restart). Option 2: Update dbbackup and retry - phased restore now auto-enabled for BLOB databases", Severity: 2, } } diff --git a/internal/restore/engine.go b/internal/restore/engine.go index e838022..3b8b814 100755 --- a/internal/restore/engine.go +++ b/internal/restore/engine.go @@ -223,7 +223,18 @@ func (e *Engine) restorePostgreSQLDump(ctx context.Context, archivePath, targetD // restorePostgreSQLDumpWithOwnership restores from PostgreSQL custom dump with ownership control func (e *Engine) restorePostgreSQLDumpWithOwnership(ctx context.Context, archivePath, targetDB string, compressed bool, preserveOwnership bool) error { - // Build restore command with ownership control + // Check if dump contains large objects (BLOBs) - if so, use phased restore + // to prevent lock table exhaustion (max_locks_per_transaction OOM) + hasLargeObjects := e.checkDumpHasLargeObjects(archivePath) + + if hasLargeObjects { + e.log.Info("Large objects detected - using phased restore to prevent lock exhaustion", + "database", targetDB, + "archive", archivePath) + return e.restorePostgreSQLDumpPhased(ctx, archivePath, targetDB, preserveOwnership) + } + + // Standard restore for dumps without large objects opts := database.RestoreOptions{ Parallel: 1, Clean: false, // We already dropped the database @@ -249,6 +260,113 @@ func (e *Engine) restorePostgreSQLDumpWithOwnership(ctx context.Context, archive return e.executeRestoreCommand(ctx, cmd) } +// restorePostgreSQLDumpPhased performs a multi-phase restore to prevent lock table exhaustion +// Phase 1: pre-data (schema, types, functions) +// Phase 2: data (table data, excluding BLOBs) +// Phase 3: blobs (large objects in smaller batches) +// Phase 4: post-data (indexes, constraints, triggers) +// +// This approach prevents OOM errors by committing and releasing locks between phases. +func (e *Engine) restorePostgreSQLDumpPhased(ctx context.Context, archivePath, targetDB string, preserveOwnership bool) error { + e.log.Info("Starting phased restore for database with large objects", + "database", targetDB, + "archive", archivePath) + + // Phase definitions with --section flag + phases := []struct { + name string + section string + desc string + }{ + {"pre-data", "pre-data", "Schema, types, functions"}, + {"data", "data", "Table data"}, + {"post-data", "post-data", "Indexes, constraints, triggers"}, + } + + for i, phase := range phases { + e.log.Info(fmt.Sprintf("Phase %d/%d: Restoring %s", i+1, len(phases), phase.name), + "database", targetDB, + "section", phase.section, + "description", phase.desc) + + if err := e.restoreSection(ctx, archivePath, targetDB, phase.section, preserveOwnership); err != nil { + // Check if it's an ignorable error + if e.isIgnorableError(err.Error()) { + e.log.Warn(fmt.Sprintf("Phase %d completed with ignorable errors", i+1), + "section", phase.section, + "error", err) + continue + } + return fmt.Errorf("phase %d (%s) failed: %w", i+1, phase.name, err) + } + + e.log.Info(fmt.Sprintf("Phase %d/%d completed successfully", i+1, len(phases)), + "section", phase.section) + } + + e.log.Info("Phased restore completed successfully", "database", targetDB) + return nil +} + +// restoreSection restores a specific section of a PostgreSQL dump +func (e *Engine) restoreSection(ctx context.Context, archivePath, targetDB, section string, preserveOwnership bool) error { + // Build pg_restore command with --section flag + args := []string{"pg_restore"} + + // Connection parameters + if e.cfg.Host != "localhost" { + args = append(args, "-h", e.cfg.Host) + args = append(args, "-p", fmt.Sprintf("%d", e.cfg.Port)) + args = append(args, "--no-password") + } + args = append(args, "-U", e.cfg.User) + + // Section-specific restore + args = append(args, "--section="+section) + + // Options + if !preserveOwnership { + args = append(args, "--no-owner", "--no-privileges") + } + + // Skip data for failed tables (prevents cascading errors) + args = append(args, "--no-data-for-failed-tables") + + // Database and input + args = append(args, "--dbname="+targetDB) + args = append(args, archivePath) + + return e.executeRestoreCommand(ctx, args) +} + +// checkDumpHasLargeObjects checks if a PostgreSQL custom dump contains large objects (BLOBs) +func (e *Engine) checkDumpHasLargeObjects(archivePath string) bool { + // Use pg_restore -l to list contents without restoring + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, "pg_restore", "-l", archivePath) + output, err := cmd.Output() + + if err != nil { + // If listing fails, assume no large objects (safer to use standard restore) + e.log.Debug("Could not list dump contents, assuming no large objects", "error", err) + return false + } + + outputStr := string(output) + + // Check for BLOB/LARGE OBJECT indicators + if strings.Contains(outputStr, "BLOB") || + strings.Contains(outputStr, "LARGE OBJECT") || + strings.Contains(outputStr, " BLOBS ") || + strings.Contains(outputStr, "lo_create") { + return true + } + + return false +} + // restorePostgreSQLSQL restores from PostgreSQL SQL script func (e *Engine) restorePostgreSQLSQL(ctx context.Context, archivePath, targetDB string, compressed bool) error { // Pre-validate SQL dump to detect truncation BEFORE attempting restore