feat(restore): add --parallel-dbs=-1 auto-detection based on CPU/RAM

- Add CalculateOptimalParallel() function to preflight.go - Calculates optimal workers: min(RAM/3GB, CPU cores), capped at 16 - Reduces parallelism by 50% if memory pressure >80% - Add -1 flag value for auto-detection mode - Preflight summary now shows CPU cores and recommended parallel
fix(grafana): update dashboard queries and thresholds
2026-01-17 13:41:28 +01:00 · 2026-01-17 13:24:54 +01:00 · 2026-01-17 11:44:05 +01:00
4 changed files with 148 additions and 43 deletions
--- a/bin/README.md
+++ b/bin/README.md
@@ -4,8 +4,8 @@ This directory contains pre-compiled binaries for the DB Backup Tool across mult
 ## Build Information
 - **Version**: 3.42.50
- **Build Time**: 2026-01-17_06:25:57_UTC
+- **Build Time**: 2026-01-17_12:25:20_UTC
- **Git Commit**: 4ea3ec2
+- **Git Commit**: c5be9bc
 ## Recent Updates (v1.1.0)
 - ✅ Fixed TUI progress display with line-by-line output
--- a/cmd/restore.go
+++ b/cmd/restore.go
@@ -290,7 +290,7 @@ func init() {
 	restoreClusterCmd.Flags().BoolVar(&restoreForce, "force", false, "Skip safety checks and confirmations")
 	restoreClusterCmd.Flags().BoolVar(&restoreCleanCluster, "clean-cluster", false, "Drop all existing user databases before restore (disaster recovery)")
 	restoreClusterCmd.Flags().IntVar(&restoreJobs, "jobs", 0, "Number of parallel decompression jobs (0 = auto)")
-	restoreClusterCmd.Flags().IntVar(&restoreParallelDBs, "parallel-dbs", 0, "Number of databases to restore in parallel (0 = use config default, 1 = sequential)")
+	restoreClusterCmd.Flags().IntVar(&restoreParallelDBs, "parallel-dbs", 0, "Number of databases to restore in parallel (0 = use config default, 1 = sequential, -1 = auto-detect based on CPU/RAM)")
 	restoreClusterCmd.Flags().StringVar(&restoreWorkdir, "workdir", "", "Working directory for extraction (use when system disk is small, e.g. /mnt/storage/restore_tmp)")
 	restoreClusterCmd.Flags().BoolVar(&restoreVerbose, "verbose", false, "Show detailed restore progress")
 	restoreClusterCmd.Flags().BoolVar(&restoreNoProgress, "no-progress", false, "Disable progress indicators")
@@ -786,7 +786,12 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
 	}
 	// Override cluster parallelism if --parallel-dbs is specified
-	if restoreParallelDBs > 0 {
+	if restoreParallelDBs == -1 {
 		// Auto-detect optimal parallelism based on system resources
 		autoParallel := restore.CalculateOptimalParallel()
 		cfg.ClusterParallelism = autoParallel
 		log.Info("Auto-detected optimal parallelism for database restores", "parallel_dbs", autoParallel, "mode", "auto")
 	} else if restoreParallelDBs > 0 {
 		cfg.ClusterParallelism = restoreParallelDBs
 		log.Info("Using custom parallelism for database restores", "parallel_dbs", restoreParallelDBs)
 	}
--- a/grafana/dbbackup-dashboard.json
+++ b/grafana/dbbackup-dashboard.json
@@ -94,7 +94,7 @@
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
-          "expr": "dbbackup_rpo_seconds{instance=~\"$instance\"} < 86400",
+          "expr": "dbbackup_rpo_seconds{instance=~\"$instance\"} < bool 604800",
          "legendFormat": "{{database}}",
          "range": true,
          "refId": "A"
@@ -711,19 +711,6 @@
      },
      "pluginVersion": "10.2.0",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "dbbackup_rpo_seconds{instance=~\"$instance\"} < 86400",
          "format": "table",
          "instant": true,
          "legendFormat": "__auto",
          "range": false,
          "refId": "Status"
        },
        {
          "datasource": {
            "type": "prometheus",
@@ -769,26 +756,30 @@
              "Time": true,
              "Time 1": true,
              "Time 2": true,
              "Time 3": true,
              "__name__": true,
              "__name__ 1": true,
              "__name__ 2": true,
              "__name__ 3": true,
              "instance 1": true,
              "instance 2": true,
              "instance 3": true,
              "job": true,
              "job 1": true,
              "job 2": true,
-              "job 3": true
+              "engine 1": true,
              "engine 2": true
            },
            "indexByName": {
              "Database": 0,
              "Instance": 1,
              "Engine": 2,
              "RPO": 3,
              "Size": 4
            },
            "indexByName": {},
            "renameByName": {
              "Value #RPO": "RPO",
              "Value #Size": "Size",
              "Value #Status": "Status",
              "database": "Database",
-              "instance": "Instance"
+              "instance": "Instance",
              "engine": "Engine"
            }
          }
        }
@@ -1275,7 +1266,7 @@
          "query": "label_values(dbbackup_rpo_seconds, instance)",
          "refId": "StandardVariableQuery"
        },
-        "refresh": 1,
+        "refresh": 2,
        "regex": "",
        "skipUrlSync": false,
        "sort": 1,
--- a/internal/restore/preflight.go
+++ b/internal/restore/preflight.go
@@ -16,6 +16,57 @@ import (
 	"github.com/shirou/gopsutil/v3/mem"
 )
 // CalculateOptimalParallel returns the recommended number of parallel workers
 // based on available system resources (CPU cores and RAM).
 // This is a standalone function that can be called from anywhere.
 // Returns 0 if resources cannot be detected.
 func CalculateOptimalParallel() int {
 	cpuCores := runtime.NumCPU()
 	vmem, err := mem.VirtualMemory()
 	if err != nil {
 		// Fallback: use half of CPU cores if memory detection fails
 		if cpuCores > 1 {
 			return cpuCores / 2
 		}
 		return 1
 	}
 	memAvailableGB := float64(vmem.Available) / (1024 * 1024 * 1024)
 	// Each pg_restore worker needs approximately 2-4GB of RAM
 	// Use conservative 3GB per worker to avoid OOM
 	const memPerWorkerGB = 3.0
 	// Calculate limits
 	maxByMem := int(memAvailableGB / memPerWorkerGB)
 	maxByCPU := cpuCores
 	// Use the minimum of memory and CPU limits
 	recommended := maxByMem
 	if maxByCPU < recommended {
 		recommended = maxByCPU
 	}
 	// Apply sensible bounds
 	if recommended < 1 {
 		recommended = 1
 	}
 	if recommended > 16 {
 		recommended = 16 // Cap at 16 to avoid diminishing returns
 	}
 	// If memory pressure is high (>80%), reduce parallelism
 	if vmem.UsedPercent > 80 && recommended > 1 {
 		recommended = recommended / 2
 		if recommended < 1 {
 			recommended = 1
 		}
 	}
 	return recommended
 }
 // PreflightResult contains all preflight check results
 type PreflightResult struct {
 	// Linux system checks
@@ -40,6 +91,8 @@ type LinuxChecks struct {
 	MemTotal            uint64  // Total RAM in bytes
 	MemAvailable        uint64  // Available RAM in bytes
 	MemUsedPercent      float64 // Memory usage percentage
 	CPUCores            int     // Number of CPU cores
 	RecommendedParallel int     // Auto-calculated optimal parallel count
 	ShmMaxOK            bool    // Is shmmax sufficient?
 	ShmAllOK            bool    // Is shmall sufficient?
 	MemAvailableOK      bool    // Is available RAM sufficient?
@@ -100,6 +153,7 @@ func (e *Engine) RunPreflightChecks(ctx context.Context, dumpsDir string, entrie
 // checkSystemResources uses gopsutil for cross-platform system checks
 func (e *Engine) checkSystemResources(result *PreflightResult) {
 	result.Linux.IsLinux = runtime.GOOS == "linux"
 	result.Linux.CPUCores = runtime.NumCPU()
 	// Get memory info (works on Linux, macOS, Windows, BSD)
 	if vmem, err := mem.VirtualMemory(); err == nil {
@@ -118,6 +172,9 @@ func (e *Engine) checkSystemResources(result *PreflightResult) {
 		e.log.Warn("Could not detect system memory", "error", err)
 	}
 	// Calculate recommended parallel based on resources
 	result.Linux.RecommendedParallel = e.calculateRecommendedParallel(result)
 	// Linux-specific kernel checks (shmmax, shmall)
 	if result.Linux.IsLinux {
 		e.checkLinuxKernel(result)
@@ -434,6 +491,56 @@ func (e *Engine) calculateRecommendations(result *PreflightResult) {
 		"recommended_locks", lockBoost)
 }
 // calculateRecommendedParallel determines optimal parallelism based on system resources
 // Returns the recommended number of parallel workers for pg_restore
 func (e *Engine) calculateRecommendedParallel(result *PreflightResult) int {
 	cpuCores := result.Linux.CPUCores
 	if cpuCores == 0 {
 		cpuCores = runtime.NumCPU()
 	}
 	memAvailableGB := float64(result.Linux.MemAvailable) / (1024 * 1024 * 1024)
 	// Each pg_restore worker needs approximately 2-4GB of RAM
 	// Use conservative 3GB per worker to avoid OOM
 	const memPerWorkerGB = 3.0
 	// Calculate limits
 	maxByMem := int(memAvailableGB / memPerWorkerGB)
 	maxByCPU := cpuCores
 	// Use the minimum of memory and CPU limits
 	recommended := maxByMem
 	if maxByCPU < recommended {
 		recommended = maxByCPU
 	}
 	// Apply sensible bounds
 	if recommended < 1 {
 		recommended = 1
 	}
 	if recommended > 16 {
 		recommended = 16 // Cap at 16 to avoid diminishing returns
 	}
 	// If memory pressure is high (>80%), reduce parallelism
 	if result.Linux.MemUsedPercent > 80 && recommended > 1 {
 		recommended = recommended / 2
 		if recommended < 1 {
 			recommended = 1
 		}
 	}
 	e.log.Info("Calculated recommended parallel",
 		"cpu_cores", cpuCores,
 		"mem_available_gb", fmt.Sprintf("%.1f", memAvailableGB),
 		"max_by_mem", maxByMem,
 		"max_by_cpu", maxByCPU,
 		"recommended", recommended)
 	return recommended
 }
 // printPreflightSummary prints a nice summary of all checks
 func (e *Engine) printPreflightSummary(result *PreflightResult) {
 	fmt.Println()
@@ -446,6 +553,8 @@ func (e *Engine) printPreflightSummary(result *PreflightResult) {
 	printCheck("Total RAM", humanize.Bytes(result.Linux.MemTotal), true)
 	printCheck("Available RAM", humanize.Bytes(result.Linux.MemAvailable), result.Linux.MemAvailableOK || result.Linux.MemAvailable == 0)
 	printCheck("Memory Usage", fmt.Sprintf("%.1f%%", result.Linux.MemUsedPercent), result.Linux.MemUsedPercent < 85)
 	printCheck("CPU Cores", fmt.Sprintf("%d", result.Linux.CPUCores), true)
 	printCheck("Recommended Parallel", fmt.Sprintf("%d (auto-calculated)", result.Linux.RecommendedParallel), true)
 	// Linux-specific kernel checks
 	if result.Linux.IsLinux && result.Linux.ShmMax > 0 {
Author	SHA1	Message	Date
Alexander Renz	62d58c77af	feat(restore): add --parallel-dbs=-1 auto-detection based on CPU/RAM All checks were successful CI/CD / Test (push) Successful in 1m16s Details CI/CD / Lint (push) Successful in 1m25s Details CI/CD / Build & Release (push) Successful in 3m14s Details - Add CalculateOptimalParallel() function to preflight.go - Calculates optimal workers: min(RAM/3GB, CPU cores), capped at 16 - Reduces parallelism by 50% if memory pressure >80% - Add -1 flag value for auto-detection mode - Preflight summary now shows CPU cores and recommended parallel	2026-01-17 13:41:28 +01:00
Alexander Renz	c5be9bcd2b	fix(grafana): update dashboard queries and thresholds All checks were successful CI/CD / Test (push) Successful in 1m15s Details CI/CD / Lint (push) Successful in 1m26s Details CI/CD / Build & Release (push) Successful in 3m13s Details - Fix Last Backup Status panel to use bool modifier for proper 1/0 values - Change RPO threshold from 24h to 7 days (604800s) for status check - Clean up table transformations to exclude duplicate fields - Update variable refresh to trigger on time range change	2026-01-17 13:24:54 +01:00
Alexander Renz	b120f1507e	style: format struct field alignment All checks were successful CI/CD / Test (push) Successful in 1m18s Details CI/CD / Lint (push) Successful in 1m26s Details CI/CD / Build & Release (push) Has been skipped Details	2026-01-17 11:44:05 +01:00