Compare commits

..

3 Commits

Author SHA1 Message Date
62d58c77af feat(restore): add --parallel-dbs=-1 auto-detection based on CPU/RAM
All checks were successful
CI/CD / Test (push) Successful in 1m16s
CI/CD / Lint (push) Successful in 1m25s
CI/CD / Build & Release (push) Successful in 3m14s
- Add CalculateOptimalParallel() function to preflight.go
- Calculates optimal workers: min(RAM/3GB, CPU cores), capped at 16
- Reduces parallelism by 50% if memory pressure >80%
- Add -1 flag value for auto-detection mode
- Preflight summary now shows CPU cores and recommended parallel
2026-01-17 13:41:28 +01:00
c5be9bcd2b fix(grafana): update dashboard queries and thresholds
All checks were successful
CI/CD / Test (push) Successful in 1m15s
CI/CD / Lint (push) Successful in 1m26s
CI/CD / Build & Release (push) Successful in 3m13s
- Fix Last Backup Status panel to use bool modifier for proper 1/0 values
- Change RPO threshold from 24h to 7 days (604800s) for status check
- Clean up table transformations to exclude duplicate fields
- Update variable refresh to trigger on time range change
2026-01-17 13:24:54 +01:00
b120f1507e style: format struct field alignment
All checks were successful
CI/CD / Test (push) Successful in 1m18s
CI/CD / Lint (push) Successful in 1m26s
CI/CD / Build & Release (push) Has been skipped
2026-01-17 11:44:05 +01:00
4 changed files with 148 additions and 43 deletions

View File

@@ -4,8 +4,8 @@ This directory contains pre-compiled binaries for the DB Backup Tool across mult
## Build Information ## Build Information
- **Version**: 3.42.50 - **Version**: 3.42.50
- **Build Time**: 2026-01-17_06:25:57_UTC - **Build Time**: 2026-01-17_12:25:20_UTC
- **Git Commit**: 4ea3ec2 - **Git Commit**: c5be9bc
## Recent Updates (v1.1.0) ## Recent Updates (v1.1.0)
- ✅ Fixed TUI progress display with line-by-line output - ✅ Fixed TUI progress display with line-by-line output

View File

@@ -290,7 +290,7 @@ func init() {
restoreClusterCmd.Flags().BoolVar(&restoreForce, "force", false, "Skip safety checks and confirmations") restoreClusterCmd.Flags().BoolVar(&restoreForce, "force", false, "Skip safety checks and confirmations")
restoreClusterCmd.Flags().BoolVar(&restoreCleanCluster, "clean-cluster", false, "Drop all existing user databases before restore (disaster recovery)") restoreClusterCmd.Flags().BoolVar(&restoreCleanCluster, "clean-cluster", false, "Drop all existing user databases before restore (disaster recovery)")
restoreClusterCmd.Flags().IntVar(&restoreJobs, "jobs", 0, "Number of parallel decompression jobs (0 = auto)") restoreClusterCmd.Flags().IntVar(&restoreJobs, "jobs", 0, "Number of parallel decompression jobs (0 = auto)")
restoreClusterCmd.Flags().IntVar(&restoreParallelDBs, "parallel-dbs", 0, "Number of databases to restore in parallel (0 = use config default, 1 = sequential)") restoreClusterCmd.Flags().IntVar(&restoreParallelDBs, "parallel-dbs", 0, "Number of databases to restore in parallel (0 = use config default, 1 = sequential, -1 = auto-detect based on CPU/RAM)")
restoreClusterCmd.Flags().StringVar(&restoreWorkdir, "workdir", "", "Working directory for extraction (use when system disk is small, e.g. /mnt/storage/restore_tmp)") restoreClusterCmd.Flags().StringVar(&restoreWorkdir, "workdir", "", "Working directory for extraction (use when system disk is small, e.g. /mnt/storage/restore_tmp)")
restoreClusterCmd.Flags().BoolVar(&restoreVerbose, "verbose", false, "Show detailed restore progress") restoreClusterCmd.Flags().BoolVar(&restoreVerbose, "verbose", false, "Show detailed restore progress")
restoreClusterCmd.Flags().BoolVar(&restoreNoProgress, "no-progress", false, "Disable progress indicators") restoreClusterCmd.Flags().BoolVar(&restoreNoProgress, "no-progress", false, "Disable progress indicators")
@@ -786,7 +786,12 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
} }
// Override cluster parallelism if --parallel-dbs is specified // Override cluster parallelism if --parallel-dbs is specified
if restoreParallelDBs > 0 { if restoreParallelDBs == -1 {
// Auto-detect optimal parallelism based on system resources
autoParallel := restore.CalculateOptimalParallel()
cfg.ClusterParallelism = autoParallel
log.Info("Auto-detected optimal parallelism for database restores", "parallel_dbs", autoParallel, "mode", "auto")
} else if restoreParallelDBs > 0 {
cfg.ClusterParallelism = restoreParallelDBs cfg.ClusterParallelism = restoreParallelDBs
log.Info("Using custom parallelism for database restores", "parallel_dbs", restoreParallelDBs) log.Info("Using custom parallelism for database restores", "parallel_dbs", restoreParallelDBs)
} }

View File

@@ -94,7 +94,7 @@
"uid": "${DS_PROMETHEUS}" "uid": "${DS_PROMETHEUS}"
}, },
"editorMode": "code", "editorMode": "code",
"expr": "dbbackup_rpo_seconds{instance=~\"$instance\"} < 86400", "expr": "dbbackup_rpo_seconds{instance=~\"$instance\"} < bool 604800",
"legendFormat": "{{database}}", "legendFormat": "{{database}}",
"range": true, "range": true,
"refId": "A" "refId": "A"
@@ -711,19 +711,6 @@
}, },
"pluginVersion": "10.2.0", "pluginVersion": "10.2.0",
"targets": [ "targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "dbbackup_rpo_seconds{instance=~\"$instance\"} < 86400",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "Status"
},
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
@@ -769,26 +756,30 @@
"Time": true, "Time": true,
"Time 1": true, "Time 1": true,
"Time 2": true, "Time 2": true,
"Time 3": true,
"__name__": true, "__name__": true,
"__name__ 1": true, "__name__ 1": true,
"__name__ 2": true, "__name__ 2": true,
"__name__ 3": true,
"instance 1": true, "instance 1": true,
"instance 2": true, "instance 2": true,
"instance 3": true,
"job": true, "job": true,
"job 1": true, "job 1": true,
"job 2": true, "job 2": true,
"job 3": true "engine 1": true,
"engine 2": true
},
"indexByName": {
"Database": 0,
"Instance": 1,
"Engine": 2,
"RPO": 3,
"Size": 4
}, },
"indexByName": {},
"renameByName": { "renameByName": {
"Value #RPO": "RPO", "Value #RPO": "RPO",
"Value #Size": "Size", "Value #Size": "Size",
"Value #Status": "Status",
"database": "Database", "database": "Database",
"instance": "Instance" "instance": "Instance",
"engine": "Engine"
} }
} }
} }
@@ -1275,7 +1266,7 @@
"query": "label_values(dbbackup_rpo_seconds, instance)", "query": "label_values(dbbackup_rpo_seconds, instance)",
"refId": "StandardVariableQuery" "refId": "StandardVariableQuery"
}, },
"refresh": 1, "refresh": 2,
"regex": "", "regex": "",
"skipUrlSync": false, "skipUrlSync": false,
"sort": 1, "sort": 1,

View File

@@ -16,6 +16,57 @@ import (
"github.com/shirou/gopsutil/v3/mem" "github.com/shirou/gopsutil/v3/mem"
) )
// CalculateOptimalParallel returns the recommended number of parallel workers
// based on available system resources (CPU cores and RAM).
// This is a standalone function that can be called from anywhere.
// Returns 0 if resources cannot be detected.
func CalculateOptimalParallel() int {
cpuCores := runtime.NumCPU()
vmem, err := mem.VirtualMemory()
if err != nil {
// Fallback: use half of CPU cores if memory detection fails
if cpuCores > 1 {
return cpuCores / 2
}
return 1
}
memAvailableGB := float64(vmem.Available) / (1024 * 1024 * 1024)
// Each pg_restore worker needs approximately 2-4GB of RAM
// Use conservative 3GB per worker to avoid OOM
const memPerWorkerGB = 3.0
// Calculate limits
maxByMem := int(memAvailableGB / memPerWorkerGB)
maxByCPU := cpuCores
// Use the minimum of memory and CPU limits
recommended := maxByMem
if maxByCPU < recommended {
recommended = maxByCPU
}
// Apply sensible bounds
if recommended < 1 {
recommended = 1
}
if recommended > 16 {
recommended = 16 // Cap at 16 to avoid diminishing returns
}
// If memory pressure is high (>80%), reduce parallelism
if vmem.UsedPercent > 80 && recommended > 1 {
recommended = recommended / 2
if recommended < 1 {
recommended = 1
}
}
return recommended
}
// PreflightResult contains all preflight check results // PreflightResult contains all preflight check results
type PreflightResult struct { type PreflightResult struct {
// Linux system checks // Linux system checks
@@ -40,6 +91,8 @@ type LinuxChecks struct {
MemTotal uint64 // Total RAM in bytes MemTotal uint64 // Total RAM in bytes
MemAvailable uint64 // Available RAM in bytes MemAvailable uint64 // Available RAM in bytes
MemUsedPercent float64 // Memory usage percentage MemUsedPercent float64 // Memory usage percentage
CPUCores int // Number of CPU cores
RecommendedParallel int // Auto-calculated optimal parallel count
ShmMaxOK bool // Is shmmax sufficient? ShmMaxOK bool // Is shmmax sufficient?
ShmAllOK bool // Is shmall sufficient? ShmAllOK bool // Is shmall sufficient?
MemAvailableOK bool // Is available RAM sufficient? MemAvailableOK bool // Is available RAM sufficient?
@@ -100,6 +153,7 @@ func (e *Engine) RunPreflightChecks(ctx context.Context, dumpsDir string, entrie
// checkSystemResources uses gopsutil for cross-platform system checks // checkSystemResources uses gopsutil for cross-platform system checks
func (e *Engine) checkSystemResources(result *PreflightResult) { func (e *Engine) checkSystemResources(result *PreflightResult) {
result.Linux.IsLinux = runtime.GOOS == "linux" result.Linux.IsLinux = runtime.GOOS == "linux"
result.Linux.CPUCores = runtime.NumCPU()
// Get memory info (works on Linux, macOS, Windows, BSD) // Get memory info (works on Linux, macOS, Windows, BSD)
if vmem, err := mem.VirtualMemory(); err == nil { if vmem, err := mem.VirtualMemory(); err == nil {
@@ -118,6 +172,9 @@ func (e *Engine) checkSystemResources(result *PreflightResult) {
e.log.Warn("Could not detect system memory", "error", err) e.log.Warn("Could not detect system memory", "error", err)
} }
// Calculate recommended parallel based on resources
result.Linux.RecommendedParallel = e.calculateRecommendedParallel(result)
// Linux-specific kernel checks (shmmax, shmall) // Linux-specific kernel checks (shmmax, shmall)
if result.Linux.IsLinux { if result.Linux.IsLinux {
e.checkLinuxKernel(result) e.checkLinuxKernel(result)
@@ -434,6 +491,56 @@ func (e *Engine) calculateRecommendations(result *PreflightResult) {
"recommended_locks", lockBoost) "recommended_locks", lockBoost)
} }
// calculateRecommendedParallel determines optimal parallelism based on system resources
// Returns the recommended number of parallel workers for pg_restore
func (e *Engine) calculateRecommendedParallel(result *PreflightResult) int {
cpuCores := result.Linux.CPUCores
if cpuCores == 0 {
cpuCores = runtime.NumCPU()
}
memAvailableGB := float64(result.Linux.MemAvailable) / (1024 * 1024 * 1024)
// Each pg_restore worker needs approximately 2-4GB of RAM
// Use conservative 3GB per worker to avoid OOM
const memPerWorkerGB = 3.0
// Calculate limits
maxByMem := int(memAvailableGB / memPerWorkerGB)
maxByCPU := cpuCores
// Use the minimum of memory and CPU limits
recommended := maxByMem
if maxByCPU < recommended {
recommended = maxByCPU
}
// Apply sensible bounds
if recommended < 1 {
recommended = 1
}
if recommended > 16 {
recommended = 16 // Cap at 16 to avoid diminishing returns
}
// If memory pressure is high (>80%), reduce parallelism
if result.Linux.MemUsedPercent > 80 && recommended > 1 {
recommended = recommended / 2
if recommended < 1 {
recommended = 1
}
}
e.log.Info("Calculated recommended parallel",
"cpu_cores", cpuCores,
"mem_available_gb", fmt.Sprintf("%.1f", memAvailableGB),
"max_by_mem", maxByMem,
"max_by_cpu", maxByCPU,
"recommended", recommended)
return recommended
}
// printPreflightSummary prints a nice summary of all checks // printPreflightSummary prints a nice summary of all checks
func (e *Engine) printPreflightSummary(result *PreflightResult) { func (e *Engine) printPreflightSummary(result *PreflightResult) {
fmt.Println() fmt.Println()
@@ -446,6 +553,8 @@ func (e *Engine) printPreflightSummary(result *PreflightResult) {
printCheck("Total RAM", humanize.Bytes(result.Linux.MemTotal), true) printCheck("Total RAM", humanize.Bytes(result.Linux.MemTotal), true)
printCheck("Available RAM", humanize.Bytes(result.Linux.MemAvailable), result.Linux.MemAvailableOK || result.Linux.MemAvailable == 0) printCheck("Available RAM", humanize.Bytes(result.Linux.MemAvailable), result.Linux.MemAvailableOK || result.Linux.MemAvailable == 0)
printCheck("Memory Usage", fmt.Sprintf("%.1f%%", result.Linux.MemUsedPercent), result.Linux.MemUsedPercent < 85) printCheck("Memory Usage", fmt.Sprintf("%.1f%%", result.Linux.MemUsedPercent), result.Linux.MemUsedPercent < 85)
printCheck("CPU Cores", fmt.Sprintf("%d", result.Linux.CPUCores), true)
printCheck("Recommended Parallel", fmt.Sprintf("%d (auto-calculated)", result.Linux.RecommendedParallel), true)
// Linux-specific kernel checks // Linux-specific kernel checks
if result.Linux.IsLinux && result.Linux.ShmMax > 0 { if result.Linux.IsLinux && result.Linux.ShmMax > 0 {