Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 62d58c77af | |||
| c5be9bcd2b | |||
| b120f1507e |
@@ -4,8 +4,8 @@ This directory contains pre-compiled binaries for the DB Backup Tool across mult
|
|||||||
|
|
||||||
## Build Information
|
## Build Information
|
||||||
- **Version**: 3.42.50
|
- **Version**: 3.42.50
|
||||||
- **Build Time**: 2026-01-17_06:25:57_UTC
|
- **Build Time**: 2026-01-17_12:25:20_UTC
|
||||||
- **Git Commit**: 4ea3ec2
|
- **Git Commit**: c5be9bc
|
||||||
|
|
||||||
## Recent Updates (v1.1.0)
|
## Recent Updates (v1.1.0)
|
||||||
- ✅ Fixed TUI progress display with line-by-line output
|
- ✅ Fixed TUI progress display with line-by-line output
|
||||||
|
|||||||
@@ -290,7 +290,7 @@ func init() {
|
|||||||
restoreClusterCmd.Flags().BoolVar(&restoreForce, "force", false, "Skip safety checks and confirmations")
|
restoreClusterCmd.Flags().BoolVar(&restoreForce, "force", false, "Skip safety checks and confirmations")
|
||||||
restoreClusterCmd.Flags().BoolVar(&restoreCleanCluster, "clean-cluster", false, "Drop all existing user databases before restore (disaster recovery)")
|
restoreClusterCmd.Flags().BoolVar(&restoreCleanCluster, "clean-cluster", false, "Drop all existing user databases before restore (disaster recovery)")
|
||||||
restoreClusterCmd.Flags().IntVar(&restoreJobs, "jobs", 0, "Number of parallel decompression jobs (0 = auto)")
|
restoreClusterCmd.Flags().IntVar(&restoreJobs, "jobs", 0, "Number of parallel decompression jobs (0 = auto)")
|
||||||
restoreClusterCmd.Flags().IntVar(&restoreParallelDBs, "parallel-dbs", 0, "Number of databases to restore in parallel (0 = use config default, 1 = sequential)")
|
restoreClusterCmd.Flags().IntVar(&restoreParallelDBs, "parallel-dbs", 0, "Number of databases to restore in parallel (0 = use config default, 1 = sequential, -1 = auto-detect based on CPU/RAM)")
|
||||||
restoreClusterCmd.Flags().StringVar(&restoreWorkdir, "workdir", "", "Working directory for extraction (use when system disk is small, e.g. /mnt/storage/restore_tmp)")
|
restoreClusterCmd.Flags().StringVar(&restoreWorkdir, "workdir", "", "Working directory for extraction (use when system disk is small, e.g. /mnt/storage/restore_tmp)")
|
||||||
restoreClusterCmd.Flags().BoolVar(&restoreVerbose, "verbose", false, "Show detailed restore progress")
|
restoreClusterCmd.Flags().BoolVar(&restoreVerbose, "verbose", false, "Show detailed restore progress")
|
||||||
restoreClusterCmd.Flags().BoolVar(&restoreNoProgress, "no-progress", false, "Disable progress indicators")
|
restoreClusterCmd.Flags().BoolVar(&restoreNoProgress, "no-progress", false, "Disable progress indicators")
|
||||||
@@ -786,7 +786,12 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Override cluster parallelism if --parallel-dbs is specified
|
// Override cluster parallelism if --parallel-dbs is specified
|
||||||
if restoreParallelDBs > 0 {
|
if restoreParallelDBs == -1 {
|
||||||
|
// Auto-detect optimal parallelism based on system resources
|
||||||
|
autoParallel := restore.CalculateOptimalParallel()
|
||||||
|
cfg.ClusterParallelism = autoParallel
|
||||||
|
log.Info("Auto-detected optimal parallelism for database restores", "parallel_dbs", autoParallel, "mode", "auto")
|
||||||
|
} else if restoreParallelDBs > 0 {
|
||||||
cfg.ClusterParallelism = restoreParallelDBs
|
cfg.ClusterParallelism = restoreParallelDBs
|
||||||
log.Info("Using custom parallelism for database restores", "parallel_dbs", restoreParallelDBs)
|
log.Info("Using custom parallelism for database restores", "parallel_dbs", restoreParallelDBs)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -94,7 +94,7 @@
|
|||||||
"uid": "${DS_PROMETHEUS}"
|
"uid": "${DS_PROMETHEUS}"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"expr": "dbbackup_rpo_seconds{instance=~\"$instance\"} < 86400",
|
"expr": "dbbackup_rpo_seconds{instance=~\"$instance\"} < bool 604800",
|
||||||
"legendFormat": "{{database}}",
|
"legendFormat": "{{database}}",
|
||||||
"range": true,
|
"range": true,
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
@@ -711,19 +711,6 @@
|
|||||||
},
|
},
|
||||||
"pluginVersion": "10.2.0",
|
"pluginVersion": "10.2.0",
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
|
||||||
"datasource": {
|
|
||||||
"type": "prometheus",
|
|
||||||
"uid": "${DS_PROMETHEUS}"
|
|
||||||
},
|
|
||||||
"editorMode": "code",
|
|
||||||
"expr": "dbbackup_rpo_seconds{instance=~\"$instance\"} < 86400",
|
|
||||||
"format": "table",
|
|
||||||
"instant": true,
|
|
||||||
"legendFormat": "__auto",
|
|
||||||
"range": false,
|
|
||||||
"refId": "Status"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
@@ -769,26 +756,30 @@
|
|||||||
"Time": true,
|
"Time": true,
|
||||||
"Time 1": true,
|
"Time 1": true,
|
||||||
"Time 2": true,
|
"Time 2": true,
|
||||||
"Time 3": true,
|
|
||||||
"__name__": true,
|
"__name__": true,
|
||||||
"__name__ 1": true,
|
"__name__ 1": true,
|
||||||
"__name__ 2": true,
|
"__name__ 2": true,
|
||||||
"__name__ 3": true,
|
|
||||||
"instance 1": true,
|
"instance 1": true,
|
||||||
"instance 2": true,
|
"instance 2": true,
|
||||||
"instance 3": true,
|
|
||||||
"job": true,
|
"job": true,
|
||||||
"job 1": true,
|
"job 1": true,
|
||||||
"job 2": true,
|
"job 2": true,
|
||||||
"job 3": true
|
"engine 1": true,
|
||||||
|
"engine 2": true
|
||||||
|
},
|
||||||
|
"indexByName": {
|
||||||
|
"Database": 0,
|
||||||
|
"Instance": 1,
|
||||||
|
"Engine": 2,
|
||||||
|
"RPO": 3,
|
||||||
|
"Size": 4
|
||||||
},
|
},
|
||||||
"indexByName": {},
|
|
||||||
"renameByName": {
|
"renameByName": {
|
||||||
"Value #RPO": "RPO",
|
"Value #RPO": "RPO",
|
||||||
"Value #Size": "Size",
|
"Value #Size": "Size",
|
||||||
"Value #Status": "Status",
|
|
||||||
"database": "Database",
|
"database": "Database",
|
||||||
"instance": "Instance"
|
"instance": "Instance",
|
||||||
|
"engine": "Engine"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1275,7 +1266,7 @@
|
|||||||
"query": "label_values(dbbackup_rpo_seconds, instance)",
|
"query": "label_values(dbbackup_rpo_seconds, instance)",
|
||||||
"refId": "StandardVariableQuery"
|
"refId": "StandardVariableQuery"
|
||||||
},
|
},
|
||||||
"refresh": 1,
|
"refresh": 2,
|
||||||
"regex": "",
|
"regex": "",
|
||||||
"skipUrlSync": false,
|
"skipUrlSync": false,
|
||||||
"sort": 1,
|
"sort": 1,
|
||||||
|
|||||||
@@ -16,6 +16,57 @@ import (
|
|||||||
"github.com/shirou/gopsutil/v3/mem"
|
"github.com/shirou/gopsutil/v3/mem"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// CalculateOptimalParallel returns the recommended number of parallel workers
|
||||||
|
// based on available system resources (CPU cores and RAM).
|
||||||
|
// This is a standalone function that can be called from anywhere.
|
||||||
|
// Returns 0 if resources cannot be detected.
|
||||||
|
func CalculateOptimalParallel() int {
|
||||||
|
cpuCores := runtime.NumCPU()
|
||||||
|
|
||||||
|
vmem, err := mem.VirtualMemory()
|
||||||
|
if err != nil {
|
||||||
|
// Fallback: use half of CPU cores if memory detection fails
|
||||||
|
if cpuCores > 1 {
|
||||||
|
return cpuCores / 2
|
||||||
|
}
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
memAvailableGB := float64(vmem.Available) / (1024 * 1024 * 1024)
|
||||||
|
|
||||||
|
// Each pg_restore worker needs approximately 2-4GB of RAM
|
||||||
|
// Use conservative 3GB per worker to avoid OOM
|
||||||
|
const memPerWorkerGB = 3.0
|
||||||
|
|
||||||
|
// Calculate limits
|
||||||
|
maxByMem := int(memAvailableGB / memPerWorkerGB)
|
||||||
|
maxByCPU := cpuCores
|
||||||
|
|
||||||
|
// Use the minimum of memory and CPU limits
|
||||||
|
recommended := maxByMem
|
||||||
|
if maxByCPU < recommended {
|
||||||
|
recommended = maxByCPU
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply sensible bounds
|
||||||
|
if recommended < 1 {
|
||||||
|
recommended = 1
|
||||||
|
}
|
||||||
|
if recommended > 16 {
|
||||||
|
recommended = 16 // Cap at 16 to avoid diminishing returns
|
||||||
|
}
|
||||||
|
|
||||||
|
// If memory pressure is high (>80%), reduce parallelism
|
||||||
|
if vmem.UsedPercent > 80 && recommended > 1 {
|
||||||
|
recommended = recommended / 2
|
||||||
|
if recommended < 1 {
|
||||||
|
recommended = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return recommended
|
||||||
|
}
|
||||||
|
|
||||||
// PreflightResult contains all preflight check results
|
// PreflightResult contains all preflight check results
|
||||||
type PreflightResult struct {
|
type PreflightResult struct {
|
||||||
// Linux system checks
|
// Linux system checks
|
||||||
@@ -40,6 +91,8 @@ type LinuxChecks struct {
|
|||||||
MemTotal uint64 // Total RAM in bytes
|
MemTotal uint64 // Total RAM in bytes
|
||||||
MemAvailable uint64 // Available RAM in bytes
|
MemAvailable uint64 // Available RAM in bytes
|
||||||
MemUsedPercent float64 // Memory usage percentage
|
MemUsedPercent float64 // Memory usage percentage
|
||||||
|
CPUCores int // Number of CPU cores
|
||||||
|
RecommendedParallel int // Auto-calculated optimal parallel count
|
||||||
ShmMaxOK bool // Is shmmax sufficient?
|
ShmMaxOK bool // Is shmmax sufficient?
|
||||||
ShmAllOK bool // Is shmall sufficient?
|
ShmAllOK bool // Is shmall sufficient?
|
||||||
MemAvailableOK bool // Is available RAM sufficient?
|
MemAvailableOK bool // Is available RAM sufficient?
|
||||||
@@ -100,6 +153,7 @@ func (e *Engine) RunPreflightChecks(ctx context.Context, dumpsDir string, entrie
|
|||||||
// checkSystemResources uses gopsutil for cross-platform system checks
|
// checkSystemResources uses gopsutil for cross-platform system checks
|
||||||
func (e *Engine) checkSystemResources(result *PreflightResult) {
|
func (e *Engine) checkSystemResources(result *PreflightResult) {
|
||||||
result.Linux.IsLinux = runtime.GOOS == "linux"
|
result.Linux.IsLinux = runtime.GOOS == "linux"
|
||||||
|
result.Linux.CPUCores = runtime.NumCPU()
|
||||||
|
|
||||||
// Get memory info (works on Linux, macOS, Windows, BSD)
|
// Get memory info (works on Linux, macOS, Windows, BSD)
|
||||||
if vmem, err := mem.VirtualMemory(); err == nil {
|
if vmem, err := mem.VirtualMemory(); err == nil {
|
||||||
@@ -118,6 +172,9 @@ func (e *Engine) checkSystemResources(result *PreflightResult) {
|
|||||||
e.log.Warn("Could not detect system memory", "error", err)
|
e.log.Warn("Could not detect system memory", "error", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate recommended parallel based on resources
|
||||||
|
result.Linux.RecommendedParallel = e.calculateRecommendedParallel(result)
|
||||||
|
|
||||||
// Linux-specific kernel checks (shmmax, shmall)
|
// Linux-specific kernel checks (shmmax, shmall)
|
||||||
if result.Linux.IsLinux {
|
if result.Linux.IsLinux {
|
||||||
e.checkLinuxKernel(result)
|
e.checkLinuxKernel(result)
|
||||||
@@ -434,6 +491,56 @@ func (e *Engine) calculateRecommendations(result *PreflightResult) {
|
|||||||
"recommended_locks", lockBoost)
|
"recommended_locks", lockBoost)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// calculateRecommendedParallel determines optimal parallelism based on system resources
|
||||||
|
// Returns the recommended number of parallel workers for pg_restore
|
||||||
|
func (e *Engine) calculateRecommendedParallel(result *PreflightResult) int {
|
||||||
|
cpuCores := result.Linux.CPUCores
|
||||||
|
if cpuCores == 0 {
|
||||||
|
cpuCores = runtime.NumCPU()
|
||||||
|
}
|
||||||
|
|
||||||
|
memAvailableGB := float64(result.Linux.MemAvailable) / (1024 * 1024 * 1024)
|
||||||
|
|
||||||
|
// Each pg_restore worker needs approximately 2-4GB of RAM
|
||||||
|
// Use conservative 3GB per worker to avoid OOM
|
||||||
|
const memPerWorkerGB = 3.0
|
||||||
|
|
||||||
|
// Calculate limits
|
||||||
|
maxByMem := int(memAvailableGB / memPerWorkerGB)
|
||||||
|
maxByCPU := cpuCores
|
||||||
|
|
||||||
|
// Use the minimum of memory and CPU limits
|
||||||
|
recommended := maxByMem
|
||||||
|
if maxByCPU < recommended {
|
||||||
|
recommended = maxByCPU
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply sensible bounds
|
||||||
|
if recommended < 1 {
|
||||||
|
recommended = 1
|
||||||
|
}
|
||||||
|
if recommended > 16 {
|
||||||
|
recommended = 16 // Cap at 16 to avoid diminishing returns
|
||||||
|
}
|
||||||
|
|
||||||
|
// If memory pressure is high (>80%), reduce parallelism
|
||||||
|
if result.Linux.MemUsedPercent > 80 && recommended > 1 {
|
||||||
|
recommended = recommended / 2
|
||||||
|
if recommended < 1 {
|
||||||
|
recommended = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
e.log.Info("Calculated recommended parallel",
|
||||||
|
"cpu_cores", cpuCores,
|
||||||
|
"mem_available_gb", fmt.Sprintf("%.1f", memAvailableGB),
|
||||||
|
"max_by_mem", maxByMem,
|
||||||
|
"max_by_cpu", maxByCPU,
|
||||||
|
"recommended", recommended)
|
||||||
|
|
||||||
|
return recommended
|
||||||
|
}
|
||||||
|
|
||||||
// printPreflightSummary prints a nice summary of all checks
|
// printPreflightSummary prints a nice summary of all checks
|
||||||
func (e *Engine) printPreflightSummary(result *PreflightResult) {
|
func (e *Engine) printPreflightSummary(result *PreflightResult) {
|
||||||
fmt.Println()
|
fmt.Println()
|
||||||
@@ -446,6 +553,8 @@ func (e *Engine) printPreflightSummary(result *PreflightResult) {
|
|||||||
printCheck("Total RAM", humanize.Bytes(result.Linux.MemTotal), true)
|
printCheck("Total RAM", humanize.Bytes(result.Linux.MemTotal), true)
|
||||||
printCheck("Available RAM", humanize.Bytes(result.Linux.MemAvailable), result.Linux.MemAvailableOK || result.Linux.MemAvailable == 0)
|
printCheck("Available RAM", humanize.Bytes(result.Linux.MemAvailable), result.Linux.MemAvailableOK || result.Linux.MemAvailable == 0)
|
||||||
printCheck("Memory Usage", fmt.Sprintf("%.1f%%", result.Linux.MemUsedPercent), result.Linux.MemUsedPercent < 85)
|
printCheck("Memory Usage", fmt.Sprintf("%.1f%%", result.Linux.MemUsedPercent), result.Linux.MemUsedPercent < 85)
|
||||||
|
printCheck("CPU Cores", fmt.Sprintf("%d", result.Linux.CPUCores), true)
|
||||||
|
printCheck("Recommended Parallel", fmt.Sprintf("%d (auto-calculated)", result.Linux.RecommendedParallel), true)
|
||||||
|
|
||||||
// Linux-specific kernel checks
|
// Linux-specific kernel checks
|
||||||
if result.Linux.IsLinux && result.Linux.ShmMax > 0 {
|
if result.Linux.IsLinux && result.Linux.ShmMax > 0 {
|
||||||
|
|||||||
Reference in New Issue
Block a user