feat: Add --workdir flag for cluster restore

Solves disk space issues on VMs with small system disks but large NFS mounts.

Use case:
- VM has small / partition (e.g., 7.8G with 2.3G used)
- Backup archive on NFS mount (e.g., /u01/dba with 140G free)
- Restore fails: "insufficient disk space: 74.7% used - need at least 4x archive size"

Solution:
- Added --workdir flag to restore cluster command
- Allows specifying alternative extraction directory
- Interactive confirmation required for safety
- Updated error messages with helpful tip

Example:
  dbbackup restore cluster backup.tar.gz --workdir /u01/dba/restore_tmp --confirm

This is environmental, not a bug. Code working brilliantly! 👨‍🍳💋
This commit is contained in:
2025-11-28 11:24:19 +00:00
parent 57ba8c7c1e
commit e581f0a357
3 changed files with 70 additions and 7 deletions

View File

@@ -201,6 +201,15 @@ Restore full cluster:
./dbbackup restore cluster cluster_backup.tar.gz --confirm
```
**For VMs with limited system disk space** (common with NFS-mounted backup storage):
```bash
# Use NFS mount or larger partition for extraction
./dbbackup restore cluster cluster_backup.tar.gz --workdir /u01/dba/restore_tmp --confirm
```
This prevents "insufficient disk space" errors when the backup directory has space but the system root partition is small.
## Commands
### Global Flags (Available for all commands)
@@ -1257,7 +1266,7 @@ Enable detailed logging:
- **"Ident authentication failed"** - Run as matching OS user or configure password authentication
- **"Permission denied"** - Check database user privileges
- **"Disk space check failed"** - Ensure 4x archive size available
- **"Disk space check failed"** - Ensure 4x archive size available. For VMs with small system disks, use `--workdir /path/to/larger/partition` to extract on NFS mount or larger disk
- **"Archive validation failed"** - Backup file corrupted or incomplete
## Building

View File

@@ -30,6 +30,7 @@ var (
restoreTarget string
restoreVerbose bool
restoreNoProgress bool
restoreWorkdir string
// Encryption flags
restoreEncryptionKeyFile string
@@ -135,6 +136,9 @@ Examples:
# Use parallel decompression
dbbackup restore cluster cluster_backup.tar.gz --jobs 4 --confirm
# Use alternative working directory (for VMs with small system disk)
dbbackup restore cluster cluster_backup.tar.gz --workdir /u01/dba/restore_tmp --confirm
`,
Args: cobra.ExactArgs(1),
RunE: runRestoreCluster,
@@ -229,6 +233,7 @@ func init() {
restoreClusterCmd.Flags().BoolVar(&restoreDryRun, "dry-run", false, "Show what would be done without executing")
restoreClusterCmd.Flags().BoolVar(&restoreForce, "force", false, "Skip safety checks and confirmations")
restoreClusterCmd.Flags().IntVar(&restoreJobs, "jobs", 0, "Number of parallel decompression jobs (0 = auto)")
restoreClusterCmd.Flags().StringVar(&restoreWorkdir, "workdir", "", "Working directory for extraction (use when system disk is small, e.g. /u01/dba/restore_tmp)")
restoreClusterCmd.Flags().BoolVar(&restoreVerbose, "verbose", false, "Show detailed restore progress")
restoreClusterCmd.Flags().BoolVar(&restoreNoProgress, "no-progress", false, "Disable progress indicators")
restoreClusterCmd.Flags().StringVar(&restoreEncryptionKeyFile, "encryption-key-file", "", "Path to encryption key file (required for encrypted backups)")
@@ -476,9 +481,35 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
return fmt.Errorf("archive validation failed: %w", err)
}
// Determine where to check disk space
checkDir := cfg.BackupDir
if restoreWorkdir != "" {
checkDir = restoreWorkdir
// Verify workdir exists or create it
if _, err := os.Stat(restoreWorkdir); os.IsNotExist(err) {
log.Warn("Working directory does not exist, will be created", "path", restoreWorkdir)
if err := os.MkdirAll(restoreWorkdir, 0755); err != nil {
return fmt.Errorf("cannot create working directory: %w", err)
}
}
log.Warn("⚠️ Using alternative working directory for extraction")
log.Warn(" This is recommended when system disk space is limited")
log.Warn(" Location: " + restoreWorkdir)
// Interactive confirmation required
if !restoreConfirm {
fmt.Printf("\n⚠ Alternative extraction directory: %s\n", restoreWorkdir)
fmt.Printf(" This location will be used for temporary extraction.\n")
fmt.Printf(" Add --confirm flag to proceed.\n\n")
return fmt.Errorf("confirmation required for --workdir usage")
}
}
log.Info("Checking disk space...")
multiplier := 4.0 // Cluster needs more space for extraction
if err := safety.CheckDiskSpace(archivePath, multiplier); err != nil {
if err := safety.CheckDiskSpaceAt(archivePath, checkDir, multiplier); err != nil {
return fmt.Errorf("disk space check failed: %w", err)
}
@@ -496,6 +527,9 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
fmt.Printf("\nWould restore cluster:\n")
fmt.Printf(" Archive: %s\n", archivePath)
fmt.Printf(" Parallel Jobs: %d (0 = auto)\n", restoreJobs)
if restoreWorkdir != "" {
fmt.Printf(" Working Directory: %s (alternative extraction location)\n", restoreWorkdir)
}
fmt.Println("\nTo execute this restore, add --confirm flag")
return nil
}

View File

@@ -230,6 +230,11 @@ func containsSQLKeywords(content string) bool {
// CheckDiskSpace verifies sufficient disk space for restore
func (s *Safety) CheckDiskSpace(archivePath string, multiplier float64) error {
return s.CheckDiskSpaceAt(archivePath, s.cfg.BackupDir, multiplier)
}
// CheckDiskSpaceAt verifies sufficient disk space at a specific directory
func (s *Safety) CheckDiskSpaceAt(archivePath string, checkDir string, multiplier float64) error {
// Get archive size
stat, err := os.Stat(archivePath)
if err != nil {
@@ -242,18 +247,33 @@ func (s *Safety) CheckDiskSpace(archivePath string, multiplier float64) error {
requiredSpace := int64(float64(archiveSize) * multiplier)
// Get available disk space
availableSpace, err := getDiskSpace(s.cfg.BackupDir)
availableSpace, err := getDiskSpace(checkDir)
if err != nil {
s.log.Warn("Cannot check disk space", "error", err)
return nil // Don't fail if we can't check
}
usagePercent := float64(availableSpace-requiredSpace) / float64(availableSpace) * 100
if usagePercent < 0 {
usagePercent = 100 + usagePercent // Show how much over we are
}
if availableSpace < requiredSpace {
return fmt.Errorf("insufficient disk space: need %s, have %s",
FormatBytes(requiredSpace), FormatBytes(availableSpace))
return fmt.Errorf("insufficient disk space for restore: %.1f%% used - need at least 4x archive size\\n"+
" Required: %s\\n"+
" Available: %s\\n"+
" Archive: %s\\n"+
" Check location: %s\\n\\n"+
"Tip: Use --workdir to specify extraction directory with more space (e.g., --workdir /u01/dba/restore_tmp)",
usagePercent,
FormatBytes(requiredSpace),
FormatBytes(availableSpace),
FormatBytes(archiveSize),
checkDir)
}
s.log.Info("Disk space check passed",
"location", checkDir,
"required", FormatBytes(requiredSpace),
"available", FormatBytes(availableSpace))