feat: Add --workdir flag for cluster restore
Solves disk space issues on VMs with small system disks but large NFS mounts. Use case: - VM has small / partition (e.g., 7.8G with 2.3G used) - Backup archive on NFS mount (e.g., /u01/dba with 140G free) - Restore fails: "insufficient disk space: 74.7% used - need at least 4x archive size" Solution: - Added --workdir flag to restore cluster command - Allows specifying alternative extraction directory - Interactive confirmation required for safety - Updated error messages with helpful tip Example: dbbackup restore cluster backup.tar.gz --workdir /u01/dba/restore_tmp --confirm This is environmental, not a bug. Code working brilliantly! 👨🍳💋
This commit is contained in:
11
README.md
11
README.md
@@ -201,6 +201,15 @@ Restore full cluster:
|
||||
./dbbackup restore cluster cluster_backup.tar.gz --confirm
|
||||
```
|
||||
|
||||
**For VMs with limited system disk space** (common with NFS-mounted backup storage):
|
||||
|
||||
```bash
|
||||
# Use NFS mount or larger partition for extraction
|
||||
./dbbackup restore cluster cluster_backup.tar.gz --workdir /u01/dba/restore_tmp --confirm
|
||||
```
|
||||
|
||||
This prevents "insufficient disk space" errors when the backup directory has space but the system root partition is small.
|
||||
|
||||
## Commands
|
||||
|
||||
### Global Flags (Available for all commands)
|
||||
@@ -1257,7 +1266,7 @@ Enable detailed logging:
|
||||
|
||||
- **"Ident authentication failed"** - Run as matching OS user or configure password authentication
|
||||
- **"Permission denied"** - Check database user privileges
|
||||
- **"Disk space check failed"** - Ensure 4x archive size available
|
||||
- **"Disk space check failed"** - Ensure 4x archive size available. For VMs with small system disks, use `--workdir /path/to/larger/partition` to extract on NFS mount or larger disk
|
||||
- **"Archive validation failed"** - Backup file corrupted or incomplete
|
||||
|
||||
## Building
|
||||
|
||||
@@ -30,6 +30,7 @@ var (
|
||||
restoreTarget string
|
||||
restoreVerbose bool
|
||||
restoreNoProgress bool
|
||||
restoreWorkdir string
|
||||
|
||||
// Encryption flags
|
||||
restoreEncryptionKeyFile string
|
||||
@@ -135,6 +136,9 @@ Examples:
|
||||
|
||||
# Use parallel decompression
|
||||
dbbackup restore cluster cluster_backup.tar.gz --jobs 4 --confirm
|
||||
|
||||
# Use alternative working directory (for VMs with small system disk)
|
||||
dbbackup restore cluster cluster_backup.tar.gz --workdir /u01/dba/restore_tmp --confirm
|
||||
`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: runRestoreCluster,
|
||||
@@ -229,6 +233,7 @@ func init() {
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreDryRun, "dry-run", false, "Show what would be done without executing")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreForce, "force", false, "Skip safety checks and confirmations")
|
||||
restoreClusterCmd.Flags().IntVar(&restoreJobs, "jobs", 0, "Number of parallel decompression jobs (0 = auto)")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreWorkdir, "workdir", "", "Working directory for extraction (use when system disk is small, e.g. /u01/dba/restore_tmp)")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreVerbose, "verbose", false, "Show detailed restore progress")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreNoProgress, "no-progress", false, "Disable progress indicators")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreEncryptionKeyFile, "encryption-key-file", "", "Path to encryption key file (required for encrypted backups)")
|
||||
@@ -476,9 +481,35 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("archive validation failed: %w", err)
|
||||
}
|
||||
|
||||
// Determine where to check disk space
|
||||
checkDir := cfg.BackupDir
|
||||
if restoreWorkdir != "" {
|
||||
checkDir = restoreWorkdir
|
||||
|
||||
// Verify workdir exists or create it
|
||||
if _, err := os.Stat(restoreWorkdir); os.IsNotExist(err) {
|
||||
log.Warn("Working directory does not exist, will be created", "path", restoreWorkdir)
|
||||
if err := os.MkdirAll(restoreWorkdir, 0755); err != nil {
|
||||
return fmt.Errorf("cannot create working directory: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
log.Warn("⚠️ Using alternative working directory for extraction")
|
||||
log.Warn(" This is recommended when system disk space is limited")
|
||||
log.Warn(" Location: " + restoreWorkdir)
|
||||
|
||||
// Interactive confirmation required
|
||||
if !restoreConfirm {
|
||||
fmt.Printf("\n⚠️ Alternative extraction directory: %s\n", restoreWorkdir)
|
||||
fmt.Printf(" This location will be used for temporary extraction.\n")
|
||||
fmt.Printf(" Add --confirm flag to proceed.\n\n")
|
||||
return fmt.Errorf("confirmation required for --workdir usage")
|
||||
}
|
||||
}
|
||||
|
||||
log.Info("Checking disk space...")
|
||||
multiplier := 4.0 // Cluster needs more space for extraction
|
||||
if err := safety.CheckDiskSpace(archivePath, multiplier); err != nil {
|
||||
if err := safety.CheckDiskSpaceAt(archivePath, checkDir, multiplier); err != nil {
|
||||
return fmt.Errorf("disk space check failed: %w", err)
|
||||
}
|
||||
|
||||
@@ -496,6 +527,9 @@ func runRestoreCluster(cmd *cobra.Command, args []string) error {
|
||||
fmt.Printf("\nWould restore cluster:\n")
|
||||
fmt.Printf(" Archive: %s\n", archivePath)
|
||||
fmt.Printf(" Parallel Jobs: %d (0 = auto)\n", restoreJobs)
|
||||
if restoreWorkdir != "" {
|
||||
fmt.Printf(" Working Directory: %s (alternative extraction location)\n", restoreWorkdir)
|
||||
}
|
||||
fmt.Println("\nTo execute this restore, add --confirm flag")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -230,6 +230,11 @@ func containsSQLKeywords(content string) bool {
|
||||
|
||||
// CheckDiskSpace verifies sufficient disk space for restore
|
||||
func (s *Safety) CheckDiskSpace(archivePath string, multiplier float64) error {
|
||||
return s.CheckDiskSpaceAt(archivePath, s.cfg.BackupDir, multiplier)
|
||||
}
|
||||
|
||||
// CheckDiskSpaceAt verifies sufficient disk space at a specific directory
|
||||
func (s *Safety) CheckDiskSpaceAt(archivePath string, checkDir string, multiplier float64) error {
|
||||
// Get archive size
|
||||
stat, err := os.Stat(archivePath)
|
||||
if err != nil {
|
||||
@@ -242,18 +247,33 @@ func (s *Safety) CheckDiskSpace(archivePath string, multiplier float64) error {
|
||||
requiredSpace := int64(float64(archiveSize) * multiplier)
|
||||
|
||||
// Get available disk space
|
||||
availableSpace, err := getDiskSpace(s.cfg.BackupDir)
|
||||
availableSpace, err := getDiskSpace(checkDir)
|
||||
if err != nil {
|
||||
s.log.Warn("Cannot check disk space", "error", err)
|
||||
return nil // Don't fail if we can't check
|
||||
}
|
||||
|
||||
usagePercent := float64(availableSpace-requiredSpace) / float64(availableSpace) * 100
|
||||
if usagePercent < 0 {
|
||||
usagePercent = 100 + usagePercent // Show how much over we are
|
||||
}
|
||||
|
||||
if availableSpace < requiredSpace {
|
||||
return fmt.Errorf("insufficient disk space: need %s, have %s",
|
||||
FormatBytes(requiredSpace), FormatBytes(availableSpace))
|
||||
return fmt.Errorf("insufficient disk space for restore: %.1f%% used - need at least 4x archive size\\n"+
|
||||
" Required: %s\\n"+
|
||||
" Available: %s\\n"+
|
||||
" Archive: %s\\n"+
|
||||
" Check location: %s\\n\\n"+
|
||||
"Tip: Use --workdir to specify extraction directory with more space (e.g., --workdir /u01/dba/restore_tmp)",
|
||||
usagePercent,
|
||||
FormatBytes(requiredSpace),
|
||||
FormatBytes(availableSpace),
|
||||
FormatBytes(archiveSize),
|
||||
checkDir)
|
||||
}
|
||||
|
||||
s.log.Info("Disk space check passed",
|
||||
"location", checkDir,
|
||||
"required", FormatBytes(requiredSpace),
|
||||
"available", FormatBytes(availableSpace))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user