feat(restore): add weighted progress, pre-extraction disk check, parallel-dbs flag

Three high-value improvements for cluster restore: 1. Weighted progress by database size - Progress now shows percentage by data volume, not just count - Phase 3/3: Databases (2/7) - 45.2% by size - Gives more accurate ETA for clusters with varied DB sizes 2. Pre-extraction disk space check - Checks workdir has 3x archive size before extraction - Prevents partial extraction failures when disk fills mid-way - Clear error message with required vs available GB 3. --parallel-dbs flag for concurrent restores - dbbackup restore cluster archive.tar.gz --parallel-dbs=4 - Overrides CLUSTER_PARALLELISM config setting - Set to 1 for sequential restore (safest for large objects)
2026-01-16 18:31:12 +01:00
parent dd7c4da0eb
commit 698b8a761c
5 changed files with 111 additions and 14 deletions
--- a/internal/tui/restore_exec.go
+++ b/internal/tui/restore_exec.go
@@ -159,6 +159,10 @@ type sharedProgressState struct {
 	overallPhase   int
 	extractionDone bool

+	// Weighted progress by database sizes (bytes)
+	dbBytesTotal int64 // Total bytes across all databases
+	dbBytesDone  int64 // Bytes completed (sum of finished DB sizes)
+
 	// Rolling window for speed calculation
 	speedSamples []restoreSpeedSample
 }
@@ -186,12 +190,12 @@ func clearCurrentRestoreProgress() {
 	currentRestoreProgressState = nil
 }

-func getCurrentRestoreProgress() (bytesTotal, bytesDone int64, description string, hasUpdate bool, dbTotal, dbDone int, speed float64, dbPhaseElapsed, dbAvgPerDB time.Duration, currentDB string, overallPhase int, extractionDone bool) {
+func getCurrentRestoreProgress() (bytesTotal, bytesDone int64, description string, hasUpdate bool, dbTotal, dbDone int, speed float64, dbPhaseElapsed, dbAvgPerDB time.Duration, currentDB string, overallPhase int, extractionDone bool, dbBytesTotal, dbBytesDone int64) {
 	currentRestoreProgressMu.Lock()
 	defer currentRestoreProgressMu.Unlock()

 	if currentRestoreProgressState == nil {
-		return 0, 0, "", false, 0, 0, 0, 0, 0, "", 0, false
+		return 0, 0, "", false, 0, 0, 0, 0, 0, "", 0, false, 0, 0
 	}

 	currentRestoreProgressState.mu.Lock()
@@ -205,7 +209,8 @@ func getCurrentRestoreProgress() (bytesTotal, bytesDone int64, description strin
 		currentRestoreProgressState.dbTotal, currentRestoreProgressState.dbDone, speed,
 		currentRestoreProgressState.dbPhaseElapsed, currentRestoreProgressState.dbAvgPerDB,
 		currentRestoreProgressState.currentDB, currentRestoreProgressState.overallPhase,
-		currentRestoreProgressState.extractionDone
+		currentRestoreProgressState.extractionDone,
+		currentRestoreProgressState.dbBytesTotal, currentRestoreProgressState.dbBytesDone
 }

 // calculateRollingSpeed calculates speed from recent samples (last 5 seconds)
@@ -359,6 +364,20 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
 			progressState.bytesDone = 0
 		})

+		// Set up weighted (bytes-based) progress callback for accurate cluster restore progress
+		engine.SetDatabaseProgressByBytesCallback(func(bytesDone, bytesTotal int64, dbName string, dbDone, dbTotal int) {
+			progressState.mu.Lock()
+			defer progressState.mu.Unlock()
+			progressState.dbBytesDone = bytesDone
+			progressState.dbBytesTotal = bytesTotal
+			progressState.dbDone = dbDone
+			progressState.dbTotal = dbTotal
+			progressState.currentDB = dbName
+			progressState.overallPhase = 3
+			progressState.extractionDone = true
+			progressState.hasUpdate = true
+		})
+
 		// Store progress state in a package-level variable for the ticker to access
 		// This is a workaround because tea messages can't be sent from callbacks
 		setCurrentRestoreProgress(progressState)
@@ -412,7 +431,7 @@ func (m RestoreExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			m.elapsed = time.Since(m.startTime)

 			// Poll shared progress state for real-time updates
-			bytesTotal, bytesDone, description, hasUpdate, dbTotal, dbDone, speed, dbPhaseElapsed, dbAvgPerDB, currentDB, overallPhase, extractionDone := getCurrentRestoreProgress()
+			bytesTotal, bytesDone, description, hasUpdate, dbTotal, dbDone, speed, dbPhaseElapsed, dbAvgPerDB, currentDB, overallPhase, extractionDone, dbBytesTotal, dbBytesDone := getCurrentRestoreProgress()
 			if hasUpdate && bytesTotal > 0 && !extractionDone {
 				// Phase 1: Extraction
 				m.bytesTotal = bytesTotal
@@ -443,8 +462,16 @@ func (m RestoreExecutionModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 				} else {
 					m.status = "Finalizing..."
 				}
-				m.phase = fmt.Sprintf("Phase 3/3: Databases (%d/%d)", dbDone, dbTotal)
-				m.progress = int((dbDone * 100) / dbTotal)
+
+				// Use weighted progress by bytes if available, otherwise use count
+				if dbBytesTotal > 0 {
+					weightedPercent := int((dbBytesDone * 100) / dbBytesTotal)
+					m.phase = fmt.Sprintf("Phase 3/3: Databases (%d/%d) - %.1f%% by size", dbDone, dbTotal, float64(dbBytesDone*100)/float64(dbBytesTotal))
+					m.progress = weightedPercent
+				} else {
+					m.phase = fmt.Sprintf("Phase 3/3: Databases (%d/%d)", dbDone, dbTotal)
+					m.progress = int((dbDone * 100) / dbTotal)
+				}
 			} else if hasUpdate && extractionDone && dbTotal == 0 {
 				// Phase 2: Globals restore (brief phase between extraction and databases)
 				m.overallPhase = 2