v3.42.9: Fix all timeout bugs and deadlocks
CRITICAL FIXES: - Encryption detection false positive (IsBackupEncrypted returned true for ALL files) - 12 cmd.Wait() deadlocks fixed with channel-based context handling - TUI timeout bugs: 60s->10min for safety checks, 15s->60s for DB listing - diagnose.go timeouts: 60s->5min for tar/pg_restore operations - Panic recovery added to parallel backup/restore goroutines - Variable shadowing fix in restore/engine.go These bugs caused pg_dump backups to fail through TUI for months.
This commit is contained in:
@@ -83,10 +83,10 @@ type backupCompleteMsg struct {
|
||||
|
||||
func executeBackupWithTUIProgress(parentCtx context.Context, cfg *config.Config, log logger.Logger, backupType, dbName string, ratio int) tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
// Use configurable cluster timeout (minutes) from config; default set in config.New()
|
||||
// Use parent context to inherit cancellation from TUI
|
||||
clusterTimeout := time.Duration(cfg.ClusterTimeoutMinutes) * time.Minute
|
||||
ctx, cancel := context.WithTimeout(parentCtx, clusterTimeout)
|
||||
// NO TIMEOUT for backup operations - a backup takes as long as it takes
|
||||
// Large databases can take many hours
|
||||
// Only manual cancellation (Ctrl+C) should stop the backup
|
||||
ctx, cancel := context.WithCancel(parentCtx)
|
||||
defer cancel()
|
||||
|
||||
start := time.Now()
|
||||
|
||||
@@ -53,7 +53,8 @@ type databaseListMsg struct {
|
||||
|
||||
func fetchDatabases(cfg *config.Config, log logger.Logger) tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
// 60 seconds for database listing - busy servers may be slow
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
dbClient, err := database.New(cfg, log)
|
||||
|
||||
@@ -111,10 +111,10 @@ type restoreCompleteMsg struct {
|
||||
|
||||
func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config, log logger.Logger, archive ArchiveInfo, targetDB string, cleanFirst, createIfMissing bool, restoreType string, cleanClusterFirst bool, existingDBs []string, saveDebugLog bool) tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
// Use configurable cluster timeout (minutes) from config; default set in config.New()
|
||||
// Use parent context to inherit cancellation from TUI
|
||||
restoreTimeout := time.Duration(cfg.ClusterTimeoutMinutes) * time.Minute
|
||||
ctx, cancel := context.WithTimeout(parentCtx, restoreTimeout)
|
||||
// NO TIMEOUT for restore operations - a restore takes as long as it takes
|
||||
// Large databases with large objects can take many hours
|
||||
// Only manual cancellation (Ctrl+C) should stop the restore
|
||||
ctx, cancel := context.WithCancel(parentCtx)
|
||||
defer cancel()
|
||||
|
||||
start := time.Now()
|
||||
@@ -138,8 +138,8 @@ func executeRestoreWithTUIProgress(parentCtx context.Context, cfg *config.Config
|
||||
// This matches how cluster restore works - uses CLI tools, not database connections
|
||||
droppedCount := 0
|
||||
for _, dbName := range existingDBs {
|
||||
// Create timeout context for each database drop (30 seconds per DB)
|
||||
dropCtx, dropCancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
// Create timeout context for each database drop (5 minutes per DB - large DBs take time)
|
||||
dropCtx, dropCancel := context.WithTimeout(ctx, 5*time.Minute)
|
||||
if err := dropDatabaseCLI(dropCtx, cfg, dbName); err != nil {
|
||||
log.Warn("Failed to drop database", "name", dbName, "error", err)
|
||||
// Continue with other databases
|
||||
|
||||
@@ -106,7 +106,8 @@ type safetyCheckCompleteMsg struct {
|
||||
|
||||
func runSafetyChecks(cfg *config.Config, log logger.Logger, archive ArchiveInfo, targetDB string) tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
// 10 minutes for safety checks - large archives can take a long time to diagnose
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
safety := restore.NewSafety(cfg, log)
|
||||
@@ -444,7 +445,7 @@ func (m RestorePreviewModel) View() string {
|
||||
// Advanced Options
|
||||
s.WriteString(archiveHeaderStyle.Render("⚙️ Advanced Options"))
|
||||
s.WriteString("\n")
|
||||
|
||||
|
||||
// Work directory option
|
||||
workDirIcon := "✗"
|
||||
workDirStyle := infoStyle
|
||||
@@ -460,7 +461,7 @@ func (m RestorePreviewModel) View() string {
|
||||
s.WriteString(infoStyle.Render(" ⚠️ Large archives need more space than /tmp may have"))
|
||||
s.WriteString("\n")
|
||||
}
|
||||
|
||||
|
||||
// Debug log option
|
||||
debugIcon := "✗"
|
||||
debugStyle := infoStyle
|
||||
|
||||
@@ -70,7 +70,8 @@ type statusMsg struct {
|
||||
|
||||
func fetchStatus(cfg *config.Config, log logger.Logger) tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
// 30 seconds for status check - slow networks or SSL negotiation
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
dbClient, err := database.New(cfg, log)
|
||||
|
||||
Reference in New Issue
Block a user