dbbackup/internal/rto/calculator.go

// Package rto provides RTO/RPO calculation and analysis
package rto

import (
	"context"
	"fmt"
	"sort"
	"time"

	"dbbackup/internal/catalog"
)

// Calculator calculates RTO and RPO metrics
type Calculator struct {
	catalog catalog.Catalog
	config  Config
}

// Config configures RTO/RPO calculations
type Config struct {
	TargetRTO time.Duration `json:"target_rto"` // Target Recovery Time Objective
	TargetRPO time.Duration `json:"target_rpo"` // Target Recovery Point Objective

	// Assumptions for calculation
	NetworkSpeedMbps     float64 `json:"network_speed_mbps"`      // Network speed for cloud restores
	DiskReadSpeedMBps    float64 `json:"disk_read_speed_mbps"`    // Disk read speed
	DiskWriteSpeedMBps   float64 `json:"disk_write_speed_mbps"`   // Disk write speed
	CloudDownloadSpeedMbps float64 `json:"cloud_download_speed_mbps"`

	// Time estimates for various operations
	StartupTimeMinutes    int `json:"startup_time_minutes"`     // DB startup time
	ValidationTimeMinutes int `json:"validation_time_minutes"`  // Post-restore validation
	SwitchoverTimeMinutes int `json:"switchover_time_minutes"`  // Application switchover time
}

// DefaultConfig returns sensible defaults
func DefaultConfig() Config {
	return Config{
		TargetRTO:            4 * time.Hour,
		TargetRPO:            1 * time.Hour,
		NetworkSpeedMbps:     100,
		DiskReadSpeedMBps:    100,
		DiskWriteSpeedMBps:   50,
		CloudDownloadSpeedMbps: 100,
		StartupTimeMinutes:    2,
		ValidationTimeMinutes: 5,
		SwitchoverTimeMinutes: 5,
	}
}

// Analysis contains RTO/RPO analysis results
type Analysis struct {
	Database    string          `json:"database"`
	Timestamp   time.Time       `json:"timestamp"`

	// Current state
	CurrentRPO  time.Duration   `json:"current_rpo"`
	CurrentRTO  time.Duration   `json:"current_rto"`

	// Target state
	TargetRPO   time.Duration   `json:"target_rpo"`
	TargetRTO   time.Duration   `json:"target_rto"`

	// Compliance
	RPOCompliant bool           `json:"rpo_compliant"`
	RTOCompliant bool           `json:"rto_compliant"`

	// Details
	LastBackup     *time.Time   `json:"last_backup,omitempty"`
	NextScheduled  *time.Time   `json:"next_scheduled,omitempty"`
	BackupInterval time.Duration `json:"backup_interval"`

	// RTO breakdown
	RTOBreakdown   RTOBreakdown  `json:"rto_breakdown"`

	// Recommendations
	Recommendations []Recommendation `json:"recommendations,omitempty"`

	// Historical
	History         []HistoricalPoint `json:"history,omitempty"`
}

// RTOBreakdown shows components of RTO calculation
type RTOBreakdown struct {
	DetectionTime   time.Duration `json:"detection_time"`
	DecisionTime    time.Duration `json:"decision_time"`
	DownloadTime    time.Duration `json:"download_time"`
	RestoreTime     time.Duration `json:"restore_time"`
	StartupTime     time.Duration `json:"startup_time"`
	ValidationTime  time.Duration `json:"validation_time"`
	SwitchoverTime  time.Duration `json:"switchover_time"`
	TotalTime       time.Duration `json:"total_time"`
}

// Recommendation suggests improvements
type Recommendation struct {
	Type        RecommendationType `json:"type"`
	Priority    Priority           `json:"priority"`
	Title       string             `json:"title"`
	Description string             `json:"description"`
	Impact      string             `json:"impact"`
	Effort      Effort             `json:"effort"`
}

// RecommendationType categorizes recommendations
type RecommendationType string

const (
	RecommendBackupFrequency RecommendationType = "backup_frequency"
	RecommendIncrementalBackup RecommendationType = "incremental_backup"
	RecommendCompression     RecommendationType = "compression"
	RecommendLocalCache      RecommendationType = "local_cache"
	RecommendParallelRestore RecommendationType = "parallel_restore"
	RecommendWALArchiving    RecommendationType = "wal_archiving"
	RecommendReplication     RecommendationType = "replication"
)

// Priority levels
type Priority string

const (
	PriorityCritical Priority = "critical"
	PriorityHigh     Priority = "high"
	PriorityMedium   Priority = "medium"
	PriorityLow      Priority = "low"
)

// Effort levels
type Effort string

const (
	EffortLow    Effort = "low"
	EffortMedium Effort = "medium"
	EffortHigh   Effort = "high"
)

// HistoricalPoint tracks RTO/RPO over time
type HistoricalPoint struct {
	Timestamp time.Time     `json:"timestamp"`
	RPO       time.Duration `json:"rpo"`
	RTO       time.Duration `json:"rto"`
}

// NewCalculator creates a new RTO/RPO calculator
func NewCalculator(cat catalog.Catalog, config Config) *Calculator {
	return &Calculator{
		catalog: cat,
		config:  config,
	}
}

// Analyze performs RTO/RPO analysis for a database
func (c *Calculator) Analyze(ctx context.Context, database string) (*Analysis, error) {
	analysis := &Analysis{
		Database:  database,
		Timestamp: time.Now(),
		TargetRPO: c.config.TargetRPO,
		TargetRTO: c.config.TargetRTO,
	}

	// Get recent backups
	entries, err := c.catalog.List(ctx, database, 100)
	if err != nil {
		return nil, fmt.Errorf("failed to list backups: %w", err)
	}

	if len(entries) == 0 {
		// No backups - worst case scenario
		analysis.CurrentRPO = 0 // undefined
		analysis.CurrentRTO = 0 // undefined
		analysis.Recommendations = append(analysis.Recommendations, Recommendation{
			Type:        RecommendBackupFrequency,
			Priority:    PriorityCritical,
			Title:       "No Backups Found",
			Description: "No backups exist for this database",
			Impact:      "Cannot recover in case of failure",
			Effort:      EffortLow,
		})
		return analysis, nil
	}

	// Calculate current RPO (time since last backup)
	lastBackup := entries[0].CreatedAt
	analysis.LastBackup = &lastBackup
	analysis.CurrentRPO = time.Since(lastBackup)
	analysis.RPOCompliant = analysis.CurrentRPO <= c.config.TargetRPO

	// Calculate backup interval
	if len(entries) >= 2 {
		analysis.BackupInterval = calculateAverageInterval(entries)
	}

	// Calculate RTO
	latestEntry := entries[0]
	analysis.RTOBreakdown = c.calculateRTOBreakdown(latestEntry)
	analysis.CurrentRTO = analysis.RTOBreakdown.TotalTime
	analysis.RTOCompliant = analysis.CurrentRTO <= c.config.TargetRTO

	// Generate recommendations
	analysis.Recommendations = c.generateRecommendations(analysis, entries)

	// Calculate history
	analysis.History = c.calculateHistory(entries)

	return analysis, nil
}

// AnalyzeAll analyzes all databases
func (c *Calculator) AnalyzeAll(ctx context.Context) ([]*Analysis, error) {
	databases, err := c.catalog.ListDatabases(ctx)
	if err != nil {
		return nil, fmt.Errorf("failed to list databases: %w", err)
	}

	var analyses []*Analysis
	for _, db := range databases {
		analysis, err := c.Analyze(ctx, db)
		if err != nil {
			continue // Skip errors for individual databases
		}
		analyses = append(analyses, analysis)
	}

	return analyses, nil
}

// calculateRTOBreakdown calculates RTO components
func (c *Calculator) calculateRTOBreakdown(entry *catalog.Entry) RTOBreakdown {
	breakdown := RTOBreakdown{
		// Detection time - assume monitoring catches issues quickly
		DetectionTime: 5 * time.Minute,

		// Decision time - human decision making
		DecisionTime: 10 * time.Minute,

		// Startup time
		StartupTime: time.Duration(c.config.StartupTimeMinutes) * time.Minute,

		// Validation time
		ValidationTime: time.Duration(c.config.ValidationTimeMinutes) * time.Minute,

		// Switchover time
		SwitchoverTime: time.Duration(c.config.SwitchoverTimeMinutes) * time.Minute,
	}

	// Calculate download time (if cloud backup)
	if entry.CloudLocation != "" {
		// Cloud download
		bytesPerSecond := c.config.CloudDownloadSpeedMbps * 125000 // Mbps to bytes/sec
		downloadSeconds := float64(entry.SizeBytes) / bytesPerSecond
		breakdown.DownloadTime = time.Duration(downloadSeconds * float64(time.Second))
	}

	// Calculate restore time
	// Estimate based on disk write speed
	bytesPerSecond := c.config.DiskWriteSpeedMBps * 1000000 // MB/s to bytes/sec
	restoreSeconds := float64(entry.SizeBytes) / bytesPerSecond

	// Add overhead for decompression if compressed
	if entry.Compression != "" && entry.Compression != "none" {
		restoreSeconds *= 1.3 // 30% overhead for decompression
	}

	// Add overhead for decryption if encrypted
	if entry.Encrypted {
		restoreSeconds *= 1.1 // 10% overhead for decryption
	}

	breakdown.RestoreTime = time.Duration(restoreSeconds * float64(time.Second))

	// Calculate total
	breakdown.TotalTime = breakdown.DetectionTime +
		breakdown.DecisionTime +
		breakdown.DownloadTime +
		breakdown.RestoreTime +
		breakdown.StartupTime +
		breakdown.ValidationTime +
		breakdown.SwitchoverTime

	return breakdown
}

// calculateAverageInterval calculates average time between backups
func calculateAverageInterval(entries []*catalog.Entry) time.Duration {
	if len(entries) < 2 {
		return 0
	}

	var totalInterval time.Duration
	for i := 0; i < len(entries)-1; i++ {
		interval := entries[i].CreatedAt.Sub(entries[i+1].CreatedAt)
		totalInterval += interval
	}

	return totalInterval / time.Duration(len(entries)-1)
}

// generateRecommendations creates recommendations based on analysis
func (c *Calculator) generateRecommendations(analysis *Analysis, entries []*catalog.Entry) []Recommendation {
	var recommendations []Recommendation

	// RPO violations
	if !analysis.RPOCompliant {
		gap := analysis.CurrentRPO - c.config.TargetRPO
		recommendations = append(recommendations, Recommendation{
			Type:        RecommendBackupFrequency,
			Priority:    PriorityCritical,
			Title:       "RPO Target Not Met",
			Description: fmt.Sprintf("Current RPO (%s) exceeds target (%s) by %s",
				formatDuration(analysis.CurrentRPO),
				formatDuration(c.config.TargetRPO),
				formatDuration(gap)),
			Impact: "Potential data loss exceeds acceptable threshold",
			Effort: EffortLow,
		})
	}

	// RTO violations
	if !analysis.RTOCompliant {
		recommendations = append(recommendations, Recommendation{
			Type:        RecommendParallelRestore,
			Priority:    PriorityHigh,
			Title:       "RTO Target Not Met",
			Description: fmt.Sprintf("Estimated recovery time (%s) exceeds target (%s)",
				formatDuration(analysis.CurrentRTO),
				formatDuration(c.config.TargetRTO)),
			Impact: "Recovery may take longer than acceptable",
			Effort: EffortMedium,
		})
	}

	// Large download time
	if analysis.RTOBreakdown.DownloadTime > 30*time.Minute {
		recommendations = append(recommendations, Recommendation{
			Type:        RecommendLocalCache,
			Priority:    PriorityMedium,
			Title:       "Consider Local Backup Cache",
			Description: fmt.Sprintf("Cloud download takes %s, local cache would reduce this",
				formatDuration(analysis.RTOBreakdown.DownloadTime)),
			Impact: "Faster recovery from local storage",
			Effort: EffortMedium,
		})
	}

	// No incremental backups
	hasIncremental := false
	for _, e := range entries {
		if e.BackupType == "incremental" {
			hasIncremental = true
			break
		}
	}
	if !hasIncremental && analysis.BackupInterval > 6*time.Hour {
		recommendations = append(recommendations, Recommendation{
			Type:        RecommendIncrementalBackup,
			Priority:    PriorityMedium,
			Title:       "Enable Incremental Backups",
			Description: "Incremental backups can reduce backup time and storage",
			Impact:      "Better RPO with less resource usage",
			Effort:      EffortLow,
		})
	}

	// WAL archiving for PostgreSQL
	if len(entries) > 0 && entries[0].DatabaseType == "postgresql" {
		recommendations = append(recommendations, Recommendation{
			Type:        RecommendWALArchiving,
			Priority:    PriorityMedium,
			Title:       "Consider WAL Archiving",
			Description: "Enable WAL archiving for point-in-time recovery",
			Impact:      "Achieve near-zero RPO with PITR",
			Effort:      EffortMedium,
		})
	}

	return recommendations
}

// calculateHistory generates historical RTO/RPO points
func (c *Calculator) calculateHistory(entries []*catalog.Entry) []HistoricalPoint {
	var history []HistoricalPoint

	// Sort entries by date (oldest first)
	sorted := make([]*catalog.Entry, len(entries))
	copy(sorted, entries)
	sort.Slice(sorted, func(i, j int) bool {
		return sorted[i].CreatedAt.Before(sorted[j].CreatedAt)
	})

	for i, entry := range sorted {
		point := HistoricalPoint{
			Timestamp: entry.CreatedAt,
			RTO:       c.calculateRTOBreakdown(entry).TotalTime,
		}

		// Calculate RPO at that point (time until next backup)
		if i < len(sorted)-1 {
			point.RPO = sorted[i+1].CreatedAt.Sub(entry.CreatedAt)
		} else {
			point.RPO = time.Since(entry.CreatedAt)
		}

		history = append(history, point)
	}

	return history
}

// Summary provides aggregate RTO/RPO status
type Summary struct {
	TotalDatabases    int           `json:"total_databases"`
	RPOCompliant      int           `json:"rpo_compliant"`
	RTOCompliant      int           `json:"rto_compliant"`
	FullyCompliant    int           `json:"fully_compliant"`
	CriticalIssues    int           `json:"critical_issues"`
	WorstRPO          time.Duration `json:"worst_rpo"`
	WorstRTO          time.Duration `json:"worst_rto"`
	WorstRPODatabase  string        `json:"worst_rpo_database"`
	WorstRTODatabase  string        `json:"worst_rto_database"`
	AverageRPO        time.Duration `json:"average_rpo"`
	AverageRTO        time.Duration `json:"average_rto"`
}

// Summarize creates a summary from analyses
func Summarize(analyses []*Analysis) *Summary {
	summary := &Summary{}

	var totalRPO, totalRTO time.Duration

	for _, a := range analyses {
		summary.TotalDatabases++

		if a.RPOCompliant {
			summary.RPOCompliant++
		}
		if a.RTOCompliant {
			summary.RTOCompliant++
		}
		if a.RPOCompliant && a.RTOCompliant {
			summary.FullyCompliant++
		}

		for _, r := range a.Recommendations {
			if r.Priority == PriorityCritical {
				summary.CriticalIssues++
				break
			}
		}

		if a.CurrentRPO > summary.WorstRPO {
			summary.WorstRPO = a.CurrentRPO
			summary.WorstRPODatabase = a.Database
		}
		if a.CurrentRTO > summary.WorstRTO {
			summary.WorstRTO = a.CurrentRTO
			summary.WorstRTODatabase = a.Database
		}

		totalRPO += a.CurrentRPO
		totalRTO += a.CurrentRTO
	}

	if len(analyses) > 0 {
		summary.AverageRPO = totalRPO / time.Duration(len(analyses))
		summary.AverageRTO = totalRTO / time.Duration(len(analyses))
	}

	return summary
}

func formatDuration(d time.Duration) string {
	if d < time.Minute {
		return fmt.Sprintf("%.0fs", d.Seconds())
	}
	if d < time.Hour {
		return fmt.Sprintf("%.0fm", d.Minutes())
	}
	hours := int(d.Hours())
	mins := int(d.Minutes()) - hours*60
	return fmt.Sprintf("%dh %dm", hours, mins)
}