feat: Add enterprise DBA features for production reliability
New features implemented: 1. Backup Catalog (internal/catalog/) - SQLite-based backup tracking - Gap detection and RPO monitoring - Search and statistics - Filesystem sync 2. DR Drill Testing (internal/drill/) - Automated restore testing in Docker containers - Database validation with custom queries - Catalog integration for drill-tested status 3. Smart Notifications (internal/notify/) - Event batching with configurable intervals - Time-based escalation policies - HTML/text/Slack templates 4. Compliance Reports (internal/report/) - SOC2, GDPR, HIPAA, PCI-DSS, ISO27001 frameworks - Evidence collection from catalog - JSON, Markdown, HTML output formats 5. RTO/RPO Calculator (internal/rto/) - Recovery objective analysis - RTO breakdown by phase - Recommendations for improvement 6. Replica-Aware Backup (internal/replica/) - Topology detection for PostgreSQL/MySQL - Automatic replica selection - Configurable selection strategies 7. Parallel Table Backup (internal/parallel/) - Concurrent table dumps - Worker pool with progress tracking - Large table optimization 8. MySQL/MariaDB PITR (internal/pitr/) - Binary log parsing and replay - Point-in-time recovery support - Transaction filtering CLI commands added: catalog, drill, report, rto All changes support the goal: reliable 3 AM database recovery.
This commit is contained in:
@@ -11,41 +11,66 @@ import (
|
||||
type EventType string
|
||||
|
||||
const (
|
||||
EventBackupStarted EventType = "backup_started"
|
||||
EventBackupCompleted EventType = "backup_completed"
|
||||
EventBackupFailed EventType = "backup_failed"
|
||||
EventRestoreStarted EventType = "restore_started"
|
||||
EventRestoreCompleted EventType = "restore_completed"
|
||||
EventRestoreFailed EventType = "restore_failed"
|
||||
EventCleanupCompleted EventType = "cleanup_completed"
|
||||
EventVerifyCompleted EventType = "verify_completed"
|
||||
EventVerifyFailed EventType = "verify_failed"
|
||||
EventPITRRecovery EventType = "pitr_recovery"
|
||||
EventBackupStarted EventType = "backup_started"
|
||||
EventBackupCompleted EventType = "backup_completed"
|
||||
EventBackupFailed EventType = "backup_failed"
|
||||
EventRestoreStarted EventType = "restore_started"
|
||||
EventRestoreCompleted EventType = "restore_completed"
|
||||
EventRestoreFailed EventType = "restore_failed"
|
||||
EventCleanupCompleted EventType = "cleanup_completed"
|
||||
EventVerifyCompleted EventType = "verify_completed"
|
||||
EventVerifyFailed EventType = "verify_failed"
|
||||
EventPITRRecovery EventType = "pitr_recovery"
|
||||
EventVerificationPassed EventType = "verification_passed"
|
||||
EventVerificationFailed EventType = "verification_failed"
|
||||
EventDRDrillPassed EventType = "dr_drill_passed"
|
||||
EventDRDrillFailed EventType = "dr_drill_failed"
|
||||
EventGapDetected EventType = "gap_detected"
|
||||
EventRPOViolation EventType = "rpo_violation"
|
||||
)
|
||||
|
||||
// Severity represents the severity level of a notification
|
||||
type Severity string
|
||||
|
||||
const (
|
||||
SeverityInfo Severity = "info"
|
||||
SeverityWarning Severity = "warning"
|
||||
SeverityError Severity = "error"
|
||||
SeverityInfo Severity = "info"
|
||||
SeveritySuccess Severity = "success"
|
||||
SeverityWarning Severity = "warning"
|
||||
SeverityError Severity = "error"
|
||||
SeverityCritical Severity = "critical"
|
||||
)
|
||||
|
||||
// severityOrder returns numeric order for severity comparison
|
||||
func severityOrder(s Severity) int {
|
||||
switch s {
|
||||
case SeverityInfo:
|
||||
return 0
|
||||
case SeveritySuccess:
|
||||
return 1
|
||||
case SeverityWarning:
|
||||
return 2
|
||||
case SeverityError:
|
||||
return 3
|
||||
case SeverityCritical:
|
||||
return 4
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// Event represents a notification event
|
||||
type Event struct {
|
||||
Type EventType `json:"type"`
|
||||
Severity Severity `json:"severity"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Database string `json:"database,omitempty"`
|
||||
Message string `json:"message"`
|
||||
Details map[string]string `json:"details,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Duration time.Duration `json:"duration,omitempty"`
|
||||
BackupFile string `json:"backup_file,omitempty"`
|
||||
BackupSize int64 `json:"backup_size,omitempty"`
|
||||
Hostname string `json:"hostname,omitempty"`
|
||||
Type EventType `json:"type"`
|
||||
Severity Severity `json:"severity"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Database string `json:"database,omitempty"`
|
||||
Message string `json:"message"`
|
||||
Details map[string]string `json:"details,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Duration time.Duration `json:"duration,omitempty"`
|
||||
BackupFile string `json:"backup_file,omitempty"`
|
||||
BackupSize int64 `json:"backup_size,omitempty"`
|
||||
Hostname string `json:"hostname,omitempty"`
|
||||
}
|
||||
|
||||
// NewEvent creates a new notification event
|
||||
@@ -132,27 +157,27 @@ type Config struct {
|
||||
WebhookSecret string // For signing payloads
|
||||
|
||||
// General settings
|
||||
OnSuccess bool // Send notifications on successful operations
|
||||
OnFailure bool // Send notifications on failed operations
|
||||
OnWarning bool // Send notifications on warnings
|
||||
MinSeverity Severity
|
||||
Retries int // Number of retry attempts
|
||||
RetryDelay time.Duration // Delay between retries
|
||||
OnSuccess bool // Send notifications on successful operations
|
||||
OnFailure bool // Send notifications on failed operations
|
||||
OnWarning bool // Send notifications on warnings
|
||||
MinSeverity Severity
|
||||
Retries int // Number of retry attempts
|
||||
RetryDelay time.Duration // Delay between retries
|
||||
}
|
||||
|
||||
// DefaultConfig returns a configuration with sensible defaults
|
||||
func DefaultConfig() Config {
|
||||
return Config{
|
||||
SMTPPort: 587,
|
||||
SMTPTLS: false,
|
||||
SMTPStartTLS: true,
|
||||
SMTPPort: 587,
|
||||
SMTPTLS: false,
|
||||
SMTPStartTLS: true,
|
||||
WebhookMethod: "POST",
|
||||
OnSuccess: true,
|
||||
OnFailure: true,
|
||||
OnWarning: true,
|
||||
MinSeverity: SeverityInfo,
|
||||
Retries: 3,
|
||||
RetryDelay: 5 * time.Second,
|
||||
OnSuccess: true,
|
||||
OnFailure: true,
|
||||
OnWarning: true,
|
||||
MinSeverity: SeverityInfo,
|
||||
Retries: 3,
|
||||
RetryDelay: 5 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user