New features implemented: 1. Backup Catalog (internal/catalog/) - SQLite-based backup tracking - Gap detection and RPO monitoring - Search and statistics - Filesystem sync 2. DR Drill Testing (internal/drill/) - Automated restore testing in Docker containers - Database validation with custom queries - Catalog integration for drill-tested status 3. Smart Notifications (internal/notify/) - Event batching with configurable intervals - Time-based escalation policies - HTML/text/Slack templates 4. Compliance Reports (internal/report/) - SOC2, GDPR, HIPAA, PCI-DSS, ISO27001 frameworks - Evidence collection from catalog - JSON, Markdown, HTML output formats 5. RTO/RPO Calculator (internal/rto/) - Recovery objective analysis - RTO breakdown by phase - Recommendations for improvement 6. Replica-Aware Backup (internal/replica/) - Topology detection for PostgreSQL/MySQL - Automatic replica selection - Configurable selection strategies 7. Parallel Table Backup (internal/parallel/) - Concurrent table dumps - Worker pool with progress tracking - Large table optimization 8. MySQL/MariaDB PITR (internal/pitr/) - Binary log parsing and replay - Point-in-time recovery support - Transaction filtering CLI commands added: catalog, drill, report, rto All changes support the goal: reliable 3 AM database recovery.
189 lines
7.1 KiB
Go
189 lines
7.1 KiB
Go
// Package catalog provides backup catalog management with SQLite storage
|
|
package catalog
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
)
|
|
|
|
// Entry represents a single backup in the catalog
|
|
type Entry struct {
|
|
ID int64 `json:"id"`
|
|
Database string `json:"database"`
|
|
DatabaseType string `json:"database_type"` // postgresql, mysql, mariadb
|
|
Host string `json:"host"`
|
|
Port int `json:"port"`
|
|
BackupPath string `json:"backup_path"`
|
|
BackupType string `json:"backup_type"` // full, incremental
|
|
SizeBytes int64 `json:"size_bytes"`
|
|
SHA256 string `json:"sha256"`
|
|
Compression string `json:"compression"`
|
|
Encrypted bool `json:"encrypted"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
Duration float64 `json:"duration_seconds"`
|
|
Status BackupStatus `json:"status"`
|
|
VerifiedAt *time.Time `json:"verified_at,omitempty"`
|
|
VerifyValid *bool `json:"verify_valid,omitempty"`
|
|
DrillTestedAt *time.Time `json:"drill_tested_at,omitempty"`
|
|
DrillSuccess *bool `json:"drill_success,omitempty"`
|
|
CloudLocation string `json:"cloud_location,omitempty"`
|
|
RetentionPolicy string `json:"retention_policy,omitempty"` // daily, weekly, monthly, yearly
|
|
Tags map[string]string `json:"tags,omitempty"`
|
|
Metadata map[string]string `json:"metadata,omitempty"`
|
|
}
|
|
|
|
// BackupStatus represents the state of a backup
|
|
type BackupStatus string
|
|
|
|
const (
|
|
StatusCompleted BackupStatus = "completed"
|
|
StatusFailed BackupStatus = "failed"
|
|
StatusVerified BackupStatus = "verified"
|
|
StatusCorrupted BackupStatus = "corrupted"
|
|
StatusDeleted BackupStatus = "deleted"
|
|
StatusArchived BackupStatus = "archived"
|
|
)
|
|
|
|
// Gap represents a detected backup gap
|
|
type Gap struct {
|
|
Database string `json:"database"`
|
|
GapStart time.Time `json:"gap_start"`
|
|
GapEnd time.Time `json:"gap_end"`
|
|
Duration time.Duration `json:"duration"`
|
|
ExpectedAt time.Time `json:"expected_at"`
|
|
Description string `json:"description"`
|
|
Severity GapSeverity `json:"severity"`
|
|
}
|
|
|
|
// GapSeverity indicates how serious a backup gap is
|
|
type GapSeverity string
|
|
|
|
const (
|
|
SeverityInfo GapSeverity = "info" // Gap within tolerance
|
|
SeverityWarning GapSeverity = "warning" // Gap exceeds expected interval
|
|
SeverityCritical GapSeverity = "critical" // Gap exceeds RPO
|
|
)
|
|
|
|
// Stats contains backup statistics
|
|
type Stats struct {
|
|
TotalBackups int64 `json:"total_backups"`
|
|
TotalSize int64 `json:"total_size_bytes"`
|
|
TotalSizeHuman string `json:"total_size_human"`
|
|
OldestBackup *time.Time `json:"oldest_backup,omitempty"`
|
|
NewestBackup *time.Time `json:"newest_backup,omitempty"`
|
|
ByDatabase map[string]int64 `json:"by_database"`
|
|
ByType map[string]int64 `json:"by_type"`
|
|
ByStatus map[string]int64 `json:"by_status"`
|
|
VerifiedCount int64 `json:"verified_count"`
|
|
DrillTestedCount int64 `json:"drill_tested_count"`
|
|
AvgDuration float64 `json:"avg_duration_seconds"`
|
|
AvgSize int64 `json:"avg_size_bytes"`
|
|
GapsDetected int `json:"gaps_detected"`
|
|
}
|
|
|
|
// SearchQuery represents search criteria for catalog entries
|
|
type SearchQuery struct {
|
|
Database string // Filter by database name (supports wildcards)
|
|
DatabaseType string // Filter by database type
|
|
Host string // Filter by host
|
|
Status string // Filter by status
|
|
StartDate *time.Time // Backups after this date
|
|
EndDate *time.Time // Backups before this date
|
|
MinSize int64 // Minimum size in bytes
|
|
MaxSize int64 // Maximum size in bytes
|
|
BackupType string // full, incremental
|
|
Encrypted *bool // Filter by encryption status
|
|
Verified *bool // Filter by verification status
|
|
DrillTested *bool // Filter by drill test status
|
|
Limit int // Max results (0 = no limit)
|
|
Offset int // Offset for pagination
|
|
OrderBy string // Field to order by
|
|
OrderDesc bool // Order descending
|
|
}
|
|
|
|
// GapDetectionConfig configures gap detection
|
|
type GapDetectionConfig struct {
|
|
ExpectedInterval time.Duration // Expected backup interval (e.g., 24h)
|
|
Tolerance time.Duration // Allowed variance (e.g., 1h)
|
|
RPOThreshold time.Duration // Critical threshold (RPO)
|
|
StartDate *time.Time // Start of analysis window
|
|
EndDate *time.Time // End of analysis window
|
|
}
|
|
|
|
// Catalog defines the interface for backup catalog operations
|
|
type Catalog interface {
|
|
// Entry management
|
|
Add(ctx context.Context, entry *Entry) error
|
|
Update(ctx context.Context, entry *Entry) error
|
|
Delete(ctx context.Context, id int64) error
|
|
Get(ctx context.Context, id int64) (*Entry, error)
|
|
GetByPath(ctx context.Context, path string) (*Entry, error)
|
|
|
|
// Search and listing
|
|
Search(ctx context.Context, query *SearchQuery) ([]*Entry, error)
|
|
List(ctx context.Context, database string, limit int) ([]*Entry, error)
|
|
ListDatabases(ctx context.Context) ([]string, error)
|
|
Count(ctx context.Context, query *SearchQuery) (int64, error)
|
|
|
|
// Statistics
|
|
Stats(ctx context.Context) (*Stats, error)
|
|
StatsByDatabase(ctx context.Context, database string) (*Stats, error)
|
|
|
|
// Gap detection
|
|
DetectGaps(ctx context.Context, database string, config *GapDetectionConfig) ([]*Gap, error)
|
|
DetectAllGaps(ctx context.Context, config *GapDetectionConfig) (map[string][]*Gap, error)
|
|
|
|
// Verification tracking
|
|
MarkVerified(ctx context.Context, id int64, valid bool) error
|
|
MarkDrillTested(ctx context.Context, id int64, success bool) error
|
|
|
|
// Sync with filesystem
|
|
SyncFromDirectory(ctx context.Context, dir string) (*SyncResult, error)
|
|
SyncFromCloud(ctx context.Context, provider, bucket, prefix string) (*SyncResult, error)
|
|
|
|
// Maintenance
|
|
Prune(ctx context.Context, before time.Time) (int, error)
|
|
Vacuum(ctx context.Context) error
|
|
Close() error
|
|
}
|
|
|
|
// SyncResult contains results from a catalog sync operation
|
|
type SyncResult struct {
|
|
Added int `json:"added"`
|
|
Updated int `json:"updated"`
|
|
Removed int `json:"removed"`
|
|
Errors int `json:"errors"`
|
|
Duration float64 `json:"duration_seconds"`
|
|
Details []string `json:"details,omitempty"`
|
|
}
|
|
|
|
// FormatSize formats bytes as human-readable string
|
|
func FormatSize(bytes int64) string {
|
|
const unit = 1024
|
|
if bytes < unit {
|
|
return fmt.Sprintf("%d B", bytes)
|
|
}
|
|
div, exp := int64(unit), 0
|
|
for n := bytes / unit; n >= unit; n /= unit {
|
|
div *= unit
|
|
exp++
|
|
}
|
|
return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp])
|
|
}
|
|
|
|
// FormatDuration formats duration as human-readable string
|
|
func FormatDuration(d time.Duration) string {
|
|
if d < time.Minute {
|
|
return fmt.Sprintf("%.0fs", d.Seconds())
|
|
}
|
|
if d < time.Hour {
|
|
mins := int(d.Minutes())
|
|
secs := int(d.Seconds()) - mins*60
|
|
return fmt.Sprintf("%dm %ds", mins, secs)
|
|
}
|
|
hours := int(d.Hours())
|
|
mins := int(d.Minutes()) - hours*60
|
|
return fmt.Sprintf("%dh %dm", hours, mins)
|
|
}
|