feat: v2.0 Sprint 1 - Backup Verification & Retention Policy

- Add SHA-256 checksum generation for all backups
- Implement verify-backup command for integrity validation
- Add JSON metadata format (.meta.json) with full backup info
- Create retention policy engine with smart cleanup
- Add cleanup command with dry-run and pattern matching
- Integrate metadata generation into backup flow
- Maintain backward compatibility with legacy .info files

New commands:
- dbbackup verify-backup [files] - Verify backup integrity
- dbbackup cleanup [dir] - Clean old backups with retention policy

New packages:
- internal/metadata - Backup metadata management
- internal/verification - Checksum validation
- internal/retention - Retention policy engine
This commit is contained in:
2025-11-25 19:18:07 +00:00
parent 884c8292d6
commit ba5ae8ecb1
8 changed files with 1583 additions and 17 deletions

View File

@@ -21,6 +21,7 @@ import (
"dbbackup/internal/database"
"dbbackup/internal/security"
"dbbackup/internal/logger"
"dbbackup/internal/metadata"
"dbbackup/internal/metrics"
"dbbackup/internal/progress"
"dbbackup/internal/swap"
@@ -541,9 +542,9 @@ func (e *Engine) BackupCluster(ctx context.Context) error {
operation.Complete(fmt.Sprintf("Cluster backup created: %s (%s)", outputFile, size))
}
// Create metadata file
if err := e.createMetadata(outputFile, "cluster", "cluster", ""); err != nil {
e.log.Warn("Failed to create metadata file", "error", err)
// Create cluster metadata file
if err := e.createClusterMetadata(outputFile, databases, successCountFinal, failCountFinal); err != nil {
e.log.Warn("Failed to create cluster metadata file", "error", err)
}
return nil
@@ -910,9 +911,70 @@ regularTar:
// createMetadata creates a metadata file for the backup
func (e *Engine) createMetadata(backupFile, database, backupType, strategy string) error {
metaFile := backupFile + ".info"
startTime := time.Now()
content := fmt.Sprintf(`{
// Get backup file information
info, err := os.Stat(backupFile)
if err != nil {
return fmt.Errorf("failed to stat backup file: %w", err)
}
// Calculate SHA-256 checksum
sha256, err := metadata.CalculateSHA256(backupFile)
if err != nil {
return fmt.Errorf("failed to calculate checksum: %w", err)
}
// Get database version
ctx := context.Background()
dbVersion, _ := e.db.GetVersion(ctx)
if dbVersion == "" {
dbVersion = "unknown"
}
// Determine compression format
compressionFormat := "none"
if e.cfg.CompressionLevel > 0 {
if e.cfg.Jobs > 1 {
compressionFormat = fmt.Sprintf("pigz-%d", e.cfg.CompressionLevel)
} else {
compressionFormat = fmt.Sprintf("gzip-%d", e.cfg.CompressionLevel)
}
}
// Create backup metadata
meta := &metadata.BackupMetadata{
Version: "2.0",
Timestamp: startTime,
Database: database,
DatabaseType: e.cfg.DatabaseType,
DatabaseVersion: dbVersion,
Host: e.cfg.Host,
Port: e.cfg.Port,
User: e.cfg.User,
BackupFile: backupFile,
SizeBytes: info.Size(),
SHA256: sha256,
Compression: compressionFormat,
BackupType: backupType,
Duration: time.Since(startTime).Seconds(),
ExtraInfo: make(map[string]string),
}
// Add strategy for sample backups
if strategy != "" {
meta.ExtraInfo["sample_strategy"] = strategy
meta.ExtraInfo["sample_value"] = fmt.Sprintf("%d", e.cfg.SampleValue)
}
// Save metadata
if err := meta.Save(); err != nil {
return fmt.Errorf("failed to save metadata: %w", err)
}
// Also save legacy .info file for backward compatibility
legacyMetaFile := backupFile + ".info"
legacyContent := fmt.Sprintf(`{
"type": "%s",
"database": "%s",
"timestamp": "%s",
@@ -920,24 +982,102 @@ func (e *Engine) createMetadata(backupFile, database, backupType, strategy strin
"port": %d,
"user": "%s",
"db_type": "%s",
"compression": %d`,
backupType, database, time.Now().Format("20060102_150405"),
e.cfg.Host, e.cfg.Port, e.cfg.User, e.cfg.DatabaseType, e.cfg.CompressionLevel)
"compression": %d,
"size_bytes": %d
}`, backupType, database, startTime.Format("20060102_150405"),
e.cfg.Host, e.cfg.Port, e.cfg.User, e.cfg.DatabaseType,
e.cfg.CompressionLevel, info.Size())
if strategy != "" {
content += fmt.Sprintf(`,
"sample_strategy": "%s",
"sample_value": %d`, e.cfg.SampleStrategy, e.cfg.SampleValue)
if err := os.WriteFile(legacyMetaFile, []byte(legacyContent), 0644); err != nil {
e.log.Warn("Failed to save legacy metadata file", "error", err)
}
if info, err := os.Stat(backupFile); err == nil {
content += fmt.Sprintf(`,
"size_bytes": %d`, info.Size())
return nil
}
// createClusterMetadata creates metadata for cluster backups
func (e *Engine) createClusterMetadata(backupFile string, databases []string, successCount, failCount int) error {
startTime := time.Now()
// Get backup file information
info, err := os.Stat(backupFile)
if err != nil {
return fmt.Errorf("failed to stat backup file: %w", err)
}
content += "\n}"
// Calculate SHA-256 checksum for archive
sha256, err := metadata.CalculateSHA256(backupFile)
if err != nil {
return fmt.Errorf("failed to calculate checksum: %w", err)
}
return os.WriteFile(metaFile, []byte(content), 0644)
// Get database version
ctx := context.Background()
dbVersion, _ := e.db.GetVersion(ctx)
if dbVersion == "" {
dbVersion = "unknown"
}
// Create cluster metadata
clusterMeta := &metadata.ClusterMetadata{
Version: "2.0",
Timestamp: startTime,
ClusterName: fmt.Sprintf("%s:%d", e.cfg.Host, e.cfg.Port),
DatabaseType: e.cfg.DatabaseType,
Host: e.cfg.Host,
Port: e.cfg.Port,
Databases: make([]metadata.BackupMetadata, 0),
TotalSize: info.Size(),
Duration: time.Since(startTime).Seconds(),
ExtraInfo: map[string]string{
"database_count": fmt.Sprintf("%d", len(databases)),
"success_count": fmt.Sprintf("%d", successCount),
"failure_count": fmt.Sprintf("%d", failCount),
"archive_sha256": sha256,
"database_version": dbVersion,
},
}
// Add database names to metadata
for _, dbName := range databases {
dbMeta := metadata.BackupMetadata{
Database: dbName,
DatabaseType: e.cfg.DatabaseType,
DatabaseVersion: dbVersion,
Timestamp: startTime,
}
clusterMeta.Databases = append(clusterMeta.Databases, dbMeta)
}
// Save cluster metadata
if err := clusterMeta.Save(backupFile); err != nil {
return fmt.Errorf("failed to save cluster metadata: %w", err)
}
// Also save legacy .info file for backward compatibility
legacyMetaFile := backupFile + ".info"
legacyContent := fmt.Sprintf(`{
"type": "cluster",
"database": "cluster",
"timestamp": "%s",
"host": "%s",
"port": %d,
"user": "%s",
"db_type": "%s",
"compression": %d,
"size_bytes": %d,
"database_count": %d,
"success_count": %d,
"failure_count": %d
}`, startTime.Format("20060102_150405"),
e.cfg.Host, e.cfg.Port, e.cfg.User, e.cfg.DatabaseType,
e.cfg.CompressionLevel, info.Size(), len(databases), successCount, failCount)
if err := os.WriteFile(legacyMetaFile, []byte(legacyContent), 0644); err != nil {
e.log.Warn("Failed to save legacy cluster metadata file", "error", err)
}
return nil
}
// executeCommand executes a backup command (optimized for huge databases)

View File

@@ -0,0 +1,167 @@
package metadata
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"time"
)
// BackupMetadata contains comprehensive information about a backup
type BackupMetadata struct {
Version string `json:"version"`
Timestamp time.Time `json:"timestamp"`
Database string `json:"database"`
DatabaseType string `json:"database_type"` // postgresql, mysql, mariadb
DatabaseVersion string `json:"database_version"` // e.g., "PostgreSQL 15.3"
Host string `json:"host"`
Port int `json:"port"`
User string `json:"user"`
BackupFile string `json:"backup_file"`
SizeBytes int64 `json:"size_bytes"`
SHA256 string `json:"sha256"`
Compression string `json:"compression"` // none, gzip, pigz
BackupType string `json:"backup_type"` // full, incremental (for v2.0)
BaseBackup string `json:"base_backup,omitempty"`
Duration float64 `json:"duration_seconds"`
ExtraInfo map[string]string `json:"extra_info,omitempty"`
}
// ClusterMetadata contains metadata for cluster backups
type ClusterMetadata struct {
Version string `json:"version"`
Timestamp time.Time `json:"timestamp"`
ClusterName string `json:"cluster_name"`
DatabaseType string `json:"database_type"`
Host string `json:"host"`
Port int `json:"port"`
Databases []BackupMetadata `json:"databases"`
TotalSize int64 `json:"total_size_bytes"`
Duration float64 `json:"duration_seconds"`
ExtraInfo map[string]string `json:"extra_info,omitempty"`
}
// CalculateSHA256 computes the SHA-256 checksum of a file
func CalculateSHA256(filePath string) (string, error) {
f, err := os.Open(filePath)
if err != nil {
return "", fmt.Errorf("failed to open file: %w", err)
}
defer f.Close()
hasher := sha256.New()
if _, err := io.Copy(hasher, f); err != nil {
return "", fmt.Errorf("failed to calculate checksum: %w", err)
}
return hex.EncodeToString(hasher.Sum(nil)), nil
}
// Save writes metadata to a .meta.json file
func (m *BackupMetadata) Save() error {
metaPath := m.BackupFile + ".meta.json"
data, err := json.MarshalIndent(m, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal metadata: %w", err)
}
if err := os.WriteFile(metaPath, data, 0644); err != nil {
return fmt.Errorf("failed to write metadata file: %w", err)
}
return nil
}
// Load reads metadata from a .meta.json file
func Load(backupFile string) (*BackupMetadata, error) {
metaPath := backupFile + ".meta.json"
data, err := os.ReadFile(metaPath)
if err != nil {
return nil, fmt.Errorf("failed to read metadata file: %w", err)
}
var meta BackupMetadata
if err := json.Unmarshal(data, &meta); err != nil {
return nil, fmt.Errorf("failed to parse metadata: %w", err)
}
return &meta, nil
}
// SaveCluster writes cluster metadata to a .meta.json file
func (m *ClusterMetadata) Save(targetFile string) error {
metaPath := targetFile + ".meta.json"
data, err := json.MarshalIndent(m, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal cluster metadata: %w", err)
}
if err := os.WriteFile(metaPath, data, 0644); err != nil {
return fmt.Errorf("failed to write cluster metadata file: %w", err)
}
return nil
}
// LoadCluster reads cluster metadata from a .meta.json file
func LoadCluster(targetFile string) (*ClusterMetadata, error) {
metaPath := targetFile + ".meta.json"
data, err := os.ReadFile(metaPath)
if err != nil {
return nil, fmt.Errorf("failed to read cluster metadata file: %w", err)
}
var meta ClusterMetadata
if err := json.Unmarshal(data, &meta); err != nil {
return nil, fmt.Errorf("failed to parse cluster metadata: %w", err)
}
return &meta, nil
}
// ListBackups scans a directory for backup files and returns their metadata
func ListBackups(dir string) ([]*BackupMetadata, error) {
pattern := filepath.Join(dir, "*.meta.json")
matches, err := filepath.Glob(pattern)
if err != nil {
return nil, fmt.Errorf("failed to scan directory: %w", err)
}
var backups []*BackupMetadata
for _, metaFile := range matches {
// Extract backup file path (remove .meta.json suffix)
backupFile := metaFile[:len(metaFile)-len(".meta.json")]
meta, err := Load(backupFile)
if err != nil {
// Skip invalid metadata files
continue
}
backups = append(backups, meta)
}
return backups, nil
}
// FormatSize returns human-readable size
func FormatSize(bytes int64) string {
const unit = 1024
if bytes < unit {
return fmt.Sprintf("%d B", bytes)
}
div, exp := int64(unit), 0
for n := bytes / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp])
}

View File

@@ -0,0 +1,224 @@
package retention
import (
"fmt"
"os"
"path/filepath"
"sort"
"time"
"dbbackup/internal/metadata"
)
// Policy defines the retention rules
type Policy struct {
RetentionDays int
MinBackups int
DryRun bool
}
// CleanupResult contains information about cleanup operations
type CleanupResult struct {
TotalBackups int
EligibleForDeletion int
Deleted []string
Kept []string
SpaceFreed int64
Errors []error
}
// ApplyPolicy enforces the retention policy on backups in a directory
func ApplyPolicy(backupDir string, policy Policy) (*CleanupResult, error) {
result := &CleanupResult{
Deleted: make([]string, 0),
Kept: make([]string, 0),
Errors: make([]error, 0),
}
// List all backups in directory
backups, err := metadata.ListBackups(backupDir)
if err != nil {
return nil, fmt.Errorf("failed to list backups: %w", err)
}
result.TotalBackups = len(backups)
// Sort backups by timestamp (oldest first)
sort.Slice(backups, func(i, j int) bool {
return backups[i].Timestamp.Before(backups[j].Timestamp)
})
// Calculate cutoff date
cutoffDate := time.Now().AddDate(0, 0, -policy.RetentionDays)
// Determine which backups to delete
for i, backup := range backups {
// Always keep minimum number of backups (most recent ones)
backupsRemaining := len(backups) - i
if backupsRemaining <= policy.MinBackups {
result.Kept = append(result.Kept, backup.BackupFile)
continue
}
// Check if backup is older than retention period
if backup.Timestamp.Before(cutoffDate) {
result.EligibleForDeletion++
if policy.DryRun {
result.Deleted = append(result.Deleted, backup.BackupFile)
} else {
// Delete backup file and associated metadata
if err := deleteBackup(backup.BackupFile); err != nil {
result.Errors = append(result.Errors,
fmt.Errorf("failed to delete %s: %w", backup.BackupFile, err))
} else {
result.Deleted = append(result.Deleted, backup.BackupFile)
result.SpaceFreed += backup.SizeBytes
}
}
} else {
result.Kept = append(result.Kept, backup.BackupFile)
}
}
return result, nil
}
// deleteBackup removes a backup file and all associated files
func deleteBackup(backupFile string) error {
// Delete main backup file
if err := os.Remove(backupFile); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete backup file: %w", err)
}
// Delete metadata file
metaFile := backupFile + ".meta.json"
if err := os.Remove(metaFile); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete metadata file: %w", err)
}
// Delete legacy .sha256 file if exists
sha256File := backupFile + ".sha256"
if err := os.Remove(sha256File); err != nil && !os.IsNotExist(err) {
// Don't fail if .sha256 doesn't exist (new format)
}
// Delete legacy .info file if exists
infoFile := backupFile + ".info"
if err := os.Remove(infoFile); err != nil && !os.IsNotExist(err) {
// Don't fail if .info doesn't exist (new format)
}
return nil
}
// GetOldestBackups returns the N oldest backups in a directory
func GetOldestBackups(backupDir string, count int) ([]*metadata.BackupMetadata, error) {
backups, err := metadata.ListBackups(backupDir)
if err != nil {
return nil, err
}
// Sort by timestamp (oldest first)
sort.Slice(backups, func(i, j int) bool {
return backups[i].Timestamp.Before(backups[j].Timestamp)
})
if count > len(backups) {
count = len(backups)
}
return backups[:count], nil
}
// GetNewestBackups returns the N newest backups in a directory
func GetNewestBackups(backupDir string, count int) ([]*metadata.BackupMetadata, error) {
backups, err := metadata.ListBackups(backupDir)
if err != nil {
return nil, err
}
// Sort by timestamp (newest first)
sort.Slice(backups, func(i, j int) bool {
return backups[i].Timestamp.After(backups[j].Timestamp)
})
if count > len(backups) {
count = len(backups)
}
return backups[:count], nil
}
// CleanupByPattern removes backups matching a specific pattern
func CleanupByPattern(backupDir, pattern string, policy Policy) (*CleanupResult, error) {
result := &CleanupResult{
Deleted: make([]string, 0),
Kept: make([]string, 0),
Errors: make([]error, 0),
}
// Find matching backup files
searchPattern := filepath.Join(backupDir, pattern)
matches, err := filepath.Glob(searchPattern)
if err != nil {
return nil, fmt.Errorf("failed to match pattern: %w", err)
}
// Filter to only .dump or .sql files
var backupFiles []string
for _, match := range matches {
ext := filepath.Ext(match)
if ext == ".dump" || ext == ".sql" {
backupFiles = append(backupFiles, match)
}
}
// Load metadata for matched backups
var backups []*metadata.BackupMetadata
for _, file := range backupFiles {
meta, err := metadata.Load(file)
if err != nil {
// Skip files without metadata
continue
}
backups = append(backups, meta)
}
result.TotalBackups = len(backups)
// Sort by timestamp
sort.Slice(backups, func(i, j int) bool {
return backups[i].Timestamp.Before(backups[j].Timestamp)
})
cutoffDate := time.Now().AddDate(0, 0, -policy.RetentionDays)
// Apply policy
for i, backup := range backups {
backupsRemaining := len(backups) - i
if backupsRemaining <= policy.MinBackups {
result.Kept = append(result.Kept, backup.BackupFile)
continue
}
if backup.Timestamp.Before(cutoffDate) {
result.EligibleForDeletion++
if policy.DryRun {
result.Deleted = append(result.Deleted, backup.BackupFile)
} else {
if err := deleteBackup(backup.BackupFile); err != nil {
result.Errors = append(result.Errors, err)
} else {
result.Deleted = append(result.Deleted, backup.BackupFile)
result.SpaceFreed += backup.SizeBytes
}
}
} else {
result.Kept = append(result.Kept, backup.BackupFile)
}
}
return result, nil
}

View File

@@ -0,0 +1,114 @@
package verification
import (
"fmt"
"os"
"dbbackup/internal/metadata"
)
// Result represents the outcome of a verification operation
type Result struct {
Valid bool
BackupFile string
ExpectedSHA256 string
CalculatedSHA256 string
SizeMatch bool
FileExists bool
MetadataExists bool
Error error
}
// Verify checks the integrity of a backup file
func Verify(backupFile string) (*Result, error) {
result := &Result{
BackupFile: backupFile,
}
// Check if backup file exists
info, err := os.Stat(backupFile)
if err != nil {
result.FileExists = false
result.Error = fmt.Errorf("backup file does not exist: %w", err)
return result, nil
}
result.FileExists = true
// Load metadata
meta, err := metadata.Load(backupFile)
if err != nil {
result.MetadataExists = false
result.Error = fmt.Errorf("failed to load metadata: %w", err)
return result, nil
}
result.MetadataExists = true
result.ExpectedSHA256 = meta.SHA256
// Check size match
if info.Size() != meta.SizeBytes {
result.SizeMatch = false
result.Error = fmt.Errorf("size mismatch: expected %d bytes, got %d bytes",
meta.SizeBytes, info.Size())
return result, nil
}
result.SizeMatch = true
// Calculate actual SHA-256
actualSHA256, err := metadata.CalculateSHA256(backupFile)
if err != nil {
result.Error = fmt.Errorf("failed to calculate checksum: %w", err)
return result, nil
}
result.CalculatedSHA256 = actualSHA256
// Compare checksums
if actualSHA256 != meta.SHA256 {
result.Valid = false
result.Error = fmt.Errorf("checksum mismatch: expected %s, got %s",
meta.SHA256, actualSHA256)
return result, nil
}
// All checks passed
result.Valid = true
return result, nil
}
// VerifyMultiple verifies multiple backup files
func VerifyMultiple(backupFiles []string) ([]*Result, error) {
var results []*Result
for _, file := range backupFiles {
result, err := Verify(file)
if err != nil {
return nil, fmt.Errorf("verification error for %s: %w", file, err)
}
results = append(results, result)
}
return results, nil
}
// QuickCheck performs a fast check without full checksum calculation
// Only validates metadata existence and file size
func QuickCheck(backupFile string) error {
// Check file exists
info, err := os.Stat(backupFile)
if err != nil {
return fmt.Errorf("backup file does not exist: %w", err)
}
// Load metadata
meta, err := metadata.Load(backupFile)
if err != nil {
return fmt.Errorf("metadata missing or invalid: %w", err)
}
// Check size
if info.Size() != meta.SizeBytes {
return fmt.Errorf("size mismatch: expected %d bytes, got %d bytes",
meta.SizeBytes, info.Size())
}
return nil
}