diff --git a/README.md b/README.md index b22b8c0..e3f0626 100755 --- a/README.md +++ b/README.md @@ -378,6 +378,111 @@ Restore entire PostgreSQL cluster from archive: ./dbbackup restore cluster ARCHIVE_FILE [OPTIONS] ``` +### Verification & Maintenance + +#### Verify Backup Integrity + +Verify backup files using SHA-256 checksums and metadata validation: + +```bash +./dbbackup verify-backup BACKUP_FILE [OPTIONS] +``` + +**Options:** + +- `--quick` - Quick verification (size check only, no checksum calculation) +- `--verbose` - Show detailed information about each backup + +**Examples:** + +```bash +# Verify single backup (full SHA-256 check) +./dbbackup verify-backup /backups/mydb_20251125.dump + +# Verify all backups in directory +./dbbackup verify-backup /backups/*.dump --verbose + +# Quick verification (fast, size check only) +./dbbackup verify-backup /backups/*.dump --quick +``` + +**Output:** +``` +Verifying 3 backup file(s)... + +šŸ“ mydb_20251125.dump + āœ… VALID + Size: 2.5 GiB + SHA-256: 7e166d4cb7276e1310d76922f45eda0333a6aeac... + Database: mydb (postgresql) + Created: 2025-11-25T19:00:00Z + +────────────────────────────────────────────────── +Total: 3 backups +āœ… Valid: 3 +``` + +#### Cleanup Old Backups + +Automatically remove old backups based on retention policy: + +```bash +./dbbackup cleanup BACKUP_DIRECTORY [OPTIONS] +``` + +**Options:** + +- `--retention-days INT` - Delete backups older than N days (default: 30) +- `--min-backups INT` - Always keep at least N most recent backups (default: 5) +- `--dry-run` - Preview what would be deleted without actually deleting +- `--pattern STRING` - Only clean backups matching pattern (e.g., "mydb_*.dump") + +**Retention Policy:** + +The cleanup command uses a safe retention policy: +1. Backups older than `--retention-days` are eligible for deletion +2. At least `--min-backups` most recent backups are always kept +3. Both conditions must be met for a backup to be deleted + +**Examples:** + +```bash +# Clean up backups older than 30 days (keep at least 5) +./dbbackup cleanup /backups --retention-days 30 --min-backups 5 + +# Preview what would be deleted +./dbbackup cleanup /backups --retention-days 7 --dry-run + +# Clean specific database backups +./dbbackup cleanup /backups --pattern "mydb_*.dump" + +# Aggressive cleanup (keep only 3 most recent) +./dbbackup cleanup /backups --retention-days 1 --min-backups 3 +``` + +**Output:** +``` +šŸ—‘ļø Cleanup Policy: + Directory: /backups + Retention: 30 days + Min backups: 5 + +šŸ“Š Results: + Total backups: 12 + Eligible for deletion: 7 + +āœ… Deleted 7 backup(s): + - old_db_20251001.dump + - old_db_20251002.dump + ... + +šŸ“¦ Kept 5 backup(s) + +šŸ’¾ Space freed: 15.2 GiB +────────────────────────────────────────────────── +āœ… Cleanup completed successfully +``` + **Options:** - `--confirm` - Confirm and execute restore (required for safety) diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..905f4f5 --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,523 @@ +# dbbackup Version 2.0 Roadmap + +## Current Status: v1.1 (Production Ready) +- āœ… 24/24 automated tests passing (100%) +- āœ… PostgreSQL, MySQL, MariaDB support +- āœ… Interactive TUI + CLI +- āœ… Cluster backup/restore +- āœ… Docker support +- āœ… Cross-platform binaries + +--- + +## Version 2.0 Vision: Enterprise-Grade Features + +Transform dbbackup into an enterprise-ready backup solution with cloud storage, incremental backups, PITR, and encryption. + +**Target Release:** Q2 2026 (3-4 months) + +--- + +## Priority Matrix + +``` + HIGH IMPACT + │ + ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” + │ │ │ + │ Cloud Storage ⭐ │ Incremental ⭐⭐⭐ │ + │ Verification │ PITR ⭐⭐⭐ │ + │ Retention │ Encryption ⭐⭐ │ +LOW │ │ │ HIGH +EFFORT ─────────────────┼──────────────────── EFFORT + │ │ │ + │ Metrics │ Web UI (optional) │ + │ Remote Restore │ Replication Slots │ + │ │ │ + ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + LOW IMPACT +``` + +--- + +## Development Phases + +### Phase 1: Foundation (Weeks 1-4) + +**Sprint 1: Verification & Retention (2 weeks)** + +**Goals:** +- Backup integrity verification with SHA-256 checksums +- Automated retention policy enforcement +- Structured backup metadata + +**Features:** +- āœ… Generate SHA-256 checksums during backup +- āœ… Verify backups before/after restore +- āœ… Automatic cleanup of old backups +- āœ… Retention policy: days + minimum count +- āœ… Backup metadata in JSON format + +**Deliverables:** +```bash +# New commands +dbbackup verify backup.dump +dbbackup cleanup --retention-days 30 --min-backups 5 + +# Metadata format +{ + "version": "2.0", + "timestamp": "2026-01-15T10:30:00Z", + "database": "production", + "size_bytes": 1073741824, + "sha256": "abc123...", + "db_version": "PostgreSQL 15.3", + "compression": "gzip-9" +} +``` + +**Implementation:** +- `internal/verification/` - Checksum calculation and validation +- `internal/retention/` - Policy enforcement +- `internal/metadata/` - Backup metadata management + +--- + +**Sprint 2: Cloud Storage (2 weeks)** + +**Goals:** +- Upload backups to cloud storage +- Support multiple cloud providers +- Download and restore from cloud + +**Providers:** +- āœ… AWS S3 +- āœ… MinIO (S3-compatible) +- āœ… Backblaze B2 +- āœ… Azure Blob Storage (optional) +- āœ… Google Cloud Storage (optional) + +**Configuration:** +```toml +[cloud] +enabled = true +provider = "s3" # s3, minio, azure, gcs, b2 +auto_upload = true + +[cloud.s3] +bucket = "db-backups" +region = "us-east-1" +endpoint = "s3.amazonaws.com" # Custom for MinIO +access_key = "..." # Or use IAM role +secret_key = "..." +``` + +**New Commands:** +```bash +# Upload existing backup +dbbackup cloud upload backup.dump + +# List cloud backups +dbbackup cloud list + +# Download from cloud +dbbackup cloud download backup_id + +# Restore directly from cloud +dbbackup restore single s3://bucket/backup.dump --target mydb +``` + +**Dependencies:** +```go +"github.com/aws/aws-sdk-go-v2/service/s3" +"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" +"cloud.google.com/go/storage" +``` + +--- + +### Phase 2: Advanced Backup (Weeks 5-10) + +**Sprint 3: Incremental Backups (3 weeks)** + +**Goals:** +- Reduce backup time and storage +- File-level incremental for PostgreSQL +- Binary log incremental for MySQL + +**PostgreSQL Strategy:** +``` +Full Backup (Base) + ā”œā”€ Incremental 1 (changed files since base) + ā”œā”€ Incremental 2 (changed files since inc1) + └─ Incremental 3 (changed files since inc2) +``` + +**MySQL Strategy:** +``` +Full Backup + ā”œā”€ Binary Log 1 (changes since full) + ā”œā”€ Binary Log 2 + └─ Binary Log 3 +``` + +**Implementation:** +```bash +# Create base backup +dbbackup backup single mydb --mode full + +# Create incremental +dbbackup backup single mydb --mode incremental + +# Restore (automatically applies incrementals) +dbbackup restore single backup.dump --apply-incrementals +``` + +**File Structure:** +``` +backups/ +ā”œā”€ā”€ mydb_full_20260115.dump +ā”œā”€ā”€ mydb_full_20260115.meta +ā”œā”€ā”€ mydb_incr_20260116.dump # Contains only changes +ā”œā”€ā”€ mydb_incr_20260116.meta # Points to base: mydb_full_20260115 +└── mydb_incr_20260117.dump +``` + +--- + +**Sprint 4: Security & Encryption (2 weeks)** + +**Goals:** +- Encrypt backups at rest +- Secure key management +- Encrypted cloud uploads + +**Features:** +- āœ… AES-256-GCM encryption +- āœ… Argon2 key derivation +- āœ… Multiple key sources (file, env, vault) +- āœ… Encrypted metadata + +**Configuration:** +```toml +[encryption] +enabled = true +algorithm = "aes-256-gcm" +key_file = "/etc/dbbackup/encryption.key" + +# Or use environment variable +# DBBACKUP_ENCRYPTION_KEY=base64key... +``` + +**Commands:** +```bash +# Generate encryption key +dbbackup keys generate + +# Encrypt existing backup +dbbackup encrypt backup.dump + +# Decrypt backup +dbbackup decrypt backup.dump.enc + +# Automatic encryption +dbbackup backup single mydb --encrypt +``` + +**File Format:** +``` ++------------------+ +| Encryption Header| (IV, algorithm, key ID) ++------------------+ +| Encrypted Data | (AES-256-GCM) ++------------------+ +| Auth Tag | (HMAC for integrity) ++------------------+ +``` + +--- + +**Sprint 5: Point-in-Time Recovery - PITR (4 weeks)** + +**Goals:** +- Restore to any point in time +- WAL archiving for PostgreSQL +- Binary log archiving for MySQL + +**PostgreSQL Implementation:** + +```toml +[pitr] +enabled = true +wal_archive_dir = "/backups/wal_archive" +wal_retention_days = 7 + +# PostgreSQL config (auto-configured by dbbackup) +# archive_mode = on +# archive_command = '/usr/local/bin/dbbackup archive-wal %p %f' +``` + +**Commands:** +```bash +# Enable PITR +dbbackup pitr enable + +# Archive WAL manually +dbbackup archive-wal /var/lib/postgresql/pg_wal/000000010000000000000001 + +# Restore to point-in-time +dbbackup restore single backup.dump \ + --target-time "2026-01-15 14:30:00" \ + --target mydb + +# Show available restore points +dbbackup pitr timeline +``` + +**WAL Archive Structure:** +``` +wal_archive/ +ā”œā”€ā”€ 000000010000000000000001 +ā”œā”€ā”€ 000000010000000000000002 +ā”œā”€ā”€ 000000010000000000000003 +└── timeline.json +``` + +**MySQL Implementation:** +```bash +# Archive binary logs +dbbackup binlog archive --start-datetime "2026-01-15 00:00:00" + +# PITR restore +dbbackup restore single backup.sql \ + --target-time "2026-01-15 14:30:00" \ + --apply-binlogs +``` + +--- + +### Phase 3: Enterprise Features (Weeks 11-16) + +**Sprint 6: Observability & Integration (3 weeks)** + +**Features:** + +1. **Prometheus Metrics** +```go +# Exposed metrics +dbbackup_backup_duration_seconds +dbbackup_backup_size_bytes +dbbackup_backup_success_total +dbbackup_restore_duration_seconds +dbbackup_last_backup_timestamp +dbbackup_cloud_upload_duration_seconds +``` + +**Endpoint:** +```bash +# Start metrics server +dbbackup metrics serve --port 9090 + +# Scrape endpoint +curl http://localhost:9090/metrics +``` + +2. **Remote Restore** +```bash +# Restore to remote server +dbbackup restore single backup.dump \ + --remote-host db-replica-01 \ + --remote-user postgres \ + --remote-port 22 \ + --confirm +``` + +3. **Replication Slots (PostgreSQL)** +```bash +# Create replication slot for continuous WAL streaming +dbbackup replication create-slot backup_slot + +# Stream WALs via replication +dbbackup replication stream backup_slot +``` + +4. **Webhook Notifications** +```toml +[notifications] +enabled = true +webhook_url = "https://slack.com/webhook/..." +notify_on = ["backup_complete", "backup_failed", "restore_complete"] +``` + +--- + +## Technical Architecture + +### New Directory Structure + +``` +internal/ +ā”œā”€ā”€ cloud/ # Cloud storage backends +│ ā”œā”€ā”€ interface.go +│ ā”œā”€ā”€ s3.go +│ ā”œā”€ā”€ azure.go +│ └── gcs.go +ā”œā”€ā”€ encryption/ # Encryption layer +│ ā”œā”€ā”€ aes.go +│ ā”œā”€ā”€ keys.go +│ └── vault.go +ā”œā”€ā”€ incremental/ # Incremental backup engine +│ ā”œā”€ā”€ postgres.go +│ └── mysql.go +ā”œā”€ā”€ pitr/ # Point-in-time recovery +│ ā”œā”€ā”€ wal.go +│ ā”œā”€ā”€ binlog.go +│ └── timeline.go +ā”œā”€ā”€ verification/ # Backup verification +│ ā”œā”€ā”€ checksum.go +│ └── validate.go +ā”œā”€ā”€ retention/ # Retention policy +│ └── cleanup.go +ā”œā”€ā”€ metrics/ # Prometheus metrics +│ └── exporter.go +└── replication/ # Replication management + └── slots.go +``` + +### Required Dependencies + +```go +// Cloud storage +"github.com/aws/aws-sdk-go-v2/service/s3" +"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" +"cloud.google.com/go/storage" + +// Encryption +"crypto/aes" +"crypto/cipher" +"golang.org/x/crypto/argon2" + +// Metrics +"github.com/prometheus/client_golang/prometheus" +"github.com/prometheus/client_golang/prometheus/promhttp" + +// PostgreSQL replication +"github.com/jackc/pgx/v5/pgconn" + +// Fast file scanning for incrementals +"github.com/karrick/godirwalk" +``` + +--- + +## Testing Strategy + +### v2.0 Test Coverage Goals +- Minimum 90% code coverage +- Integration tests for all cloud providers +- End-to-end PITR scenarios +- Performance benchmarks for incremental backups +- Encryption/decryption validation +- Multi-database restore tests + +### New Test Suites +```bash +# Cloud storage tests +./run_qa_tests.sh --suite cloud + +# Incremental backup tests +./run_qa_tests.sh --suite incremental + +# PITR tests +./run_qa_tests.sh --suite pitr + +# Encryption tests +./run_qa_tests.sh --suite encryption + +# Full v2.0 suite +./run_qa_tests.sh --suite v2 +``` + +--- + +## Migration Path + +### v1.x → v2.0 Compatibility +- āœ… All v1.x backups readable in v2.0 +- āœ… Configuration auto-migration +- āœ… Metadata format upgrade +- āœ… Backward-compatible commands + +### Deprecation Timeline +- v2.0: Warning for old config format +- v2.1: Full migration required +- v3.0: Old format no longer supported + +--- + +## Documentation Updates + +### New Docs +- `CLOUD.md` - Cloud storage configuration +- `INCREMENTAL.md` - Incremental backup guide +- `PITR.md` - Point-in-time recovery +- `ENCRYPTION.md` - Encryption setup +- `METRICS.md` - Prometheus integration + +--- + +## Success Metrics + +### v2.0 Goals +- šŸŽÆ 95%+ test coverage +- šŸŽÆ Support 1TB+ databases with incrementals +- šŸŽÆ PITR with <5 minute granularity +- šŸŽÆ Cloud upload/download >100MB/s +- šŸŽÆ Encryption overhead <10% +- šŸŽÆ Full compatibility with pgBackRest for PostgreSQL +- šŸŽÆ Industry-leading MySQL PITR solution + +--- + +## Release Schedule + +- **v2.0-alpha** (End Sprint 3): Cloud + Verification +- **v2.0-beta** (End Sprint 5): + Incremental + PITR +- **v2.0-rc1** (End Sprint 6): + Enterprise features +- **v2.0 GA** (Q2 2026): Production release + +--- + +## What Makes v2.0 Unique + +After v2.0, dbbackup will be: + +āœ… **Only multi-database tool** with full PITR support +āœ… **Best-in-class UX** (TUI + CLI + Docker + K8s) +āœ… **Feature parity** with pgBackRest (PostgreSQL) +āœ… **Superior to mysqldump** with incremental + PITR +āœ… **Cloud-native** with multi-provider support +āœ… **Enterprise-ready** with encryption + metrics +āœ… **Zero-config** for 80% of use cases + +--- + +## Contributing + +Want to contribute to v2.0? Check out: +- [CONTRIBUTING.md](CONTRIBUTING.md) +- [Good First Issues](https://git.uuxo.net/uuxo/dbbackup/issues?labels=good-first-issue) +- [v2.0 Milestone](https://git.uuxo.net/uuxo/dbbackup/milestone/2) + +--- + +## Questions? + +Open an issue or start a discussion: +- Issues: https://git.uuxo.net/uuxo/dbbackup/issues +- Discussions: https://git.uuxo.net/uuxo/dbbackup/discussions + +--- + +**Next Step:** Sprint 1 - Backup Verification & Retention (January 2026) diff --git a/cmd/cleanup.go b/cmd/cleanup.go new file mode 100644 index 0000000..1d2dc35 --- /dev/null +++ b/cmd/cleanup.go @@ -0,0 +1,152 @@ +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "dbbackup/internal/metadata" + "dbbackup/internal/retention" + "github.com/spf13/cobra" +) + +var cleanupCmd = &cobra.Command{ + Use: "cleanup [backup-directory]", + Short: "Clean up old backups based on retention policy", + Long: `Remove old backup files based on retention policy while maintaining minimum backup count. + +The retention policy ensures: +1. Backups older than --retention-days are eligible for deletion +2. At least --min-backups most recent backups are always kept +3. Both conditions must be met for deletion + +Examples: + # Clean up backups older than 30 days (keep at least 5) + dbbackup cleanup /backups --retention-days 30 --min-backups 5 + + # Dry run to see what would be deleted + dbbackup cleanup /backups --retention-days 7 --dry-run + + # Clean up specific database backups only + dbbackup cleanup /backups --pattern "mydb_*.dump" + + # Aggressive cleanup (keep only 3 most recent) + dbbackup cleanup /backups --retention-days 1 --min-backups 3`, + Args: cobra.ExactArgs(1), + RunE: runCleanup, +} + +var ( + retentionDays int + minBackups int + dryRun bool + cleanupPattern string +) + +func init() { + rootCmd.AddCommand(cleanupCmd) + cleanupCmd.Flags().IntVar(&retentionDays, "retention-days", 30, "Delete backups older than this many days") + cleanupCmd.Flags().IntVar(&minBackups, "min-backups", 5, "Always keep at least this many backups") + cleanupCmd.Flags().BoolVar(&dryRun, "dry-run", false, "Show what would be deleted without actually deleting") + cleanupCmd.Flags().StringVar(&cleanupPattern, "pattern", "", "Only clean up backups matching this pattern (e.g., 'mydb_*.dump')") +} + +func runCleanup(cmd *cobra.Command, args []string) error { + backupDir := args[0] + + // Validate directory exists + if !dirExists(backupDir) { + return fmt.Errorf("backup directory does not exist: %s", backupDir) + } + + // Create retention policy + policy := retention.Policy{ + RetentionDays: retentionDays, + MinBackups: minBackups, + DryRun: dryRun, + } + + fmt.Printf("šŸ—‘ļø Cleanup Policy:\n") + fmt.Printf(" Directory: %s\n", backupDir) + fmt.Printf(" Retention: %d days\n", policy.RetentionDays) + fmt.Printf(" Min backups: %d\n", policy.MinBackups) + if cleanupPattern != "" { + fmt.Printf(" Pattern: %s\n", cleanupPattern) + } + if dryRun { + fmt.Printf(" Mode: DRY RUN (no files will be deleted)\n") + } + fmt.Println() + + var result *retention.CleanupResult + var err error + + // Apply policy + if cleanupPattern != "" { + result, err = retention.CleanupByPattern(backupDir, cleanupPattern, policy) + } else { + result, err = retention.ApplyPolicy(backupDir, policy) + } + + if err != nil { + return fmt.Errorf("cleanup failed: %w", err) + } + + // Display results + fmt.Printf("šŸ“Š Results:\n") + fmt.Printf(" Total backups: %d\n", result.TotalBackups) + fmt.Printf(" Eligible for deletion: %d\n", result.EligibleForDeletion) + + if len(result.Deleted) > 0 { + fmt.Printf("\n") + if dryRun { + fmt.Printf("šŸ” Would delete %d backup(s):\n", len(result.Deleted)) + } else { + fmt.Printf("āœ… Deleted %d backup(s):\n", len(result.Deleted)) + } + for _, file := range result.Deleted { + fmt.Printf(" - %s\n", filepath.Base(file)) + } + } + + if len(result.Kept) > 0 && len(result.Kept) <= 10 { + fmt.Printf("\nšŸ“¦ Kept %d backup(s):\n", len(result.Kept)) + for _, file := range result.Kept { + fmt.Printf(" - %s\n", filepath.Base(file)) + } + } else if len(result.Kept) > 10 { + fmt.Printf("\nšŸ“¦ Kept %d backup(s)\n", len(result.Kept)) + } + + if !dryRun && result.SpaceFreed > 0 { + fmt.Printf("\nšŸ’¾ Space freed: %s\n", metadata.FormatSize(result.SpaceFreed)) + } + + if len(result.Errors) > 0 { + fmt.Printf("\nāš ļø Errors:\n") + for _, err := range result.Errors { + fmt.Printf(" - %v\n", err) + } + } + + fmt.Println(strings.Repeat("─", 50)) + + if dryRun { + fmt.Println("āœ… Dry run completed (no files were deleted)") + } else if len(result.Deleted) > 0 { + fmt.Println("āœ… Cleanup completed successfully") + } else { + fmt.Println("ā„¹ļø No backups eligible for deletion") + } + + return nil +} + +func dirExists(path string) bool { + info, err := os.Stat(path) + if err != nil { + return false + } + return info.IsDir() +} diff --git a/cmd/verify.go b/cmd/verify.go new file mode 100644 index 0000000..b6743ae --- /dev/null +++ b/cmd/verify.go @@ -0,0 +1,141 @@ +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "dbbackup/internal/metadata" + "dbbackup/internal/verification" + "github.com/spf13/cobra" +) + +var verifyBackupCmd = &cobra.Command{ + Use: "verify-backup [backup-file]", + Short: "Verify backup file integrity with checksums", + Long: `Verify the integrity of one or more backup files by comparing their SHA-256 checksums +against the stored metadata. This ensures that backups have not been corrupted. + +Examples: + # Verify a single backup + dbbackup verify-backup /backups/mydb_20260115.dump + + # Verify all backups in a directory + dbbackup verify-backup /backups/*.dump + + # Quick verification (size check only, no checksum) + dbbackup verify-backup /backups/mydb.dump --quick + + # Verify and show detailed information + dbbackup verify-backup /backups/mydb.dump --verbose`, + Args: cobra.MinimumNArgs(1), + RunE: runVerifyBackup, +} + +var ( + quickVerify bool + verboseVerify bool +) + +func init() { + rootCmd.AddCommand(verifyBackupCmd) + verifyBackupCmd.Flags().BoolVar(&quickVerify, "quick", false, "Quick verification (size check only)") + verifyBackupCmd.Flags().BoolVarP(&verboseVerify, "verbose", "v", false, "Show detailed information") +} + +func runVerifyBackup(cmd *cobra.Command, args []string) error { + // Expand glob patterns + var backupFiles []string + for _, pattern := range args { + matches, err := filepath.Glob(pattern) + if err != nil { + return fmt.Errorf("invalid pattern %s: %w", pattern, err) + } + if len(matches) == 0 { + // Not a glob, use as-is + backupFiles = append(backupFiles, pattern) + } else { + backupFiles = append(backupFiles, matches...) + } + } + + if len(backupFiles) == 0 { + return fmt.Errorf("no backup files found") + } + + fmt.Printf("Verifying %d backup file(s)...\n\n", len(backupFiles)) + + successCount := 0 + failureCount := 0 + + for _, backupFile := range backupFiles { + // Skip metadata files + if strings.HasSuffix(backupFile, ".meta.json") || + strings.HasSuffix(backupFile, ".sha256") || + strings.HasSuffix(backupFile, ".info") { + continue + } + + fmt.Printf("šŸ“ %s\n", filepath.Base(backupFile)) + + if quickVerify { + // Quick check: size only + err := verification.QuickCheck(backupFile) + if err != nil { + fmt.Printf(" āŒ FAILED: %v\n\n", err) + failureCount++ + continue + } + fmt.Printf(" āœ… VALID (quick check)\n\n") + successCount++ + } else { + // Full verification with SHA-256 + result, err := verification.Verify(backupFile) + if err != nil { + return fmt.Errorf("verification error: %w", err) + } + + if result.Valid { + fmt.Printf(" āœ… VALID\n") + if verboseVerify { + meta, _ := metadata.Load(backupFile) + fmt.Printf(" Size: %s\n", metadata.FormatSize(meta.SizeBytes)) + fmt.Printf(" SHA-256: %s\n", meta.SHA256) + fmt.Printf(" Database: %s (%s)\n", meta.Database, meta.DatabaseType) + fmt.Printf(" Created: %s\n", meta.Timestamp.Format(time.RFC3339)) + } + fmt.Println() + successCount++ + } else { + fmt.Printf(" āŒ FAILED: %v\n", result.Error) + if verboseVerify { + if !result.FileExists { + fmt.Printf(" File does not exist\n") + } else if !result.MetadataExists { + fmt.Printf(" Metadata file missing\n") + } else if !result.SizeMatch { + fmt.Printf(" Size mismatch\n") + } else { + fmt.Printf(" Expected: %s\n", result.ExpectedSHA256) + fmt.Printf(" Got: %s\n", result.CalculatedSHA256) + } + } + fmt.Println() + failureCount++ + } + } + } + + // Summary + fmt.Println(strings.Repeat("─", 50)) + fmt.Printf("Total: %d backups\n", len(backupFiles)) + fmt.Printf("āœ… Valid: %d\n", successCount) + if failureCount > 0 { + fmt.Printf("āŒ Failed: %d\n", failureCount) + os.Exit(1) + } + + return nil +} diff --git a/internal/backup/engine.go b/internal/backup/engine.go index 9925747..abe9c9e 100755 --- a/internal/backup/engine.go +++ b/internal/backup/engine.go @@ -21,6 +21,7 @@ import ( "dbbackup/internal/database" "dbbackup/internal/security" "dbbackup/internal/logger" + "dbbackup/internal/metadata" "dbbackup/internal/metrics" "dbbackup/internal/progress" "dbbackup/internal/swap" @@ -541,9 +542,9 @@ func (e *Engine) BackupCluster(ctx context.Context) error { operation.Complete(fmt.Sprintf("Cluster backup created: %s (%s)", outputFile, size)) } - // Create metadata file - if err := e.createMetadata(outputFile, "cluster", "cluster", ""); err != nil { - e.log.Warn("Failed to create metadata file", "error", err) + // Create cluster metadata file + if err := e.createClusterMetadata(outputFile, databases, successCountFinal, failCountFinal); err != nil { + e.log.Warn("Failed to create cluster metadata file", "error", err) } return nil @@ -910,9 +911,70 @@ regularTar: // createMetadata creates a metadata file for the backup func (e *Engine) createMetadata(backupFile, database, backupType, strategy string) error { - metaFile := backupFile + ".info" + startTime := time.Now() - content := fmt.Sprintf(`{ + // Get backup file information + info, err := os.Stat(backupFile) + if err != nil { + return fmt.Errorf("failed to stat backup file: %w", err) + } + + // Calculate SHA-256 checksum + sha256, err := metadata.CalculateSHA256(backupFile) + if err != nil { + return fmt.Errorf("failed to calculate checksum: %w", err) + } + + // Get database version + ctx := context.Background() + dbVersion, _ := e.db.GetVersion(ctx) + if dbVersion == "" { + dbVersion = "unknown" + } + + // Determine compression format + compressionFormat := "none" + if e.cfg.CompressionLevel > 0 { + if e.cfg.Jobs > 1 { + compressionFormat = fmt.Sprintf("pigz-%d", e.cfg.CompressionLevel) + } else { + compressionFormat = fmt.Sprintf("gzip-%d", e.cfg.CompressionLevel) + } + } + + // Create backup metadata + meta := &metadata.BackupMetadata{ + Version: "2.0", + Timestamp: startTime, + Database: database, + DatabaseType: e.cfg.DatabaseType, + DatabaseVersion: dbVersion, + Host: e.cfg.Host, + Port: e.cfg.Port, + User: e.cfg.User, + BackupFile: backupFile, + SizeBytes: info.Size(), + SHA256: sha256, + Compression: compressionFormat, + BackupType: backupType, + Duration: time.Since(startTime).Seconds(), + ExtraInfo: make(map[string]string), + } + + // Add strategy for sample backups + if strategy != "" { + meta.ExtraInfo["sample_strategy"] = strategy + meta.ExtraInfo["sample_value"] = fmt.Sprintf("%d", e.cfg.SampleValue) + } + + // Save metadata + if err := meta.Save(); err != nil { + return fmt.Errorf("failed to save metadata: %w", err) + } + + // Also save legacy .info file for backward compatibility + legacyMetaFile := backupFile + ".info" + legacyContent := fmt.Sprintf(`{ "type": "%s", "database": "%s", "timestamp": "%s", @@ -920,24 +982,102 @@ func (e *Engine) createMetadata(backupFile, database, backupType, strategy strin "port": %d, "user": "%s", "db_type": "%s", - "compression": %d`, - backupType, database, time.Now().Format("20060102_150405"), - e.cfg.Host, e.cfg.Port, e.cfg.User, e.cfg.DatabaseType, e.cfg.CompressionLevel) + "compression": %d, + "size_bytes": %d +}`, backupType, database, startTime.Format("20060102_150405"), + e.cfg.Host, e.cfg.Port, e.cfg.User, e.cfg.DatabaseType, + e.cfg.CompressionLevel, info.Size()) - if strategy != "" { - content += fmt.Sprintf(`, - "sample_strategy": "%s", - "sample_value": %d`, e.cfg.SampleStrategy, e.cfg.SampleValue) + if err := os.WriteFile(legacyMetaFile, []byte(legacyContent), 0644); err != nil { + e.log.Warn("Failed to save legacy metadata file", "error", err) } - if info, err := os.Stat(backupFile); err == nil { - content += fmt.Sprintf(`, - "size_bytes": %d`, info.Size()) + return nil +} + +// createClusterMetadata creates metadata for cluster backups +func (e *Engine) createClusterMetadata(backupFile string, databases []string, successCount, failCount int) error { + startTime := time.Now() + + // Get backup file information + info, err := os.Stat(backupFile) + if err != nil { + return fmt.Errorf("failed to stat backup file: %w", err) } - content += "\n}" + // Calculate SHA-256 checksum for archive + sha256, err := metadata.CalculateSHA256(backupFile) + if err != nil { + return fmt.Errorf("failed to calculate checksum: %w", err) + } - return os.WriteFile(metaFile, []byte(content), 0644) + // Get database version + ctx := context.Background() + dbVersion, _ := e.db.GetVersion(ctx) + if dbVersion == "" { + dbVersion = "unknown" + } + + // Create cluster metadata + clusterMeta := &metadata.ClusterMetadata{ + Version: "2.0", + Timestamp: startTime, + ClusterName: fmt.Sprintf("%s:%d", e.cfg.Host, e.cfg.Port), + DatabaseType: e.cfg.DatabaseType, + Host: e.cfg.Host, + Port: e.cfg.Port, + Databases: make([]metadata.BackupMetadata, 0), + TotalSize: info.Size(), + Duration: time.Since(startTime).Seconds(), + ExtraInfo: map[string]string{ + "database_count": fmt.Sprintf("%d", len(databases)), + "success_count": fmt.Sprintf("%d", successCount), + "failure_count": fmt.Sprintf("%d", failCount), + "archive_sha256": sha256, + "database_version": dbVersion, + }, + } + + // Add database names to metadata + for _, dbName := range databases { + dbMeta := metadata.BackupMetadata{ + Database: dbName, + DatabaseType: e.cfg.DatabaseType, + DatabaseVersion: dbVersion, + Timestamp: startTime, + } + clusterMeta.Databases = append(clusterMeta.Databases, dbMeta) + } + + // Save cluster metadata + if err := clusterMeta.Save(backupFile); err != nil { + return fmt.Errorf("failed to save cluster metadata: %w", err) + } + + // Also save legacy .info file for backward compatibility + legacyMetaFile := backupFile + ".info" + legacyContent := fmt.Sprintf(`{ + "type": "cluster", + "database": "cluster", + "timestamp": "%s", + "host": "%s", + "port": %d, + "user": "%s", + "db_type": "%s", + "compression": %d, + "size_bytes": %d, + "database_count": %d, + "success_count": %d, + "failure_count": %d +}`, startTime.Format("20060102_150405"), + e.cfg.Host, e.cfg.Port, e.cfg.User, e.cfg.DatabaseType, + e.cfg.CompressionLevel, info.Size(), len(databases), successCount, failCount) + + if err := os.WriteFile(legacyMetaFile, []byte(legacyContent), 0644); err != nil { + e.log.Warn("Failed to save legacy cluster metadata file", "error", err) + } + + return nil } // executeCommand executes a backup command (optimized for huge databases) diff --git a/internal/metadata/metadata.go b/internal/metadata/metadata.go new file mode 100644 index 0000000..f8006dd --- /dev/null +++ b/internal/metadata/metadata.go @@ -0,0 +1,167 @@ +package metadata + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "time" +) + +// BackupMetadata contains comprehensive information about a backup +type BackupMetadata struct { + Version string `json:"version"` + Timestamp time.Time `json:"timestamp"` + Database string `json:"database"` + DatabaseType string `json:"database_type"` // postgresql, mysql, mariadb + DatabaseVersion string `json:"database_version"` // e.g., "PostgreSQL 15.3" + Host string `json:"host"` + Port int `json:"port"` + User string `json:"user"` + BackupFile string `json:"backup_file"` + SizeBytes int64 `json:"size_bytes"` + SHA256 string `json:"sha256"` + Compression string `json:"compression"` // none, gzip, pigz + BackupType string `json:"backup_type"` // full, incremental (for v2.0) + BaseBackup string `json:"base_backup,omitempty"` + Duration float64 `json:"duration_seconds"` + ExtraInfo map[string]string `json:"extra_info,omitempty"` +} + +// ClusterMetadata contains metadata for cluster backups +type ClusterMetadata struct { + Version string `json:"version"` + Timestamp time.Time `json:"timestamp"` + ClusterName string `json:"cluster_name"` + DatabaseType string `json:"database_type"` + Host string `json:"host"` + Port int `json:"port"` + Databases []BackupMetadata `json:"databases"` + TotalSize int64 `json:"total_size_bytes"` + Duration float64 `json:"duration_seconds"` + ExtraInfo map[string]string `json:"extra_info,omitempty"` +} + +// CalculateSHA256 computes the SHA-256 checksum of a file +func CalculateSHA256(filePath string) (string, error) { + f, err := os.Open(filePath) + if err != nil { + return "", fmt.Errorf("failed to open file: %w", err) + } + defer f.Close() + + hasher := sha256.New() + if _, err := io.Copy(hasher, f); err != nil { + return "", fmt.Errorf("failed to calculate checksum: %w", err) + } + + return hex.EncodeToString(hasher.Sum(nil)), nil +} + +// Save writes metadata to a .meta.json file +func (m *BackupMetadata) Save() error { + metaPath := m.BackupFile + ".meta.json" + + data, err := json.MarshalIndent(m, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal metadata: %w", err) + } + + if err := os.WriteFile(metaPath, data, 0644); err != nil { + return fmt.Errorf("failed to write metadata file: %w", err) + } + + return nil +} + +// Load reads metadata from a .meta.json file +func Load(backupFile string) (*BackupMetadata, error) { + metaPath := backupFile + ".meta.json" + + data, err := os.ReadFile(metaPath) + if err != nil { + return nil, fmt.Errorf("failed to read metadata file: %w", err) + } + + var meta BackupMetadata + if err := json.Unmarshal(data, &meta); err != nil { + return nil, fmt.Errorf("failed to parse metadata: %w", err) + } + + return &meta, nil +} + +// SaveCluster writes cluster metadata to a .meta.json file +func (m *ClusterMetadata) Save(targetFile string) error { + metaPath := targetFile + ".meta.json" + + data, err := json.MarshalIndent(m, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal cluster metadata: %w", err) + } + + if err := os.WriteFile(metaPath, data, 0644); err != nil { + return fmt.Errorf("failed to write cluster metadata file: %w", err) + } + + return nil +} + +// LoadCluster reads cluster metadata from a .meta.json file +func LoadCluster(targetFile string) (*ClusterMetadata, error) { + metaPath := targetFile + ".meta.json" + + data, err := os.ReadFile(metaPath) + if err != nil { + return nil, fmt.Errorf("failed to read cluster metadata file: %w", err) + } + + var meta ClusterMetadata + if err := json.Unmarshal(data, &meta); err != nil { + return nil, fmt.Errorf("failed to parse cluster metadata: %w", err) + } + + return &meta, nil +} + +// ListBackups scans a directory for backup files and returns their metadata +func ListBackups(dir string) ([]*BackupMetadata, error) { + pattern := filepath.Join(dir, "*.meta.json") + matches, err := filepath.Glob(pattern) + if err != nil { + return nil, fmt.Errorf("failed to scan directory: %w", err) + } + + var backups []*BackupMetadata + for _, metaFile := range matches { + // Extract backup file path (remove .meta.json suffix) + backupFile := metaFile[:len(metaFile)-len(".meta.json")] + + meta, err := Load(backupFile) + if err != nil { + // Skip invalid metadata files + continue + } + + backups = append(backups, meta) + } + + return backups, nil +} + +// FormatSize returns human-readable size +func FormatSize(bytes int64) string { + const unit = 1024 + if bytes < unit { + return fmt.Sprintf("%d B", bytes) + } + div, exp := int64(unit), 0 + for n := bytes / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp]) +} diff --git a/internal/retention/retention.go b/internal/retention/retention.go new file mode 100644 index 0000000..024dcc3 --- /dev/null +++ b/internal/retention/retention.go @@ -0,0 +1,224 @@ +package retention + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "time" + + "dbbackup/internal/metadata" +) + +// Policy defines the retention rules +type Policy struct { + RetentionDays int + MinBackups int + DryRun bool +} + +// CleanupResult contains information about cleanup operations +type CleanupResult struct { + TotalBackups int + EligibleForDeletion int + Deleted []string + Kept []string + SpaceFreed int64 + Errors []error +} + +// ApplyPolicy enforces the retention policy on backups in a directory +func ApplyPolicy(backupDir string, policy Policy) (*CleanupResult, error) { + result := &CleanupResult{ + Deleted: make([]string, 0), + Kept: make([]string, 0), + Errors: make([]error, 0), + } + + // List all backups in directory + backups, err := metadata.ListBackups(backupDir) + if err != nil { + return nil, fmt.Errorf("failed to list backups: %w", err) + } + + result.TotalBackups = len(backups) + + // Sort backups by timestamp (oldest first) + sort.Slice(backups, func(i, j int) bool { + return backups[i].Timestamp.Before(backups[j].Timestamp) + }) + + // Calculate cutoff date + cutoffDate := time.Now().AddDate(0, 0, -policy.RetentionDays) + + // Determine which backups to delete + for i, backup := range backups { + // Always keep minimum number of backups (most recent ones) + backupsRemaining := len(backups) - i + if backupsRemaining <= policy.MinBackups { + result.Kept = append(result.Kept, backup.BackupFile) + continue + } + + // Check if backup is older than retention period + if backup.Timestamp.Before(cutoffDate) { + result.EligibleForDeletion++ + + if policy.DryRun { + result.Deleted = append(result.Deleted, backup.BackupFile) + } else { + // Delete backup file and associated metadata + if err := deleteBackup(backup.BackupFile); err != nil { + result.Errors = append(result.Errors, + fmt.Errorf("failed to delete %s: %w", backup.BackupFile, err)) + } else { + result.Deleted = append(result.Deleted, backup.BackupFile) + result.SpaceFreed += backup.SizeBytes + } + } + } else { + result.Kept = append(result.Kept, backup.BackupFile) + } + } + + return result, nil +} + +// deleteBackup removes a backup file and all associated files +func deleteBackup(backupFile string) error { + // Delete main backup file + if err := os.Remove(backupFile); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to delete backup file: %w", err) + } + + // Delete metadata file + metaFile := backupFile + ".meta.json" + if err := os.Remove(metaFile); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to delete metadata file: %w", err) + } + + // Delete legacy .sha256 file if exists + sha256File := backupFile + ".sha256" + if err := os.Remove(sha256File); err != nil && !os.IsNotExist(err) { + // Don't fail if .sha256 doesn't exist (new format) + } + + // Delete legacy .info file if exists + infoFile := backupFile + ".info" + if err := os.Remove(infoFile); err != nil && !os.IsNotExist(err) { + // Don't fail if .info doesn't exist (new format) + } + + return nil +} + +// GetOldestBackups returns the N oldest backups in a directory +func GetOldestBackups(backupDir string, count int) ([]*metadata.BackupMetadata, error) { + backups, err := metadata.ListBackups(backupDir) + if err != nil { + return nil, err + } + + // Sort by timestamp (oldest first) + sort.Slice(backups, func(i, j int) bool { + return backups[i].Timestamp.Before(backups[j].Timestamp) + }) + + if count > len(backups) { + count = len(backups) + } + + return backups[:count], nil +} + +// GetNewestBackups returns the N newest backups in a directory +func GetNewestBackups(backupDir string, count int) ([]*metadata.BackupMetadata, error) { + backups, err := metadata.ListBackups(backupDir) + if err != nil { + return nil, err + } + + // Sort by timestamp (newest first) + sort.Slice(backups, func(i, j int) bool { + return backups[i].Timestamp.After(backups[j].Timestamp) + }) + + if count > len(backups) { + count = len(backups) + } + + return backups[:count], nil +} + +// CleanupByPattern removes backups matching a specific pattern +func CleanupByPattern(backupDir, pattern string, policy Policy) (*CleanupResult, error) { + result := &CleanupResult{ + Deleted: make([]string, 0), + Kept: make([]string, 0), + Errors: make([]error, 0), + } + + // Find matching backup files + searchPattern := filepath.Join(backupDir, pattern) + matches, err := filepath.Glob(searchPattern) + if err != nil { + return nil, fmt.Errorf("failed to match pattern: %w", err) + } + + // Filter to only .dump or .sql files + var backupFiles []string + for _, match := range matches { + ext := filepath.Ext(match) + if ext == ".dump" || ext == ".sql" { + backupFiles = append(backupFiles, match) + } + } + + // Load metadata for matched backups + var backups []*metadata.BackupMetadata + for _, file := range backupFiles { + meta, err := metadata.Load(file) + if err != nil { + // Skip files without metadata + continue + } + backups = append(backups, meta) + } + + result.TotalBackups = len(backups) + + // Sort by timestamp + sort.Slice(backups, func(i, j int) bool { + return backups[i].Timestamp.Before(backups[j].Timestamp) + }) + + cutoffDate := time.Now().AddDate(0, 0, -policy.RetentionDays) + + // Apply policy + for i, backup := range backups { + backupsRemaining := len(backups) - i + if backupsRemaining <= policy.MinBackups { + result.Kept = append(result.Kept, backup.BackupFile) + continue + } + + if backup.Timestamp.Before(cutoffDate) { + result.EligibleForDeletion++ + + if policy.DryRun { + result.Deleted = append(result.Deleted, backup.BackupFile) + } else { + if err := deleteBackup(backup.BackupFile); err != nil { + result.Errors = append(result.Errors, err) + } else { + result.Deleted = append(result.Deleted, backup.BackupFile) + result.SpaceFreed += backup.SizeBytes + } + } + } else { + result.Kept = append(result.Kept, backup.BackupFile) + } + } + + return result, nil +} diff --git a/internal/verification/verification.go b/internal/verification/verification.go new file mode 100644 index 0000000..1bbb8f4 --- /dev/null +++ b/internal/verification/verification.go @@ -0,0 +1,114 @@ +package verification + +import ( + "fmt" + "os" + + "dbbackup/internal/metadata" +) + +// Result represents the outcome of a verification operation +type Result struct { + Valid bool + BackupFile string + ExpectedSHA256 string + CalculatedSHA256 string + SizeMatch bool + FileExists bool + MetadataExists bool + Error error +} + +// Verify checks the integrity of a backup file +func Verify(backupFile string) (*Result, error) { + result := &Result{ + BackupFile: backupFile, + } + + // Check if backup file exists + info, err := os.Stat(backupFile) + if err != nil { + result.FileExists = false + result.Error = fmt.Errorf("backup file does not exist: %w", err) + return result, nil + } + result.FileExists = true + + // Load metadata + meta, err := metadata.Load(backupFile) + if err != nil { + result.MetadataExists = false + result.Error = fmt.Errorf("failed to load metadata: %w", err) + return result, nil + } + result.MetadataExists = true + result.ExpectedSHA256 = meta.SHA256 + + // Check size match + if info.Size() != meta.SizeBytes { + result.SizeMatch = false + result.Error = fmt.Errorf("size mismatch: expected %d bytes, got %d bytes", + meta.SizeBytes, info.Size()) + return result, nil + } + result.SizeMatch = true + + // Calculate actual SHA-256 + actualSHA256, err := metadata.CalculateSHA256(backupFile) + if err != nil { + result.Error = fmt.Errorf("failed to calculate checksum: %w", err) + return result, nil + } + result.CalculatedSHA256 = actualSHA256 + + // Compare checksums + if actualSHA256 != meta.SHA256 { + result.Valid = false + result.Error = fmt.Errorf("checksum mismatch: expected %s, got %s", + meta.SHA256, actualSHA256) + return result, nil + } + + // All checks passed + result.Valid = true + return result, nil +} + +// VerifyMultiple verifies multiple backup files +func VerifyMultiple(backupFiles []string) ([]*Result, error) { + var results []*Result + + for _, file := range backupFiles { + result, err := Verify(file) + if err != nil { + return nil, fmt.Errorf("verification error for %s: %w", file, err) + } + results = append(results, result) + } + + return results, nil +} + +// QuickCheck performs a fast check without full checksum calculation +// Only validates metadata existence and file size +func QuickCheck(backupFile string) error { + // Check file exists + info, err := os.Stat(backupFile) + if err != nil { + return fmt.Errorf("backup file does not exist: %w", err) + } + + // Load metadata + meta, err := metadata.Load(backupFile) + if err != nil { + return fmt.Errorf("metadata missing or invalid: %w", err) + } + + // Check size + if info.Size() != meta.SizeBytes { + return fmt.Errorf("size mismatch: expected %d bytes, got %d bytes", + meta.SizeBytes, info.Size()) + } + + return nil +}