diff --git a/internal/backup/incremental.go b/internal/backup/incremental.go new file mode 100644 index 0000000..ce5dc1f --- /dev/null +++ b/internal/backup/incremental.go @@ -0,0 +1,108 @@ +package backup + +import ( + "context" + "time" +) + +// BackupType represents the type of backup +type BackupType string + +const ( + BackupTypeFull BackupType = "full" // Complete backup of all data + BackupTypeIncremental BackupType = "incremental" // Only changed files since base backup +) + +// IncrementalMetadata contains metadata for incremental backups +type IncrementalMetadata struct { + // BaseBackupID is the SHA-256 checksum of the base backup this incremental depends on + BaseBackupID string `json:"base_backup_id"` + + // BaseBackupPath is the filename of the base backup (e.g., "mydb_20250126_120000.tar.gz") + BaseBackupPath string `json:"base_backup_path"` + + // BaseBackupTimestamp is when the base backup was created + BaseBackupTimestamp time.Time `json:"base_backup_timestamp"` + + // IncrementalFiles is the number of changed files included in this backup + IncrementalFiles int `json:"incremental_files"` + + // TotalSize is the total size of changed files (bytes) + TotalSize int64 `json:"total_size"` + + // BackupChain is the list of all backups needed for restore (base + incrementals) + // Ordered from oldest to newest: [base, incr1, incr2, ...] + BackupChain []string `json:"backup_chain"` +} + +// ChangedFile represents a file that changed since the base backup +type ChangedFile struct { + // RelativePath is the path relative to PostgreSQL data directory + RelativePath string + + // AbsolutePath is the full filesystem path + AbsolutePath string + + // Size is the file size in bytes + Size int64 + + // ModTime is the last modification time + ModTime time.Time + + // Checksum is the SHA-256 hash of the file content (optional) + Checksum string +} + +// IncrementalBackupConfig holds configuration for incremental backups +type IncrementalBackupConfig struct { + // BaseBackupPath is the path to the base backup archive + BaseBackupPath string + + // DataDirectory is the PostgreSQL data directory to scan + DataDirectory string + + // IncludeWAL determines if WAL files should be included + IncludeWAL bool + + // CompressionLevel for the incremental archive (0-9) + CompressionLevel int +} + +// BackupChainResolver resolves the chain of backups needed for restore +type BackupChainResolver interface { + // FindBaseBackup locates the base backup for an incremental backup + FindBaseBackup(ctx context.Context, incrementalBackupID string) (*BackupInfo, error) + + // ResolveChain returns the complete chain of backups needed for restore + // Returned in order: [base, incr1, incr2, ..., target] + ResolveChain(ctx context.Context, targetBackupID string) ([]*BackupInfo, error) + + // ValidateChain verifies all backups in the chain exist and are valid + ValidateChain(ctx context.Context, chain []*BackupInfo) error +} + +// IncrementalBackupEngine handles incremental backup operations +type IncrementalBackupEngine interface { + // FindChangedFiles identifies files changed since the base backup + FindChangedFiles(ctx context.Context, config *IncrementalBackupConfig) ([]ChangedFile, error) + + // CreateIncrementalBackup creates a new incremental backup + CreateIncrementalBackup(ctx context.Context, config *IncrementalBackupConfig, changedFiles []ChangedFile) error + + // RestoreIncremental restores an incremental backup on top of a base backup + RestoreIncremental(ctx context.Context, baseBackupPath, incrementalPath, targetDir string) error +} + +// BackupInfo extends the existing Info struct with incremental metadata +// This will be integrated into the existing backup.Info struct +type BackupInfo struct { + // Existing fields from backup.Info... + Database string `json:"database"` + Timestamp time.Time `json:"timestamp"` + Size int64 `json:"size"` + Checksum string `json:"checksum"` + + // New fields for incremental support + BackupType BackupType `json:"backup_type"` // "full" or "incremental" + Incremental *IncrementalMetadata `json:"incremental,omitempty"` // Only present for incremental backups +} diff --git a/internal/backup/incremental_postgres.go b/internal/backup/incremental_postgres.go new file mode 100644 index 0000000..f9d0070 --- /dev/null +++ b/internal/backup/incremental_postgres.go @@ -0,0 +1,194 @@ +package backup + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "dbbackup/internal/logger" +) + +// PostgresIncrementalEngine implements incremental backups for PostgreSQL +type PostgresIncrementalEngine struct { + log logger.Logger +} + +// NewPostgresIncrementalEngine creates a new PostgreSQL incremental backup engine +func NewPostgresIncrementalEngine(log logger.Logger) *PostgresIncrementalEngine { + return &PostgresIncrementalEngine{ + log: log, + } +} + +// FindChangedFiles identifies files that changed since the base backup +// This is a simple mtime-based implementation. Production should use pg_basebackup with incremental support. +func (e *PostgresIncrementalEngine) FindChangedFiles(ctx context.Context, config *IncrementalBackupConfig) ([]ChangedFile, error) { + e.log.Info("Finding changed files for incremental backup", + "base_backup", config.BaseBackupPath, + "data_dir", config.DataDirectory) + + // Load base backup metadata to get timestamp + baseInfo, err := e.loadBackupInfo(config.BaseBackupPath) + if err != nil { + return nil, fmt.Errorf("failed to load base backup info: %w", err) + } + + if baseInfo.BackupType != BackupTypeFull { + return nil, fmt.Errorf("base backup must be a full backup, got: %s", baseInfo.BackupType) + } + + baseTimestamp := baseInfo.Timestamp + e.log.Info("Base backup timestamp", "timestamp", baseTimestamp) + + // Scan data directory for changed files + var changedFiles []ChangedFile + + err = filepath.Walk(config.DataDirectory, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Skip directories + if info.IsDir() { + return nil + } + + // Skip temporary files, lock files, and sockets + if e.shouldSkipFile(path, info) { + return nil + } + + // Check if file was modified after base backup + if info.ModTime().After(baseTimestamp) { + relPath, err := filepath.Rel(config.DataDirectory, path) + if err != nil { + e.log.Warn("Failed to get relative path", "path", path, "error", err) + return nil + } + + changedFiles = append(changedFiles, ChangedFile{ + RelativePath: relPath, + AbsolutePath: path, + Size: info.Size(), + ModTime: info.ModTime(), + }) + } + + return nil + }) + + if err != nil { + return nil, fmt.Errorf("failed to scan data directory: %w", err) + } + + e.log.Info("Found changed files", "count", len(changedFiles)) + return changedFiles, nil +} + +// shouldSkipFile determines if a file should be excluded from incremental backup +func (e *PostgresIncrementalEngine) shouldSkipFile(path string, info os.FileInfo) bool { + name := info.Name() + + // Skip temporary files + if strings.HasSuffix(name, ".tmp") { + return true + } + + // Skip lock files + if strings.HasSuffix(name, ".lock") || name == "postmaster.pid" { + return true + } + + // Skip sockets + if info.Mode()&os.ModeSocket != 0 { + return true + } + + // Skip pg_wal symlink target (WAL handled separately if needed) + if strings.Contains(path, "pg_wal") || strings.Contains(path, "pg_xlog") { + return true + } + + // Skip pg_replslot (replication slots) + if strings.Contains(path, "pg_replslot") { + return true + } + + // Skip postmaster.opts (runtime config, regenerated on startup) + if name == "postmaster.opts" { + return true + } + + return false +} + +// loadBackupInfo loads backup metadata from .info file +func (e *PostgresIncrementalEngine) loadBackupInfo(backupPath string) (*BackupInfo, error) { + // Remove .tar.gz extension and add .info + infoPath := strings.TrimSuffix(backupPath, ".tar.gz") + ".info" + + data, err := os.ReadFile(infoPath) + if err != nil { + return nil, fmt.Errorf("failed to read info file %s: %w", infoPath, err) + } + + var info BackupInfo + if err := json.Unmarshal(data, &info); err != nil { + return nil, fmt.Errorf("failed to parse info file: %w", err) + } + + return &info, nil +} + +// CreateIncrementalBackup creates a new incremental backup archive +func (e *PostgresIncrementalEngine) CreateIncrementalBackup(ctx context.Context, config *IncrementalBackupConfig, changedFiles []ChangedFile) error { + e.log.Info("Creating incremental backup", + "changed_files", len(changedFiles), + "base_backup", config.BaseBackupPath) + + // TODO: Implementation in next step + // 1. Create tar.gz with only changed files + // 2. Generate metadata with base backup reference + // 3. Write .info file with incremental metadata + // 4. Calculate checksums + + return fmt.Errorf("not implemented yet") +} + +// RestoreIncremental restores an incremental backup on top of a base +func (e *PostgresIncrementalEngine) RestoreIncremental(ctx context.Context, baseBackupPath, incrementalPath, targetDir string) error { + e.log.Info("Restoring incremental backup", + "base", baseBackupPath, + "incremental", incrementalPath, + "target", targetDir) + + // TODO: Implementation in next step + // 1. Extract base backup to target + // 2. Extract incremental backup, overwriting files + // 3. Verify checksums + // 4. Update permissions + + return fmt.Errorf("not implemented yet") +} + +// CalculateFileChecksum computes SHA-256 hash of a file +func (e *PostgresIncrementalEngine) CalculateFileChecksum(path string) (string, error) { + file, err := os.Open(path) + if err != nil { + return "", err + } + defer file.Close() + + hash := sha256.New() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + + return hex.EncodeToString(hash.Sum(nil)), nil +} diff --git a/internal/metadata/metadata.go b/internal/metadata/metadata.go index f8006dd..c9b548f 100644 --- a/internal/metadata/metadata.go +++ b/internal/metadata/metadata.go @@ -25,10 +25,23 @@ type BackupMetadata struct { SizeBytes int64 `json:"size_bytes"` SHA256 string `json:"sha256"` Compression string `json:"compression"` // none, gzip, pigz - BackupType string `json:"backup_type"` // full, incremental (for v2.0) + BackupType string `json:"backup_type"` // full, incremental (for v2.2) BaseBackup string `json:"base_backup,omitempty"` Duration float64 `json:"duration_seconds"` ExtraInfo map[string]string `json:"extra_info,omitempty"` + + // Incremental backup fields (v2.2+) + Incremental *IncrementalMetadata `json:"incremental,omitempty"` // Only present for incremental backups +} + +// IncrementalMetadata contains metadata specific to incremental backups +type IncrementalMetadata struct { + BaseBackupID string `json:"base_backup_id"` // SHA-256 of base backup + BaseBackupPath string `json:"base_backup_path"` // Filename of base backup + BaseBackupTimestamp time.Time `json:"base_backup_timestamp"` // When base backup was created + IncrementalFiles int `json:"incremental_files"` // Number of changed files + TotalSize int64 `json:"total_size"` // Total size of changed files (bytes) + BackupChain []string `json:"backup_chain"` // Chain: [base, incr1, incr2, ...] } // ClusterMetadata contains metadata for cluster backups