feat: Phase 3A - Incremental backup scaffolding (types, interfaces, metadata)

Added foundational types for PostgreSQL incremental backups:

Types & Interfaces (internal/backup/incremental.go):
- BackupType enum: full vs incremental
- IncrementalMetadata struct with base backup reference
- ChangedFile struct for tracking modifications
- BackupChainResolver interface for restore chain logic
- IncrementalBackupEngine interface

PostgreSQL Implementation (internal/backup/incremental_postgres.go):
- PostgresIncrementalEngine for file-level incrementals
- FindChangedFiles() - mtime-based change detection
- shouldSkipFile() - exclude temp/lock/socket files
- loadBackupInfo() - read base backup metadata
- Stubs for CreateIncrementalBackup() and RestoreIncremental()

Metadata Extension (internal/metadata/metadata.go):
- Added IncrementalMetadata to BackupMetadata
- Fields: base_backup_id, backup_chain, incremental_files
- Tracks parent backup and restore dependencies

Next Steps:
- Add --backup-type incremental flag to CLI
- Implement backup chain resolution
- Write integration tests

Status: SCAFFOLDING ONLY - not functional yet
This commit is contained in:
2025-11-26 06:22:54 +00:00
parent b460a709a7
commit 1d4aa24817
3 changed files with 316 additions and 1 deletions

View File

@@ -0,0 +1,108 @@
package backup
import (
"context"
"time"
)
// BackupType represents the type of backup
type BackupType string
const (
BackupTypeFull BackupType = "full" // Complete backup of all data
BackupTypeIncremental BackupType = "incremental" // Only changed files since base backup
)
// IncrementalMetadata contains metadata for incremental backups
type IncrementalMetadata struct {
// BaseBackupID is the SHA-256 checksum of the base backup this incremental depends on
BaseBackupID string `json:"base_backup_id"`
// BaseBackupPath is the filename of the base backup (e.g., "mydb_20250126_120000.tar.gz")
BaseBackupPath string `json:"base_backup_path"`
// BaseBackupTimestamp is when the base backup was created
BaseBackupTimestamp time.Time `json:"base_backup_timestamp"`
// IncrementalFiles is the number of changed files included in this backup
IncrementalFiles int `json:"incremental_files"`
// TotalSize is the total size of changed files (bytes)
TotalSize int64 `json:"total_size"`
// BackupChain is the list of all backups needed for restore (base + incrementals)
// Ordered from oldest to newest: [base, incr1, incr2, ...]
BackupChain []string `json:"backup_chain"`
}
// ChangedFile represents a file that changed since the base backup
type ChangedFile struct {
// RelativePath is the path relative to PostgreSQL data directory
RelativePath string
// AbsolutePath is the full filesystem path
AbsolutePath string
// Size is the file size in bytes
Size int64
// ModTime is the last modification time
ModTime time.Time
// Checksum is the SHA-256 hash of the file content (optional)
Checksum string
}
// IncrementalBackupConfig holds configuration for incremental backups
type IncrementalBackupConfig struct {
// BaseBackupPath is the path to the base backup archive
BaseBackupPath string
// DataDirectory is the PostgreSQL data directory to scan
DataDirectory string
// IncludeWAL determines if WAL files should be included
IncludeWAL bool
// CompressionLevel for the incremental archive (0-9)
CompressionLevel int
}
// BackupChainResolver resolves the chain of backups needed for restore
type BackupChainResolver interface {
// FindBaseBackup locates the base backup for an incremental backup
FindBaseBackup(ctx context.Context, incrementalBackupID string) (*BackupInfo, error)
// ResolveChain returns the complete chain of backups needed for restore
// Returned in order: [base, incr1, incr2, ..., target]
ResolveChain(ctx context.Context, targetBackupID string) ([]*BackupInfo, error)
// ValidateChain verifies all backups in the chain exist and are valid
ValidateChain(ctx context.Context, chain []*BackupInfo) error
}
// IncrementalBackupEngine handles incremental backup operations
type IncrementalBackupEngine interface {
// FindChangedFiles identifies files changed since the base backup
FindChangedFiles(ctx context.Context, config *IncrementalBackupConfig) ([]ChangedFile, error)
// CreateIncrementalBackup creates a new incremental backup
CreateIncrementalBackup(ctx context.Context, config *IncrementalBackupConfig, changedFiles []ChangedFile) error
// RestoreIncremental restores an incremental backup on top of a base backup
RestoreIncremental(ctx context.Context, baseBackupPath, incrementalPath, targetDir string) error
}
// BackupInfo extends the existing Info struct with incremental metadata
// This will be integrated into the existing backup.Info struct
type BackupInfo struct {
// Existing fields from backup.Info...
Database string `json:"database"`
Timestamp time.Time `json:"timestamp"`
Size int64 `json:"size"`
Checksum string `json:"checksum"`
// New fields for incremental support
BackupType BackupType `json:"backup_type"` // "full" or "incremental"
Incremental *IncrementalMetadata `json:"incremental,omitempty"` // Only present for incremental backups
}

View File

@@ -0,0 +1,194 @@
package backup
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"dbbackup/internal/logger"
)
// PostgresIncrementalEngine implements incremental backups for PostgreSQL
type PostgresIncrementalEngine struct {
log logger.Logger
}
// NewPostgresIncrementalEngine creates a new PostgreSQL incremental backup engine
func NewPostgresIncrementalEngine(log logger.Logger) *PostgresIncrementalEngine {
return &PostgresIncrementalEngine{
log: log,
}
}
// FindChangedFiles identifies files that changed since the base backup
// This is a simple mtime-based implementation. Production should use pg_basebackup with incremental support.
func (e *PostgresIncrementalEngine) FindChangedFiles(ctx context.Context, config *IncrementalBackupConfig) ([]ChangedFile, error) {
e.log.Info("Finding changed files for incremental backup",
"base_backup", config.BaseBackupPath,
"data_dir", config.DataDirectory)
// Load base backup metadata to get timestamp
baseInfo, err := e.loadBackupInfo(config.BaseBackupPath)
if err != nil {
return nil, fmt.Errorf("failed to load base backup info: %w", err)
}
if baseInfo.BackupType != BackupTypeFull {
return nil, fmt.Errorf("base backup must be a full backup, got: %s", baseInfo.BackupType)
}
baseTimestamp := baseInfo.Timestamp
e.log.Info("Base backup timestamp", "timestamp", baseTimestamp)
// Scan data directory for changed files
var changedFiles []ChangedFile
err = filepath.Walk(config.DataDirectory, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Skip directories
if info.IsDir() {
return nil
}
// Skip temporary files, lock files, and sockets
if e.shouldSkipFile(path, info) {
return nil
}
// Check if file was modified after base backup
if info.ModTime().After(baseTimestamp) {
relPath, err := filepath.Rel(config.DataDirectory, path)
if err != nil {
e.log.Warn("Failed to get relative path", "path", path, "error", err)
return nil
}
changedFiles = append(changedFiles, ChangedFile{
RelativePath: relPath,
AbsolutePath: path,
Size: info.Size(),
ModTime: info.ModTime(),
})
}
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to scan data directory: %w", err)
}
e.log.Info("Found changed files", "count", len(changedFiles))
return changedFiles, nil
}
// shouldSkipFile determines if a file should be excluded from incremental backup
func (e *PostgresIncrementalEngine) shouldSkipFile(path string, info os.FileInfo) bool {
name := info.Name()
// Skip temporary files
if strings.HasSuffix(name, ".tmp") {
return true
}
// Skip lock files
if strings.HasSuffix(name, ".lock") || name == "postmaster.pid" {
return true
}
// Skip sockets
if info.Mode()&os.ModeSocket != 0 {
return true
}
// Skip pg_wal symlink target (WAL handled separately if needed)
if strings.Contains(path, "pg_wal") || strings.Contains(path, "pg_xlog") {
return true
}
// Skip pg_replslot (replication slots)
if strings.Contains(path, "pg_replslot") {
return true
}
// Skip postmaster.opts (runtime config, regenerated on startup)
if name == "postmaster.opts" {
return true
}
return false
}
// loadBackupInfo loads backup metadata from .info file
func (e *PostgresIncrementalEngine) loadBackupInfo(backupPath string) (*BackupInfo, error) {
// Remove .tar.gz extension and add .info
infoPath := strings.TrimSuffix(backupPath, ".tar.gz") + ".info"
data, err := os.ReadFile(infoPath)
if err != nil {
return nil, fmt.Errorf("failed to read info file %s: %w", infoPath, err)
}
var info BackupInfo
if err := json.Unmarshal(data, &info); err != nil {
return nil, fmt.Errorf("failed to parse info file: %w", err)
}
return &info, nil
}
// CreateIncrementalBackup creates a new incremental backup archive
func (e *PostgresIncrementalEngine) CreateIncrementalBackup(ctx context.Context, config *IncrementalBackupConfig, changedFiles []ChangedFile) error {
e.log.Info("Creating incremental backup",
"changed_files", len(changedFiles),
"base_backup", config.BaseBackupPath)
// TODO: Implementation in next step
// 1. Create tar.gz with only changed files
// 2. Generate metadata with base backup reference
// 3. Write .info file with incremental metadata
// 4. Calculate checksums
return fmt.Errorf("not implemented yet")
}
// RestoreIncremental restores an incremental backup on top of a base
func (e *PostgresIncrementalEngine) RestoreIncremental(ctx context.Context, baseBackupPath, incrementalPath, targetDir string) error {
e.log.Info("Restoring incremental backup",
"base", baseBackupPath,
"incremental", incrementalPath,
"target", targetDir)
// TODO: Implementation in next step
// 1. Extract base backup to target
// 2. Extract incremental backup, overwriting files
// 3. Verify checksums
// 4. Update permissions
return fmt.Errorf("not implemented yet")
}
// CalculateFileChecksum computes SHA-256 hash of a file
func (e *PostgresIncrementalEngine) CalculateFileChecksum(path string) (string, error) {
file, err := os.Open(path)
if err != nil {
return "", err
}
defer file.Close()
hash := sha256.New()
if _, err := io.Copy(hash, file); err != nil {
return "", err
}
return hex.EncodeToString(hash.Sum(nil)), nil
}

View File

@@ -25,10 +25,23 @@ type BackupMetadata struct {
SizeBytes int64 `json:"size_bytes"` SizeBytes int64 `json:"size_bytes"`
SHA256 string `json:"sha256"` SHA256 string `json:"sha256"`
Compression string `json:"compression"` // none, gzip, pigz Compression string `json:"compression"` // none, gzip, pigz
BackupType string `json:"backup_type"` // full, incremental (for v2.0) BackupType string `json:"backup_type"` // full, incremental (for v2.2)
BaseBackup string `json:"base_backup,omitempty"` BaseBackup string `json:"base_backup,omitempty"`
Duration float64 `json:"duration_seconds"` Duration float64 `json:"duration_seconds"`
ExtraInfo map[string]string `json:"extra_info,omitempty"` ExtraInfo map[string]string `json:"extra_info,omitempty"`
// Incremental backup fields (v2.2+)
Incremental *IncrementalMetadata `json:"incremental,omitempty"` // Only present for incremental backups
}
// IncrementalMetadata contains metadata specific to incremental backups
type IncrementalMetadata struct {
BaseBackupID string `json:"base_backup_id"` // SHA-256 of base backup
BaseBackupPath string `json:"base_backup_path"` // Filename of base backup
BaseBackupTimestamp time.Time `json:"base_backup_timestamp"` // When base backup was created
IncrementalFiles int `json:"incremental_files"` // Number of changed files
TotalSize int64 `json:"total_size"` // Total size of changed files (bytes)
BackupChain []string `json:"backup_chain"` // Chain: [base, incr1, incr2, ...]
} }
// ClusterMetadata contains metadata for cluster backups // ClusterMetadata contains metadata for cluster backups