feat: Phase 3A - Incremental backup scaffolding (types, interfaces, metadata)
Added foundational types for PostgreSQL incremental backups: Types & Interfaces (internal/backup/incremental.go): - BackupType enum: full vs incremental - IncrementalMetadata struct with base backup reference - ChangedFile struct for tracking modifications - BackupChainResolver interface for restore chain logic - IncrementalBackupEngine interface PostgreSQL Implementation (internal/backup/incremental_postgres.go): - PostgresIncrementalEngine for file-level incrementals - FindChangedFiles() - mtime-based change detection - shouldSkipFile() - exclude temp/lock/socket files - loadBackupInfo() - read base backup metadata - Stubs for CreateIncrementalBackup() and RestoreIncremental() Metadata Extension (internal/metadata/metadata.go): - Added IncrementalMetadata to BackupMetadata - Fields: base_backup_id, backup_chain, incremental_files - Tracks parent backup and restore dependencies Next Steps: - Add --backup-type incremental flag to CLI - Implement backup chain resolution - Write integration tests Status: SCAFFOLDING ONLY - not functional yet
This commit is contained in:
108
internal/backup/incremental.go
Normal file
108
internal/backup/incremental.go
Normal file
@@ -0,0 +1,108 @@
|
||||
package backup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
)
|
||||
|
||||
// BackupType represents the type of backup
|
||||
type BackupType string
|
||||
|
||||
const (
|
||||
BackupTypeFull BackupType = "full" // Complete backup of all data
|
||||
BackupTypeIncremental BackupType = "incremental" // Only changed files since base backup
|
||||
)
|
||||
|
||||
// IncrementalMetadata contains metadata for incremental backups
|
||||
type IncrementalMetadata struct {
|
||||
// BaseBackupID is the SHA-256 checksum of the base backup this incremental depends on
|
||||
BaseBackupID string `json:"base_backup_id"`
|
||||
|
||||
// BaseBackupPath is the filename of the base backup (e.g., "mydb_20250126_120000.tar.gz")
|
||||
BaseBackupPath string `json:"base_backup_path"`
|
||||
|
||||
// BaseBackupTimestamp is when the base backup was created
|
||||
BaseBackupTimestamp time.Time `json:"base_backup_timestamp"`
|
||||
|
||||
// IncrementalFiles is the number of changed files included in this backup
|
||||
IncrementalFiles int `json:"incremental_files"`
|
||||
|
||||
// TotalSize is the total size of changed files (bytes)
|
||||
TotalSize int64 `json:"total_size"`
|
||||
|
||||
// BackupChain is the list of all backups needed for restore (base + incrementals)
|
||||
// Ordered from oldest to newest: [base, incr1, incr2, ...]
|
||||
BackupChain []string `json:"backup_chain"`
|
||||
}
|
||||
|
||||
// ChangedFile represents a file that changed since the base backup
|
||||
type ChangedFile struct {
|
||||
// RelativePath is the path relative to PostgreSQL data directory
|
||||
RelativePath string
|
||||
|
||||
// AbsolutePath is the full filesystem path
|
||||
AbsolutePath string
|
||||
|
||||
// Size is the file size in bytes
|
||||
Size int64
|
||||
|
||||
// ModTime is the last modification time
|
||||
ModTime time.Time
|
||||
|
||||
// Checksum is the SHA-256 hash of the file content (optional)
|
||||
Checksum string
|
||||
}
|
||||
|
||||
// IncrementalBackupConfig holds configuration for incremental backups
|
||||
type IncrementalBackupConfig struct {
|
||||
// BaseBackupPath is the path to the base backup archive
|
||||
BaseBackupPath string
|
||||
|
||||
// DataDirectory is the PostgreSQL data directory to scan
|
||||
DataDirectory string
|
||||
|
||||
// IncludeWAL determines if WAL files should be included
|
||||
IncludeWAL bool
|
||||
|
||||
// CompressionLevel for the incremental archive (0-9)
|
||||
CompressionLevel int
|
||||
}
|
||||
|
||||
// BackupChainResolver resolves the chain of backups needed for restore
|
||||
type BackupChainResolver interface {
|
||||
// FindBaseBackup locates the base backup for an incremental backup
|
||||
FindBaseBackup(ctx context.Context, incrementalBackupID string) (*BackupInfo, error)
|
||||
|
||||
// ResolveChain returns the complete chain of backups needed for restore
|
||||
// Returned in order: [base, incr1, incr2, ..., target]
|
||||
ResolveChain(ctx context.Context, targetBackupID string) ([]*BackupInfo, error)
|
||||
|
||||
// ValidateChain verifies all backups in the chain exist and are valid
|
||||
ValidateChain(ctx context.Context, chain []*BackupInfo) error
|
||||
}
|
||||
|
||||
// IncrementalBackupEngine handles incremental backup operations
|
||||
type IncrementalBackupEngine interface {
|
||||
// FindChangedFiles identifies files changed since the base backup
|
||||
FindChangedFiles(ctx context.Context, config *IncrementalBackupConfig) ([]ChangedFile, error)
|
||||
|
||||
// CreateIncrementalBackup creates a new incremental backup
|
||||
CreateIncrementalBackup(ctx context.Context, config *IncrementalBackupConfig, changedFiles []ChangedFile) error
|
||||
|
||||
// RestoreIncremental restores an incremental backup on top of a base backup
|
||||
RestoreIncremental(ctx context.Context, baseBackupPath, incrementalPath, targetDir string) error
|
||||
}
|
||||
|
||||
// BackupInfo extends the existing Info struct with incremental metadata
|
||||
// This will be integrated into the existing backup.Info struct
|
||||
type BackupInfo struct {
|
||||
// Existing fields from backup.Info...
|
||||
Database string `json:"database"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Size int64 `json:"size"`
|
||||
Checksum string `json:"checksum"`
|
||||
|
||||
// New fields for incremental support
|
||||
BackupType BackupType `json:"backup_type"` // "full" or "incremental"
|
||||
Incremental *IncrementalMetadata `json:"incremental,omitempty"` // Only present for incremental backups
|
||||
}
|
||||
194
internal/backup/incremental_postgres.go
Normal file
194
internal/backup/incremental_postgres.go
Normal file
@@ -0,0 +1,194 @@
|
||||
package backup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// PostgresIncrementalEngine implements incremental backups for PostgreSQL
|
||||
type PostgresIncrementalEngine struct {
|
||||
log logger.Logger
|
||||
}
|
||||
|
||||
// NewPostgresIncrementalEngine creates a new PostgreSQL incremental backup engine
|
||||
func NewPostgresIncrementalEngine(log logger.Logger) *PostgresIncrementalEngine {
|
||||
return &PostgresIncrementalEngine{
|
||||
log: log,
|
||||
}
|
||||
}
|
||||
|
||||
// FindChangedFiles identifies files that changed since the base backup
|
||||
// This is a simple mtime-based implementation. Production should use pg_basebackup with incremental support.
|
||||
func (e *PostgresIncrementalEngine) FindChangedFiles(ctx context.Context, config *IncrementalBackupConfig) ([]ChangedFile, error) {
|
||||
e.log.Info("Finding changed files for incremental backup",
|
||||
"base_backup", config.BaseBackupPath,
|
||||
"data_dir", config.DataDirectory)
|
||||
|
||||
// Load base backup metadata to get timestamp
|
||||
baseInfo, err := e.loadBackupInfo(config.BaseBackupPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load base backup info: %w", err)
|
||||
}
|
||||
|
||||
if baseInfo.BackupType != BackupTypeFull {
|
||||
return nil, fmt.Errorf("base backup must be a full backup, got: %s", baseInfo.BackupType)
|
||||
}
|
||||
|
||||
baseTimestamp := baseInfo.Timestamp
|
||||
e.log.Info("Base backup timestamp", "timestamp", baseTimestamp)
|
||||
|
||||
// Scan data directory for changed files
|
||||
var changedFiles []ChangedFile
|
||||
|
||||
err = filepath.Walk(config.DataDirectory, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip temporary files, lock files, and sockets
|
||||
if e.shouldSkipFile(path, info) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if file was modified after base backup
|
||||
if info.ModTime().After(baseTimestamp) {
|
||||
relPath, err := filepath.Rel(config.DataDirectory, path)
|
||||
if err != nil {
|
||||
e.log.Warn("Failed to get relative path", "path", path, "error", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
changedFiles = append(changedFiles, ChangedFile{
|
||||
RelativePath: relPath,
|
||||
AbsolutePath: path,
|
||||
Size: info.Size(),
|
||||
ModTime: info.ModTime(),
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to scan data directory: %w", err)
|
||||
}
|
||||
|
||||
e.log.Info("Found changed files", "count", len(changedFiles))
|
||||
return changedFiles, nil
|
||||
}
|
||||
|
||||
// shouldSkipFile determines if a file should be excluded from incremental backup
|
||||
func (e *PostgresIncrementalEngine) shouldSkipFile(path string, info os.FileInfo) bool {
|
||||
name := info.Name()
|
||||
|
||||
// Skip temporary files
|
||||
if strings.HasSuffix(name, ".tmp") {
|
||||
return true
|
||||
}
|
||||
|
||||
// Skip lock files
|
||||
if strings.HasSuffix(name, ".lock") || name == "postmaster.pid" {
|
||||
return true
|
||||
}
|
||||
|
||||
// Skip sockets
|
||||
if info.Mode()&os.ModeSocket != 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Skip pg_wal symlink target (WAL handled separately if needed)
|
||||
if strings.Contains(path, "pg_wal") || strings.Contains(path, "pg_xlog") {
|
||||
return true
|
||||
}
|
||||
|
||||
// Skip pg_replslot (replication slots)
|
||||
if strings.Contains(path, "pg_replslot") {
|
||||
return true
|
||||
}
|
||||
|
||||
// Skip postmaster.opts (runtime config, regenerated on startup)
|
||||
if name == "postmaster.opts" {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// loadBackupInfo loads backup metadata from .info file
|
||||
func (e *PostgresIncrementalEngine) loadBackupInfo(backupPath string) (*BackupInfo, error) {
|
||||
// Remove .tar.gz extension and add .info
|
||||
infoPath := strings.TrimSuffix(backupPath, ".tar.gz") + ".info"
|
||||
|
||||
data, err := os.ReadFile(infoPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read info file %s: %w", infoPath, err)
|
||||
}
|
||||
|
||||
var info BackupInfo
|
||||
if err := json.Unmarshal(data, &info); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse info file: %w", err)
|
||||
}
|
||||
|
||||
return &info, nil
|
||||
}
|
||||
|
||||
// CreateIncrementalBackup creates a new incremental backup archive
|
||||
func (e *PostgresIncrementalEngine) CreateIncrementalBackup(ctx context.Context, config *IncrementalBackupConfig, changedFiles []ChangedFile) error {
|
||||
e.log.Info("Creating incremental backup",
|
||||
"changed_files", len(changedFiles),
|
||||
"base_backup", config.BaseBackupPath)
|
||||
|
||||
// TODO: Implementation in next step
|
||||
// 1. Create tar.gz with only changed files
|
||||
// 2. Generate metadata with base backup reference
|
||||
// 3. Write .info file with incremental metadata
|
||||
// 4. Calculate checksums
|
||||
|
||||
return fmt.Errorf("not implemented yet")
|
||||
}
|
||||
|
||||
// RestoreIncremental restores an incremental backup on top of a base
|
||||
func (e *PostgresIncrementalEngine) RestoreIncremental(ctx context.Context, baseBackupPath, incrementalPath, targetDir string) error {
|
||||
e.log.Info("Restoring incremental backup",
|
||||
"base", baseBackupPath,
|
||||
"incremental", incrementalPath,
|
||||
"target", targetDir)
|
||||
|
||||
// TODO: Implementation in next step
|
||||
// 1. Extract base backup to target
|
||||
// 2. Extract incremental backup, overwriting files
|
||||
// 3. Verify checksums
|
||||
// 4. Update permissions
|
||||
|
||||
return fmt.Errorf("not implemented yet")
|
||||
}
|
||||
|
||||
// CalculateFileChecksum computes SHA-256 hash of a file
|
||||
func (e *PostgresIncrementalEngine) CalculateFileChecksum(path string) (string, error) {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
hash := sha256.New()
|
||||
if _, err := io.Copy(hash, file); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return hex.EncodeToString(hash.Sum(nil)), nil
|
||||
}
|
||||
@@ -25,10 +25,23 @@ type BackupMetadata struct {
|
||||
SizeBytes int64 `json:"size_bytes"`
|
||||
SHA256 string `json:"sha256"`
|
||||
Compression string `json:"compression"` // none, gzip, pigz
|
||||
BackupType string `json:"backup_type"` // full, incremental (for v2.0)
|
||||
BackupType string `json:"backup_type"` // full, incremental (for v2.2)
|
||||
BaseBackup string `json:"base_backup,omitempty"`
|
||||
Duration float64 `json:"duration_seconds"`
|
||||
ExtraInfo map[string]string `json:"extra_info,omitempty"`
|
||||
|
||||
// Incremental backup fields (v2.2+)
|
||||
Incremental *IncrementalMetadata `json:"incremental,omitempty"` // Only present for incremental backups
|
||||
}
|
||||
|
||||
// IncrementalMetadata contains metadata specific to incremental backups
|
||||
type IncrementalMetadata struct {
|
||||
BaseBackupID string `json:"base_backup_id"` // SHA-256 of base backup
|
||||
BaseBackupPath string `json:"base_backup_path"` // Filename of base backup
|
||||
BaseBackupTimestamp time.Time `json:"base_backup_timestamp"` // When base backup was created
|
||||
IncrementalFiles int `json:"incremental_files"` // Number of changed files
|
||||
TotalSize int64 `json:"total_size"` // Total size of changed files (bytes)
|
||||
BackupChain []string `json:"backup_chain"` // Chain: [base, incr1, incr2, ...]
|
||||
}
|
||||
|
||||
// ClusterMetadata contains metadata for cluster backups
|
||||
|
||||
Reference in New Issue
Block a user