feat: add embedded systemd installer and Prometheus metrics
Some checks failed
CI/CD / Test (push) Successful in 2m42s
CI/CD / Lint (push) Successful in 2m50s
CI/CD / Build (amd64, darwin) (push) Successful in 2m0s
CI/CD / Build (amd64, linux) (push) Successful in 1m58s
CI/CD / Build (arm64, darwin) (push) Successful in 2m1s
CI/CD / Build (arm64, linux) (push) Has been cancelled
Some checks failed
CI/CD / Test (push) Successful in 2m42s
CI/CD / Lint (push) Successful in 2m50s
CI/CD / Build (amd64, darwin) (push) Successful in 2m0s
CI/CD / Build (amd64, linux) (push) Successful in 1m58s
CI/CD / Build (arm64, darwin) (push) Successful in 2m1s
CI/CD / Build (arm64, linux) (push) Has been cancelled
Systemd Integration: - New 'dbbackup install' command creates service/timer units - Supports single-database and cluster backup modes - Automatic dbbackup user/group creation with proper permissions - Hardened service units with security features - Template units with configurable OnCalendar schedules - 'dbbackup uninstall' for clean removal Prometheus Metrics: - 'dbbackup metrics export' for textfile collector format - 'dbbackup metrics serve' runs HTTP exporter on port 9399 - Metrics: last_success_timestamp, rpo_seconds, backup_total, etc. - Integration with node_exporter textfile collector - --with-metrics flag during install Technical: - Systemd templates embedded with //go:embed - Service units include ReadWritePaths, OOMScoreAdjust - Metrics exporter caches with 30s TTL - Graceful shutdown on SIGTERM
This commit is contained in:
237
cmd/install.go
Normal file
237
cmd/install.go
Normal file
@@ -0,0 +1,237 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"dbbackup/internal/installer"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
// Install flags
|
||||
installInstance string
|
||||
installSchedule string
|
||||
installBackupType string
|
||||
installUser string
|
||||
installGroup string
|
||||
installBackupDir string
|
||||
installConfigPath string
|
||||
installTimeout int
|
||||
installWithMetrics bool
|
||||
installMetricsPort int
|
||||
installDryRun bool
|
||||
installStatus bool
|
||||
|
||||
// Uninstall flags
|
||||
uninstallPurge bool
|
||||
)
|
||||
|
||||
// installCmd represents the install command
|
||||
var installCmd = &cobra.Command{
|
||||
Use: "install",
|
||||
Short: "Install dbbackup as a systemd service",
|
||||
Long: `Install dbbackup as a systemd service with automatic scheduling.
|
||||
|
||||
This command creates systemd service and timer units for automated database backups.
|
||||
It supports both single database and cluster backup modes.
|
||||
|
||||
Examples:
|
||||
# Interactive installation (will prompt for options)
|
||||
sudo dbbackup install
|
||||
|
||||
# Install cluster backup running daily at 2am
|
||||
sudo dbbackup install --backup-type cluster --schedule "daily"
|
||||
|
||||
# Install single database backup with custom schedule
|
||||
sudo dbbackup install --instance production --backup-type single --schedule "*-*-* 03:00:00"
|
||||
|
||||
# Install with Prometheus metrics exporter
|
||||
sudo dbbackup install --with-metrics --metrics-port 9399
|
||||
|
||||
# Check installation status
|
||||
dbbackup install --status
|
||||
|
||||
# Dry-run to see what would be installed
|
||||
sudo dbbackup install --dry-run
|
||||
|
||||
Schedule format (OnCalendar):
|
||||
daily - Every day at midnight
|
||||
weekly - Every Monday at midnight
|
||||
*-*-* 02:00:00 - Every day at 2am
|
||||
*-*-* 02,14:00 - Twice daily at 2am and 2pm
|
||||
Mon *-*-* 03:00 - Every Monday at 3am
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Handle --status flag
|
||||
if installStatus {
|
||||
return runInstallStatus(cmd.Context())
|
||||
}
|
||||
|
||||
return runInstall(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
// uninstallCmd represents the uninstall command
|
||||
var uninstallCmd = &cobra.Command{
|
||||
Use: "uninstall [instance]",
|
||||
Short: "Uninstall dbbackup systemd service",
|
||||
Long: `Uninstall dbbackup systemd service and timer.
|
||||
|
||||
Examples:
|
||||
# Uninstall default instance
|
||||
sudo dbbackup uninstall
|
||||
|
||||
# Uninstall specific instance
|
||||
sudo dbbackup uninstall production
|
||||
|
||||
# Uninstall and remove all configuration
|
||||
sudo dbbackup uninstall --purge
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
instance := "cluster"
|
||||
if len(args) > 0 {
|
||||
instance = args[0]
|
||||
}
|
||||
return runUninstall(cmd.Context(), instance)
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(installCmd)
|
||||
rootCmd.AddCommand(uninstallCmd)
|
||||
|
||||
// Install flags
|
||||
installCmd.Flags().StringVarP(&installInstance, "instance", "i", "", "Instance name (e.g., production, staging)")
|
||||
installCmd.Flags().StringVarP(&installSchedule, "schedule", "s", "daily", "Backup schedule (OnCalendar format)")
|
||||
installCmd.Flags().StringVarP(&installBackupType, "backup-type", "t", "cluster", "Backup type: single or cluster")
|
||||
installCmd.Flags().StringVar(&installUser, "user", "dbbackup", "System user to run backups")
|
||||
installCmd.Flags().StringVar(&installGroup, "group", "dbbackup", "System group for backup user")
|
||||
installCmd.Flags().StringVar(&installBackupDir, "backup-dir", "/var/lib/dbbackup/backups", "Directory for backups")
|
||||
installCmd.Flags().StringVar(&installConfigPath, "config-path", "/etc/dbbackup/dbbackup.conf", "Path to config file")
|
||||
installCmd.Flags().IntVar(&installTimeout, "timeout", 3600, "Backup timeout in seconds")
|
||||
installCmd.Flags().BoolVar(&installWithMetrics, "with-metrics", false, "Install Prometheus metrics exporter")
|
||||
installCmd.Flags().IntVar(&installMetricsPort, "metrics-port", 9399, "Prometheus metrics port")
|
||||
installCmd.Flags().BoolVar(&installDryRun, "dry-run", false, "Show what would be installed without making changes")
|
||||
installCmd.Flags().BoolVar(&installStatus, "status", false, "Show installation status")
|
||||
|
||||
// Uninstall flags
|
||||
uninstallCmd.Flags().BoolVar(&uninstallPurge, "purge", false, "Also remove configuration files")
|
||||
}
|
||||
|
||||
func runInstall(ctx context.Context) error {
|
||||
// Create context with signal handling
|
||||
ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
// Expand schedule shortcuts
|
||||
schedule := expandSchedule(installSchedule)
|
||||
|
||||
// Create installer
|
||||
inst := installer.NewInstaller(log, installDryRun)
|
||||
|
||||
// Set up options
|
||||
opts := installer.InstallOptions{
|
||||
Instance: installInstance,
|
||||
BackupType: installBackupType,
|
||||
Schedule: schedule,
|
||||
User: installUser,
|
||||
Group: installGroup,
|
||||
BackupDir: installBackupDir,
|
||||
ConfigPath: installConfigPath,
|
||||
TimeoutSeconds: installTimeout,
|
||||
WithMetrics: installWithMetrics,
|
||||
MetricsPort: installMetricsPort,
|
||||
}
|
||||
|
||||
// For cluster backup, override instance
|
||||
if installBackupType == "cluster" {
|
||||
opts.Instance = "cluster"
|
||||
}
|
||||
|
||||
return inst.Install(ctx, opts)
|
||||
}
|
||||
|
||||
func runUninstall(ctx context.Context, instance string) error {
|
||||
ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
inst := installer.NewInstaller(log, false)
|
||||
return inst.Uninstall(ctx, instance, uninstallPurge)
|
||||
}
|
||||
|
||||
func runInstallStatus(ctx context.Context) error {
|
||||
inst := installer.NewInstaller(log, false)
|
||||
|
||||
// Check cluster status
|
||||
clusterStatus, err := inst.Status(ctx, "cluster")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("📦 DBBackup Installation Status")
|
||||
fmt.Println(strings.Repeat("═", 50))
|
||||
|
||||
if clusterStatus.Installed {
|
||||
fmt.Println()
|
||||
fmt.Println("🔹 Cluster Backup:")
|
||||
fmt.Printf(" Service: %s\n", formatStatus(clusterStatus.Installed, clusterStatus.Active))
|
||||
fmt.Printf(" Timer: %s\n", formatStatus(clusterStatus.TimerEnabled, clusterStatus.TimerActive))
|
||||
if clusterStatus.NextRun != "" {
|
||||
fmt.Printf(" Next run: %s\n", clusterStatus.NextRun)
|
||||
}
|
||||
if clusterStatus.LastRun != "" {
|
||||
fmt.Printf(" Last run: %s\n", clusterStatus.LastRun)
|
||||
}
|
||||
} else {
|
||||
fmt.Println()
|
||||
fmt.Println("❌ No systemd services installed")
|
||||
fmt.Println()
|
||||
fmt.Println("Run 'sudo dbbackup install' to install as a systemd service")
|
||||
}
|
||||
|
||||
// Check for exporter
|
||||
if _, err := os.Stat("/etc/systemd/system/dbbackup-exporter.service"); err == nil {
|
||||
exporterStatus, err := inst.Status(ctx, "exporter")
|
||||
fmt.Println()
|
||||
fmt.Println("🔹 Metrics Exporter:")
|
||||
if err == nil && exporterStatus != nil {
|
||||
fmt.Printf(" Service: %s\n", formatStatus(true, exporterStatus.Active))
|
||||
} else {
|
||||
fmt.Printf(" Service: installed (status unknown)\n")
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
return nil
|
||||
}
|
||||
|
||||
func formatStatus(installed, active bool) string {
|
||||
if !installed {
|
||||
return "not installed"
|
||||
}
|
||||
if active {
|
||||
return "✅ active"
|
||||
}
|
||||
return "⚪ inactive"
|
||||
}
|
||||
|
||||
func expandSchedule(schedule string) string {
|
||||
shortcuts := map[string]string{
|
||||
"hourly": "*-*-* *:00:00",
|
||||
"daily": "*-*-* 02:00:00",
|
||||
"weekly": "Mon *-*-* 02:00:00",
|
||||
"monthly": "*-*-01 02:00:00",
|
||||
}
|
||||
|
||||
if expanded, ok := shortcuts[strings.ToLower(schedule)]; ok {
|
||||
return expanded
|
||||
}
|
||||
return schedule
|
||||
}
|
||||
138
cmd/metrics.go
Normal file
138
cmd/metrics.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"dbbackup/internal/prometheus"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
metricsInstance string
|
||||
metricsOutput string
|
||||
metricsPort int
|
||||
)
|
||||
|
||||
// metricsCmd represents the metrics command
|
||||
var metricsCmd = &cobra.Command{
|
||||
Use: "metrics",
|
||||
Short: "Prometheus metrics management",
|
||||
Long: `Prometheus metrics management for dbbackup.
|
||||
|
||||
Export metrics to a textfile for node_exporter, or run an HTTP server
|
||||
for direct Prometheus scraping.`,
|
||||
}
|
||||
|
||||
// metricsExportCmd exports metrics to a textfile
|
||||
var metricsExportCmd = &cobra.Command{
|
||||
Use: "export",
|
||||
Short: "Export metrics to textfile",
|
||||
Long: `Export Prometheus metrics to a textfile for node_exporter.
|
||||
|
||||
The textfile collector in node_exporter can scrape metrics from files
|
||||
in a designated directory (typically /var/lib/node_exporter/textfile_collector/).
|
||||
|
||||
Examples:
|
||||
# Export metrics to default location
|
||||
dbbackup metrics export
|
||||
|
||||
# Export with custom output path
|
||||
dbbackup metrics export --output /var/lib/dbbackup/metrics/dbbackup.prom
|
||||
|
||||
# Export for specific instance
|
||||
dbbackup metrics export --instance production --output /var/lib/dbbackup/metrics/production.prom
|
||||
|
||||
After export, configure node_exporter with:
|
||||
--collector.textfile.directory=/var/lib/dbbackup/metrics/
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runMetricsExport(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
// metricsServeCmd runs the HTTP metrics server
|
||||
var metricsServeCmd = &cobra.Command{
|
||||
Use: "serve",
|
||||
Short: "Run Prometheus HTTP server",
|
||||
Long: `Run an HTTP server exposing Prometheus metrics.
|
||||
|
||||
This starts a long-running daemon that serves metrics at /metrics.
|
||||
Prometheus can scrape this endpoint directly.
|
||||
|
||||
Examples:
|
||||
# Start server on default port 9399
|
||||
dbbackup metrics serve
|
||||
|
||||
# Start server on custom port
|
||||
dbbackup metrics serve --port 9100
|
||||
|
||||
# Run as systemd service (installed via 'dbbackup install --with-metrics')
|
||||
sudo systemctl start dbbackup-exporter
|
||||
|
||||
Endpoints:
|
||||
/metrics - Prometheus metrics
|
||||
/health - Health check (returns 200 OK)
|
||||
/ - Service info page
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runMetricsServe(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(metricsCmd)
|
||||
metricsCmd.AddCommand(metricsExportCmd)
|
||||
metricsCmd.AddCommand(metricsServeCmd)
|
||||
|
||||
// Export flags
|
||||
metricsExportCmd.Flags().StringVar(&metricsInstance, "instance", "default", "Instance name for metrics labels")
|
||||
metricsExportCmd.Flags().StringVarP(&metricsOutput, "output", "o", "/var/lib/dbbackup/metrics/dbbackup.prom", "Output file path")
|
||||
|
||||
// Serve flags
|
||||
metricsServeCmd.Flags().StringVar(&metricsInstance, "instance", "default", "Instance name for metrics labels")
|
||||
metricsServeCmd.Flags().IntVarP(&metricsPort, "port", "p", 9399, "HTTP server port")
|
||||
}
|
||||
|
||||
func runMetricsExport(ctx context.Context) error {
|
||||
// Open catalog
|
||||
cat, err := openCatalog()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open catalog: %w", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
// Create metrics writer
|
||||
writer := prometheus.NewMetricsWriter(log, cat, metricsInstance)
|
||||
|
||||
// Write textfile
|
||||
if err := writer.WriteTextfile(metricsOutput); err != nil {
|
||||
return fmt.Errorf("failed to write metrics: %w", err)
|
||||
}
|
||||
|
||||
log.Info("Exported metrics to textfile", "path", metricsOutput, "instance", metricsInstance)
|
||||
return nil
|
||||
}
|
||||
|
||||
func runMetricsServe(ctx context.Context) error {
|
||||
// Setup signal handling
|
||||
ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
// Open catalog
|
||||
cat, err := openCatalog()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open catalog: %w", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
// Create exporter
|
||||
exporter := prometheus.NewExporter(log, cat, metricsInstance, metricsPort)
|
||||
|
||||
// Run server (blocks until context is cancelled)
|
||||
return exporter.Serve(ctx)
|
||||
}
|
||||
Reference in New Issue
Block a user