feat: add embedded systemd installer and Prometheus metrics
Some checks failed
CI/CD / Test (push) Successful in 2m42s
CI/CD / Lint (push) Successful in 2m50s
CI/CD / Build (amd64, darwin) (push) Successful in 2m0s
CI/CD / Build (amd64, linux) (push) Successful in 1m58s
CI/CD / Build (arm64, darwin) (push) Successful in 2m1s
CI/CD / Build (arm64, linux) (push) Has been cancelled
Some checks failed
CI/CD / Test (push) Successful in 2m42s
CI/CD / Lint (push) Successful in 2m50s
CI/CD / Build (amd64, darwin) (push) Successful in 2m0s
CI/CD / Build (amd64, linux) (push) Successful in 1m58s
CI/CD / Build (arm64, darwin) (push) Successful in 2m1s
CI/CD / Build (arm64, linux) (push) Has been cancelled
Systemd Integration: - New 'dbbackup install' command creates service/timer units - Supports single-database and cluster backup modes - Automatic dbbackup user/group creation with proper permissions - Hardened service units with security features - Template units with configurable OnCalendar schedules - 'dbbackup uninstall' for clean removal Prometheus Metrics: - 'dbbackup metrics export' for textfile collector format - 'dbbackup metrics serve' runs HTTP exporter on port 9399 - Metrics: last_success_timestamp, rpo_seconds, backup_total, etc. - Integration with node_exporter textfile collector - --with-metrics flag during install Technical: - Systemd templates embedded with //go:embed - Service units include ReadWritePaths, OOMScoreAdjust - Metrics exporter caches with 30s TTL - Graceful shutdown on SIGTERM
This commit is contained in:
138
cmd/metrics.go
Normal file
138
cmd/metrics.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"dbbackup/internal/prometheus"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
metricsInstance string
|
||||
metricsOutput string
|
||||
metricsPort int
|
||||
)
|
||||
|
||||
// metricsCmd represents the metrics command
|
||||
var metricsCmd = &cobra.Command{
|
||||
Use: "metrics",
|
||||
Short: "Prometheus metrics management",
|
||||
Long: `Prometheus metrics management for dbbackup.
|
||||
|
||||
Export metrics to a textfile for node_exporter, or run an HTTP server
|
||||
for direct Prometheus scraping.`,
|
||||
}
|
||||
|
||||
// metricsExportCmd exports metrics to a textfile
|
||||
var metricsExportCmd = &cobra.Command{
|
||||
Use: "export",
|
||||
Short: "Export metrics to textfile",
|
||||
Long: `Export Prometheus metrics to a textfile for node_exporter.
|
||||
|
||||
The textfile collector in node_exporter can scrape metrics from files
|
||||
in a designated directory (typically /var/lib/node_exporter/textfile_collector/).
|
||||
|
||||
Examples:
|
||||
# Export metrics to default location
|
||||
dbbackup metrics export
|
||||
|
||||
# Export with custom output path
|
||||
dbbackup metrics export --output /var/lib/dbbackup/metrics/dbbackup.prom
|
||||
|
||||
# Export for specific instance
|
||||
dbbackup metrics export --instance production --output /var/lib/dbbackup/metrics/production.prom
|
||||
|
||||
After export, configure node_exporter with:
|
||||
--collector.textfile.directory=/var/lib/dbbackup/metrics/
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runMetricsExport(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
// metricsServeCmd runs the HTTP metrics server
|
||||
var metricsServeCmd = &cobra.Command{
|
||||
Use: "serve",
|
||||
Short: "Run Prometheus HTTP server",
|
||||
Long: `Run an HTTP server exposing Prometheus metrics.
|
||||
|
||||
This starts a long-running daemon that serves metrics at /metrics.
|
||||
Prometheus can scrape this endpoint directly.
|
||||
|
||||
Examples:
|
||||
# Start server on default port 9399
|
||||
dbbackup metrics serve
|
||||
|
||||
# Start server on custom port
|
||||
dbbackup metrics serve --port 9100
|
||||
|
||||
# Run as systemd service (installed via 'dbbackup install --with-metrics')
|
||||
sudo systemctl start dbbackup-exporter
|
||||
|
||||
Endpoints:
|
||||
/metrics - Prometheus metrics
|
||||
/health - Health check (returns 200 OK)
|
||||
/ - Service info page
|
||||
`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runMetricsServe(cmd.Context())
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(metricsCmd)
|
||||
metricsCmd.AddCommand(metricsExportCmd)
|
||||
metricsCmd.AddCommand(metricsServeCmd)
|
||||
|
||||
// Export flags
|
||||
metricsExportCmd.Flags().StringVar(&metricsInstance, "instance", "default", "Instance name for metrics labels")
|
||||
metricsExportCmd.Flags().StringVarP(&metricsOutput, "output", "o", "/var/lib/dbbackup/metrics/dbbackup.prom", "Output file path")
|
||||
|
||||
// Serve flags
|
||||
metricsServeCmd.Flags().StringVar(&metricsInstance, "instance", "default", "Instance name for metrics labels")
|
||||
metricsServeCmd.Flags().IntVarP(&metricsPort, "port", "p", 9399, "HTTP server port")
|
||||
}
|
||||
|
||||
func runMetricsExport(ctx context.Context) error {
|
||||
// Open catalog
|
||||
cat, err := openCatalog()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open catalog: %w", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
// Create metrics writer
|
||||
writer := prometheus.NewMetricsWriter(log, cat, metricsInstance)
|
||||
|
||||
// Write textfile
|
||||
if err := writer.WriteTextfile(metricsOutput); err != nil {
|
||||
return fmt.Errorf("failed to write metrics: %w", err)
|
||||
}
|
||||
|
||||
log.Info("Exported metrics to textfile", "path", metricsOutput, "instance", metricsInstance)
|
||||
return nil
|
||||
}
|
||||
|
||||
func runMetricsServe(ctx context.Context) error {
|
||||
// Setup signal handling
|
||||
ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
// Open catalog
|
||||
cat, err := openCatalog()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open catalog: %w", err)
|
||||
}
|
||||
defer cat.Close()
|
||||
|
||||
// Create exporter
|
||||
exporter := prometheus.NewExporter(log, cat, metricsInstance, metricsPort)
|
||||
|
||||
// Run server (blocks until context is cancelled)
|
||||
return exporter.Serve(ctx)
|
||||
}
|
||||
Reference in New Issue
Block a user