feat: add embedded systemd installer and Prometheus metrics
Some checks failed
CI/CD / Test (push) Successful in 2m42s
CI/CD / Lint (push) Successful in 2m50s
CI/CD / Build (amd64, darwin) (push) Successful in 2m0s
CI/CD / Build (amd64, linux) (push) Successful in 1m58s
CI/CD / Build (arm64, darwin) (push) Successful in 2m1s
CI/CD / Build (arm64, linux) (push) Has been cancelled
Some checks failed
CI/CD / Test (push) Successful in 2m42s
CI/CD / Lint (push) Successful in 2m50s
CI/CD / Build (amd64, darwin) (push) Successful in 2m0s
CI/CD / Build (amd64, linux) (push) Successful in 1m58s
CI/CD / Build (arm64, darwin) (push) Successful in 2m1s
CI/CD / Build (arm64, linux) (push) Has been cancelled
Systemd Integration: - New 'dbbackup install' command creates service/timer units - Supports single-database and cluster backup modes - Automatic dbbackup user/group creation with proper permissions - Hardened service units with security features - Template units with configurable OnCalendar schedules - 'dbbackup uninstall' for clean removal Prometheus Metrics: - 'dbbackup metrics export' for textfile collector format - 'dbbackup metrics serve' runs HTTP exporter on port 9399 - Metrics: last_success_timestamp, rpo_seconds, backup_total, etc. - Integration with node_exporter textfile collector - --with-metrics flag during install Technical: - Systemd templates embedded with //go:embed - Service units include ReadWritePaths, OOMScoreAdjust - Metrics exporter caches with 30s TTL - Graceful shutdown on SIGTERM
This commit is contained in:
174
internal/prometheus/exporter.go
Normal file
174
internal/prometheus/exporter.go
Normal file
@@ -0,0 +1,174 @@
|
||||
// Package prometheus provides Prometheus metrics for dbbackup
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/catalog"
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
// Exporter provides an HTTP endpoint for Prometheus metrics
|
||||
type Exporter struct {
|
||||
log logger.Logger
|
||||
catalog catalog.Catalog
|
||||
instance string
|
||||
port int
|
||||
|
||||
mu sync.RWMutex
|
||||
cachedData string
|
||||
lastRefresh time.Time
|
||||
refreshTTL time.Duration
|
||||
}
|
||||
|
||||
// NewExporter creates a new Prometheus exporter
|
||||
func NewExporter(log logger.Logger, cat catalog.Catalog, instance string, port int) *Exporter {
|
||||
return &Exporter{
|
||||
log: log,
|
||||
catalog: cat,
|
||||
instance: instance,
|
||||
port: port,
|
||||
refreshTTL: 30 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
// Serve starts the HTTP server and blocks until context is cancelled
|
||||
func (e *Exporter) Serve(ctx context.Context) error {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
// /metrics endpoint
|
||||
mux.HandleFunc("/metrics", e.handleMetrics)
|
||||
|
||||
// /health endpoint
|
||||
mux.HandleFunc("/health", e.handleHealth)
|
||||
|
||||
// / root with info
|
||||
mux.HandleFunc("/", e.handleRoot)
|
||||
|
||||
addr := fmt.Sprintf(":%d", e.port)
|
||||
srv := &http.Server{
|
||||
Addr: addr,
|
||||
Handler: mux,
|
||||
ReadTimeout: 10 * time.Second,
|
||||
WriteTimeout: 30 * time.Second,
|
||||
IdleTimeout: 60 * time.Second,
|
||||
}
|
||||
|
||||
// Start refresh goroutine
|
||||
go e.refreshLoop(ctx)
|
||||
|
||||
// Graceful shutdown
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
e.log.Info("Shutting down metrics server...")
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if err := srv.Shutdown(shutdownCtx); err != nil {
|
||||
e.log.Error("Server shutdown error", "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
e.log.Info("Starting Prometheus metrics server", "addr", addr)
|
||||
if err := srv.ListenAndServe(); err != http.ErrServerClosed {
|
||||
return fmt.Errorf("server error: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleMetrics handles /metrics endpoint
|
||||
func (e *Exporter) handleMetrics(w http.ResponseWriter, r *http.Request) {
|
||||
e.mu.RLock()
|
||||
data := e.cachedData
|
||||
e.mu.RUnlock()
|
||||
|
||||
if data == "" {
|
||||
// Force refresh if cache is empty
|
||||
if err := e.refresh(); err != nil {
|
||||
http.Error(w, "Failed to collect metrics", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
e.mu.RLock()
|
||||
data = e.cachedData
|
||||
e.mu.RUnlock()
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(data))
|
||||
}
|
||||
|
||||
// handleHealth handles /health endpoint
|
||||
func (e *Exporter) handleHealth(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok","service":"dbbackup-exporter"}`))
|
||||
}
|
||||
|
||||
// handleRoot handles / endpoint
|
||||
func (e *Exporter) handleRoot(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>DBBackup Exporter</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>DBBackup Prometheus Exporter</h1>
|
||||
<p>This is a Prometheus metrics exporter for DBBackup.</p>
|
||||
<ul>
|
||||
<li><a href="/metrics">/metrics</a> - Prometheus metrics</li>
|
||||
<li><a href="/health">/health</a> - Health check</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>`))
|
||||
}
|
||||
|
||||
// refreshLoop periodically refreshes the metrics cache
|
||||
func (e *Exporter) refreshLoop(ctx context.Context) {
|
||||
ticker := time.NewTicker(e.refreshTTL)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Initial refresh
|
||||
if err := e.refresh(); err != nil {
|
||||
e.log.Error("Initial metrics refresh failed", "error", err)
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := e.refresh(); err != nil {
|
||||
e.log.Error("Metrics refresh failed", "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// refresh updates the cached metrics
|
||||
func (e *Exporter) refresh() error {
|
||||
writer := NewMetricsWriter(e.log, e.catalog, e.instance)
|
||||
data, err := writer.GenerateMetricsString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
e.mu.Lock()
|
||||
e.cachedData = data
|
||||
e.lastRefresh = time.Now()
|
||||
e.mu.Unlock()
|
||||
|
||||
e.log.Debug("Refreshed metrics cache")
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user