Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4cace277eb | |||
| d28871f3f4 | |||
| 0a593e7dc6 | |||
| 71f137a96f | |||
| 9b35d21bdb |
43
CHANGELOG.md
43
CHANGELOG.md
@ -5,6 +5,49 @@ All notable changes to dbbackup will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [5.1.22] - 2026-02-01
|
||||
|
||||
### Added
|
||||
- **Restore Metrics for Prometheus/Grafana** - Now you can monitor restore performance!
|
||||
- `dbbackup_restore_total{status="success|failure"}` - Total restore count
|
||||
- `dbbackup_restore_duration_seconds{profile, parallel_jobs}` - Restore duration
|
||||
- `dbbackup_restore_parallel_jobs{profile}` - Jobs used (shows if turbo=8 is working!)
|
||||
- `dbbackup_restore_size_bytes` - Restored archive size
|
||||
- `dbbackup_restore_last_timestamp` - Last restore time
|
||||
|
||||
- **Grafana Dashboard: Restore Operations Section**
|
||||
- Total Successful/Failed Restores
|
||||
- Parallel Jobs Used (RED if 1=SLOW, GREEN if 8=TURBO)
|
||||
- Last Restore Duration with thresholds
|
||||
- Restore Duration Over Time graph
|
||||
- Parallel Jobs per Restore bar chart
|
||||
|
||||
- **Restore Engine Metrics Recording**
|
||||
- All single database and cluster restores now record metrics
|
||||
- Stored in `~/.dbbackup/restore_metrics.json`
|
||||
- Prometheus exporter reads and exposes these metrics
|
||||
|
||||
## [5.1.21] - 2026-02-01
|
||||
|
||||
### Fixed
|
||||
- **Complete verification of profile system** - Full code path analysis confirms TURBO works:
|
||||
- CLI: `--profile turbo` → `config.ApplyProfile()` → `cfg.Jobs=8` → `pg_restore --jobs=8`
|
||||
- TUI: Settings → `ApplyResourceProfile()` → `cpu.GetProfileByName("turbo")` → `cfg.Jobs=8`
|
||||
- Updated help text for `restore cluster` command to show turbo example
|
||||
- Updated flag description to list all profiles: conservative, balanced, turbo, max-performance
|
||||
|
||||
## [5.1.20] - 2026-02-01
|
||||
|
||||
### Fixed
|
||||
- **CRITICAL: "turbo" and "max-performance" profiles were NOT recognized in restore command!**
|
||||
- `profile.go` only had: conservative, balanced, aggressive, potato
|
||||
- "turbo" profile returned ERROR "unknown profile" and SILENTLY fell back to "balanced"
|
||||
- "balanced" profile has `Jobs: 0` which became `Jobs: 1` after default fallback
|
||||
- **Result: --profile turbo was IGNORED and restore ran with --jobs=1 (single-threaded)**
|
||||
- Added turbo profile: Jobs=8, ParallelDBs=2
|
||||
- Added max-performance profile: Jobs=8, ParallelDBs=4
|
||||
- NOW `--profile turbo` correctly uses `pg_restore --jobs=8`
|
||||
|
||||
## [5.1.19] - 2026-02-01
|
||||
|
||||
### Fixed
|
||||
|
||||
@ -437,14 +437,6 @@ func formatBool(b *bool) string {
|
||||
return "false"
|
||||
}
|
||||
|
||||
// formatExportDuration formats *time.Duration to string
|
||||
func formatExportDuration(d *time.Duration) string {
|
||||
if d == nil {
|
||||
return ""
|
||||
}
|
||||
return d.String()
|
||||
}
|
||||
|
||||
// formatTimeSpan formats a duration in human-readable form
|
||||
func formatTimeSpan(d time.Duration) string {
|
||||
days := int(d.Hours() / 24)
|
||||
|
||||
@ -100,9 +100,8 @@ func runGenerateMan(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
}()
|
||||
|
||||
filename := filepath.Join(outputDir, c.CommandPath()+".1")
|
||||
// Replace spaces with hyphens for filename
|
||||
filename = filepath.Join(outputDir, filepath.Base(c.CommandPath())+".1")
|
||||
filename := filepath.Join(outputDir, filepath.Base(c.CommandPath())+".1")
|
||||
|
||||
f, err := os.Create(filename)
|
||||
if err != nil {
|
||||
|
||||
@ -32,7 +32,7 @@ var (
|
||||
restoreCreate bool
|
||||
restoreJobs int
|
||||
restoreParallelDBs int // Number of parallel database restores
|
||||
restoreProfile string // Resource profile: conservative, balanced, aggressive
|
||||
restoreProfile string // Resource profile: conservative, balanced, aggressive, turbo, max-performance
|
||||
restoreTarget string
|
||||
restoreVerbose bool
|
||||
restoreNoProgress bool
|
||||
@ -186,6 +186,9 @@ Examples:
|
||||
# Maximum performance (dedicated server)
|
||||
dbbackup restore cluster cluster_backup.tar.gz --profile=aggressive --confirm
|
||||
|
||||
# TURBO: 8 parallel jobs for fastest restore (like pg_restore -j8)
|
||||
dbbackup restore cluster cluster_backup.tar.gz --profile=turbo --confirm
|
||||
|
||||
# Use parallel decompression
|
||||
dbbackup restore cluster cluster_backup.tar.gz --jobs 4 --confirm
|
||||
|
||||
@ -319,7 +322,7 @@ func init() {
|
||||
restoreSingleCmd.Flags().BoolVar(&restoreClean, "clean", false, "Drop and recreate target database")
|
||||
restoreSingleCmd.Flags().BoolVar(&restoreCreate, "create", false, "Create target database if it doesn't exist")
|
||||
restoreSingleCmd.Flags().StringVar(&restoreTarget, "target", "", "Target database name (defaults to original)")
|
||||
restoreSingleCmd.Flags().StringVar(&restoreProfile, "profile", "balanced", "Resource profile: conservative (--parallel=1, low memory), balanced, aggressive (max performance)")
|
||||
restoreSingleCmd.Flags().StringVar(&restoreProfile, "profile", "balanced", "Resource profile: conservative, balanced, turbo (--jobs=8), max-performance")
|
||||
restoreSingleCmd.Flags().BoolVar(&restoreVerbose, "verbose", false, "Show detailed restore progress")
|
||||
restoreSingleCmd.Flags().BoolVar(&restoreNoProgress, "no-progress", false, "Disable progress indicators")
|
||||
restoreSingleCmd.Flags().StringVar(&restoreEncryptionKeyFile, "encryption-key-file", "", "Path to encryption key file (required for encrypted backups)")
|
||||
@ -337,7 +340,7 @@ func init() {
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreDryRun, "dry-run", false, "Show what would be done without executing")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreForce, "force", false, "Skip safety checks and confirmations")
|
||||
restoreClusterCmd.Flags().BoolVar(&restoreCleanCluster, "clean-cluster", false, "Drop all existing user databases before restore (disaster recovery)")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreProfile, "profile", "conservative", "Resource profile: conservative (single-threaded, prevents lock issues), balanced (auto-detect), aggressive (max speed)")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreProfile, "profile", "conservative", "Resource profile: conservative, balanced, turbo (--jobs=8), max-performance")
|
||||
restoreClusterCmd.Flags().IntVar(&restoreJobs, "jobs", 0, "Number of parallel decompression jobs (0 = auto, overrides profile)")
|
||||
restoreClusterCmd.Flags().IntVar(&restoreParallelDBs, "parallel-dbs", 0, "Number of databases to restore in parallel (0 = use profile, 1 = sequential, -1 = auto-detect, overrides profile)")
|
||||
restoreClusterCmd.Flags().StringVar(&restoreWorkdir, "workdir", "", "Working directory for extraction (use when system disk is small, e.g. /mnt/storage/restore_tmp)")
|
||||
|
||||
@ -44,13 +44,6 @@ Examples:
|
||||
dbbackup retention-simulator --days 30 --format json`,
|
||||
}
|
||||
|
||||
var retentionSimulatorRunCmd = &cobra.Command{
|
||||
Use: "simulate",
|
||||
Short: "Run retention simulation",
|
||||
Long: `Run retention policy simulation and show results.`,
|
||||
RunE: runRetentionSimulator,
|
||||
}
|
||||
|
||||
var retentionSimulatorCompareCmd = &cobra.Command{
|
||||
Use: "compare",
|
||||
Short: "Compare multiple retention strategies",
|
||||
|
||||
@ -245,10 +245,7 @@ func outputTimerTable(timers []TimerInfo) {
|
||||
fmt.Println("=====================================================")
|
||||
|
||||
for _, timer := range timers {
|
||||
name := timer.Unit
|
||||
if strings.HasSuffix(name, ".timer") {
|
||||
name = strings.TrimSuffix(name, ".timer")
|
||||
}
|
||||
name := strings.TrimSuffix(timer.Unit, ".timer")
|
||||
|
||||
fmt.Printf("\n[TIMER] %s\n", name)
|
||||
fmt.Printf(" Status: %s\n", timer.Active)
|
||||
|
||||
@ -941,7 +941,9 @@
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
@ -1002,7 +1004,9 @@
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
@ -1063,7 +1067,9 @@
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
@ -1124,7 +1130,9 @@
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
@ -1185,7 +1193,9 @@
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
@ -1246,7 +1256,9 @@
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
@ -1307,7 +1319,9 @@
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
@ -1526,6 +1540,893 @@
|
||||
],
|
||||
"title": "Dedup Storage Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 35
|
||||
},
|
||||
"id": 400,
|
||||
"panels": [],
|
||||
"title": "Point-in-Time Recovery (PITR)",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Whether PITR is enabled for this database",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "red",
|
||||
"text": "Disabled"
|
||||
},
|
||||
"1": {
|
||||
"color": "green",
|
||||
"text": "Enabled"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 36
|
||||
},
|
||||
"id": 401,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_pitr_enabled{server=~\"$server\"}",
|
||||
"legendFormat": "{{server}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "PITR Status",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Seconds since last archive was created",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 300
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 3600
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 5,
|
||||
"x": 4,
|
||||
"y": 36
|
||||
},
|
||||
"id": 402,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_pitr_archive_lag_seconds{server=~\"$server\"}",
|
||||
"legendFormat": "{{server}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Archive Lag",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Whether the WAL/binlog chain is valid (no gaps)",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "red",
|
||||
"text": "BROKEN"
|
||||
},
|
||||
"1": {
|
||||
"color": "green",
|
||||
"text": "VALID"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 9,
|
||||
"y": 36
|
||||
},
|
||||
"id": 403,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_pitr_chain_valid{server=~\"$server\"}",
|
||||
"legendFormat": "{{server}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Chain Status",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Number of gaps in the WAL/binlog chain (should be 0)",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 13,
|
||||
"y": 36
|
||||
},
|
||||
"id": 404,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_pitr_gap_count{server=~\"$server\"}",
|
||||
"legendFormat": "{{server}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Gap Count",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Recovery window in minutes (time between oldest and newest archive)",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 60
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 1440
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "m"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 7,
|
||||
"x": 17,
|
||||
"y": 36
|
||||
},
|
||||
"id": 405,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_pitr_recovery_window_minutes{server=~\"$server\"}",
|
||||
"legendFormat": "{{server}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Recovery Window",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 40
|
||||
},
|
||||
"id": 300,
|
||||
"panels": [],
|
||||
"title": "Restore Operations",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Total successful restores",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 41
|
||||
},
|
||||
"id": 301,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(dbbackup_restore_total{server=~\"$server\", status=\"success\"})",
|
||||
"legendFormat": "Successful",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Successful Restores",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Total failed restores",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 41
|
||||
},
|
||||
"id": 302,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(dbbackup_restore_total{server=~\"$server\", status=\"failure\"})",
|
||||
"legendFormat": "Failed",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Failed Restores",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Parallel jobs used in last restore. TURBO=8, balanced=auto",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"1": {
|
||||
"color": "red",
|
||||
"index": 0,
|
||||
"text": "1 (SLOW!)"
|
||||
},
|
||||
"2": {
|
||||
"color": "yellow",
|
||||
"index": 1,
|
||||
"text": "2"
|
||||
},
|
||||
"4": {
|
||||
"color": "light-green",
|
||||
"index": 2,
|
||||
"text": "4"
|
||||
},
|
||||
"8": {
|
||||
"color": "green",
|
||||
"index": 3,
|
||||
"text": "8 (TURBO)"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 41
|
||||
},
|
||||
"id": 303,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_restore_parallel_jobs{server=~\"$server\"}",
|
||||
"legendFormat": "{{database}} ({{profile}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Parallel Jobs Used",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Last restore duration. Green <1h, Yellow <4h, Red >4h",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 3600
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 14400
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 41
|
||||
},
|
||||
"id": 304,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_restore_duration_seconds{server=~\"$server\"}",
|
||||
"legendFormat": "{{database}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Last Restore Duration",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Restore duration over time with 4h threshold",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "line"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 14400
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 45
|
||||
},
|
||||
"id": 305,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_restore_duration_seconds{server=~\"$server\"}",
|
||||
"legendFormat": "{{database}} ({{profile}}, jobs={{parallel_jobs}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Restore Duration Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Parallel jobs used per restore - shows if turbo mode (8 jobs) is being used",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "Parallel Jobs",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "line+area"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"max": 10,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "transparent",
|
||||
"value": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 45
|
||||
},
|
||||
"id": 306,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "dbbackup_restore_parallel_jobs{server=~\"$server\"}",
|
||||
"legendFormat": "{{database}} ({{profile}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Parallel Jobs per Restore",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
@ -1585,4 +2486,4 @@
|
||||
"uid": "dbbackup-overview",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
||||
@ -31,6 +31,19 @@ type Entry struct {
|
||||
RetentionPolicy string `json:"retention_policy,omitempty"` // daily, weekly, monthly, yearly
|
||||
Tags map[string]string `json:"tags,omitempty"`
|
||||
Metadata map[string]string `json:"metadata,omitempty"`
|
||||
RestoreInfo *RestoreInfo `json:"restore_info,omitempty"` // Info about restore operations
|
||||
Path string `json:"path,omitempty"` // Alias for BackupPath
|
||||
}
|
||||
|
||||
// RestoreInfo contains information about a restore operation
|
||||
type RestoreInfo struct {
|
||||
Success bool `json:"success"`
|
||||
CompletedAt time.Time `json:"completed_at"`
|
||||
Duration time.Duration `json:"duration"`
|
||||
ParallelJobs int `json:"parallel_jobs"`
|
||||
Profile string `json:"profile"`
|
||||
TargetDB string `json:"target_db,omitempty"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
}
|
||||
|
||||
// BackupStatus represents the state of a backup
|
||||
|
||||
@ -56,8 +56,29 @@ func GetRestoreProfile(profileName string) (*RestoreProfile, error) {
|
||||
MemoryConservative: true,
|
||||
}, nil
|
||||
|
||||
case "turbo":
|
||||
// TURBO MODE: Maximum parallelism for fastest restore
|
||||
// Matches native pg_restore -j8 performance
|
||||
return &RestoreProfile{
|
||||
Name: "turbo",
|
||||
ParallelDBs: 2, // 2 DBs in parallel (I/O balanced)
|
||||
Jobs: 8, // pg_restore --jobs=8
|
||||
DisableProgress: false,
|
||||
MemoryConservative: false,
|
||||
}, nil
|
||||
|
||||
case "max-performance":
|
||||
// Maximum performance for high-end servers
|
||||
return &RestoreProfile{
|
||||
Name: "max-performance",
|
||||
ParallelDBs: 4,
|
||||
Jobs: 8,
|
||||
DisableProgress: false,
|
||||
MemoryConservative: false,
|
||||
}, nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown profile: %s (valid: conservative, balanced, aggressive)", profileName)
|
||||
return nil, fmt.Errorf("unknown profile: %s (valid: conservative, balanced, aggressive, turbo, max-performance)", profileName)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -927,8 +927,10 @@ func (e *MySQLNativeEngine) backupRoutines(ctx context.Context, w io.Writer, dat
|
||||
continue // Skip routines we can't read
|
||||
}
|
||||
|
||||
// Write routine header
|
||||
header := fmt.Sprintf("\n--\n-- %s `%s`\n--\n\n", strings.Title(strings.ToLower(routineType)), routineName)
|
||||
// Write routine header (capitalize first letter manually to avoid deprecated strings.Title)
|
||||
routineTypeLower := strings.ToLower(routineType)
|
||||
routineTypeTitle := strings.ToUpper(routineTypeLower[:1]) + routineTypeLower[1:]
|
||||
header := fmt.Sprintf("\n--\n-- %s `%s`\n--\n\n", routineTypeTitle, routineName)
|
||||
if _, err := w.Write([]byte(header)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
package native
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/logger"
|
||||
@ -99,17 +102,138 @@ func (r *PostgreSQLRestoreEngine) Restore(ctx context.Context, source io.Reader,
|
||||
EngineUsed: "postgresql_native",
|
||||
}
|
||||
|
||||
// TODO: Implement PostgreSQL restore logic
|
||||
// This is a basic implementation - would need to:
|
||||
// 1. Parse SQL statements from source
|
||||
// 2. Execute schema creation statements
|
||||
// 3. Handle COPY data import
|
||||
// 4. Execute data import statements
|
||||
// 5. Handle errors appropriately
|
||||
// 6. Report progress
|
||||
if options == nil {
|
||||
options = &RestoreOptions{}
|
||||
}
|
||||
|
||||
// Acquire connection for restore operations
|
||||
conn, err := r.engine.pool.Acquire(ctx)
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to acquire connection: %w", err)
|
||||
}
|
||||
defer conn.Release()
|
||||
|
||||
// Parse and execute SQL statements from the backup
|
||||
scanner := bufio.NewScanner(source)
|
||||
scanner.Buffer(make([]byte, 1024*1024), 10*1024*1024) // 10MB max line
|
||||
|
||||
var (
|
||||
stmtBuffer bytes.Buffer
|
||||
inCopyMode bool
|
||||
copyTableName string
|
||||
copyData bytes.Buffer
|
||||
stmtCount int64
|
||||
rowsRestored int64
|
||||
)
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
// Handle COPY data mode
|
||||
if inCopyMode {
|
||||
if line == "\\." {
|
||||
// End of COPY data - execute the COPY FROM
|
||||
if copyData.Len() > 0 {
|
||||
copySQL := fmt.Sprintf("COPY %s FROM STDIN", copyTableName)
|
||||
tag, err := conn.Conn().PgConn().CopyFrom(ctx, strings.NewReader(copyData.String()), copySQL)
|
||||
if err != nil {
|
||||
if options.ContinueOnError {
|
||||
r.engine.log.Warn("COPY failed, continuing", "table", copyTableName, "error", err)
|
||||
} else {
|
||||
return result, fmt.Errorf("COPY to %s failed: %w", copyTableName, err)
|
||||
}
|
||||
} else {
|
||||
rowsRestored += tag.RowsAffected()
|
||||
}
|
||||
}
|
||||
copyData.Reset()
|
||||
inCopyMode = false
|
||||
copyTableName = ""
|
||||
continue
|
||||
}
|
||||
copyData.WriteString(line)
|
||||
copyData.WriteByte('\n')
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for COPY statement start
|
||||
if strings.HasPrefix(strings.ToUpper(strings.TrimSpace(line)), "COPY ") && strings.HasSuffix(strings.TrimSpace(line), "FROM stdin;") {
|
||||
// Extract table name from COPY statement
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) >= 2 {
|
||||
copyTableName = parts[1]
|
||||
inCopyMode = true
|
||||
stmtCount++
|
||||
if options.ProgressCallback != nil {
|
||||
options.ProgressCallback(&RestoreProgress{
|
||||
Operation: "COPY",
|
||||
CurrentObject: copyTableName,
|
||||
ObjectsCompleted: stmtCount,
|
||||
RowsProcessed: rowsRestored,
|
||||
})
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Skip comments and empty lines for regular statements
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed == "" || strings.HasPrefix(trimmed, "--") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Accumulate statement
|
||||
stmtBuffer.WriteString(line)
|
||||
stmtBuffer.WriteByte('\n')
|
||||
|
||||
// Check if statement is complete (ends with ;)
|
||||
if strings.HasSuffix(trimmed, ";") {
|
||||
stmt := stmtBuffer.String()
|
||||
stmtBuffer.Reset()
|
||||
|
||||
// Skip data statements if schema-only mode
|
||||
if options.SchemaOnly && (strings.HasPrefix(strings.ToUpper(trimmed), "INSERT") ||
|
||||
strings.HasPrefix(strings.ToUpper(trimmed), "COPY")) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip schema statements if data-only mode
|
||||
if options.DataOnly && !strings.HasPrefix(strings.ToUpper(trimmed), "INSERT") &&
|
||||
!strings.HasPrefix(strings.ToUpper(trimmed), "COPY") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Execute the statement
|
||||
_, err := conn.Exec(ctx, stmt)
|
||||
if err != nil {
|
||||
if options.ContinueOnError {
|
||||
r.engine.log.Warn("Statement failed, continuing", "error", err)
|
||||
} else {
|
||||
return result, fmt.Errorf("statement execution failed: %w", err)
|
||||
}
|
||||
}
|
||||
stmtCount++
|
||||
|
||||
if options.ProgressCallback != nil && stmtCount%100 == 0 {
|
||||
options.ProgressCallback(&RestoreProgress{
|
||||
Operation: "SQL",
|
||||
ObjectsCompleted: stmtCount,
|
||||
RowsProcessed: rowsRestored,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return result, fmt.Errorf("error reading backup: %w", err)
|
||||
}
|
||||
|
||||
result.Duration = time.Since(startTime)
|
||||
return result, fmt.Errorf("PostgreSQL restore not yet implemented")
|
||||
result.ObjectsProcessed = int(stmtCount)
|
||||
result.BytesProcessed = rowsRestored
|
||||
r.engine.log.Info("Restore completed", "statements", stmtCount, "rows", rowsRestored, "duration", result.Duration)
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Ping checks database connectivity
|
||||
@ -149,17 +273,121 @@ func (r *MySQLRestoreEngine) Restore(ctx context.Context, source io.Reader, opti
|
||||
EngineUsed: "mysql_native",
|
||||
}
|
||||
|
||||
// TODO: Implement MySQL restore logic
|
||||
// This is a basic implementation - would need to:
|
||||
// 1. Parse SQL statements from source
|
||||
// 2. Execute CREATE DATABASE statements
|
||||
// 3. Execute schema creation statements
|
||||
// 4. Execute data import statements
|
||||
// 5. Handle MySQL-specific syntax
|
||||
// 6. Report progress
|
||||
if options == nil {
|
||||
options = &RestoreOptions{}
|
||||
}
|
||||
|
||||
// Parse and execute SQL statements from the backup
|
||||
scanner := bufio.NewScanner(source)
|
||||
scanner.Buffer(make([]byte, 1024*1024), 10*1024*1024) // 10MB max line
|
||||
|
||||
var (
|
||||
stmtBuffer bytes.Buffer
|
||||
stmtCount int64
|
||||
rowsRestored int64
|
||||
inMultiLine bool
|
||||
delimiter = ";"
|
||||
)
|
||||
|
||||
// Disable foreign key checks if requested
|
||||
if options.DisableForeignKeys {
|
||||
if _, err := r.engine.db.ExecContext(ctx, "SET FOREIGN_KEY_CHECKS = 0"); err != nil {
|
||||
r.engine.log.Warn("Failed to disable foreign key checks", "error", err)
|
||||
}
|
||||
defer func() {
|
||||
_, _ = r.engine.db.ExecContext(ctx, "SET FOREIGN_KEY_CHECKS = 1")
|
||||
}()
|
||||
}
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
trimmed := strings.TrimSpace(line)
|
||||
|
||||
// Skip comments and empty lines
|
||||
if trimmed == "" || strings.HasPrefix(trimmed, "--") || strings.HasPrefix(trimmed, "/*") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle DELIMITER changes (common in MySQL dumps)
|
||||
if strings.HasPrefix(strings.ToUpper(trimmed), "DELIMITER ") {
|
||||
delimiter = strings.TrimSpace(strings.TrimPrefix(trimmed, "DELIMITER "))
|
||||
if delimiter == "" {
|
||||
delimiter = ";"
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Accumulate statement
|
||||
stmtBuffer.WriteString(line)
|
||||
stmtBuffer.WriteByte('\n')
|
||||
|
||||
// Check if statement is complete
|
||||
if strings.HasSuffix(trimmed, delimiter) {
|
||||
stmt := strings.TrimSuffix(stmtBuffer.String(), delimiter+"\n")
|
||||
stmt = strings.TrimSuffix(stmt, delimiter)
|
||||
stmtBuffer.Reset()
|
||||
inMultiLine = false
|
||||
|
||||
upperStmt := strings.ToUpper(strings.TrimSpace(stmt))
|
||||
|
||||
// Skip data statements if schema-only mode
|
||||
if options.SchemaOnly && strings.HasPrefix(upperStmt, "INSERT") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip schema statements if data-only mode
|
||||
if options.DataOnly && !strings.HasPrefix(upperStmt, "INSERT") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Execute the statement
|
||||
res, err := r.engine.db.ExecContext(ctx, stmt)
|
||||
if err != nil {
|
||||
if options.ContinueOnError {
|
||||
r.engine.log.Warn("Statement failed, continuing", "error", err)
|
||||
} else {
|
||||
return result, fmt.Errorf("statement execution failed: %w", err)
|
||||
}
|
||||
} else {
|
||||
if rows, _ := res.RowsAffected(); rows > 0 {
|
||||
rowsRestored += rows
|
||||
}
|
||||
}
|
||||
stmtCount++
|
||||
|
||||
if options.ProgressCallback != nil && stmtCount%100 == 0 {
|
||||
options.ProgressCallback(&RestoreProgress{
|
||||
Operation: "SQL",
|
||||
ObjectsCompleted: stmtCount,
|
||||
RowsProcessed: rowsRestored,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
inMultiLine = true
|
||||
}
|
||||
}
|
||||
|
||||
// Handle any remaining statement
|
||||
if stmtBuffer.Len() > 0 && !inMultiLine {
|
||||
stmt := stmtBuffer.String()
|
||||
if _, err := r.engine.db.ExecContext(ctx, stmt); err != nil {
|
||||
if !options.ContinueOnError {
|
||||
return result, fmt.Errorf("final statement failed: %w", err)
|
||||
}
|
||||
}
|
||||
stmtCount++
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return result, fmt.Errorf("error reading backup: %w", err)
|
||||
}
|
||||
|
||||
result.Duration = time.Since(startTime)
|
||||
return result, fmt.Errorf("MySQL restore not yet implemented")
|
||||
result.ObjectsProcessed = int(stmtCount)
|
||||
result.BytesProcessed = rowsRestored
|
||||
r.engine.log.Info("Restore completed", "statements", stmtCount, "rows", rowsRestored, "duration", result.Duration)
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Ping checks database connectivity
|
||||
|
||||
@ -189,28 +189,3 @@ func (pi *ProgressInfo) FormatSummary() string {
|
||||
|
||||
return fmt.Sprintf("%s elapsed", formatDuration(pi.ElapsedTime))
|
||||
}
|
||||
|
||||
// Helper function to format bytes
|
||||
func formatProgressBytes(bytes int64) string {
|
||||
const unit = 1024
|
||||
if bytes < unit {
|
||||
return fmt.Sprintf("%d B", bytes)
|
||||
}
|
||||
div, exp := int64(unit), 0
|
||||
for n := bytes / unit; n >= unit; n /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
// Helper function to format duration
|
||||
func formatProgressDuration(d time.Duration) string {
|
||||
if d < time.Minute {
|
||||
return fmt.Sprintf("%.0fs", d.Seconds())
|
||||
}
|
||||
if d < time.Hour {
|
||||
return fmt.Sprintf("%.1fm", d.Minutes())
|
||||
}
|
||||
return fmt.Sprintf("%.1fh", d.Hours())
|
||||
}
|
||||
|
||||
@ -3,12 +3,16 @@ package prometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"dbbackup/internal/catalog"
|
||||
"dbbackup/internal/dedup"
|
||||
"dbbackup/internal/logger"
|
||||
)
|
||||
|
||||
@ -21,6 +25,11 @@ type Exporter struct {
|
||||
version string
|
||||
gitCommit string
|
||||
|
||||
// Optional paths for PITR and dedup metrics
|
||||
pitrConfigPaths []string // Paths to check for pitr_config.json
|
||||
dedupBasePath string // Base path for dedup store
|
||||
dedupIndexPath string // Path to dedup index DB (for NFS/CIFS)
|
||||
|
||||
mu sync.RWMutex
|
||||
cachedData string
|
||||
lastRefresh time.Time
|
||||
@ -40,14 +49,41 @@ func NewExporter(log logger.Logger, cat catalog.Catalog, instance string, port i
|
||||
|
||||
// NewExporterWithVersion creates a new Prometheus exporter with version info
|
||||
func NewExporterWithVersion(log logger.Logger, cat catalog.Catalog, instance string, port int, version, gitCommit string) *Exporter {
|
||||
// Auto-detect PITR and dedup paths based on hostname
|
||||
hostname, _ := os.Hostname()
|
||||
shortHostname := hostname
|
||||
if idx := len(hostname); idx > 0 {
|
||||
// Extract short hostname (e.g., mysql01 from mysql01.uuxo.net)
|
||||
for i, c := range hostname {
|
||||
if c == '.' {
|
||||
shortHostname = hostname[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Common PITR config locations
|
||||
pitrPaths := []string{
|
||||
fmt.Sprintf("/mnt/smb-%s/backups/binlog_archive/pitr_config.json", shortHostname),
|
||||
fmt.Sprintf("/mnt/smb-%s/backups/wal_archive/pitr_config.json", shortHostname),
|
||||
"/var/lib/dbbackup/pitr_config.json",
|
||||
}
|
||||
|
||||
// Common dedup locations
|
||||
dedupBase := fmt.Sprintf("/mnt/smb-%s/backups/dedup", shortHostname)
|
||||
dedupIndex := "/var/lib/dbbackup/dedup-index.db"
|
||||
|
||||
return &Exporter{
|
||||
log: log,
|
||||
catalog: cat,
|
||||
instance: instance,
|
||||
port: port,
|
||||
version: version,
|
||||
gitCommit: gitCommit,
|
||||
refreshTTL: 30 * time.Second,
|
||||
log: log,
|
||||
catalog: cat,
|
||||
instance: instance,
|
||||
port: port,
|
||||
version: version,
|
||||
gitCommit: gitCommit,
|
||||
refreshTTL: 30 * time.Second,
|
||||
pitrConfigPaths: pitrPaths,
|
||||
dedupBasePath: dedupBase,
|
||||
dedupIndexPath: dedupIndex,
|
||||
}
|
||||
}
|
||||
|
||||
@ -179,6 +215,19 @@ func (e *Exporter) refresh() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Collect PITR metrics if available
|
||||
pitrMetrics := e.collectPITRMetrics()
|
||||
if len(pitrMetrics) > 0 {
|
||||
pitrWriter := NewPITRMetricsWriter(e.log, e.instance)
|
||||
data += "\n" + pitrWriter.FormatPITRMetrics(pitrMetrics)
|
||||
}
|
||||
|
||||
// Collect dedup metrics if available
|
||||
dedupData := e.collectDedupMetrics()
|
||||
if dedupData != "" {
|
||||
data += "\n" + dedupData
|
||||
}
|
||||
|
||||
e.mu.Lock()
|
||||
e.cachedData = data
|
||||
e.lastRefresh = time.Now()
|
||||
@ -187,3 +236,141 @@ func (e *Exporter) refresh() error {
|
||||
e.log.Debug("Refreshed metrics cache")
|
||||
return nil
|
||||
}
|
||||
|
||||
// PITRConfigFile represents the PITR configuration file structure
|
||||
type PITRConfigFile struct {
|
||||
ArchiveDir string `json:"archive_dir"`
|
||||
ArchiveInterval string `json:"archive_interval"`
|
||||
Compression bool `json:"compression"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Enabled bool `json:"enabled"`
|
||||
Encryption bool `json:"encryption"`
|
||||
GTIDMode bool `json:"gtid_mode"`
|
||||
RetentionDays int `json:"retention_days"`
|
||||
ServerID int `json:"server_id"`
|
||||
ServerType string `json:"server_type"`
|
||||
ServerVersion string `json:"server_version"`
|
||||
}
|
||||
|
||||
// collectPITRMetrics collects PITR metrics from config files and archive directories
|
||||
func (e *Exporter) collectPITRMetrics() []PITRMetrics {
|
||||
var metrics []PITRMetrics
|
||||
|
||||
for _, configPath := range e.pitrConfigPaths {
|
||||
data, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
continue // Config not found at this path
|
||||
}
|
||||
|
||||
var config PITRConfigFile
|
||||
if err := json.Unmarshal(data, &config); err != nil {
|
||||
e.log.Warn("Failed to parse PITR config", "path", configPath, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if !config.Enabled {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get archive directory stats
|
||||
archiveDir := config.ArchiveDir
|
||||
if archiveDir == "" {
|
||||
archiveDir = filepath.Dir(configPath)
|
||||
}
|
||||
|
||||
// Count archive files and get timestamps
|
||||
archiveCount := 0
|
||||
var archiveSize int64
|
||||
var oldestArchive, newestArchive time.Time
|
||||
var gapCount int
|
||||
|
||||
entries, err := os.ReadDir(archiveDir)
|
||||
if err == nil {
|
||||
var lastSeq int
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := entry.Name()
|
||||
// Match binlog/WAL files (mysql-bin.*, mariadb-bin.*, or WAL segment names)
|
||||
if len(name) > 4 && (name[:4] == "mysq" || name[:4] == "mari" || len(name) == 24) {
|
||||
archiveCount++
|
||||
info, err := entry.Info()
|
||||
if err == nil {
|
||||
archiveSize += info.Size()
|
||||
modTime := info.ModTime()
|
||||
if oldestArchive.IsZero() || modTime.Before(oldestArchive) {
|
||||
oldestArchive = modTime
|
||||
}
|
||||
if newestArchive.IsZero() || modTime.After(newestArchive) {
|
||||
newestArchive = modTime
|
||||
}
|
||||
}
|
||||
// Simple gap detection for binlog files
|
||||
var seq int
|
||||
if _, err := fmt.Sscanf(name, "mysql-bin.%d", &seq); err == nil {
|
||||
if lastSeq > 0 && seq > lastSeq+1 {
|
||||
gapCount++
|
||||
}
|
||||
lastSeq = seq
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate archive lag
|
||||
archiveLag := float64(0)
|
||||
if !newestArchive.IsZero() {
|
||||
archiveLag = time.Since(newestArchive).Seconds()
|
||||
}
|
||||
|
||||
// Calculate recovery window (time between oldest and newest archive)
|
||||
recoveryMinutes := float64(0)
|
||||
if !oldestArchive.IsZero() && !newestArchive.IsZero() {
|
||||
recoveryMinutes = newestArchive.Sub(oldestArchive).Minutes()
|
||||
}
|
||||
|
||||
// Determine database name from archive path
|
||||
dbName := "cluster"
|
||||
if config.ServerType == "mariadb" || config.ServerType == "mysql" {
|
||||
dbName = "mysql"
|
||||
} else if config.ServerType == "postgres" {
|
||||
dbName = "postgres"
|
||||
}
|
||||
|
||||
metrics = append(metrics, PITRMetrics{
|
||||
Database: dbName,
|
||||
Engine: config.ServerType,
|
||||
Enabled: config.Enabled,
|
||||
LastArchived: newestArchive,
|
||||
ArchiveLag: archiveLag,
|
||||
ArchiveCount: archiveCount,
|
||||
ArchiveSize: archiveSize,
|
||||
ChainValid: gapCount == 0,
|
||||
GapCount: gapCount,
|
||||
RecoveryMinutes: recoveryMinutes,
|
||||
})
|
||||
|
||||
e.log.Debug("Collected PITR metrics", "database", dbName, "archives", archiveCount, "lag", archiveLag)
|
||||
}
|
||||
|
||||
return metrics
|
||||
}
|
||||
|
||||
// collectDedupMetrics collects deduplication metrics if dedup store exists
|
||||
func (e *Exporter) collectDedupMetrics() string {
|
||||
// Check if dedup directory exists
|
||||
if _, err := os.Stat(e.dedupBasePath); os.IsNotExist(err) {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Try to collect dedup metrics
|
||||
metrics, err := dedup.CollectMetrics(e.dedupBasePath, e.dedupIndexPath)
|
||||
if err != nil {
|
||||
e.log.Debug("Could not collect dedup metrics", "error", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Format as Prometheus metrics
|
||||
return dedup.FormatPrometheusMetrics(metrics, e.instance)
|
||||
}
|
||||
|
||||
@ -3,6 +3,7 @@ package prometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@ -64,6 +65,22 @@ type BackupMetrics struct {
|
||||
PITRBaseCount int // Count of PITR base backups
|
||||
}
|
||||
|
||||
// RestoreMetrics holds metrics for restore operations
|
||||
type RestoreMetrics struct {
|
||||
Database string
|
||||
Engine string
|
||||
LastRestore time.Time
|
||||
LastDuration time.Duration
|
||||
LastSize int64
|
||||
ParallelJobs int // Number of parallel jobs used (--jobs)
|
||||
Profile string // Profile used (turbo, balanced, etc.)
|
||||
TotalRestores int
|
||||
SuccessCount int
|
||||
FailureCount int
|
||||
LastStatus string // "success", "failure"
|
||||
SourceArchive string // Path/name of source archive
|
||||
}
|
||||
|
||||
// PITRMetrics holds PITR-specific metrics for a database
|
||||
type PITRMetrics struct {
|
||||
Database string
|
||||
@ -195,6 +212,154 @@ func (m *MetricsWriter) collectMetrics() ([]BackupMetrics, error) {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// collectRestoreMetrics collects restore operation metrics from catalog
|
||||
func (m *MetricsWriter) collectRestoreMetrics() []RestoreMetrics {
|
||||
if m.catalog == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Try to get restore history from catalog
|
||||
ctx := context.Background()
|
||||
entries, err := m.catalog.List(ctx, "", 0)
|
||||
if err != nil {
|
||||
m.log.Warn("Failed to list catalog for restore metrics", "error", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Group by database - look for restore entries
|
||||
byDB := make(map[string]*RestoreMetrics)
|
||||
|
||||
for _, e := range entries {
|
||||
// Check if this is a restore operation (has restore metadata)
|
||||
if e.RestoreInfo == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
dbName := e.Database
|
||||
if dbName == "" {
|
||||
dbName = "cluster"
|
||||
}
|
||||
|
||||
rm, exists := byDB[dbName]
|
||||
if !exists {
|
||||
rm = &RestoreMetrics{
|
||||
Database: dbName,
|
||||
Engine: e.DatabaseType,
|
||||
}
|
||||
byDB[dbName] = rm
|
||||
}
|
||||
|
||||
rm.TotalRestores++
|
||||
if e.RestoreInfo.Success {
|
||||
rm.SuccessCount++
|
||||
if e.RestoreInfo.CompletedAt.After(rm.LastRestore) {
|
||||
rm.LastRestore = e.RestoreInfo.CompletedAt
|
||||
rm.LastDuration = e.RestoreInfo.Duration
|
||||
rm.LastSize = e.SizeBytes
|
||||
rm.ParallelJobs = e.RestoreInfo.ParallelJobs
|
||||
rm.Profile = e.RestoreInfo.Profile
|
||||
rm.LastStatus = "success"
|
||||
rm.SourceArchive = e.Path
|
||||
}
|
||||
} else {
|
||||
rm.FailureCount++
|
||||
if e.RestoreInfo.CompletedAt.After(rm.LastRestore) {
|
||||
rm.LastRestore = e.RestoreInfo.CompletedAt
|
||||
rm.LastStatus = "failure"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also read from restore_metrics.json file (written by restore engine)
|
||||
m.loadRestoreMetricsFromFile(byDB)
|
||||
|
||||
// Convert to slice
|
||||
result := make([]RestoreMetrics, 0, len(byDB))
|
||||
for _, rm := range byDB {
|
||||
result = append(result, *rm)
|
||||
}
|
||||
sort.Slice(result, func(i, j int) bool {
|
||||
return result[i].Database < result[j].Database
|
||||
})
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// loadRestoreMetricsFromFile reads restore metrics from JSON file
|
||||
func (m *MetricsWriter) loadRestoreMetricsFromFile(byDB map[string]*RestoreMetrics) {
|
||||
// Try common locations for restore_metrics.json
|
||||
homeDir, _ := os.UserHomeDir()
|
||||
paths := []string{
|
||||
filepath.Join(homeDir, ".dbbackup", "restore_metrics.json"),
|
||||
"/var/lib/dbbackup/restore_metrics.json",
|
||||
}
|
||||
|
||||
for _, path := range paths {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
var metricsFile struct {
|
||||
Records []struct {
|
||||
Database string `json:"database"`
|
||||
Engine string `json:"engine"`
|
||||
CompletedAt time.Time `json:"completed_at"`
|
||||
Duration time.Duration `json:"duration_ns"`
|
||||
DurationSecs float64 `json:"duration_seconds"`
|
||||
SizeBytes int64 `json:"size_bytes"`
|
||||
ParallelJobs int `json:"parallel_jobs"`
|
||||
Profile string `json:"profile"`
|
||||
Success bool `json:"success"`
|
||||
SourceFile string `json:"source_file"`
|
||||
} `json:"records"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(data, &metricsFile); err != nil {
|
||||
m.log.Warn("Failed to parse restore_metrics.json", "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Process records
|
||||
for _, rec := range metricsFile.Records {
|
||||
dbName := rec.Database
|
||||
if dbName == "" {
|
||||
dbName = "unknown"
|
||||
}
|
||||
|
||||
rm, exists := byDB[dbName]
|
||||
if !exists {
|
||||
rm = &RestoreMetrics{
|
||||
Database: dbName,
|
||||
Engine: rec.Engine,
|
||||
}
|
||||
byDB[dbName] = rm
|
||||
}
|
||||
|
||||
rm.TotalRestores++
|
||||
if rec.Success {
|
||||
rm.SuccessCount++
|
||||
if rec.CompletedAt.After(rm.LastRestore) {
|
||||
rm.LastRestore = rec.CompletedAt
|
||||
rm.LastDuration = time.Duration(rec.DurationSecs * float64(time.Second))
|
||||
rm.LastSize = rec.SizeBytes
|
||||
rm.ParallelJobs = rec.ParallelJobs
|
||||
rm.Profile = rec.Profile
|
||||
rm.LastStatus = "success"
|
||||
rm.SourceArchive = rec.SourceFile
|
||||
}
|
||||
} else {
|
||||
rm.FailureCount++
|
||||
if rec.CompletedAt.After(rm.LastRestore) {
|
||||
rm.LastRestore = rec.CompletedAt
|
||||
rm.LastStatus = "failure"
|
||||
}
|
||||
}
|
||||
}
|
||||
break // Found and processed file
|
||||
}
|
||||
}
|
||||
|
||||
// formatMetrics formats metrics in Prometheus exposition format
|
||||
func (m *MetricsWriter) formatMetrics(metrics []BackupMetrics) string {
|
||||
var b strings.Builder
|
||||
@ -319,6 +484,64 @@ func (m *MetricsWriter) formatMetrics(metrics []BackupMetrics) string {
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// ========== RESTORE METRICS ==========
|
||||
restoreMetrics := m.collectRestoreMetrics()
|
||||
|
||||
// dbbackup_restore_total
|
||||
b.WriteString("# HELP dbbackup_restore_total Total number of restore operations by status\n")
|
||||
b.WriteString("# TYPE dbbackup_restore_total counter\n")
|
||||
for _, rm := range restoreMetrics {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_total{server=%q,database=%q,status=\"success\"} %d\n",
|
||||
m.instance, rm.Database, rm.SuccessCount))
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_total{server=%q,database=%q,status=\"failure\"} %d\n",
|
||||
m.instance, rm.Database, rm.FailureCount))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_restore_duration_seconds
|
||||
b.WriteString("# HELP dbbackup_restore_duration_seconds Duration of last restore operation in seconds\n")
|
||||
b.WriteString("# TYPE dbbackup_restore_duration_seconds gauge\n")
|
||||
for _, rm := range restoreMetrics {
|
||||
if rm.LastDuration > 0 {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_duration_seconds{server=%q,database=%q,profile=%q,parallel_jobs=\"%d\"} %.2f\n",
|
||||
m.instance, rm.Database, rm.Profile, rm.ParallelJobs, rm.LastDuration.Seconds()))
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_restore_parallel_jobs
|
||||
b.WriteString("# HELP dbbackup_restore_parallel_jobs Number of parallel jobs used in last restore\n")
|
||||
b.WriteString("# TYPE dbbackup_restore_parallel_jobs gauge\n")
|
||||
for _, rm := range restoreMetrics {
|
||||
if rm.ParallelJobs > 0 {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_parallel_jobs{server=%q,database=%q,profile=%q} %d\n",
|
||||
m.instance, rm.Database, rm.Profile, rm.ParallelJobs))
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_restore_size_bytes
|
||||
b.WriteString("# HELP dbbackup_restore_size_bytes Size of last restored archive in bytes\n")
|
||||
b.WriteString("# TYPE dbbackup_restore_size_bytes gauge\n")
|
||||
for _, rm := range restoreMetrics {
|
||||
if rm.LastSize > 0 {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_size_bytes{server=%q,database=%q} %d\n",
|
||||
m.instance, rm.Database, rm.LastSize))
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_restore_last_timestamp
|
||||
b.WriteString("# HELP dbbackup_restore_last_timestamp Unix timestamp of last restore operation\n")
|
||||
b.WriteString("# TYPE dbbackup_restore_last_timestamp gauge\n")
|
||||
for _, rm := range restoreMetrics {
|
||||
if !rm.LastRestore.IsZero() {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_last_timestamp{server=%q,database=%q,status=%q} %d\n",
|
||||
m.instance, rm.Database, rm.LastStatus, rm.LastRestore.Unix()))
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// dbbackup_scrape_timestamp
|
||||
b.WriteString("# HELP dbbackup_scrape_timestamp Unix timestamp when metrics were collected\n")
|
||||
b.WriteString("# TYPE dbbackup_scrape_timestamp gauge\n")
|
||||
|
||||
@ -188,6 +188,7 @@ func (la *loggerAdapter) Debug(msg string, args ...any) {
|
||||
// RestoreSingle restores a single database from an archive
|
||||
func (e *Engine) RestoreSingle(ctx context.Context, archivePath, targetDB string, cleanFirst, createIfMissing bool) error {
|
||||
operation := e.log.StartOperation("Single Database Restore")
|
||||
startTime := time.Now()
|
||||
|
||||
// Validate and sanitize archive path
|
||||
validArchivePath, pathErr := security.ValidateArchivePath(archivePath)
|
||||
@ -197,6 +198,12 @@ func (e *Engine) RestoreSingle(ctx context.Context, archivePath, targetDB string
|
||||
}
|
||||
archivePath = validArchivePath
|
||||
|
||||
// Get archive size for metrics
|
||||
var archiveSize int64
|
||||
if fi, err := os.Stat(archivePath); err == nil {
|
||||
archiveSize = fi.Size()
|
||||
}
|
||||
|
||||
// Validate archive exists
|
||||
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||
operation.Fail("Archive not found")
|
||||
@ -269,6 +276,33 @@ func (e *Engine) RestoreSingle(ctx context.Context, archivePath, targetDB string
|
||||
return fmt.Errorf("unsupported archive format: %s", format)
|
||||
}
|
||||
|
||||
// Record restore metrics for Prometheus
|
||||
duration := time.Since(startTime)
|
||||
dbType := "postgresql"
|
||||
if format == FormatMySQLSQL || format == FormatMySQLSQLGz {
|
||||
dbType = "mysql"
|
||||
}
|
||||
record := RestoreRecord{
|
||||
Database: targetDB,
|
||||
Engine: dbType,
|
||||
StartedAt: startTime,
|
||||
CompletedAt: time.Now(),
|
||||
Duration: duration,
|
||||
SizeBytes: archiveSize,
|
||||
ParallelJobs: e.cfg.Jobs,
|
||||
Profile: e.cfg.ResourceProfile,
|
||||
Success: err == nil,
|
||||
SourceFile: filepath.Base(archivePath),
|
||||
TargetDB: targetDB,
|
||||
IsCluster: false,
|
||||
}
|
||||
if err != nil {
|
||||
record.ErrorMessage = err.Error()
|
||||
}
|
||||
if recordErr := RecordRestore(record); recordErr != nil {
|
||||
e.log.Warn("Failed to record restore metrics", "error", recordErr)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
e.progress.Fail(fmt.Sprintf("Restore failed: %v", err))
|
||||
operation.Fail(fmt.Sprintf("Restore failed: %v", err))
|
||||
@ -1056,6 +1090,7 @@ func (e *Engine) RestoreSingleFromCluster(ctx context.Context, clusterArchivePat
|
||||
// This avoids double extraction when ValidateAndExtractCluster was already called
|
||||
func (e *Engine) RestoreCluster(ctx context.Context, archivePath string, preExtractedPath ...string) error {
|
||||
operation := e.log.StartOperation("Cluster Restore")
|
||||
clusterStartTime := time.Now()
|
||||
|
||||
// 🚀 LOG ACTUAL PERFORMANCE SETTINGS - helps debug slow restores
|
||||
profile := e.cfg.GetCurrentProfile()
|
||||
@ -1829,14 +1864,60 @@ func (e *Engine) RestoreCluster(ctx context.Context, archivePath string, preExtr
|
||||
e.progress.Fail(fmt.Sprintf("Cluster restore: %d succeeded, %d failed out of %d total", successCountFinal, failCountFinal, totalDBs))
|
||||
operation.Complete(fmt.Sprintf("Partial restore: %d/%d databases succeeded", successCountFinal, totalDBs))
|
||||
|
||||
// Record cluster restore metrics (partial failure)
|
||||
e.recordClusterRestoreMetrics(clusterStartTime, archivePath, totalDBs, successCountFinal, false, restoreErrors.Error())
|
||||
|
||||
return fmt.Errorf("cluster restore completed with %d failures:\n%s", failCountFinal, restoreErrors.Error())
|
||||
}
|
||||
|
||||
e.progress.Complete(fmt.Sprintf("Cluster restored successfully: %d databases", successCountFinal))
|
||||
operation.Complete(fmt.Sprintf("Restored %d databases from cluster archive", successCountFinal))
|
||||
|
||||
// Record cluster restore metrics (success)
|
||||
e.recordClusterRestoreMetrics(clusterStartTime, archivePath, totalDBs, successCountFinal, true, "")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// recordClusterRestoreMetrics records metrics for cluster restore operations
|
||||
func (e *Engine) recordClusterRestoreMetrics(startTime time.Time, archivePath string, totalDBs, successCount int, success bool, errorMsg string) {
|
||||
duration := time.Since(startTime)
|
||||
|
||||
// Get archive size
|
||||
var archiveSize int64
|
||||
if fi, err := os.Stat(archivePath); err == nil {
|
||||
archiveSize = fi.Size()
|
||||
}
|
||||
|
||||
record := RestoreRecord{
|
||||
Database: "cluster",
|
||||
Engine: "postgresql",
|
||||
StartedAt: startTime,
|
||||
CompletedAt: time.Now(),
|
||||
Duration: duration,
|
||||
SizeBytes: archiveSize,
|
||||
ParallelJobs: e.cfg.Jobs,
|
||||
Profile: e.cfg.ResourceProfile,
|
||||
Success: success,
|
||||
SourceFile: filepath.Base(archivePath),
|
||||
IsCluster: true,
|
||||
ErrorMessage: errorMsg,
|
||||
}
|
||||
|
||||
if recordErr := RecordRestore(record); recordErr != nil {
|
||||
e.log.Warn("Failed to record cluster restore metrics", "error", recordErr)
|
||||
}
|
||||
|
||||
// Log performance summary
|
||||
e.log.Info("📊 RESTORE PERFORMANCE SUMMARY",
|
||||
"total_duration", duration.Round(time.Second),
|
||||
"databases", totalDBs,
|
||||
"successful", successCount,
|
||||
"parallel_jobs", e.cfg.Jobs,
|
||||
"profile", e.cfg.ResourceProfile,
|
||||
"avg_per_db", (duration / time.Duration(totalDBs)).Round(time.Second))
|
||||
}
|
||||
|
||||
// extractArchive extracts a tar.gz archive with progress reporting
|
||||
func (e *Engine) extractArchive(ctx context.Context, archivePath, destDir string) error {
|
||||
// If progress callback is set, use Go's archive/tar for progress tracking
|
||||
|
||||
220
internal/restore/metrics.go
Normal file
220
internal/restore/metrics.go
Normal file
@ -0,0 +1,220 @@
|
||||
// Package restore - metrics recording for restore operations
|
||||
package restore
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// RestoreRecord represents a single restore operation for metrics
|
||||
type RestoreRecord struct {
|
||||
Database string `json:"database"`
|
||||
Engine string `json:"engine"` // postgresql, mysql
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
CompletedAt time.Time `json:"completed_at"`
|
||||
Duration time.Duration `json:"duration_ns"`
|
||||
DurationSecs float64 `json:"duration_seconds"`
|
||||
SizeBytes int64 `json:"size_bytes"`
|
||||
ParallelJobs int `json:"parallel_jobs"`
|
||||
Profile string `json:"profile"`
|
||||
Success bool `json:"success"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
SourceFile string `json:"source_file"`
|
||||
TargetDB string `json:"target_db,omitempty"`
|
||||
IsCluster bool `json:"is_cluster"`
|
||||
Server string `json:"server"` // hostname
|
||||
}
|
||||
|
||||
// RestoreMetricsFile holds all restore records for Prometheus scraping
|
||||
type RestoreMetricsFile struct {
|
||||
Records []RestoreRecord `json:"records"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
var (
|
||||
metricsFile *RestoreMetricsFile
|
||||
metricsFilePath string
|
||||
metricsOnce sync.Once
|
||||
)
|
||||
|
||||
// InitMetrics initializes the restore metrics system
|
||||
func InitMetrics(dataDir string) error {
|
||||
metricsOnce.Do(func() {
|
||||
metricsFilePath = filepath.Join(dataDir, "restore_metrics.json")
|
||||
metricsFile = &RestoreMetricsFile{
|
||||
Records: make([]RestoreRecord, 0),
|
||||
}
|
||||
// Try to load existing metrics
|
||||
_ = metricsFile.load()
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
// RecordRestore records a restore operation for Prometheus metrics
|
||||
func RecordRestore(record RestoreRecord) error {
|
||||
if metricsFile == nil {
|
||||
// Auto-initialize with default path if not initialized
|
||||
homeDir, _ := os.UserHomeDir()
|
||||
dataDir := filepath.Join(homeDir, ".dbbackup")
|
||||
if err := InitMetrics(dataDir); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
metricsFile.mu.Lock()
|
||||
defer metricsFile.mu.Unlock()
|
||||
|
||||
// Calculate duration in seconds
|
||||
record.DurationSecs = record.Duration.Seconds()
|
||||
|
||||
// Get hostname for server label
|
||||
if record.Server == "" {
|
||||
hostname, _ := os.Hostname()
|
||||
record.Server = hostname
|
||||
}
|
||||
|
||||
// Append record
|
||||
metricsFile.Records = append(metricsFile.Records, record)
|
||||
|
||||
// Keep only last 1000 records to prevent unbounded growth
|
||||
if len(metricsFile.Records) > 1000 {
|
||||
metricsFile.Records = metricsFile.Records[len(metricsFile.Records)-1000:]
|
||||
}
|
||||
|
||||
metricsFile.UpdatedAt = time.Now()
|
||||
|
||||
return metricsFile.save()
|
||||
}
|
||||
|
||||
// GetMetrics returns all restore metrics
|
||||
func GetMetrics() []RestoreRecord {
|
||||
if metricsFile == nil {
|
||||
return nil
|
||||
}
|
||||
metricsFile.mu.Lock()
|
||||
defer metricsFile.mu.Unlock()
|
||||
result := make([]RestoreRecord, len(metricsFile.Records))
|
||||
copy(result, metricsFile.Records)
|
||||
return result
|
||||
}
|
||||
|
||||
// GetLatestByDatabase returns the most recent restore for each database
|
||||
func GetLatestByDatabase() map[string]RestoreRecord {
|
||||
records := GetMetrics()
|
||||
result := make(map[string]RestoreRecord)
|
||||
for _, r := range records {
|
||||
existing, exists := result[r.Database]
|
||||
if !exists || r.CompletedAt.After(existing.CompletedAt) {
|
||||
result[r.Database] = r
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (m *RestoreMetricsFile) load() error {
|
||||
data, err := os.ReadFile(metricsFilePath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil // OK, no previous data
|
||||
}
|
||||
return err
|
||||
}
|
||||
return json.Unmarshal(data, m)
|
||||
}
|
||||
|
||||
func (m *RestoreMetricsFile) save() error {
|
||||
// Ensure directory exists
|
||||
if err := os.MkdirAll(filepath.Dir(metricsFilePath), 0755); err != nil {
|
||||
return fmt.Errorf("failed to create metrics directory: %w", err)
|
||||
}
|
||||
|
||||
data, err := json.MarshalIndent(m, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Atomic write
|
||||
tmpPath := metricsFilePath + ".tmp"
|
||||
if err := os.WriteFile(tmpPath, data, 0644); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Rename(tmpPath, metricsFilePath)
|
||||
}
|
||||
|
||||
// FormatPrometheusMetrics outputs restore metrics in Prometheus format
|
||||
func FormatPrometheusMetrics() string {
|
||||
latest := GetLatestByDatabase()
|
||||
if len(latest) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
// Aggregate totals
|
||||
successByDB := make(map[string]int)
|
||||
failureByDB := make(map[string]int)
|
||||
for _, r := range GetMetrics() {
|
||||
if r.Success {
|
||||
successByDB[r.Database]++
|
||||
} else {
|
||||
failureByDB[r.Database]++
|
||||
}
|
||||
}
|
||||
|
||||
b.WriteString("# HELP dbbackup_restore_total Total number of restore operations\n")
|
||||
b.WriteString("# TYPE dbbackup_restore_total counter\n")
|
||||
for db, count := range successByDB {
|
||||
rec := latest[db]
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_total{server=%q,database=%q,status=\"success\"} %d\n",
|
||||
rec.Server, db, count))
|
||||
}
|
||||
for db, count := range failureByDB {
|
||||
rec := latest[db]
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_total{server=%q,database=%q,status=\"failure\"} %d\n",
|
||||
rec.Server, db, count))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_restore_duration_seconds Duration of last restore in seconds\n")
|
||||
b.WriteString("# TYPE dbbackup_restore_duration_seconds gauge\n")
|
||||
for db, rec := range latest {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_duration_seconds{server=%q,database=%q,profile=%q,parallel_jobs=\"%d\"} %.2f\n",
|
||||
rec.Server, db, rec.Profile, rec.ParallelJobs, rec.DurationSecs))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_restore_parallel_jobs Number of parallel jobs used\n")
|
||||
b.WriteString("# TYPE dbbackup_restore_parallel_jobs gauge\n")
|
||||
for db, rec := range latest {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_parallel_jobs{server=%q,database=%q,profile=%q} %d\n",
|
||||
rec.Server, db, rec.Profile, rec.ParallelJobs))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_restore_size_bytes Size of restored archive in bytes\n")
|
||||
b.WriteString("# TYPE dbbackup_restore_size_bytes gauge\n")
|
||||
for db, rec := range latest {
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_size_bytes{server=%q,database=%q} %d\n",
|
||||
rec.Server, db, rec.SizeBytes))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("# HELP dbbackup_restore_last_timestamp Unix timestamp of last restore\n")
|
||||
b.WriteString("# TYPE dbbackup_restore_last_timestamp gauge\n")
|
||||
for db, rec := range latest {
|
||||
status := "success"
|
||||
if !rec.Success {
|
||||
status = "failure"
|
||||
}
|
||||
b.WriteString(fmt.Sprintf("dbbackup_restore_last_timestamp{server=%q,database=%q,status=%q} %d\n",
|
||||
rec.Server, db, status, rec.CompletedAt.Unix()))
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
@ -17,7 +17,7 @@ import (
|
||||
|
||||
// CatalogDashboardView displays an interactive catalog browser
|
||||
type CatalogDashboardView struct {
|
||||
catalog catalog.Catalog
|
||||
_ catalog.Catalog // Stored for future use
|
||||
entries []*catalog.Entry
|
||||
databases []string
|
||||
cursor int
|
||||
|
||||
@ -223,10 +223,7 @@ func (s *ScheduleView) View() string {
|
||||
|
||||
// Display timers
|
||||
for _, timer := range s.timers {
|
||||
name := timer.Name
|
||||
if strings.HasSuffix(name, ".timer") {
|
||||
name = strings.TrimSuffix(name, ".timer")
|
||||
}
|
||||
name := strings.TrimSuffix(timer.Name, ".timer")
|
||||
|
||||
b.WriteString(successStyle.Render(fmt.Sprintf("[TIMER] %s", name)))
|
||||
b.WriteString("\n")
|
||||
|
||||
Reference in New Issue
Block a user