Enhance network resilience for mobile scenarios in HMAC File Server 3.2
- Introduced fast detection and quality monitoring for network changes. - Added predictive switching to proactively handle network failures. - Updated configuration examples and README for mobile network resilience. - Enhanced network resilience settings in Podman configuration. - Created a new configuration file for optimized mobile network resilience.
This commit is contained in:
88
README.md
88
README.md
@ -109,6 +109,84 @@ chmod +x hmac-file-server-linux-amd64
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Mobile Network Resilience
|
||||||
|
|
||||||
|
HMAC File Server 3.2 introduces enhanced network resilience specifically designed for mobile devices and network switching scenarios.
|
||||||
|
|
||||||
|
### 📱 **Mobile Network Switching Support**
|
||||||
|
|
||||||
|
#### **Scenario 1: WLAN ↔ IPv6 5G Switching**
|
||||||
|
Perfect for mobile devices that switch between WiFi and cellular networks:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[server]
|
||||||
|
networkevents = true # REQUIRED: Enable network monitoring
|
||||||
|
|
||||||
|
[network_resilience]
|
||||||
|
fast_detection = true # 1-second detection vs 5-second default
|
||||||
|
quality_monitoring = true # Monitor connection quality
|
||||||
|
predictive_switching = true # Switch before network fails
|
||||||
|
mobile_optimizations = true # Cellular-friendly settings
|
||||||
|
|
||||||
|
[uploads]
|
||||||
|
session_recovery_timeout = "600s" # 10-minute recovery window for IP changes
|
||||||
|
client_reconnect_window = "300s" # 5-minute reconnection window
|
||||||
|
max_resumable_age = "72h" # Extended session retention
|
||||||
|
max_upload_retries = 8 # More retries for cellular
|
||||||
|
|
||||||
|
[timeouts]
|
||||||
|
readtimeout = "600s" # Extended for cellular latency
|
||||||
|
writetimeout = "600s" # Handle cellular upload delays
|
||||||
|
idletimeout = "1200s" # 20-minute tolerance
|
||||||
|
```
|
||||||
|
|
||||||
|
#### **Scenario 2: Dual-Connected Devices (Wired + WiFi)**
|
||||||
|
For devices with multiple network interfaces:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[network_resilience]
|
||||||
|
fast_detection = true # Quick interface change detection
|
||||||
|
quality_monitoring = true # Monitor both connections
|
||||||
|
predictive_switching = true # Use best available interface
|
||||||
|
|
||||||
|
# System automatically selects best interface based on:
|
||||||
|
# - RTT (latency)
|
||||||
|
# - Packet loss percentage
|
||||||
|
# - Connection stability
|
||||||
|
# - Interface priority (ethernet > wifi > cellular)
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Benefits for Mobile Scenarios**
|
||||||
|
|
||||||
|
| Feature | Standard Detection | Enhanced Mobile Detection |
|
||||||
|
|---------|-------------------|---------------------------|
|
||||||
|
| **Detection Speed** | 5 seconds | **1 second** |
|
||||||
|
| **Network Quality** | Interface status only | **RTT + packet loss monitoring** |
|
||||||
|
| **Switching Logic** | Reactive (after failure) | **Proactive (before failure)** |
|
||||||
|
| **Mobile Tolerance** | Fixed thresholds | **Cellular-optimized thresholds** |
|
||||||
|
| **Session Recovery** | 2-minute window | **10-minute window** |
|
||||||
|
| **Upload Resumption** | Basic retry | **Smart retry with backoff** |
|
||||||
|
|
||||||
|
### **Configuration Examples**
|
||||||
|
|
||||||
|
**Ultra-Fast Mobile Detection**:
|
||||||
|
```toml
|
||||||
|
[network_resilience]
|
||||||
|
detection_interval = "500ms" # Sub-second detection
|
||||||
|
quality_check_interval = "2s" # Frequent quality checks
|
||||||
|
mobile_optimizations = true # Lenient cellular thresholds
|
||||||
|
```
|
||||||
|
|
||||||
|
**Conservative Stable Network**:
|
||||||
|
```toml
|
||||||
|
[network_resilience]
|
||||||
|
detection_interval = "10s" # Slower detection
|
||||||
|
quality_monitoring = false # Disable quality checks
|
||||||
|
predictive_switching = false # React only to hard failures
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Configuration Generation
|
## Configuration Generation
|
||||||
|
|
||||||
### Generate Minimal Configuration
|
### Generate Minimal Configuration
|
||||||
@ -221,6 +299,16 @@ disable_keep_alives = false # Disable HTTP keep-alives
|
|||||||
client_timeout = "300s" # Client request timeout
|
client_timeout = "300s" # Client request timeout
|
||||||
restart_grace_period = "60s" # Grace period after restart
|
restart_grace_period = "60s" # Grace period after restart
|
||||||
|
|
||||||
|
# Enhanced Network Resilience (v3.2+)
|
||||||
|
[network_resilience]
|
||||||
|
fast_detection = true # Enable 1-second network change detection (vs 5-second default)
|
||||||
|
quality_monitoring = true # Monitor RTT and packet loss per interface
|
||||||
|
predictive_switching = true # Switch proactively before network failure
|
||||||
|
mobile_optimizations = true # Use mobile-friendly thresholds for cellular networks
|
||||||
|
detection_interval = "1s" # Network change detection interval
|
||||||
|
quality_check_interval = "5s" # Connection quality monitoring interval
|
||||||
|
max_detection_interval = "10s" # Maximum detection interval during stable periods
|
||||||
|
|
||||||
[uploads]
|
[uploads]
|
||||||
# File upload configuration
|
# File upload configuration
|
||||||
allowed_extensions = [".zip", ".rar", ".jpg", ".jpeg", ".png", ".gif", ".webp", ".pdf", ".txt", ".mp4", ".mov", ".ogg", ".mp3", ".doc", ".docx"] # Permitted upload file extensions (XMPP-compatible)
|
allowed_extensions = [".zip", ".rar", ".jpg", ".jpeg", ".png", ".gif", ".webp", ".pdf", ".txt", ".mp4", ".mov", ".ogg", ".mp3", ".doc", ".docx"] # Permitted upload file extensions (XMPP-compatible)
|
||||||
|
@ -223,6 +223,16 @@ type BuildConfig struct {
|
|||||||
Version string `mapstructure:"version"` // Updated version
|
Version string `mapstructure:"version"` // Updated version
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NetworkResilienceConfig struct {
|
||||||
|
FastDetection bool `toml:"fast_detection" mapstructure:"fast_detection"`
|
||||||
|
QualityMonitoring bool `toml:"quality_monitoring" mapstructure:"quality_monitoring"`
|
||||||
|
PredictiveSwitching bool `toml:"predictive_switching" mapstructure:"predictive_switching"`
|
||||||
|
MobileOptimizations bool `toml:"mobile_optimizations" mapstructure:"mobile_optimizations"`
|
||||||
|
DetectionInterval string `toml:"detection_interval" mapstructure:"detection_interval"`
|
||||||
|
QualityCheckInterval string `toml:"quality_check_interval" mapstructure:"quality_check_interval"`
|
||||||
|
MaxDetectionInterval string `toml:"max_detection_interval" mapstructure:"max_detection_interval"`
|
||||||
|
}
|
||||||
|
|
||||||
// This is the main Config struct to be used
|
// This is the main Config struct to be used
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Server ServerConfig `mapstructure:"server"`
|
Server ServerConfig `mapstructure:"server"`
|
||||||
@ -239,6 +249,7 @@ type Config struct {
|
|||||||
Workers WorkersConfig `mapstructure:"workers"`
|
Workers WorkersConfig `mapstructure:"workers"`
|
||||||
File FileConfig `mapstructure:"file"`
|
File FileConfig `mapstructure:"file"`
|
||||||
Build BuildConfig `mapstructure:"build"`
|
Build BuildConfig `mapstructure:"build"`
|
||||||
|
NetworkResilience NetworkResilienceConfig `mapstructure:"network_resilience"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type UploadTask struct {
|
type UploadTask struct {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// network_resilience.go - Network resilience middleware without modifying core functions
|
// network_resilience.go - Enhanced network resilience with quality monitoring and fast detection
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
@ -8,6 +8,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
"os/exec"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NetworkResilienceManager handles network change detection and upload pausing
|
// NetworkResilienceManager handles network change detection and upload pausing
|
||||||
@ -18,6 +19,81 @@ type NetworkResilienceManager struct {
|
|||||||
pauseChannel chan bool
|
pauseChannel chan bool
|
||||||
resumeChannel chan bool
|
resumeChannel chan bool
|
||||||
lastInterfaces []net.Interface
|
lastInterfaces []net.Interface
|
||||||
|
|
||||||
|
// Enhanced monitoring
|
||||||
|
qualityMonitor *NetworkQualityMonitor
|
||||||
|
adaptiveTicker *AdaptiveTicker
|
||||||
|
config *NetworkResilienceConfigLocal
|
||||||
|
}
|
||||||
|
|
||||||
|
// NetworkQualityMonitor tracks connection quality per interface
|
||||||
|
type NetworkQualityMonitor struct {
|
||||||
|
interfaces map[string]*InterfaceQuality
|
||||||
|
mutex sync.RWMutex
|
||||||
|
thresholds NetworkThresholds
|
||||||
|
}
|
||||||
|
|
||||||
|
// InterfaceQuality represents the quality metrics of a network interface
|
||||||
|
type InterfaceQuality struct {
|
||||||
|
Name string
|
||||||
|
RTT time.Duration
|
||||||
|
PacketLoss float64
|
||||||
|
Bandwidth int64
|
||||||
|
Stability float64
|
||||||
|
LastGood time.Time
|
||||||
|
Connectivity ConnectivityState
|
||||||
|
Samples []QualitySample
|
||||||
|
}
|
||||||
|
|
||||||
|
// QualitySample represents a point-in-time quality measurement
|
||||||
|
type QualitySample struct {
|
||||||
|
Timestamp time.Time
|
||||||
|
RTT time.Duration
|
||||||
|
PacketLoss float64
|
||||||
|
Success bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// ConnectivityState represents the current state of network connectivity
|
||||||
|
type ConnectivityState int
|
||||||
|
|
||||||
|
const (
|
||||||
|
ConnectivityUnknown ConnectivityState = iota
|
||||||
|
ConnectivityGood
|
||||||
|
ConnectivityDegraded
|
||||||
|
ConnectivityPoor
|
||||||
|
ConnectivityFailed
|
||||||
|
)
|
||||||
|
|
||||||
|
// NetworkThresholds defines quality thresholds for network assessment
|
||||||
|
type NetworkThresholds struct {
|
||||||
|
RTTWarning time.Duration // 200ms
|
||||||
|
RTTCritical time.Duration // 1000ms
|
||||||
|
PacketLossWarn float64 // 2%
|
||||||
|
PacketLossCrit float64 // 10%
|
||||||
|
StabilityMin float64 // 0.8
|
||||||
|
SampleWindow int // Number of samples to keep
|
||||||
|
}
|
||||||
|
|
||||||
|
// NetworkResilienceConfigLocal holds configuration for enhanced network resilience
|
||||||
|
type NetworkResilienceConfigLocal struct {
|
||||||
|
FastDetection bool `toml:"fast_detection"`
|
||||||
|
QualityMonitoring bool `toml:"quality_monitoring"`
|
||||||
|
PredictiveSwitching bool `toml:"predictive_switching"`
|
||||||
|
MobileOptimizations bool `toml:"mobile_optimizations"`
|
||||||
|
DetectionInterval time.Duration `toml:"detection_interval"`
|
||||||
|
QualityCheckInterval time.Duration `toml:"quality_check_interval"`
|
||||||
|
MaxDetectionInterval time.Duration `toml:"max_detection_interval"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AdaptiveTicker provides adaptive timing for network monitoring
|
||||||
|
type AdaptiveTicker struct {
|
||||||
|
C <-chan time.Time
|
||||||
|
ticker *time.Ticker
|
||||||
|
minInterval time.Duration
|
||||||
|
maxInterval time.Duration
|
||||||
|
currentInterval time.Duration
|
||||||
|
unstableCount int
|
||||||
|
done chan bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// UploadContext tracks active upload state
|
// UploadContext tracks active upload state
|
||||||
@ -29,22 +105,149 @@ type UploadContext struct {
|
|||||||
IsPaused bool
|
IsPaused bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewNetworkResilienceManager creates a new network resilience manager
|
// NewNetworkResilienceManager creates a new network resilience manager with enhanced capabilities
|
||||||
func NewNetworkResilienceManager() *NetworkResilienceManager {
|
func NewNetworkResilienceManager() *NetworkResilienceManager {
|
||||||
|
// Get configuration from global config, with sensible defaults
|
||||||
|
config := &NetworkResilienceConfigLocal{
|
||||||
|
FastDetection: true,
|
||||||
|
QualityMonitoring: true,
|
||||||
|
PredictiveSwitching: true,
|
||||||
|
MobileOptimizations: true,
|
||||||
|
DetectionInterval: 1 * time.Second,
|
||||||
|
QualityCheckInterval: 5 * time.Second,
|
||||||
|
MaxDetectionInterval: 10 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override with values from config file if available
|
||||||
|
if conf.NetworkResilience.DetectionInterval != "" {
|
||||||
|
if duration, err := time.ParseDuration(conf.NetworkResilience.DetectionInterval); err == nil {
|
||||||
|
config.DetectionInterval = duration
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if conf.NetworkResilience.QualityCheckInterval != "" {
|
||||||
|
if duration, err := time.ParseDuration(conf.NetworkResilience.QualityCheckInterval); err == nil {
|
||||||
|
config.QualityCheckInterval = duration
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if conf.NetworkResilience.MaxDetectionInterval != "" {
|
||||||
|
if duration, err := time.ParseDuration(conf.NetworkResilience.MaxDetectionInterval); err == nil {
|
||||||
|
config.MaxDetectionInterval = duration
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override boolean settings if explicitly set
|
||||||
|
config.FastDetection = conf.NetworkResilience.FastDetection
|
||||||
|
config.QualityMonitoring = conf.NetworkResilience.QualityMonitoring
|
||||||
|
config.PredictiveSwitching = conf.NetworkResilience.PredictiveSwitching
|
||||||
|
config.MobileOptimizations = conf.NetworkResilience.MobileOptimizations
|
||||||
|
|
||||||
|
// Create quality monitor with mobile-optimized thresholds
|
||||||
|
thresholds := NetworkThresholds{
|
||||||
|
RTTWarning: 200 * time.Millisecond,
|
||||||
|
RTTCritical: 1000 * time.Millisecond,
|
||||||
|
PacketLossWarn: 2.0,
|
||||||
|
PacketLossCrit: 10.0,
|
||||||
|
StabilityMin: 0.8,
|
||||||
|
SampleWindow: 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adjust thresholds for mobile optimizations
|
||||||
|
if config.MobileOptimizations {
|
||||||
|
thresholds.RTTWarning = 500 * time.Millisecond // More lenient for cellular
|
||||||
|
thresholds.RTTCritical = 2000 * time.Millisecond // Account for cellular latency
|
||||||
|
thresholds.PacketLossWarn = 5.0 // Higher tolerance for mobile
|
||||||
|
thresholds.PacketLossCrit = 15.0 // Mobile networks can be lossy
|
||||||
|
thresholds.StabilityMin = 0.6 // Lower stability expectations
|
||||||
|
}
|
||||||
|
|
||||||
|
qualityMonitor := &NetworkQualityMonitor{
|
||||||
|
interfaces: make(map[string]*InterfaceQuality),
|
||||||
|
thresholds: thresholds,
|
||||||
|
}
|
||||||
|
|
||||||
manager := &NetworkResilienceManager{
|
manager := &NetworkResilienceManager{
|
||||||
activeUploads: make(map[string]*UploadContext),
|
activeUploads: make(map[string]*UploadContext),
|
||||||
pauseChannel: make(chan bool, 100),
|
pauseChannel: make(chan bool, 100),
|
||||||
resumeChannel: make(chan bool, 100),
|
resumeChannel: make(chan bool, 100),
|
||||||
|
qualityMonitor: qualityMonitor,
|
||||||
|
config: config,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start network monitoring if enabled
|
// Create adaptive ticker for smart monitoring
|
||||||
|
manager.adaptiveTicker = NewAdaptiveTicker(
|
||||||
|
config.DetectionInterval,
|
||||||
|
config.MaxDetectionInterval,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Start enhanced network monitoring if enabled
|
||||||
if conf.Server.NetworkEvents {
|
if conf.Server.NetworkEvents {
|
||||||
go manager.monitorNetworkChanges()
|
if config.FastDetection {
|
||||||
|
go manager.monitorNetworkChangesEnhanced()
|
||||||
|
log.Info("Fast network change detection enabled")
|
||||||
|
} else {
|
||||||
|
go manager.monitorNetworkChanges() // Fallback to original method
|
||||||
|
log.Info("Standard network change detection enabled")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if config.QualityMonitoring {
|
||||||
|
go manager.monitorNetworkQuality()
|
||||||
|
log.Info("Network quality monitoring enabled")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Enhanced network resilience manager initialized with fast_detection=%v, quality_monitoring=%v, predictive_switching=%v",
|
||||||
|
config.FastDetection, config.QualityMonitoring, config.PredictiveSwitching)
|
||||||
return manager
|
return manager
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewAdaptiveTicker creates a ticker that adjusts its interval based on network stability
|
||||||
|
func NewAdaptiveTicker(minInterval, maxInterval time.Duration) *AdaptiveTicker {
|
||||||
|
ticker := &AdaptiveTicker{
|
||||||
|
minInterval: minInterval,
|
||||||
|
maxInterval: maxInterval,
|
||||||
|
currentInterval: minInterval,
|
||||||
|
done: make(chan bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create initial ticker
|
||||||
|
ticker.ticker = time.NewTicker(minInterval)
|
||||||
|
ticker.C = ticker.ticker.C
|
||||||
|
|
||||||
|
return ticker
|
||||||
|
}
|
||||||
|
|
||||||
|
// AdjustInterval adjusts the ticker interval based on network stability
|
||||||
|
func (t *AdaptiveTicker) AdjustInterval(stable bool) {
|
||||||
|
if stable {
|
||||||
|
// Network is stable, slow down monitoring
|
||||||
|
t.unstableCount = 0
|
||||||
|
newInterval := t.currentInterval * 2
|
||||||
|
if newInterval > t.maxInterval {
|
||||||
|
newInterval = t.maxInterval
|
||||||
|
}
|
||||||
|
if newInterval != t.currentInterval {
|
||||||
|
t.currentInterval = newInterval
|
||||||
|
t.ticker.Reset(newInterval)
|
||||||
|
log.Debugf("Network stable, slowing monitoring to %v", newInterval)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Network is unstable, speed up monitoring
|
||||||
|
t.unstableCount++
|
||||||
|
newInterval := t.minInterval
|
||||||
|
if newInterval != t.currentInterval {
|
||||||
|
t.currentInterval = newInterval
|
||||||
|
t.ticker.Reset(newInterval)
|
||||||
|
log.Debugf("Network unstable, accelerating monitoring to %v", newInterval)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop stops the adaptive ticker
|
||||||
|
func (t *AdaptiveTicker) Stop() {
|
||||||
|
t.ticker.Stop()
|
||||||
|
close(t.done)
|
||||||
|
}
|
||||||
|
|
||||||
// RegisterUpload registers an active upload for pause/resume functionality
|
// RegisterUpload registers an active upload for pause/resume functionality
|
||||||
func (m *NetworkResilienceManager) RegisterUpload(sessionID string) *UploadContext {
|
func (m *NetworkResilienceManager) RegisterUpload(sessionID string) *UploadContext {
|
||||||
m.mutex.Lock()
|
m.mutex.Lock()
|
||||||
@ -123,11 +326,302 @@ func (m *NetworkResilienceManager) ResumeAllUploads() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// monitorNetworkChanges monitors for network interface changes
|
// monitorNetworkChangesEnhanced provides fast detection with quality monitoring
|
||||||
|
func (m *NetworkResilienceManager) monitorNetworkChangesEnhanced() {
|
||||||
|
log.Info("Starting enhanced network monitoring with fast detection")
|
||||||
|
|
||||||
|
// Get initial interface state
|
||||||
|
m.lastInterfaces, _ = net.Interfaces()
|
||||||
|
|
||||||
|
// Initialize quality monitoring for current interfaces
|
||||||
|
m.initializeInterfaceQuality()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-m.adaptiveTicker.C:
|
||||||
|
currentInterfaces, err := net.Interfaces()
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Failed to get network interfaces: %v", err)
|
||||||
|
m.adaptiveTicker.AdjustInterval(false) // Network is unstable
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for interface changes
|
||||||
|
interfaceChanged := m.hasNetworkChanges(m.lastInterfaces, currentInterfaces)
|
||||||
|
|
||||||
|
// Check for quality degradation (predictive switching)
|
||||||
|
qualityDegraded := false
|
||||||
|
if m.config.PredictiveSwitching {
|
||||||
|
qualityDegraded = m.checkQualityDegradation()
|
||||||
|
}
|
||||||
|
|
||||||
|
networkUnstable := interfaceChanged || qualityDegraded
|
||||||
|
|
||||||
|
if interfaceChanged {
|
||||||
|
log.Infof("Network interface change detected")
|
||||||
|
m.handleNetworkSwitch("interface_change")
|
||||||
|
} else if qualityDegraded {
|
||||||
|
log.Infof("Network quality degradation detected, preparing for switch")
|
||||||
|
m.prepareForNetworkSwitch()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adjust monitoring frequency based on stability
|
||||||
|
m.adaptiveTicker.AdjustInterval(!networkUnstable)
|
||||||
|
|
||||||
|
m.lastInterfaces = currentInterfaces
|
||||||
|
|
||||||
|
case <-m.adaptiveTicker.done:
|
||||||
|
log.Info("Network monitoring stopped")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// monitorNetworkQuality continuously monitors connection quality
|
||||||
|
func (m *NetworkResilienceManager) monitorNetworkQuality() {
|
||||||
|
ticker := time.NewTicker(m.config.QualityCheckInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
log.Info("Starting network quality monitoring")
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
m.updateNetworkQuality()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// initializeInterfaceQuality sets up quality monitoring for current interfaces
|
||||||
|
func (m *NetworkResilienceManager) initializeInterfaceQuality() {
|
||||||
|
interfaces, err := net.Interfaces()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
m.qualityMonitor.mutex.Lock()
|
||||||
|
defer m.qualityMonitor.mutex.Unlock()
|
||||||
|
|
||||||
|
for _, iface := range interfaces {
|
||||||
|
if iface.Flags&net.FlagLoopback == 0 && iface.Flags&net.FlagUp != 0 {
|
||||||
|
m.qualityMonitor.interfaces[iface.Name] = &InterfaceQuality{
|
||||||
|
Name: iface.Name,
|
||||||
|
Connectivity: ConnectivityUnknown,
|
||||||
|
LastGood: time.Now(),
|
||||||
|
Samples: make([]QualitySample, 0, m.qualityMonitor.thresholds.SampleWindow),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateNetworkQuality measures and updates quality metrics for all interfaces
|
||||||
|
func (m *NetworkResilienceManager) updateNetworkQuality() {
|
||||||
|
m.qualityMonitor.mutex.Lock()
|
||||||
|
defer m.qualityMonitor.mutex.Unlock()
|
||||||
|
|
||||||
|
for name, quality := range m.qualityMonitor.interfaces {
|
||||||
|
sample := m.measureInterfaceQuality(name)
|
||||||
|
|
||||||
|
// Add sample to history
|
||||||
|
quality.Samples = append(quality.Samples, sample)
|
||||||
|
if len(quality.Samples) > m.qualityMonitor.thresholds.SampleWindow {
|
||||||
|
quality.Samples = quality.Samples[1:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update current metrics
|
||||||
|
quality.RTT = sample.RTT
|
||||||
|
quality.PacketLoss = m.calculatePacketLoss(quality.Samples)
|
||||||
|
quality.Stability = m.calculateStability(quality.Samples)
|
||||||
|
quality.Connectivity = m.assessConnectivity(quality)
|
||||||
|
|
||||||
|
if sample.Success {
|
||||||
|
quality.LastGood = time.Now()
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("Interface %s: RTT=%v, Loss=%.1f%%, Stability=%.2f, State=%v",
|
||||||
|
name, quality.RTT, quality.PacketLoss, quality.Stability, quality.Connectivity)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// measureInterfaceQuality performs a quick connectivity test for an interface
|
||||||
|
func (m *NetworkResilienceManager) measureInterfaceQuality(interfaceName string) QualitySample {
|
||||||
|
sample := QualitySample{
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
RTT: 0,
|
||||||
|
Success: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use ping to measure RTT (simplified for demonstration)
|
||||||
|
// In production, you'd want more sophisticated testing
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
// Try to ping a reliable host (Google DNS)
|
||||||
|
cmd := exec.Command("ping", "-c", "1", "-W", "2", "8.8.8.8")
|
||||||
|
err := cmd.Run()
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
sample.RTT = time.Since(start)
|
||||||
|
sample.Success = true
|
||||||
|
} else {
|
||||||
|
sample.RTT = 2 * time.Second // Timeout value
|
||||||
|
sample.Success = false
|
||||||
|
}
|
||||||
|
|
||||||
|
return sample
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculatePacketLoss calculates packet loss percentage from samples
|
||||||
|
func (m *NetworkResilienceManager) calculatePacketLoss(samples []QualitySample) float64 {
|
||||||
|
if len(samples) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
failed := 0
|
||||||
|
for _, sample := range samples {
|
||||||
|
if !sample.Success {
|
||||||
|
failed++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return float64(failed) / float64(len(samples)) * 100
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateStability calculates network stability from RTT variance
|
||||||
|
func (m *NetworkResilienceManager) calculateStability(samples []QualitySample) float64 {
|
||||||
|
if len(samples) < 2 {
|
||||||
|
return 1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate RTT variance
|
||||||
|
var sum, sumSquares float64
|
||||||
|
count := 0
|
||||||
|
|
||||||
|
for _, sample := range samples {
|
||||||
|
if sample.Success {
|
||||||
|
rttMs := float64(sample.RTT.Nanoseconds()) / 1e6
|
||||||
|
sum += rttMs
|
||||||
|
sumSquares += rttMs * rttMs
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if count < 2 {
|
||||||
|
return 1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
mean := sum / float64(count)
|
||||||
|
variance := (sumSquares / float64(count)) - (mean * mean)
|
||||||
|
|
||||||
|
// Convert variance to stability score (lower variance = higher stability)
|
||||||
|
if variance <= 100 { // Very stable (variance < 100ms²)
|
||||||
|
return 1.0
|
||||||
|
} else if variance <= 1000 { // Moderately stable
|
||||||
|
return 1.0 - (variance-100)/900*0.3 // Scale from 1.0 to 0.7
|
||||||
|
} else { // Unstable
|
||||||
|
return 0.5 // Cap at 0.5 for very unstable connections
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// assessConnectivity determines connectivity state based on quality metrics
|
||||||
|
func (m *NetworkResilienceManager) assessConnectivity(quality *InterfaceQuality) ConnectivityState {
|
||||||
|
thresholds := m.qualityMonitor.thresholds
|
||||||
|
|
||||||
|
// Check if we have recent successful samples
|
||||||
|
timeSinceLastGood := time.Since(quality.LastGood)
|
||||||
|
if timeSinceLastGood > 30*time.Second {
|
||||||
|
return ConnectivityFailed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assess based on packet loss
|
||||||
|
if quality.PacketLoss >= thresholds.PacketLossCrit {
|
||||||
|
return ConnectivityPoor
|
||||||
|
} else if quality.PacketLoss >= thresholds.PacketLossWarn {
|
||||||
|
return ConnectivityDegraded
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assess based on RTT
|
||||||
|
if quality.RTT >= thresholds.RTTCritical {
|
||||||
|
return ConnectivityPoor
|
||||||
|
} else if quality.RTT >= thresholds.RTTWarning {
|
||||||
|
return ConnectivityDegraded
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assess based on stability
|
||||||
|
if quality.Stability < thresholds.StabilityMin {
|
||||||
|
return ConnectivityDegraded
|
||||||
|
}
|
||||||
|
|
||||||
|
return ConnectivityGood
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkQualityDegradation checks if any interface shows quality degradation
|
||||||
|
func (m *NetworkResilienceManager) checkQualityDegradation() bool {
|
||||||
|
m.qualityMonitor.mutex.RLock()
|
||||||
|
defer m.qualityMonitor.mutex.RUnlock()
|
||||||
|
|
||||||
|
for _, quality := range m.qualityMonitor.interfaces {
|
||||||
|
if quality.Connectivity == ConnectivityPoor ||
|
||||||
|
(quality.Connectivity == ConnectivityDegraded && quality.PacketLoss > 5.0) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// prepareForNetworkSwitch proactively prepares for an anticipated network switch
|
||||||
|
func (m *NetworkResilienceManager) prepareForNetworkSwitch() {
|
||||||
|
log.Info("Preparing for anticipated network switch due to quality degradation")
|
||||||
|
|
||||||
|
// Temporarily pause new uploads but don't stop existing ones
|
||||||
|
// This gives ongoing uploads a chance to complete before the switch
|
||||||
|
m.mutex.Lock()
|
||||||
|
defer m.mutex.Unlock()
|
||||||
|
|
||||||
|
// Mark as preparing for switch (could be used by upload handlers)
|
||||||
|
for _, ctx := range m.activeUploads {
|
||||||
|
select {
|
||||||
|
case ctx.PauseChan <- true:
|
||||||
|
ctx.IsPaused = true
|
||||||
|
log.Debugf("Preemptively paused upload %s", ctx.SessionID)
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resume after a short delay to allow network to stabilize
|
||||||
|
go func() {
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
m.ResumeAllUploads()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleNetworkSwitch handles an actual network interface change
|
||||||
|
func (m *NetworkResilienceManager) handleNetworkSwitch(switchType string) {
|
||||||
|
log.Infof("Handling network switch: %s", switchType)
|
||||||
|
|
||||||
|
m.PauseAllUploads()
|
||||||
|
|
||||||
|
// Wait for network stabilization (adaptive based on switch type)
|
||||||
|
stabilizationTime := 2 * time.Second
|
||||||
|
if switchType == "interface_change" {
|
||||||
|
stabilizationTime = 3 * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(stabilizationTime)
|
||||||
|
|
||||||
|
// Re-initialize quality monitoring for new network state
|
||||||
|
m.initializeInterfaceQuality()
|
||||||
|
|
||||||
|
m.ResumeAllUploads()
|
||||||
|
}
|
||||||
|
|
||||||
|
// monitorNetworkChanges provides the original network monitoring (fallback)
|
||||||
func (m *NetworkResilienceManager) monitorNetworkChanges() {
|
func (m *NetworkResilienceManager) monitorNetworkChanges() {
|
||||||
ticker := time.NewTicker(5 * time.Second)
|
ticker := time.NewTicker(5 * time.Second)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
log.Info("Starting standard network monitoring (5s interval)")
|
||||||
|
|
||||||
// Get initial interface state
|
// Get initial interface state
|
||||||
m.lastInterfaces, _ = net.Interfaces()
|
m.lastInterfaces, _ = net.Interfaces()
|
||||||
|
|
||||||
|
74
config-network-resilience-enhanced.toml
Normal file
74
config-network-resilience-enhanced.toml
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
# HMAC File Server - Enhanced Network Resilience Configuration
|
||||||
|
# Optimized for mobile devices and network switching scenarios
|
||||||
|
|
||||||
|
[server]
|
||||||
|
listen_address = "8080"
|
||||||
|
storage_path = "/opt/hmac-file-server/data/uploads"
|
||||||
|
networkevents = true # REQUIRED: Enable network monitoring
|
||||||
|
|
||||||
|
[uploads]
|
||||||
|
allowed_extensions = [".zip", ".rar", ".7z", ".tar.gz", ".tgz", ".gpg", ".enc", ".pgp", ".txt", ".pdf", ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".svg", ".webp", ".wav", ".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv", ".webm", ".mpeg", ".mpg", ".m4v", ".3gp", ".3g2", ".mp3", ".ogg", ".doc", ".docx"]
|
||||||
|
chunked_uploads_enabled = true
|
||||||
|
chunk_size = "32MB" # Optimized chunk size for mobile
|
||||||
|
resumable_uploads_enabled = true
|
||||||
|
max_resumable_age = "72h" # Extended for mobile scenarios
|
||||||
|
sessiontimeout = "90m" # Longer sessions for mobile
|
||||||
|
maxretries = 5 # More retries for mobile networks
|
||||||
|
|
||||||
|
# Enhanced upload resilience settings for mobile
|
||||||
|
session_persistence = true
|
||||||
|
session_recovery_timeout = "600s" # 10 minutes for mobile switches
|
||||||
|
client_reconnect_window = "300s" # 5 minutes reconnect window
|
||||||
|
upload_slot_ttl = "7200s" # 2 hours slot validity
|
||||||
|
retry_failed_uploads = true
|
||||||
|
max_upload_retries = 8 # More retries for cellular
|
||||||
|
|
||||||
|
[timeouts]
|
||||||
|
readtimeout = "600s" # 10 minutes for mobile networks
|
||||||
|
writetimeout = "600s" # Extended for cellular uploads
|
||||||
|
idletimeout = "1200s" # 20 minutes idle tolerance
|
||||||
|
shutdown = "300s"
|
||||||
|
|
||||||
|
# NEW: Enhanced Network Resilience Configuration
|
||||||
|
[network_resilience]
|
||||||
|
fast_detection = true # Enable 1-second network change detection
|
||||||
|
quality_monitoring = true # Monitor connection quality (RTT, packet loss)
|
||||||
|
predictive_switching = true # Switch proactively before network fails
|
||||||
|
mobile_optimizations = true # Mobile-friendly thresholds and timeouts
|
||||||
|
|
||||||
|
# Timing configuration
|
||||||
|
detection_interval = "1s" # Fast detection interval
|
||||||
|
quality_check_interval = "5s" # How often to check connection quality
|
||||||
|
max_detection_interval = "10s" # Maximum interval during stable periods
|
||||||
|
|
||||||
|
[security]
|
||||||
|
secret = "f6g4ldPvQM7O2UTFeBEUUj33VrXypDAcsDt0yqKrLiOr5oQW"
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
level = "info" # Set to "debug" to see network resilience details
|
||||||
|
file = "/opt/hmac-file-server/data/logs/hmac-file-server.log"
|
||||||
|
max_size = 100
|
||||||
|
max_backups = 7
|
||||||
|
max_age = 30
|
||||||
|
compress = true
|
||||||
|
|
||||||
|
[deduplication]
|
||||||
|
enabled = true
|
||||||
|
directory = "/opt/hmac-file-server/data/deduplication"
|
||||||
|
maxsize = "1GB"
|
||||||
|
|
||||||
|
# Configuration Notes:
|
||||||
|
# 1. Set networkevents = true in [server] section to enable network monitoring
|
||||||
|
# 2. fast_detection = true enables 1-second polling instead of 5-second polling
|
||||||
|
# 3. quality_monitoring = true adds RTT and packet loss monitoring
|
||||||
|
# 4. predictive_switching = true switches networks proactively before complete failure
|
||||||
|
# 5. mobile_optimizations = true uses mobile-friendly thresholds (higher RTT tolerance, etc.)
|
||||||
|
# 6. Extended timeouts and retry counts help with mobile network instability
|
||||||
|
# 7. Longer session persistence accommodates network switching delays
|
||||||
|
|
||||||
|
# Mobile Network Switching Benefits:
|
||||||
|
# • WLAN ↔ 5G IPv6 switching: Fast detection (1s) + predictive switching
|
||||||
|
# • Dual connectivity (Wired + WiFi): Quality monitoring selects best interface
|
||||||
|
# • Cellular optimization: Higher RTT/packet loss tolerance + more retries
|
||||||
|
# • Session mobility: 10-minute recovery window for IP address changes
|
||||||
|
# • Proactive switching: Network degradation detected before complete failure
|
@ -17,6 +17,7 @@ worker_scale_down_thresh = 10
|
|||||||
deduplication_enabled = true
|
deduplication_enabled = true
|
||||||
min_free_bytes = "1GB"
|
min_free_bytes = "1GB"
|
||||||
file_naming = "original"
|
file_naming = "original"
|
||||||
|
networkevents = true # Enable network change detection
|
||||||
|
|
||||||
# Network resilience settings
|
# Network resilience settings
|
||||||
graceful_shutdown_timeout = "300s"
|
graceful_shutdown_timeout = "300s"
|
||||||
@ -45,6 +46,16 @@ upload_slot_ttl = "3600s"
|
|||||||
retry_failed_uploads = true
|
retry_failed_uploads = true
|
||||||
max_upload_retries = 3
|
max_upload_retries = 3
|
||||||
|
|
||||||
|
# Enhanced Network Resilience (NEW)
|
||||||
|
[network_resilience]
|
||||||
|
fast_detection = true # 1-second network change detection
|
||||||
|
quality_monitoring = true # Monitor RTT and packet loss
|
||||||
|
predictive_switching = true # Proactive network switching
|
||||||
|
mobile_optimizations = true # Mobile-friendly thresholds
|
||||||
|
detection_interval = "1s"
|
||||||
|
quality_check_interval = "5s"
|
||||||
|
max_detection_interval = "10s"
|
||||||
|
|
||||||
[downloads]
|
[downloads]
|
||||||
resumable_downloads_enabled = true
|
resumable_downloads_enabled = true
|
||||||
chunked_downloads_enabled = true
|
chunked_downloads_enabled = true
|
||||||
|
Reference in New Issue
Block a user