Implement network resilience features for improved upload stability during network changes

- Enable network events by default in configuration
- Integrate network resilience manager into upload handling
- Add support for automatic upload pause/resume during WLAN to 5G transitions
- Enhance documentation with network resilience settings and testing procedures
- Create a test script for validating network resilience functionality
This commit is contained in:
2025-08-24 13:32:44 +00:00
parent 3887feb12c
commit 91128f2861
9 changed files with 792 additions and 22 deletions

View File

@ -33,6 +33,7 @@ func DefaultConfig() *Config {
EnableDynamicWorkers: true,
WorkerScaleUpThresh: 40, // Optimized from previous session
WorkerScaleDownThresh: 10,
NetworkEvents: true, // Enable network resilience by default
},
Uploads: UploadsConfig{
AllowedExtensions: []string{".zip", ".rar", ".7z", ".tar.gz", ".tgz", ".gpg", ".enc", ".pgp", ".txt", ".pdf", ".png", ".jpg", ".jpeg"},
@ -104,6 +105,14 @@ func DefaultConfig() *Config {
NumWorkers: 4,
UploadQueueSize: 100, // Optimized from previous session
},
NetworkResilience: NetworkResilienceConfig{
FastDetection: true, // Enable fast 1-second detection
QualityMonitoring: true, // Monitor connection quality
PredictiveSwitching: true, // Switch before complete failure
MobileOptimizations: true, // Mobile-friendly thresholds
DetectionInterval: "1s", // Fast detection
QualityCheckInterval: "5s", // Regular quality checks
},
File: FileConfig{},
Build: BuildConfig{
Version: "3.2",
@ -254,13 +263,26 @@ worker_scale_up_thresh = 40
worker_scale_down_thresh = 10
[uploads]
allowed_extensions = [".zip", ".rar", ".7z", ".tar.gz", ".tgz", ".gpg", ".enc", ".pgp"]
allowed_extensions = [".zip", ".rar", ".7z", ".tar.gz", ".tgz", ".gpg", ".enc", ".pgp", ".txt", ".pdf", ".jpg", ".jpeg", ".png", ".gif", ".webp", ".mp4", ".mov", ".ogg", ".mp3", ".doc", ".docx"]
chunked_uploads_enabled = true
chunk_size = "10MB"
resumable_uploads_enabled = true
max_resumable_age = "48h"
sessiontimeout = "60m"
maxretries = 3
networkevents = false # Enable network event monitoring for resilience
# Upload resilience and session management
session_persistence = true # Persist sessions across restarts
session_recovery_timeout = "300s" # Session recovery timeout after network changes
client_reconnect_window = "120s" # Time window for client reconnection
upload_slot_ttl = "3600s" # Upload slot validity time
retry_failed_uploads = true # Auto-retry failed uploads
max_upload_retries = 3 # Maximum retry attempts
allow_session_resume = true # Allow resume from different IPs
session_persistence_duration = "24h" # How long to keep session data
detect_duplicate_uploads = true # Detect same upload from different IPs
merge_duplicate_sessions = true # Merge sessions from same client
[downloads]
allowed_extensions = [".txt", ".pdf", ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".svg", ".webp"]
@ -322,6 +344,45 @@ redishealthcheckinterval = "120s"
numworkers = 4
uploadqueuesize = 100
# Network Resilience Configuration (v3.2+)
[network_resilience]
enabled = true # Enable network resilience system
fast_detection = true # Enable 1-second network change detection
quality_monitoring = true # Monitor RTT and packet loss per interface
predictive_switching = true # Switch proactively before network failure
mobile_optimizations = true # Use mobile-friendly thresholds for cellular networks
upload_resilience = true # Resume uploads across network changes
detection_interval = "1s" # Network change detection interval
quality_check_interval = "5s" # Connection quality monitoring interval
max_detection_interval = "10s" # Maximum detection interval during stable periods
network_change_threshold = 3 # Switches required to trigger network change
interface_stability_time = "30s" # Time to wait before marking interface stable
upload_pause_timeout = "5m" # Maximum time to pause uploads during network changes
upload_retry_timeout = "10m" # Maximum time to retry uploads after network changes
rtt_warning_threshold = "200ms" # RTT threshold for warning
rtt_critical_threshold = "1000ms" # RTT threshold for critical
packet_loss_warning_threshold = 2.0 # Packet loss % for warning
packet_loss_critical_threshold = 10.0 # Packet loss % for critical
# Multi-Interface Support (v3.2+)
multi_interface_enabled = false # Enable multi-interface management
interface_priority = ["eth0", "wlan0", "wwan0", "ppp0"] # Interface priority order
auto_switch_enabled = true # Enable automatic interface switching
switch_threshold_latency = "500ms" # Latency threshold for switching
switch_threshold_packet_loss = 5.0 # Packet loss threshold for switching
quality_degradation_threshold = 0.5 # Quality degradation threshold
max_switch_attempts = 3 # Maximum switch attempts per detection
switch_detection_interval = "10s" # Switch detection interval
# Client Network Support (v3.2+)
[client_network_support]
session_based_tracking = false # Track sessions by ID instead of IP
allow_ip_changes = true # Allow session continuation from different IPs
session_migration_timeout = "5m" # Time to wait for client reconnection
max_ip_changes_per_session = 10 # Maximum IP changes per session
client_connection_detection = false # Detect client network type
adapt_to_client_network = false # Optimize parameters based on client connection
[build]
version = "3.2"
`

View File

@ -748,10 +748,11 @@ func main() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if conf.Server.NetworkEvents { // Corrected field name
go monitorNetwork(ctx) // Assuming monitorNetwork is defined in helpers.go or elsewhere
go handleNetworkEvents(ctx) // Assuming handleNetworkEvents is defined in helpers.go or elsewhere
}
// Legacy network monitoring disabled - now handled by NetworkResilienceManager
// if conf.Server.NetworkEvents { // Corrected field name
// go monitorNetwork(ctx) // OLD: Basic network monitoring (replaced by NetworkResilienceManager)
// go handleNetworkEvents(ctx) // OLD: Basic event logging (replaced by NetworkResilienceManager)
// }
go updateSystemMetrics(ctx)
if conf.ClamAV.ClamAVEnabled {
@ -1640,8 +1641,21 @@ func handleUpload(w http.ResponseWriter, r *http.Request) {
}
defer dst.Close()
// Copy file content
written, err := io.Copy(dst, file)
// Register upload with network resilience manager for WLAN/5G switching support
var uploadCtx *UploadContext
var sessionID string
if networkManager != nil {
sessionID = r.Header.Get("X-Upload-Session-ID")
if sessionID == "" {
sessionID = fmt.Sprintf("upload_%s_%d", getClientIP(r), time.Now().UnixNano())
}
uploadCtx = networkManager.RegisterUpload(sessionID)
defer networkManager.UnregisterUpload(sessionID)
log.Debugf("Registered upload with network resilience: %s", sessionID)
}
// Copy file content with network resilience support
written, err := copyWithNetworkResilience(dst, file, uploadCtx)
if err != nil {
http.Error(w, fmt.Sprintf("Error saving file: %v", err), http.StatusInternalServerError)
uploadErrorsTotal.Inc()

View File

@ -4,11 +4,13 @@ package main
import (
"context"
"fmt"
"io"
"net"
"net/http"
"os/exec"
"sync"
"time"
"os/exec"
)
// NetworkResilienceManager handles network change detection and upload pausing
@ -841,3 +843,56 @@ func InitializeNetworkResilience() {
ConfigureEnhancedTimeouts()
log.Info("Network resilience system initialized")
}
// copyWithNetworkResilience performs io.Copy with network resilience support
func copyWithNetworkResilience(dst io.Writer, src io.Reader, uploadCtx *UploadContext) (int64, error) {
if uploadCtx == nil {
// Fallback to regular copy if no network resilience
return io.Copy(dst, src)
}
const bufferSize = 32 * 1024 // 32KB buffer
buf := make([]byte, bufferSize)
var written int64
for {
// Check for network resilience signals before each read
select {
case <-uploadCtx.PauseChan:
log.Debug("Upload paused due to network change, waiting for resume...")
uploadCtx.IsPaused = true
// Wait for resume signal
<-uploadCtx.ResumeChan
uploadCtx.IsPaused = false
log.Debug("Upload resumed after network stabilization")
case <-uploadCtx.CancelChan:
return written, fmt.Errorf("upload cancelled due to network issues")
default:
// Continue with upload
}
// Read data
nr, readErr := src.Read(buf)
if nr > 0 {
// Write data
nw, writeErr := dst.Write(buf[:nr])
if nw > 0 {
written += int64(nw)
}
if writeErr != nil {
return written, writeErr
}
if nr != nw {
return written, io.ErrShortWrite
}
}
if readErr != nil {
if readErr != io.EOF {
return written, readErr
}
break
}
}
return written, nil
}