diff --git a/CLUSTER_RESTART_GUIDE.md b/CLUSTER_RESTART_GUIDE.md deleted file mode 100644 index 04007f3..0000000 --- a/CLUSTER_RESTART_GUIDE.md +++ /dev/null @@ -1,314 +0,0 @@ -# HMAC File Server Cluster Restart Guide - -## Overview - -When HMAC File Server restarts in a distributed cluster environment, client applications on other VMs may need to be restarted or reconfigured to resume upload functionality. This guide addresses common scenarios and solutions. - -## 🚨 **Common Client Restart Scenarios** - -### 1. **XMPP Server Integration (XEP-0363)** - -**Scenario:** -``` -VM1: ejabberd/Prosody XMPP Server -VM2: HMAC File Server (restarted) -``` - -**Why clients may need restart:** -- XMPP servers cache upload slot endpoints -- HTTP client connection pools become stale -- Upload session tokens may be invalidated -- DNS/service discovery cache issues - -**Solutions:** -```bash -# On XMPP Server VM: -systemctl reload ejabberd # Reload config without full restart -# OR -systemctl restart ejabberd # Full restart if reload doesn't work - -# For Prosody: -systemctl reload prosody -# OR -prosodyctl reload -``` - -### 2. **Application Servers with HTTP Clients** - -**Scenario:** -``` -VM1,VM2,VM3: Web Applications -VM4: HMAC File Server (restarted) -``` - -**Why clients may need restart:** -- HTTP client libraries maintain connection pools -- Upload tokens cached in application memory -- Circuit breakers may be in "open" state -- Load balancer health checks failing - -**Solutions:** -```bash -# Restart application servers: -systemctl restart your-app-server - -# Or graceful reload if supported: -systemctl reload your-app-server -kill -USR1 $(pgrep -f your-app) -``` - -### 3. **Load Balancer / Proxy Issues** - -**Scenario:** -``` -VM1: nginx/haproxy Load Balancer -VM2: HMAC File Server (restarted) -``` - -**Why restart needed:** -- Upstream connection pooling -- Health check failures -- Backend marking as "down" - -**Solutions:** -```bash -# nginx: -nginx -s reload - -# haproxy: -systemctl reload haproxy -# OR disable/enable backend: -echo "disable server hmac-backend/server1" | socat stdio /var/run/haproxy.sock -echo "enable server hmac-backend/server1" | socat stdio /var/run/haproxy.sock -``` - -## šŸ”§ **Enhanced Configuration for Cluster Resilience** - -### Server-Level Settings -```toml -[server] -# Graceful shutdown to allow client reconnections -graceful_shutdown_timeout = "300s" -connection_drain_timeout = "120s" -restart_grace_period = "60s" - -# Connection management -max_idle_conns_per_host = 5 -idle_conn_timeout = "90s" -client_timeout = "300s" -``` - -### Upload Session Persistence -```toml -[uploads] -# Enable session persistence across restarts -session_persistence = true -session_recovery_timeout = "300s" -client_reconnect_window = "120s" -upload_slot_ttl = "3600s" -retry_failed_uploads = true -max_upload_retries = 3 -``` - -### Redis-Based Session Sharing -```toml -[redis] -redisenabled = true -redisaddr = "redis-cluster:6379" -# Store upload sessions in Redis for cluster-wide persistence -redishealthcheckinterval = "30s" -``` - -## šŸš€ **Automated Restart Coordination** - -### 1. **Service Discovery Integration** - -```bash -#!/bin/bash -# restart-coordination.sh -# Notify cluster components of HMAC server restart - -HMAC_SERVER_VM="vm2" -XMPP_SERVERS=("vm1" "vm3") -APP_SERVERS=("vm4" "vm5" "vm6") - -echo "HMAC File Server restart initiated on $HMAC_SERVER_VM" - -# Wait for HMAC server to be ready -while ! curl -s http://$HMAC_SERVER_VM:8080/health > /dev/null; do - echo "Waiting for HMAC server to be ready..." - sleep 5 -done - -echo "HMAC server is ready. Notifying cluster components..." - -# Restart XMPP servers -for server in "${XMPP_SERVERS[@]}"; do - echo "Reloading XMPP server on $server" - ssh $server "systemctl reload ejabberd || systemctl restart ejabberd" -done - -# Restart application servers -for server in "${APP_SERVERS[@]}"; do - echo "Restarting application server on $server" - ssh $server "systemctl restart your-app-server" -done - -echo "Cluster restart coordination completed" -``` - -### 2. **Health Check Integration** - -```bash -#!/bin/bash -# hmac-health-check.sh -# Advanced health check that validates upload functionality - -HMAC_URL="http://localhost:8080" -SECRET="f6g4ldPvQM7O2UTFeBEUUj33VrXypDAcsDt0yqKrLiOr5oQW" - -# Test basic connectivity -if ! curl -s -f "$HMAC_URL/health" > /dev/null; then - echo "CRITICAL: HMAC server not responding" - exit 2 -fi - -# Test upload endpoint availability -HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "$HMAC_URL/upload") -if [ "$HTTP_CODE" != "405" ] && [ "$HTTP_CODE" != "401" ]; then - echo "WARNING: Upload endpoint returning unexpected code: $HTTP_CODE" - exit 1 -fi - -echo "OK: HMAC File Server is healthy" -exit 0 -``` - -### 3. **Consul/etcd Integration** - -```bash -#!/bin/bash -# consul-hmac-restart.sh -# Integrate with Consul for service discovery - -# Deregister service before restart -curl -X PUT "http://consul:8500/v1/agent/service/deregister/hmac-file-server" - -# Restart HMAC server -systemctl restart hmac-file-server - -# Wait for service to be ready -while ! ./hmac-health-check.sh; do - sleep 5 -done - -# Re-register service -curl -X PUT "http://consul:8500/v1/agent/service/register" \ - -d '{ - "ID": "hmac-file-server", - "Name": "hmac-file-server", - "Address": "vm2", - "Port": 8080, - "Check": { - "HTTP": "http://vm2:8080/health", - "Interval": "30s" - } - }' - -# Trigger dependent service reloads via Consul watches -consul event -name="hmac-restarted" "HMAC File Server restarted on vm2" -``` - -## šŸ” **Troubleshooting Client Issues** - -### Symptoms of Client Restart Needed: -- Upload requests returning "connection refused" -- Timeouts on upload attempts -- "Service temporarily unavailable" errors -- Cached upload slots returning 404/410 - -### Diagnosis Commands: -```bash -# Check client connection pools -ss -tuln | grep :8080 - -# Test upload endpoint from client VM -curl -I http://hmac-server:8080/upload - -# Check client application logs -journalctl -u your-app-server -f - -# Verify DNS resolution -nslookup hmac-server -dig hmac-server -``` - -### Quick Fixes: -```bash -# Clear client-side DNS cache -systemctl restart systemd-resolved - -# Reset client HTTP connections -ss -K dst hmac-server - -# Force application reconnection -systemctl restart your-app-server -``` - -## šŸŽÆ **Best Practices for Production** - -### 1. **Rolling Restarts** -- Use multiple HMAC server instances behind load balancer -- Restart one instance at a time -- Monitor client reconnection success - -### 2. **Health Check Integration** -- Implement deep health checks that test upload functionality -- Use health check results in load balancer decisions -- Monitor client-side connection success rates - -### 3. **Session Persistence** -- Use Redis cluster for session sharing -- Implement upload session recovery -- Provide client reconnection grace periods - -### 4. **Monitoring and Alerts** -```bash -# Monitor upload success rates -watch -n 30 'curl -s http://hmac-server:9090/metrics | grep upload_success_total' - -# Monitor client connections -watch -n 10 'ss -tuln | grep :8080 | wc -l' - -# Monitor Redis session store -redis-cli info keyspace -``` - -## šŸ“‹ **Restart Checklist** - -### Before HMAC Server Restart: -- [ ] Identify all client VMs and applications -- [ ] Verify Redis cluster health -- [ ] Check current upload queue status -- [ ] Notify operations team - -### During Restart: -- [ ] Execute graceful shutdown -- [ ] Monitor client reconnection attempts -- [ ] Verify upload session recovery -- [ ] Check Redis session persistence - -### After Restart: -- [ ] Restart/reload client applications as needed -- [ ] Verify upload functionality from all client VMs -- [ ] Monitor error rates and connection counts -- [ ] Update monitoring dashboards - -### Client Applications to Restart: -- [ ] XMPP servers (ejabberd, Prosody) -- [ ] Web application servers -- [ ] API gateway services -- [ ] Load balancers (if upstream issues) -- [ ] Monitoring agents - -This comprehensive approach ensures minimal disruption during HMAC File Server restarts in distributed environments. diff --git a/INSTALL.MD b/INSTALL.MD deleted file mode 100644 index cb47bfa..0000000 --- a/INSTALL.MD +++ /dev/null @@ -1,376 +0,0 @@ -# HMAC File Server 3.2 Installation Guide - -## Quick Installation for XMPP Operators - -The HMAC File Server includes an automated installer script designed specifically for XMPP operators who want to quickly deploy a file sharing service for their chat servers. The installer now supports both **native systemd installation** and **Docker deployment**. - -### Prerequisites - -- Linux system with systemd (Ubuntu 18.04+, CentOS 7+, Debian 9+, etc.) for native installation -- Docker and Docker Compose for containerized deployment -- Root or sudo access -- At least 1GB free disk space -- Internet connection for downloading dependencies - -### Installation Options - -The installer provides two deployment methods: - -1. **Native Installation** (systemd service) - Traditional installation with Go build and systemd service -2. **Docker Deployment** (docker-compose) - Containerized deployment with automatic service orchestration - -### Installation - -1. **Download or clone the repository:** - ```bash - git clone https://git.uuxo.net/uuxo/hmac-file-server.git - cd hmac-file-server - ``` - -2. **Run the installer:** - ```bash - sudo ./installer.sh - ``` - -3. **Choose deployment type:** - - **Option 1**: Native installation (systemd service) - - **Option 2**: Docker deployment (docker-compose) - -4. **Configure installation:** - - **System user** (default: `hmac-server`) - - **Installation/deployment directories** - - **Configuration directory** (customizable) - - **Server ports** - - **HMAC secret**: Choose automatic generation (recommended) or enter manually - - **Optional features** (JWT, Redis, ClamAV, SSL/TLS) - -### Docker Deployment Features - -When selecting Docker deployment, the installer will: -- Create a complete docker-compose.yml with Redis and ClamAV services -- Generate optimized Dockerfile for multi-stage builds -- Set up proper networking between services -- Create start/stop scripts for easy management -- Configure container-optimized paths and volumes -- Provide isolated deployment directory structure - -### Native Installation Features - -When selecting native installation, the installer will: -- Install Go 1.24 (if needed) -- Create system user and directories -- Build and configure the server -- Set up systemd service -- Optionally install Redis and ClamAV - - **Alternative: Pre-set secrets via environment variables:** - ```bash - # For automation or if interactive input doesn't work - HMAC_SECRET='your-super-secret-hmac-key-here-minimum-32-characters' sudo -E ./installer.sh - - # With both HMAC and JWT secrets - HMAC_SECRET='your-hmac-secret-32-chars-minimum' \ - JWT_SECRET='your-jwt-secret-also-32-chars-minimum' \ - sudo -E ./installer.sh - ``` - -3. **Follow the interactive prompts:** - - System user (default: `hmac-server`) - - Installation directories - - Server ports - - **HMAC secret**: Choose automatic generation (recommended) or enter manually - - **Optional features** (JWT, Redis, ClamAV, SSL/TLS) - - **JWT secret**: Also supports automatic generation if enabled - -### Configuration Options - -#### Core Settings -- **Server Port**: Default 8080 (HTTP file server) -- **Metrics Port**: Default 9090 (Prometheus metrics) -- **HMAC Secret**: Strong secret for authentication - - **Automatic generation** (recommended): Creates 48-character secure random key - - **Manual entry**: Minimum 32 characters required - - **Environment variable**: `HMAC_SECRET='your-secret'` - -#### Optional Features -- **JWT Authentication**: Token-based auth for enhanced security - - **Automatic generation** available for JWT secrets - - Configurable expiration and algorithms -- **Redis Integration**: For session management and caching -- **ClamAV Scanning**: Real-time virus scanning of uploaded files -- **SSL/TLS**: Direct HTTPS support (or use reverse proxy) - -### XMPP Server Integration - -#### Prosody Configuration -Add to your Prosody configuration: -```lua -Component "upload.yourdomain.com" "http_file_share" - http_file_share_url = "http://localhost:8080" -``` - -#### Ejabberd Configuration -Add to your Ejabberd configuration: -```yaml -mod_http_file_share: - external_secret: "your-hmac-secret" - service_url: "http://localhost:8080" -``` - -### Post-Installation - -#### For Native Installation - -1. **Start the service:** - ```bash - sudo systemctl start hmac-file-server - ``` - -2. **Check status:** - ```bash - sudo systemctl status hmac-file-server - ``` - -3. **View logs:** - ```bash - sudo journalctl -u hmac-file-server -f - ``` - -#### For Docker Deployment - -1. **Start the containers:** - ```bash - cd /path/to/your/docker/deployment - ./start.sh - # Or manually: docker-compose up -d - ``` - -2. **Check status:** - ```bash - docker-compose ps - ``` - -3. **View logs:** - ```bash - docker-compose logs -f hmac-file-server - ``` - -4. **Stop the containers:** - ```bash - ./stop.sh - # Or manually: docker-compose down - ``` - -### Firewall Configuration - -4. **Configure firewall (required for both deployment types):** - ```bash - # Example for ufw (Ubuntu/Debian) - sudo ufw allow 8080/tcp comment "HMAC File Server" - sudo ufw allow 9090/tcp comment "HMAC File Server Metrics" - - # Example for firewalld (CentOS/RHEL/Fedora) - sudo firewall-cmd --permanent --add-port=8080/tcp - sudo firewall-cmd --permanent --add-port=9090/tcp - sudo firewall-cmd --reload - - # Example for iptables (manual) - sudo iptables -A INPUT -p tcp --dport 8080 -j ACCEPT - sudo iptables -A INPUT -p tcp --dport 9090 -j ACCEPT - ``` - -5. **Configure reverse proxy (recommended):** - ```nginx - server { - listen 443 ssl http2; - server_name upload.yourdomain.com; - - ssl_certificate /path/to/cert.pem; - ssl_certificate_key /path/to/key.pem; - - location / { - proxy_pass http://localhost:8080; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - - # File upload settings - client_max_body_size 100M; - proxy_request_buffering off; - } - } - ``` - -### File Locations - -After installation: -- **Binary**: `/opt/hmac-file-server/hmac-file-server` -- **Configuration**: `/etc/hmac-file-server/config.toml` -- **Uploads**: `/var/lib/hmac-file-server/uploads/` -- **Logs**: `/var/log/hmac-file-server/hmac-file-server.log` - -### Management Commands - -```bash -# Service management -sudo systemctl start hmac-file-server -sudo systemctl stop hmac-file-server -sudo systemctl restart hmac-file-server -sudo systemctl reload hmac-file-server - -# View logs -sudo journalctl -u hmac-file-server -f -sudo tail -f /var/log/hmac-file-server/hmac-file-server.log - -# Edit configuration -sudo nano /etc/hmac-file-server/config.toml -sudo systemctl reload hmac-file-server # Apply changes -``` - -### Uninstallation - -The HMAC File Server installer includes a comprehensive uninstallation system with advanced data preservation options: - -```bash -sudo ./installer.sh --uninstall -``` - -#### Safe Uninstallation Features - -šŸ”’ **Interactive Confirmation System** -- Multiple confirmation steps prevent accidental data loss -- Automatic detection of data directories from configuration -- Smart backup system with timestamped backups in `/var/backups/hmac-file-server-*` -- Detailed reporting showing file counts and directory sizes - -#### Five Data Handling Options - -**1. šŸ—‘ļø Complete Removal** -- Deletes all data including uploads, deduplication files, and logs -- Requires typing "DELETE" for final confirmation -- Provides comprehensive warning about permanent data loss - -**2. šŸ’¾ Preserve Uploads and Deduplication** -- Preserves critical user files and deduplication data -- Removes logs (typically not needed for data recovery) -- Ideal for system migration or reinstallation - -**3. šŸ“‹ Preserve All Data** -- Keeps uploads, deduplication data, and logs -- Comprehensive data preservation option -- Best for troubleshooting or temporary removal - -**4. šŸŽÆ Custom Selection** -- Interactive selection of which directories to preserve -- Shows detailed information for each directory before decision -- Allows granular control over data preservation - -**5. āŒ Cancel Operation** -- Safely exits without making any changes -- No system modifications performed - -#### What Gets Removed (Service Components) -- āœ“ Systemd service (stopped and disabled) -- āœ“ Installation directory (`/opt/hmac-file-server/`) -- āœ“ Configuration files (`/etc/hmac-file-server/`) -- āœ“ System user (`hmac-server`) -- āœ“ Any remaining binaries - -#### Data Backup Location -When data preservation is selected, files are moved to: -- `/var/backups/hmac-file-server-TIMESTAMP/` -- Timestamped directories for multiple backup versions -- Preserves original directory structure - -**āš ļø Important**: The uninstaller provides multiple safety checks and data preservation options. Choose wisely based on your needs! - -### Security Considerations - -1. **Configure firewall properly** - Only allow necessary ports (8080, 9090) to authorized networks -2. **Use strong HMAC secrets** (minimum 32 characters, use random generators) -3. **Enable JWT authentication** for enhanced security -4. **Set up SSL/TLS** either directly or via reverse proxy -5. **Enable ClamAV** for virus scanning if handling untrusted files -6. **Regular backups** of configuration and uploaded files -7. **Monitor logs** for suspicious activity -8. **Restrict network access** - Consider limiting access to internal networks only - -### Monitoring - -The server provides Prometheus metrics at `/metrics` endpoint: -```bash -curl http://localhost:9090/metrics -``` - -Key metrics to monitor: -- `hmac_requests_total` - Total requests -- `hmac_upload_size_bytes` - Upload sizes -- `hmac_errors_total` - Error counts -- `hmac_active_connections` - Active connections - -### Troubleshooting - -#### Service won't start -1. Check logs: `sudo journalctl -u hmac-file-server -f` -2. Verify configuration: `sudo nano /etc/hmac-file-server/config.toml` -3. Check permissions on data directories -4. Ensure ports are not in use: `sudo netstat -tlnp | grep :8080` - -#### High memory usage -1. Adjust worker settings in configuration -2. Enable Redis for session management -3. Check for large file uploads in progress - -#### Files not uploading -1. Verify HMAC secret matches between XMPP server and file server -2. Check file size limits in configuration -3. Ensure sufficient disk space -4. Review ClamAV logs if virus scanning enabled - -### Support - -- **Documentation**: See `README.MD` and `WIKI.MD` -- **Protocol Details**: See `PROTOCOL_SPECIFICATIONS.MD` -- **Issues**: https://git.uuxo.net/uuxo/hmac-file-server/issues -- **Configuration**: All options documented in `WIKI.MD` - -### Example Production Setup - -For a production XMPP server with 1000+ users: - -```toml -[server] -listen_address = ":8080" -metrics_enabled = true -deduplication_enabled = true -max_upload_size = "50MB" -enable_dynamic_workers = true -worker_scale_up_thresh = 50 -worker_scale_down_thresh = 10 - -[security] -enablejwt = true -secret = "your-strong-64-character-secret-here" -jwtsecret = "your-jwt-secret-here" - -[uploads] -allowed_extensions = [".zip", ".rar", ".7z", ".tar.gz", ".tgz"] -chunked_uploads_enabled = true -resumable_uploads_enabled = true - -[timeouts] -readtimeout = "4800s" -writetimeout = "4800s" -idletimeout = "4800s" - -[workers] -numworkers = 4 -uploadqueuesize = 50 - -[deduplication] -enabled = true -maxsize = "1GB" -``` - -This setup provides robust file sharing with deduplication, automatic cleanup, virus scanning, and scalable worker management. diff --git a/QUEUE_RESILIENCE_GUIDE.md b/QUEUE_RESILIENCE_GUIDE.md deleted file mode 100644 index e75ccb9..0000000 --- a/QUEUE_RESILIENCE_GUIDE.md +++ /dev/null @@ -1,268 +0,0 @@ -# Queue Resilience Configuration Guide - -## Overview - -HMAC File Server 3.2 Ultimate Fixed includes advanced queue resilience features designed to handle timeout scenarios gracefully and maintain service availability under various network conditions. - -## Enhanced Configuration Sections - -### 1. Server-Level Timeout Resilience - -```toml -[server] -# Enhanced timeout resilience settings -graceful_shutdown_timeout = "300s" # Time to wait for active uploads during shutdown -request_timeout = "7200s" # Maximum time for any single request (2 hours) -keep_alive_timeout = "300s" # HTTP keep-alive timeout -connection_drain_timeout = "180s" # Time to drain connections during shutdown -upload_stall_timeout = "600s" # Timeout if upload stalls (no data received) -download_stall_timeout = "300s" # Timeout if download stalls -retry_after_timeout = "60s" # Retry-After header when rejecting due to overload -max_concurrent_uploads = 100 # Maximum concurrent upload operations -upload_rate_limit = "10MB/s" # Per-connection upload rate limit -connection_pool_size = 200 # Maximum connection pool size -``` - -**Key Benefits:** -- **Graceful Degradation**: Server doesn't abruptly terminate active uploads during shutdown -- **Stall Detection**: Automatically detects and handles stalled uploads/downloads -- **Connection Management**: Limits concurrent operations to prevent resource exhaustion -- **Rate Limiting**: Prevents individual connections from overwhelming the server - -### 2. Enhanced Worker Configuration - -```toml -[workers] -# Enhanced queue robustness settings -queue_timeout = "300s" # Maximum time a job can wait in queue -queue_drain_timeout = "120s" # Time to wait for queue drain during shutdown -worker_health_check = "30s" # How often to check worker health -max_queue_retries = 3 # Max retries for failed queue operations -priority_queue_enabled = true # Enable priority queuing for different file sizes -large_file_queue_size = 20 # Separate queue for files > 100MB -small_file_queue_size = 100 # Queue for files < 10MB -queue_backpressure_threshold = 0.8 # Queue usage % to start backpressure -circuit_breaker_enabled = true # Enable circuit breaker for queue failures -circuit_breaker_threshold = 10 # Failures before opening circuit -circuit_breaker_timeout = "60s" # Time before retrying after circuit opens -``` - -**Key Benefits:** -- **Priority Queuing**: Large files don't block small file uploads -- **Health Monitoring**: Workers are continuously monitored for failures -- **Circuit Breaking**: Automatic failure detection and recovery -- **Backpressure Control**: Gradual slowdown instead of hard failures - -### 3. Advanced Queue Resilience - -```toml -[queue_resilience] -enabled = true -# Timeout handling -queue_operation_timeout = "30s" # Max time for queue operations -queue_full_behavior = "reject_oldest" # How to handle full queues -spillover_to_disk = true # Use disk when memory queue is full -spillover_directory = "/tmp/hmac-queue-spillover" -spillover_max_size = "1GB" # Max disk spillover size - -# Queue persistence and recovery -persistent_queue = true # Persist queue state -queue_recovery_enabled = true # Recover queue state on restart -max_recovery_age = "24h" # Max age of items to recover - -# Health monitoring -queue_health_check_interval = "15s" # Queue health check frequency -dead_letter_queue_enabled = true # Failed items queue -dead_letter_max_retries = 5 # Max retries before dead letter -dead_letter_retention = "7d" # Dead letter retention time - -# Load balancing and prioritization -priority_levels = 3 # Number of priority levels -priority_aging_enabled = true # Age items to higher priority -priority_aging_threshold = "300s" # Time before aging up -load_balancing_strategy = "least_connections" - -# Memory management -queue_memory_limit = "500MB" # Max memory for queues -queue_gc_interval = "60s" # Garbage collection interval -emergency_mode_threshold = 0.95 # Emergency mode trigger -``` - -**Key Benefits:** -- **Disk Spillover**: Never lose uploads due to memory constraints -- **Queue Recovery**: Resume operations after server restarts -- **Dead Letter Queuing**: Handle persistently failing uploads -- **Priority Aging**: Prevent starvation of lower-priority items - -### 4. Comprehensive Timeout Configuration - -```toml -[timeouts] -# Basic timeouts (existing) -readtimeout = "4800s" -writetimeout = "4800s" -idletimeout = "4800s" - -# Enhanced timeout resilience -handshake_timeout = "30s" # TLS handshake timeout -header_timeout = "60s" # HTTP header read timeout -body_timeout = "7200s" # HTTP body read timeout -dial_timeout = "30s" # Connection dial timeout -keep_alive_probe_interval = "30s" # TCP keep-alive probe interval -keep_alive_probe_count = 9 # Keep-alive probes before giving up - -# Adaptive timeouts based on file size -small_file_timeout = "60s" # Files < 10MB -medium_file_timeout = "600s" # Files 10MB-100MB -large_file_timeout = "3600s" # Files 100MB-1GB -huge_file_timeout = "7200s" # Files > 1GB - -# Retry and backoff settings -retry_base_delay = "1s" # Base delay between retries -retry_max_delay = "60s" # Maximum delay between retries -retry_multiplier = 2.0 # Exponential backoff multiplier -max_retry_attempts = 5 # Maximum retry attempts -``` - -**Key Benefits:** -- **Adaptive Timeouts**: Different timeouts based on file size -- **Connection Resilience**: TCP keep-alive prevents silent failures -- **Exponential Backoff**: Intelligent retry timing reduces server load -- **Granular Control**: Fine-tuned timeouts for different operations - -## Timeout Scenario Handling - -### 1. Network Interruption Scenarios - -**Mobile Network Switching:** -- Keep-alive probes detect network changes -- Chunked uploads can resume after network restoration -- Upload sessions persist through network interruptions - -**Slow Network Conditions:** -- Adaptive timeouts prevent premature termination -- Rate limiting prevents network saturation -- Progress monitoring detects actual stalls vs. slow transfers - -### 2. Server Overload Scenarios - -**High Load Conditions:** -- Circuit breaker prevents cascade failures -- Backpressure slows down new requests gracefully -- Priority queuing ensures critical uploads continue - -**Memory Pressure:** -- Disk spillover prevents memory exhaustion -- Queue garbage collection manages memory usage -- Emergency mode provides last-resort protection - -### 3. Application Restart Scenarios - -**Graceful Shutdown:** -- Active uploads get time to complete -- Queue state is persisted before shutdown -- Connections are drained properly - -**Recovery After Restart:** -- Queue state is restored from persistence -- Upload sessions are recovered -- Dead letter items are reprocessed - -## Monitoring and Observability - -### Queue Health Metrics - -The enhanced configuration provides comprehensive metrics: - -- **Queue Length**: Current items in each queue -- **Queue Processing Time**: Time items spend in queue -- **Worker Health**: Individual worker status and performance -- **Circuit Breaker State**: Open/closed status and failure counts -- **Spillover Usage**: Disk spillover utilization -- **Dead Letter Queue**: Failed item counts and reasons - -### Log Messages - -Enhanced logging provides visibility into queue operations: - -``` -INFO: Queue backpressure activated (80% full) -WARN: Circuit breaker opened for upload queue (10 consecutive failures) -INFO: Spillover activated: 50MB written to disk -ERROR: Dead letter queue: Upload failed after 5 retries -INFO: Queue recovery: Restored 23 items from persistence -``` - -## Best Practices - -### 1. Configuration Tuning - -**For High-Volume Servers:** -```toml -uploadqueuesize = 200 -large_file_queue_size = 50 -small_file_queue_size = 500 -max_concurrent_uploads = 200 -queue_memory_limit = "1GB" -``` - -**For Memory-Constrained Environments:** -```toml -uploadqueuesize = 50 -spillover_to_disk = true -queue_memory_limit = "200MB" -emergency_mode_threshold = 0.85 -``` - -**For Mobile/Unreliable Networks:** -```toml -keep_alive_probe_interval = "15s" -upload_stall_timeout = "300s" -max_retry_attempts = 8 -retry_max_delay = "120s" -``` - -### 2. Monitoring Setup - -**Essential Metrics to Monitor:** -- Queue length trends -- Worker health status -- Circuit breaker activations -- Spillover usage -- Dead letter queue growth - -**Alert Thresholds:** -- Queue length > 80% capacity -- Circuit breaker open for > 5 minutes -- Dead letter queue growth > 10 items/hour -- Spillover usage > 50% of limit - -### 3. Troubleshooting - -**Common Issues and Solutions:** - -**Frequent Timeouts:** -- Check network stability -- Increase adaptive timeouts for file size -- Enable more aggressive keep-alive settings - -**Queue Backlogs:** -- Monitor worker health -- Check for resource constraints -- Consider increasing worker count - -**Memory Issues:** -- Enable disk spillover -- Reduce queue memory limit -- Increase garbage collection frequency - -## Implementation Notes - -The enhanced queue resilience features are designed to be: - -1. **Backward Compatible**: Existing configurations continue to work -2. **Opt-in**: Features can be enabled individually -3. **Performance Conscious**: Minimal overhead when not actively needed -4. **Configurable**: All aspects can be tuned for specific environments - -These enhancements make HMAC File Server significantly more robust in handling timeout scenarios while maintaining high performance and reliability. diff --git a/QUEUE_RESILIENCE_SUMMARY.md b/QUEUE_RESILIENCE_SUMMARY.md deleted file mode 100644 index 1942249..0000000 --- a/QUEUE_RESILIENCE_SUMMARY.md +++ /dev/null @@ -1,245 +0,0 @@ -# HMAC File Server Queue Resilience Enhancement Summary - -## Overview - -I've reviewed and enhanced the queuing system in HMAC File Server 3.2 Ultimate Fixed to make it significantly more robust in handling timeout scenarios. The improvements span multiple layers: configuration, queue management, worker health, and failure recovery. - -## Key Problems Addressed - -### 1. **Timeout-Related Queue Failures** -- **Problem**: Queued uploads timing out during network interruptions -- **Solution**: Adaptive timeouts based on file size, keep-alive monitoring, and resumable uploads - -### 2. **Queue Overflow During High Load** -- **Problem**: Memory queues filling up and rejecting new uploads -- **Solution**: Disk spillover, priority queuing, and backpressure control - -### 3. **Worker Health and Failure Detection** -- **Problem**: Failed workers blocking queue processing -- **Solution**: Continuous health monitoring, circuit breakers, and automatic recovery - -### 4. **Network Interruption Recovery** -- **Problem**: Lost uploads during network switching or disconnections -- **Solution**: Persistent queue state, upload session recovery, and graceful degradation - -## Enhanced Configuration Structure - -### Server-Level Resilience (`[server]` section) -```toml -# NEW: Advanced timeout handling -graceful_shutdown_timeout = "300s" # Complete active uploads before shutdown -request_timeout = "7200s" # 2-hour maximum for large files -upload_stall_timeout = "600s" # Detect stalled uploads -max_concurrent_uploads = 100 # Prevent resource exhaustion -connection_pool_size = 200 # Manage connection resources -``` - -### Enhanced Worker Management (`[workers]` section) -```toml -# NEW: Queue robustness features -queue_timeout = "300s" # Max queue wait time -priority_queue_enabled = true # Separate queues by file size -large_file_queue_size = 20 # Dedicated large file queue -circuit_breaker_enabled = true # Automatic failure detection -queue_backpressure_threshold = 0.8 # Gradual slowdown vs hard rejection -``` - -### Advanced Queue Resilience (`[queue_resilience]` section - NEW) -```toml -# Spillover and persistence -spillover_to_disk = true # Use disk when memory is full -persistent_queue = true # Survive server restarts -queue_recovery_enabled = true # Restore queue state after restart - -# Health monitoring -dead_letter_queue_enabled = true # Handle persistently failing uploads -queue_health_check_interval = "15s" # Continuous monitoring -emergency_mode_threshold = 0.95 # Last-resort protection - -# Priority management -priority_levels = 3 # High/Medium/Low priority queues -priority_aging_enabled = true # Prevent starvation -load_balancing_strategy = "least_connections" -``` - -### Comprehensive Timeout Configuration (`[timeouts]` section) -```toml -# NEW: Adaptive timeouts by file size -small_file_timeout = "60s" # < 10MB files -medium_file_timeout = "600s" # 10MB-100MB files -large_file_timeout = "3600s" # 100MB-1GB files -huge_file_timeout = "7200s" # > 1GB files - -# NEW: Connection resilience -keep_alive_probe_interval = "30s" # Detect network issues -keep_alive_probe_count = 9 # Retries before giving up - -# NEW: Intelligent retry logic -retry_base_delay = "1s" # Exponential backoff starting point -retry_max_delay = "60s" # Maximum backoff delay -max_retry_attempts = 5 # Retry limit -``` - -## Core Resilience Features - -### 1. **Multi-Tier Queue Architecture** -- **High Priority Queue**: Small files, urgent uploads -- **Medium Priority Queue**: Regular uploads -- **Low Priority Queue**: Large files, background uploads -- **Disk Spillover**: Unlimited capacity fallback -- **Dead Letter Queue**: Failed uploads for manual intervention - -### 2. **Intelligent Timeout Management** -- **Adaptive Timeouts**: Different limits based on file size -- **Progress Monitoring**: Distinguish between slow and stalled transfers -- **Keep-Alive Probing**: Early detection of network issues -- **Graceful Degradation**: Slow down rather than fail hard - -### 3. **Circuit Breaker Pattern** -- **Failure Detection**: Automatic detection of systemic issues -- **Fail-Fast**: Prevent cascade failures during outages -- **Auto-Recovery**: Intelligent retry after issues resolve -- **Metrics Integration**: Observable failure patterns - -### 4. **Worker Health Monitoring** -- **Continuous Monitoring**: Regular health checks for all workers -- **Performance Tracking**: Average processing time and error rates -- **Automatic Recovery**: Restart failed workers automatically -- **Load Balancing**: Route work to healthiest workers - -### 5. **Queue Persistence and Recovery** -- **State Persistence**: Queue contents survive server restarts -- **Session Recovery**: Resume interrupted uploads automatically -- **Redis Integration**: Distributed queue state for clustering -- **Disk Fallback**: Local persistence when Redis unavailable - -## Timeout Scenario Handling - -### Network Interruption Recovery -``` -User uploads 1GB file → Network switches from WiFi to 4G -ā”œā”€ā”€ Upload session persisted to Redis/disk -ā”œā”€ā”€ Keep-alive probes detect network change -ā”œā”€ā”€ Upload pauses gracefully (no data loss) -ā”œā”€ā”€ Network restored after 30 seconds -ā”œā”€ā”€ Upload session recovered from persistence -└── Upload resumes from last completed chunk -``` - -### Server Overload Protection -``` -100 concurrent uploads overwhelm server -ā”œā”€ā”€ Queue reaches 80% capacity (backpressure threshold) -ā”œā”€ā”€ New uploads get delayed (not rejected) -ā”œā”€ā”€ Circuit breaker monitors failure rate -ā”œā”€ā”€ Large files moved to disk spillover -ā”œā”€ā”€ Priority queue ensures small files continue -└── System degrades gracefully under load -``` - -### Application Restart Robustness -``` -Server restart during active uploads -ā”œā”€ā”€ Graceful shutdown waits 300s for completion -ā”œā”€ā”€ Active upload sessions persisted to disk -ā”œā”€ā”€ Queue state saved to Redis/disk -ā”œā”€ā”€ Server restarts with new configuration -ā”œā”€ā”€ Queue state restored from persistence -ā”œā”€ā”€ Upload sessions recovered automatically -└── Clients resume uploads seamlessly -``` - -## Performance Impact - -### Memory Usage -- **Queue Memory Limit**: Configurable cap on queue memory usage -- **Spillover Efficiency**: Only activates when memory queues full -- **Garbage Collection**: Regular cleanup of expired items - -### CPU Overhead -- **Health Monitoring**: Lightweight checks every 15-30 seconds -- **Circuit Breaker**: O(1) operations with atomic counters -- **Priority Aging**: Batched operations to minimize impact - -### Disk I/O -- **Spillover Optimization**: Sequential writes, batch operations -- **Persistence Strategy**: Asynchronous writes, configurable intervals -- **Recovery Efficiency**: Parallel restoration of queue state - -## Monitoring and Observability - -### Key Metrics Exposed -``` -# Queue health metrics -hmac_queue_length{priority="high|medium|low"} -hmac_queue_processing_time_seconds -hmac_spillover_items_total -hmac_circuit_breaker_state{state="open|closed|half_open"} - -# Worker health metrics -hmac_worker_health_status{worker_id="1",status="healthy|slow|failed"} -hmac_worker_processed_total{worker_id="1"} -hmac_worker_errors_total{worker_id="1"} - -# Timeout and retry metrics -hmac_timeouts_total{type="upload|download|queue"} -hmac_retries_total{reason="timeout|network|server_error"} -hmac_dead_letter_items_total -``` - -### Enhanced Logging -``` -INFO: Queue backpressure activated (queue 80% full) -WARN: Circuit breaker opened after 10 consecutive failures -INFO: Spillover activated: 156 items moved to disk -ERROR: Upload failed after 5 retries, moved to dead letter queue -INFO: Worker 3 marked as unhealthy (error rate 67%) -INFO: Queue recovery completed: 23 items restored from persistence -``` - -## Implementation Benefits - -### 1. **Zero Data Loss** -- Persistent queues survive server restarts -- Spillover prevents queue overflow -- Dead letter queue captures failed items - -### 2. **Graceful Degradation** -- Backpressure instead of hard rejections -- Priority queuing maintains service for small files -- Circuit breakers prevent cascade failures - -### 3. **Network Resilience** -- Keep-alive probing detects network issues early -- Adaptive timeouts handle slow connections -- Upload session recovery survives interruptions - -### 4. **Operational Visibility** -- Comprehensive metrics for monitoring -- Detailed logging for troubleshooting -- Health dashboards for proactive management - -### 5. **Tunable Performance** -- All aspects configurable per environment -- Resource limits prevent system exhaustion -- Emergency modes provide last-resort protection - -## Migration and Deployment - -### Backward Compatibility -- All new features are opt-in -- Existing configurations continue working -- Gradual migration path available - -### Configuration Validation -- Startup validation of all timeout values -- Warnings for suboptimal configurations -- Auto-adjustment for invalid settings - -### Testing Recommendations -- Load testing with various file sizes -- Network interruption simulation -- Server restart scenarios -- Memory pressure testing - -This comprehensive queue resilience enhancement makes HMAC File Server 3.2 Ultimate Fixed significantly more robust in handling timeout scenarios while maintaining high performance and providing excellent operational visibility. diff --git a/config-clean.toml b/config-default.toml similarity index 100% rename from config-clean.toml rename to config-default.toml diff --git a/monitor_nginx.sh b/monitor_nginx.sh new file mode 100644 index 0000000..e69de29 diff --git a/monitor_server.sh b/monitor_server.sh new file mode 100644 index 0000000..e69de29 diff --git a/monitor_uploads.sh b/monitor_uploads.sh new file mode 100644 index 0000000..e69de29 diff --git a/test_upload.txt b/test_upload.txt deleted file mode 100644 index 249f6d8..0000000 --- a/test_upload.txt +++ /dev/null @@ -1 +0,0 @@ -Hello, HMAC File Server! Fri Jul 18 11:35:16 AM UTC 2025 diff --git a/tests/README.md b/tests/README.md deleted file mode 100644 index a1c487d..0000000 --- a/tests/README.md +++ /dev/null @@ -1,86 +0,0 @@ -# HMAC File Server Test Suite - -This directory contains test scripts, monitoring tools, and test data files for the HMAC File Server. - -## Test Scripts - -### Protocol Testing -- `test_final_xmpp.sh` - Complete XEP-0363 protocol testing (all variants: v1, v2, v3, token) -- `test_xmpp_simulation.sh` - XMPP client simulation for upload testing -- `test_url_formats.sh` - URL format validation and testing -- `verify_xmpp_upload.sh` - XMPP upload verification script - -### Performance Testing -- `comprehensive_upload_test.sh` - Comprehensive upload performance testing -- `test_upload_queue.sh` - Queue performance and concurrent upload testing -- `test_upload_completion.sh` - Upload completion and reliability testing - -### Feature Testing -- `test_deduplication.sh` - File deduplication functionality testing -- `test_direct_connection.sh` - Direct server connection testing -- `test_path_discovery.sh` - Path discovery and routing testing - -### Debugging & Monitoring -- `debug_upload.sh` - Upload debugging and troubleshooting script -- `monitor_server.sh` - Server status and performance monitoring -- `monitor_nginx.sh` - Nginx proxy monitoring -- `monitor_uploads.sh` - Upload activity monitoring - -## Test Data Files - -### Small Test Files -- `test_1mb.txt` / `test_1mb.bin` - 1MB test files for basic functionality -- `test_upload.txt` - Small text file for quick testing -- `chunk_0.bin` - Chunked upload test data - -### Large Test Files -- `test_50mb.bin` - 50MB file for medium-size upload testing -- `test_215mb.bin` - 215MB file for large upload testing -- `test_4gb.bin` / `test_4gb.txt` - 4GB files for stress testing - -## Analysis Tools - -- `xep0363_analysis.ipynb` - Jupyter notebook for XEP-0363 protocol analysis - -## Usage Examples - -### Quick Protocol Test -```bash -cd tests -./test_final_xmpp.sh -``` - -### Performance Testing -```bash -cd tests -./comprehensive_upload_test.sh -./test_upload_queue.sh -``` - -### Deduplication Testing -```bash -cd tests -./test_deduplication.sh -``` - -### Monitor Server -```bash -cd tests -./monitor_server.sh -``` - -## Test Environment - -These tests are designed to work with: -- HMAC File Server 3.2 -- nginx reverse proxy (standard configuration) -- Extended timeout settings (4800s) -- Deduplication enabled -- Dynamic worker scaling - -## Notes - -- Large test files (4GB) are intended for stress testing extended timeout configurations -- All scripts include proper error handling and cleanup -- Monitor scripts provide real-time status information -- Test scripts validate both success and failure scenarios diff --git a/tests/chunk_0.bin b/tests/chunk_0.bin deleted file mode 100644 index ee149f2..0000000 Binary files a/tests/chunk_0.bin and /dev/null differ diff --git a/tests/comprehensive_upload_test.sh b/tests/comprehensive_upload_test.sh deleted file mode 100755 index ed61de0..0000000 --- a/tests/comprehensive_upload_test.sh +++ /dev/null @@ -1,267 +0,0 @@ -#!/bin/bash - -# Comprehensive XMPP Upload Test Script -# Tests multiple upload scenarios with real-time debugging - -echo "=== COMPREHENSIVE UPLOAD TEST SCRIPT ===" -echo "This script will test multiple upload scenarios while monitoring logs" -echo "Date: $(date)" -echo "" - -# Configuration -SERVER_URL="https://share.uuxo.net" -LOCAL_URL="http://localhost:8080" -SECRET="f6g4ldPvQM7O2UTFeBEUUj33VrXypDAcsDt0yqKrLiOr5oQW" -TEST_DIR="/tmp/upload_tests" - -# Create test directory -mkdir -p "$TEST_DIR" -cd "$TEST_DIR" - -# Function to generate HMAC signature for v3 protocol -generate_v3_signature() { - local method="$1" - local expires="$2" - local path="$3" - local message="${method}\n${expires}\n${path}" - echo -n "$message" | openssl dgst -sha256 -hmac "$SECRET" -hex | cut -d' ' -f2 -} - -# Function to start log monitoring -start_monitoring() { - echo "Starting log monitoring in background..." - - # Kill any existing monitoring - pkill -f "tail.*hmac-file-server" 2>/dev/null - pkill -f "tail.*nginx.*share" 2>/dev/null - - # Start nginx monitoring - echo "=== NGINX ACCESS LOG ===" > /tmp/nginx_monitor.log - sudo tail -f /var/log/nginx/share_access.log >> /tmp/nginx_monitor.log 2>&1 & - NGINX_PID=$! - - # Start server monitoring - echo "=== HMAC SERVER LOG ===" > /tmp/server_monitor.log - sudo tail -f /var/log/hmac-file-server/hmac-file-server.log >> /tmp/server_monitor.log 2>&1 & - SERVER_PID=$! - - sleep 1 - echo "Monitoring started (nginx PID: $NGINX_PID, server PID: $SERVER_PID)" -} - -# Function to stop monitoring and show results -stop_monitoring() { - echo "Stopping monitors..." - kill $NGINX_PID $SERVER_PID 2>/dev/null - sleep 1 - - echo "" - echo "=== NGINX LOG RESULTS ===" - tail -10 /tmp/nginx_monitor.log 2>/dev/null || echo "No nginx activity detected" - - echo "" - echo "=== SERVER LOG RESULTS ===" - tail -10 /tmp/server_monitor.log 2>/dev/null || echo "No server activity detected" - echo "" -} - -# Function to create test files -create_test_files() { - echo "Creating test files..." - - # Small file (1KB) - echo "This is a small test file for upload testing" > small_test.txt - echo "Content: Basic text file" >> small_test.txt - - # Medium file (1MB) - dd if=/dev/zero of=medium_test.bin bs=1024 count=1024 2>/dev/null - - # Large file (5MB) - dd if=/dev/zero of=large_test.bin bs=1024 count=5120 2>/dev/null - - # Video file simulation (1MB with .mp4 extension) - cp medium_test.bin test_video.mp4 - - echo "Test files created:" - ls -lh *.txt *.bin *.mp4 2>/dev/null - echo "" -} - -# Function to test different upload protocols -test_upload_protocol() { - local protocol="$1" - local filename="$2" - local description="$3" - - echo "--- Testing $protocol Protocol: $description ---" - - # Generate test parameters - local expires=$(date -d "+1 hour" +%s) - local path="/test_${protocol}/${filename}" - local url="" - local signature="" - - case "$protocol" in - "v3") - signature=$(generate_v3_signature "PUT" "$expires" "$path") - url="${SERVER_URL}${path}?v3=${signature}&expires=${expires}" - ;; - "v2") - signature=$(echo -n "PUT${path}" | openssl dgst -sha256 -hmac "$SECRET" -hex | cut -d' ' -f2) - url="${SERVER_URL}${path}?v2=${signature}" - ;; - "v1") - signature=$(echo -n "PUT${path}" | openssl dgst -sha256 -hmac "$SECRET" -hex | cut -d' ' -f2) - url="${SERVER_URL}${path}?v=${signature}" - ;; - "token") - signature=$(echo -n "PUT${path}" | openssl dgst -sha256 -hmac "$SECRET" -hex | cut -d' ' -f2) - url="${SERVER_URL}${path}?token=${signature}" - ;; - esac - - echo "URL: $url" - echo "File: $filename ($(stat -f%z "$filename" 2>/dev/null || stat -c%s "$filename")bytes)" - - # Start monitoring for this test - echo "Starting upload test..." - - # Perform upload - local start_time=$(date +%s.%N) - local response=$(curl -s -w "HTTPSTATUS:%{http_code};TIME:%{time_total}" \ - -X PUT \ - --data-binary "@$filename" \ - -H "User-Agent: XMPP-Upload-Test/1.0" \ - -H "Content-Type: application/octet-stream" \ - "$url" 2>&1) - local end_time=$(date +%s.%N) - - # Parse response - local http_code=$(echo "$response" | grep -o "HTTPSTATUS:[0-9]*" | cut -d: -f2) - local time_total=$(echo "$response" | grep -o "TIME:[0-9.]*" | cut -d: -f2) - local body=$(echo "$response" | sed 's/HTTPSTATUS:[0-9]*;TIME:[0-9.]*$//') - - # Calculate duration - local duration=$(echo "$end_time - $start_time" | bc 2>/dev/null || echo "N/A") - - echo "Result: HTTP $http_code (${time_total}s)" - if [[ "$http_code" =~ ^[45] ]]; then - echo "Error body: $body" - elif [[ "$http_code" == "200" ]]; then - echo "āœ… SUCCESS: Upload completed" - echo "Response: $body" - else - echo "Response: $body" - fi - - echo "Duration: ${duration}s" - echo "" - - # Brief pause to separate log entries - sleep 2 -} - -# Function to test deduplication -test_deduplication() { - echo "--- Testing Deduplication ---" - echo "Uploading the same file twice to test deduplication logic" - - # First upload - echo "1. First upload (should create new file):" - test_upload_protocol "v3" "small_test.txt" "Dedup Test #1" - - # Second upload (should deduplicate) - echo "2. Second upload (should deduplicate):" - test_upload_protocol "v3" "small_test.txt" "Dedup Test #2" -} - -# Function to test storage scenarios -test_storage_scenarios() { - echo "--- Testing Different Storage Scenarios ---" - - # Test small file - test_upload_protocol "v3" "small_test.txt" "Small File (1KB)" - - # Test medium file - test_upload_protocol "v3" "medium_test.bin" "Medium File (1MB)" - - # Test video file - test_upload_protocol "v3" "test_video.mp4" "Video File (.mp4)" - - # Test large file - test_upload_protocol "v3" "large_test.bin" "Large File (5MB)" -} - -# Function to test all protocols -test_all_protocols() { - echo "--- Testing All XEP-0363 Protocol Variants ---" - - test_upload_protocol "v3" "small_test.txt" "XEP-0363 v3 (mod_http_upload_external)" - test_upload_protocol "v2" "small_test.txt" "XEP-0363 v2 (extended)" - test_upload_protocol "v1" "small_test.txt" "XEP-0363 v1 (basic)" - test_upload_protocol "token" "small_test.txt" "XEP-0363 token (alternative)" -} - -# Function to show current configuration -show_configuration() { - echo "=== Current Server Configuration ===" - echo "Deduplication: $(sudo grep deduplication_enabled /etc/hmac-file-server/config.toml | cut -d'=' -f2 | tr -d ' ')" - echo "Max Upload: $(sudo grep max_upload_size /etc/hmac-file-server/config.toml | cut -d'"' -f2)" - echo "ClamAV: $(sudo grep clamavenabled /etc/hmac-file-server/config.toml | cut -d'=' -f2 | tr -d ' ')" - echo "Global Extensions: $(sudo grep global_extensions /etc/hmac-file-server/config.toml | cut -d'[' -f2 | cut -d']' -f1)" - echo "Log Level: $(sudo grep 'level =' /etc/hmac-file-server/config.toml | cut -d'"' -f2)" - echo "Server Status: $(systemctl is-active hmac-file-server)" - echo "" -} - -# Function to cleanup -cleanup() { - echo "Cleaning up..." - stop_monitoring - rm -rf "$TEST_DIR" 2>/dev/null - echo "Cleanup complete" -} - -# Trap for cleanup on exit -trap cleanup EXIT - -# Main execution -main() { - show_configuration - create_test_files - start_monitoring - - echo "=== STARTING COMPREHENSIVE UPLOAD TESTS ===" - echo "Monitor logs in real-time:" - echo " nginx: tail -f /tmp/nginx_monitor.log" - echo " server: tail -f /tmp/server_monitor.log" - echo "" - - # Test 1: Protocol variants - echo "šŸ”„ TEST 1: All Protocol Variants" - test_all_protocols - - # Test 2: Storage scenarios - echo "šŸ”„ TEST 2: Storage Scenarios" - test_storage_scenarios - - # Test 3: Deduplication - echo "šŸ”„ TEST 3: Deduplication" - test_deduplication - - echo "=== TEST SUMMARY ===" - echo "All tests completed. Check the results above." - echo "If you see HTTP 401 errors, that's expected (HMAC signature validation)." - echo "If you see HTTP 200 responses, uploads are working!" - echo "If you see no nginx log entries, requests aren't reaching the server." - echo "" - - stop_monitoring - - echo "Log files saved to:" - echo " nginx: /tmp/nginx_monitor.log" - echo " server: /tmp/server_monitor.log" -} - -# Run main function -main "$@" diff --git a/tests/debug_upload.sh b/tests/debug_upload.sh deleted file mode 100755 index 03ce0bb..0000000 --- a/tests/debug_upload.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash - -# Simple test to debug the 49% upload stop issue - -set -e - -echo "[DEBUG-TEST] Starting server..." -./hmac-file-server --config config-network-resilience.toml > debug_server.log 2>&1 & -SERVER_PID=$! - -# Wait for server to start -sleep 3 - -# Check if server is running -if ! kill -0 $SERVER_PID 2>/dev/null; then - echo "[ERROR] Server failed to start" - cat debug_server.log - exit 1 -fi - -cleanup() { - echo "[DEBUG-TEST] Cleaning up..." - kill $SERVER_PID 2>/dev/null || true - rm -f debug_server.log -} - -trap cleanup EXIT - -echo "[DEBUG-TEST] Testing 50MB chunked upload..." - -# Calculate HMAC signature -SECRET="your-super-secret-hmac-key-minimum-32-characters-long" -MESSAGE="/chunked-upload" -SIGNATURE=$(echo -n "$MESSAGE" | openssl dgst -sha256 -hmac "$SECRET" | cut -d' ' -f2) - -# Start session -echo "[DEBUG-TEST] Creating session..." -SESSION_RESPONSE=$(curl -s -X POST \ - -H "X-Filename: test_50mb.bin" \ - -H "X-Total-Size: 52428800" \ - -H "X-Signature: $SIGNATURE" \ - http://localhost:8080/chunked-upload) - -echo "[DEBUG-TEST] Session response: $SESSION_RESPONSE" - -SESSION_ID=$(echo "$SESSION_RESPONSE" | grep -o '"session_id":"[^"]*"' | cut -d'"' -f4) -if [ -z "$SESSION_ID" ]; then - echo "[ERROR] Failed to get session ID" - exit 1 -fi - -echo "[DEBUG-TEST] Session ID: $SESSION_ID" - -# Upload first few chunks to see what happens -CHUNK_SIZE=5242880 # 5MB -for i in {0..12}; do # Upload first 13 chunks (65MB worth, should trigger completion) - OFFSET=$((i * CHUNK_SIZE)) - - echo "[DEBUG-TEST] Creating chunk $i..." - dd if=test_50mb.bin of=chunk_$i.bin bs=$CHUNK_SIZE skip=$i count=1 2>/dev/null || { - # Handle the last chunk - REMAINING=$((52428800 - OFFSET)) - if [ $REMAINING -gt 0 ]; then - dd if=test_50mb.bin of=chunk_$i.bin bs=1 skip=$OFFSET count=$REMAINING 2>/dev/null - else - echo "[DEBUG-TEST] No more data for chunk $i" - break - fi - } - - CHUNK_SIZE_ACTUAL=$(stat -f%z chunk_$i.bin 2>/dev/null || stat -c%s chunk_$i.bin 2>/dev/null) - echo "[DEBUG-TEST] Uploading chunk $i (size: $CHUNK_SIZE_ACTUAL bytes)..." - - UPLOAD_RESPONSE=$(curl -s -w "\n%{http_code}" -X PUT \ - -H "X-Upload-Session-ID: $SESSION_ID" \ - -H "X-Chunk-Number: $i" \ - --data-binary @chunk_$i.bin \ - http://localhost:8080/chunked-upload) - - echo "[DEBUG-TEST] Upload response for chunk $i:" - echo "$UPLOAD_RESPONSE" - echo "---" - - # Check server logs for debug output - echo "[DEBUG-TEST] Recent server logs:" - tail -5 debug_server.log - echo "---" - - # Check if complete - if echo "$UPLOAD_RESPONSE" | grep -q '"complete":true'; then - echo "[DEBUG-TEST] āœ… Upload completed at chunk $i" - rm -f chunk_*.bin - exit 0 - fi - - rm -f chunk_$i.bin - sleep 1 -done - -echo "[DEBUG-TEST] Upload did not complete. Checking status..." -STATUS_RESPONSE=$(curl -s "http://localhost:8080/upload-status?session_id=$SESSION_ID") -echo "[DEBUG-TEST] Final status: $STATUS_RESPONSE" - -echo "[DEBUG-TEST] Full server logs:" -cat debug_server.log diff --git a/tests/monitor_nginx.sh b/tests/monitor_nginx.sh deleted file mode 100755 index c04ca56..0000000 --- a/tests/monitor_nginx.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# Terminal 1: nginx Monitoring Script -echo "=== NGINX ACCESS LOG MONITOR ===" -echo "Monitoring: /var/log/nginx/share_access.log" -echo "Press Ctrl+C to stop" -echo "" -echo "Waiting for upload requests..." -echo "$(date): Monitor started" -echo "" - -# Monitor nginx access logs with timestamps -sudo tail -f /var/log/nginx/share_access.log | while read line; do - if [[ -n "$line" ]]; then - echo "[$(date '+%H:%M:%S')] NGINX: $line" - - # Highlight important patterns - if echo "$line" | grep -q "PUT"; then - echo "*** PUT REQUEST DETECTED ***" - fi - - if echo "$line" | grep -q " 401 "; then - echo "!!! AUTH FAILURE (401) !!!" - fi - - if echo "$line" | grep -q " 200 "; then - echo "āœ… SUCCESS (200) āœ…" - fi - - if echo "$line" | grep -q " 40[0-9] \| 50[0-9] "; then - echo "āŒ ERROR RESPONSE āŒ" - fi - fi -done diff --git a/tests/monitor_server.sh b/tests/monitor_server.sh deleted file mode 100755 index 0842767..0000000 --- a/tests/monitor_server.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -# Terminal 2: HMAC Server Monitoring Script -echo "=== HMAC SERVER LOG MONITOR ===" -echo "Monitoring: /var/log/hmac-file-server/hmac-file-server.log" -echo "Press Ctrl+C to stop" -echo "" -echo "Waiting for upload activity..." -echo "$(date): Monitor started" -echo "" - -# Monitor server logs with filtering and highlighting -sudo tail -f /var/log/hmac-file-server/hmac-file-server.log | while read line; do - # Skip debug worker messages unless they're important - if echo "$line" | grep -q "DEBUG.*Worker\|NumWorkers\|NumScanWorkers" && ! echo "$line" | grep -q "upload\|error\|fail"; then - continue - fi - - if [[ -n "$line" ]]; then - echo "[$(date '+%H:%M:%S')] SERVER: $line" - - # Highlight upload-related activity - if echo "$line" | grep -qi "upload\|PUT\|POST"; then - echo "šŸ“¤ UPLOAD ACTIVITY DETECTED" - fi - - # Highlight HMAC validation - if echo "$line" | grep -qi "hmac\|auth\|signature"; then - echo "šŸ” HMAC VALIDATION ACTIVITY" - fi - - # Highlight deduplication - if echo "$line" | grep -qi "dedup"; then - echo "šŸ”— DEDUPLICATION ACTIVITY" - fi - - # Highlight errors - if echo "$line" | grep -qi "error\|fail\|fatal"; then - echo "āŒ ERROR DETECTED āŒ" - fi - - # Highlight success - if echo "$line" | grep -qi "success"; then - echo "āœ… SUCCESS DETECTED āœ…" - fi - - # Highlight file operations - if echo "$line" | grep -qi "file.*created\|file.*stored\|file.*saved"; then - echo "šŸ’¾ FILE STORAGE ACTIVITY" - fi - fi -done diff --git a/tests/monitor_uploads.sh b/tests/monitor_uploads.sh deleted file mode 100755 index 985b0b5..0000000 --- a/tests/monitor_uploads.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash - -# Monitor script to watch for XMPP upload activity -# This will help verify that our performance optimizations are working - -echo "=== HMAC File Server Upload Monitor ===" -echo "Watching for upload activity on share.uuxo.net..." -echo "Press Ctrl+C to stop" -echo "" - -# Function to show current configuration status -show_status() { - echo "Current Configuration Status:" - echo "- Max Upload Size: $(grep max_upload_size /etc/hmac-file-server/config.toml | cut -d'"' -f2)" - echo "- ClamAV Enabled: $(grep clamavenabled /etc/hmac-file-server/config.toml | cut -d'=' -f2 | tr -d ' ')" - echo "- Deduplication: $(grep deduplication_enabled /etc/hmac-file-server/config.toml | cut -d'=' -f2 | tr -d ' ')" - echo "- File Naming: $(grep file_naming /etc/hmac-file-server/config.toml | cut -d'"' -f2)" - echo "" -} - -# Function to monitor logs -monitor_logs() { - echo "Starting real-time log monitoring..." - echo "Monitoring multiple log sources:" - echo "1. HMAC Server logs (/var/log/hmac-file-server/hmac-file-server.log)" - echo "2. Share nginx access logs (/var/log/nginx/share_access.log)" - echo "3. Share nginx error logs (/var/log/nginx/share_error.log)" - echo "" - - # Run tail on multiple files simultaneously - sudo tail -f /var/log/hmac-file-server/hmac-file-server.log \ - /var/log/nginx/share_access.log \ - /var/log/nginx/share_error.log 2>/dev/null | \ - while read line; do - timestamp=$(date '+%H:%M:%S') - echo "[$timestamp] $line" - - # Highlight important upload events - if echo "$line" | grep -qi "PUT\|upload\|POST"; then - echo "*** UPLOAD ACTIVITY DETECTED ***" - fi - - if echo "$line" | grep -qi "error\|failed\|timeout"; then - echo "!!! ERROR/ISSUE DETECTED !!!" - fi - - if echo "$line" | grep -qi "clamav\|scan"; then - echo ">>> ClamAV ACTIVITY <<<" - fi - - if echo "$line" | grep -qi "dedup"; then - echo ">>> DEDUPLICATION ACTIVITY <<<" - fi - done -} - -# Show current status -show_status - -# Start monitoring -monitor_logs diff --git a/tests/test_1mb.bin b/tests/test_1mb.bin deleted file mode 100644 index 9e0f96a..0000000 Binary files a/tests/test_1mb.bin and /dev/null differ diff --git a/tests/test_1mb.txt b/tests/test_1mb.txt deleted file mode 100644 index a2e74b5..0000000 --- a/tests/test_1mb.txt +++ /dev/null @@ -1 +0,0 @@ -Hello, HMAC File Server! Do 17. Jul 18:59:11 CEST 2025 diff --git a/tests/test_215mb.bin b/tests/test_215mb.bin deleted file mode 100644 index 0c8322c..0000000 Binary files a/tests/test_215mb.bin and /dev/null differ diff --git a/tests/test_4gb.bin b/tests/test_4gb.bin deleted file mode 100644 index 451971a..0000000 Binary files a/tests/test_4gb.bin and /dev/null differ diff --git a/tests/test_4gb.txt b/tests/test_4gb.txt deleted file mode 100644 index a2e74b5..0000000 --- a/tests/test_4gb.txt +++ /dev/null @@ -1 +0,0 @@ -Hello, HMAC File Server! Do 17. Jul 18:59:11 CEST 2025 diff --git a/tests/test_50mb.bin b/tests/test_50mb.bin deleted file mode 100644 index 514783a..0000000 Binary files a/tests/test_50mb.bin and /dev/null differ diff --git a/tests/test_upload.txt b/tests/test_upload.txt deleted file mode 100644 index dc4023c..0000000 --- a/tests/test_upload.txt +++ /dev/null @@ -1 +0,0 @@ -Hello, HMAC File Server! Thu Jul 17 05:40:07 PM UTC 2025 diff --git a/tests/verify_xmpp_upload.sh b/tests/verify_xmpp_upload.sh deleted file mode 100755 index cd53142..0000000 --- a/tests/verify_xmpp_upload.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/bash - -# XMPP Upload Verification Script -# Tests HMAC validation and upload process - -echo "=== XMPP Upload Verification ===" -echo "Testing HMAC File Server configuration for XMPP uploads" -echo "" - -# Configuration check -echo "1. Configuration Status:" -echo " Secret configured: $(sudo grep -c "secret.*f6g4ldPvQM7O2UTFeBEUUj33VrXypDAcsDt0yqKrLiOr5oQW" /etc/hmac-file-server/config.toml > /dev/null && echo "āœ… YES" || echo "āŒ NO")" -echo " Deduplication limit: $(sudo grep maxsize /etc/hmac-file-server/config.toml | cut -d'"' -f2)" -echo " Max upload size: $(sudo grep max_upload_size /etc/hmac-file-server/config.toml | cut -d'"' -f2)" -echo " ClamAV enabled: $(sudo grep clamavenabled /etc/hmac-file-server/config.toml | cut -d'=' -f2 | tr -d ' ')" -echo "" - -# Server status -echo "2. Server Status:" -echo " Service status: $(systemctl is-active hmac-file-server)" -echo " Health endpoint: $(curl -s -w "%{http_code}" http://localhost:8080/health -o /dev/null)" -echo " Process running: $(pgrep -f hmac-file-server > /dev/null && echo "āœ… YES" || echo "āŒ NO")" -echo "" - -# Network connectivity -echo "3. Network Configuration:" -echo " nginx stream (443→4443): $(sudo netstat -tlnp | grep :443 | grep -q nginx && echo "āœ… ACTIVE" || echo "āŒ NOT FOUND")" -echo " nginx HTTP (4443→8080): $(sudo netstat -tlnp | grep :4443 | grep -q nginx && echo "āœ… ACTIVE" || echo "āŒ NOT FOUND")" -echo " HMAC server (8080): $(sudo netstat -tlnp | grep :8080 | grep -q hmac && echo "āœ… LISTENING" || echo "āŒ NOT LISTENING")" -echo "" - -# XEP-0363 protocol support -echo "4. XEP-0363 Protocol Support:" -echo " v1 support: āœ… YES (basic XEP-0363)" -echo " v2 support: āœ… YES (extended XEP-0363)" -echo " v3 support: āœ… YES (mod_http_upload_external)" -echo " Token support: āœ… YES (alternative auth)" -echo "" - -# HMAC signature validation -echo "5. HMAC Signature Features:" -echo " Grace period for XMPP clients: āœ… 2 hours" -echo " Extended grace for large files: āœ… Dynamic (2min/100MB)" -echo " Maximum grace period: āœ… 4 hours" -echo " Client detection: āœ… Gajim, Dino, Conversations" -echo "" - -# Upload optimization status -echo "6. Upload Optimizations:" -echo " Large file deduplication: āœ… SKIPPED (>1GB)" -echo " ClamAV scanning: āœ… DISABLED" -echo " nginx timeouts: āœ… 4800s (80 minutes)" -echo " File naming: āœ… ORIGINAL (proper MIME types)" -echo "" - -# Recent activity check -echo "7. Recent Activity:" -RECENT_LOGS=$(sudo tail -5 /var/log/hmac-file-server/hmac-file-server.log 2>/dev/null | grep -v "DEBUG\|Worker" | wc -l) -echo " Recent server logs: $RECENT_LOGS entries" - -NGINX_ACTIVITY=$(sudo tail -5 /var/log/nginx/share_access.log 2>/dev/null | wc -l) -echo " Recent nginx activity: $NGINX_ACTIVITY requests" - -echo "" -echo "8. Troubleshooting:" -echo " If uploads still show 'endless encryption':" -echo " → Check if upload is actually starting (monitor nginx logs)" -echo " → Verify ejabberd is sending correct HMAC signatures" -echo " → Test with smaller file first to isolate the issue" -echo " → Monitor real-time: /root/hmac-file-server/monitor_uploads.sh" -echo "" - -# Test suggestions -echo "9. Next Steps:" -echo " 1. Try uploading a small test file first" -echo " 2. Monitor logs during upload: sudo tail -f /var/log/nginx/share_access.log" -echo " 3. Check HMAC signature validation in server logs" -echo " 4. Verify ejabberd cluster is generating valid upload URLs" -echo "" - -echo "=== Verification Complete ===" -echo "All optimizations are in place. The 1GB deduplication limit should" -echo "eliminate the 'endless encryption' delay for your large video files." diff --git a/tests/xep0363_analysis.ipynb b/tests/xep0363_analysis.ipynb deleted file mode 100644 index 99453c0..0000000 --- a/tests/xep0363_analysis.ipynb +++ /dev/null @@ -1,556 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "d4b71234", - "metadata": {}, - "source": [ - "# XEP-0363 HTTP File Upload Analysis for HMAC File Server\n", - "\n", - "## Problem Statement\n", - "Large file uploads (970MB) through XMPP clients (Gajim, Dino, Conversations) are failing with \"bad gateway\" errors. This analysis examines XEP-0363 specification compliance and identifies configuration issues.\n", - "\n", - "## Analysis Scope\n", - "- XEP-0363 specification requirements\n", - "- HMAC file server configuration\n", - "- Prosody mod_http_file_share comparison\n", - "- XMPP client implementation differences\n", - "- Large file upload optimization\n", - "\n", - "## Current Issue\n", - "- File size: 970MB\n", - "- Error: Gateway timeout\n", - "- Clients affected: Gajim, Dino, Conversations\n", - "- Server: HMAC File Server 3.2 with nginx proxy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "760564a7", - "metadata": {}, - "outputs": [], - "source": [ - "# Import Required Libraries\n", - "import requests\n", - "import json\n", - "import toml\n", - "import xml.etree.ElementTree as ET\n", - "import re\n", - "import pandas as pd\n", - "from datetime import datetime\n", - "import subprocess\n", - "import os\n", - "from pathlib import Path\n", - "\n", - "print(\"Libraries imported successfully\")\n", - "print(f\"Analysis started at: {datetime.now()}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "30355db7", - "metadata": {}, - "outputs": [], - "source": [ - "# Parse TOML Configuration\n", - "config_path = \"/etc/hmac-file-server/config.toml\"\n", - "dockerenv_config = \"/root/hmac-file-server/dockerenv/config/config.toml\"\n", - "\n", - "try:\n", - " # Try production config first\n", - " with open(config_path, 'r') as f:\n", - " config = toml.load(f)\n", - " config_source = \"Production\"\n", - "except FileNotFoundError:\n", - " # Fallback to dockerenv config\n", - " with open(dockerenv_config, 'r') as f:\n", - " config = toml.load(f)\n", - " config_source = \"Development\"\n", - "\n", - "print(f\"Configuration loaded from: {config_source}\")\n", - "print(\"\\n=== Key Upload Settings ===\")\n", - "print(f\"Max Upload Size: {config['server'].get('max_upload_size', 'Not set')}\")\n", - "print(f\"Max Header Bytes: {config['server'].get('max_header_bytes', 'Not set')}\")\n", - "print(f\"Read Timeout: {config.get('timeouts', {}).get('readtimeout', 'Not set')}\")\n", - "print(f\"Write Timeout: {config.get('timeouts', {}).get('writetimeout', 'Not set')}\")\n", - "print(f\"Chunked Uploads: {config.get('uploads', {}).get('chunked_uploads_enabled', 'Not set')}\")\n", - "print(f\"Chunk Size: {config.get('uploads', {}).get('chunk_size', 'Not set')}\")\n", - "\n", - "# Store for later analysis\n", - "server_config = config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "831143c1", - "metadata": {}, - "outputs": [], - "source": [ - "# Download and Parse XEP-0363 Specification\n", - "print(\"=== XEP-0363 Key Requirements Analysis ===\")\n", - "\n", - "# Key requirements from XEP-0363 specification\n", - "xep0363_requirements = {\n", - " \"slot_request\": {\n", - " \"method\": \"IQ-get\",\n", - " \"namespace\": \"urn:xmpp:http:upload:0\",\n", - " \"required_attributes\": [\"filename\", \"size\"],\n", - " \"optional_attributes\": [\"content-type\"]\n", - " },\n", - " \"slot_response\": {\n", - " \"put_url\": \"HTTPS URL for upload\",\n", - " \"get_url\": \"HTTPS URL for download\", \n", - " \"headers\": [\"Authorization\", \"Cookie\", \"Expires\"]\n", - " },\n", - " \"upload_requirements\": {\n", - " \"method\": \"HTTP PUT\",\n", - " \"content_length_match\": \"MUST match size in slot request\",\n", - " \"content_type_match\": \"SHOULD match if specified\",\n", - " \"success_code\": \"201 Created\",\n", - " \"timeout_recommendation\": \"~300s for PUT URL validity\"\n", - " },\n", - " \"error_conditions\": {\n", - " \"file_too_large\": \"not-acceptable + file-too-large\",\n", - " \"quota_exceeded\": \"resource-constraint + retry element\",\n", - " \"auth_failure\": \"forbidden\"\n", - " }\n", - "}\n", - "\n", - "print(\"āœ… Slot Request Process:\")\n", - "print(\" 1. Client sends IQ-get with filename, size, content-type\")\n", - "print(\" 2. Server responds with PUT/GET URLs + optional headers\")\n", - "print(\" 3. Client performs HTTP PUT to upload URL\")\n", - "print(\" 4. Server returns 201 Created on success\")\n", - "\n", - "print(\"\\nāœ… Critical Requirements:\")\n", - "print(\" - Content-Length MUST match slot request size\")\n", - "print(\" - HTTPS required for both PUT and GET URLs\")\n", - "print(\" - Server SHOULD reject if Content-Type doesn't match\")\n", - "print(\" - PUT URL timeout ~300s recommended\")\n", - "\n", - "print(\"\\nāš ļø Large File Considerations:\")\n", - "print(\" - No chunking specified in XEP-0363\")\n", - "print(\" - Single HTTP PUT for entire file\")\n", - "print(\" - Server timeouts critical for large files\")\n", - "print(\" - Client must handle long upload times\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d1af4e5", - "metadata": {}, - "outputs": [], - "source": [ - "# Analyze Prosody mod_http_file_share Documentation\n", - "print(\"=== Prosody mod_http_file_share Settings ===\")\n", - "\n", - "prosody_defaults = {\n", - " \"http_file_share_size_limit\": \"10*1024*1024\", # 10 MiB\n", - " \"http_file_share_daily_quota\": \"100*1024*1024\", # 100 MiB\n", - " \"http_file_share_expires_after\": \"1 week\",\n", - " \"http_file_share_safe_file_types\": [\"image/*\", \"video/*\", \"audio/*\", \"text/plain\"],\n", - " \"external_protocol\": \"JWT with HS256 algorithm\"\n", - "}\n", - "\n", - "print(\"šŸ“Š Default Prosody Limits:\")\n", - "for key, value in prosody_defaults.items():\n", - " print(f\" {key}: {value}\")\n", - "\n", - "print(\"\\nšŸ” External Upload Protocol (JWT):\")\n", - "jwt_fields = [\n", - " \"slot - Unique identifier\", \n", - " \"iat - Token issued timestamp\",\n", - " \"exp - Token expiration timestamp\", \n", - " \"sub - Uploader identity\",\n", - " \"filename - File name\",\n", - " \"filesize - File size in bytes\", \n", - " \"filetype - MIME type\",\n", - " \"expires - File expiration timestamp\"\n", - "]\n", - "\n", - "for field in jwt_fields:\n", - " print(f\" • {field}\")\n", - "\n", - "print(\"\\nāš ļø Key Differences from HMAC Server:\")\n", - "print(\" - Prosody uses JWT tokens vs HMAC signatures\")\n", - "print(\" - Default 10MB limit vs 10GB HMAC server limit\") \n", - "print(\" - Built-in chunking not specified\")\n", - "print(\" - Different authentication mechanism\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15646074", - "metadata": {}, - "outputs": [], - "source": [ - "# Compare Client Implementations\n", - "print(\"=== XMPP Client XEP-0363 Implementation Analysis ===\")\n", - "\n", - "client_behaviors = {\n", - " \"Gajim\": {\n", - " \"xep0363_support\": \"Full support\",\n", - " \"large_file_handling\": \"Single HTTP PUT\",\n", - " \"timeout_behavior\": \"May timeout on slow uploads\",\n", - " \"chunking\": \"Not implemented in XEP-0363\",\n", - " \"max_file_check\": \"Checks server-announced limits\",\n", - " \"known_issues\": \"Can timeout on slow connections for large files\"\n", - " },\n", - " \"Dino\": {\n", - " \"xep0363_support\": \"Full support\", \n", - " \"large_file_handling\": \"Single HTTP PUT\",\n", - " \"timeout_behavior\": \"Generally more tolerant\",\n", - " \"chunking\": \"Not implemented in XEP-0363\",\n", - " \"max_file_check\": \"Respects server limits\",\n", - " \"known_issues\": \"May struggle with very large files (>500MB)\"\n", - " },\n", - " \"Conversations\": {\n", - " \"xep0363_support\": \"Full support\",\n", - " \"large_file_handling\": \"Single HTTP PUT\",\n", - " \"timeout_behavior\": \"Conservative timeouts\",\n", - " \"chunking\": \"Not implemented in XEP-0363\", \n", - " \"max_file_check\": \"Strict limit checking\",\n", - " \"known_issues\": \"Often fails on files >100MB due to Android limitations\"\n", - " }\n", - "}\n", - "\n", - "for client, details in client_behaviors.items():\n", - " print(f\"\\nšŸ“± {client}:\")\n", - " for key, value in details.items():\n", - " print(f\" {key}: {value}\")\n", - "\n", - "print(\"\\nšŸŽÆ Common Client Limitations:\")\n", - "print(\" • XEP-0363 mandates single HTTP PUT (no chunking)\")\n", - "print(\" • Client timeouts typically 60-300 seconds\") \n", - "print(\" • Mobile clients more memory/timeout constrained\")\n", - "print(\" • No resumable upload support in standard\")\n", - "print(\" • Large files (>500MB) often problematic\")\n", - "\n", - "print(\"\\n🚨 970MB Upload Challenges:\")\n", - "print(\" • Exceeds typical client timeout expectations\")\n", - "print(\" • Single PUT operation for entire file\") \n", - "print(\" • Network interruptions cause complete failure\")\n", - "print(\" • Mobile devices may run out of memory\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ec400943", - "metadata": {}, - "outputs": [], - "source": [ - "# Identify Configuration Conflicts\n", - "print(\"=== Configuration Conflict Analysis ===\")\n", - "\n", - "def parse_size(size_str):\n", - " \"\"\"Convert size string to bytes\"\"\"\n", - " if not size_str:\n", - " return 0\n", - " \n", - " size_str = str(size_str).upper()\n", - " multipliers = {'B': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}\n", - " \n", - " for unit, mult in multipliers.items():\n", - " if size_str.endswith(unit):\n", - " return int(size_str[:-len(unit)]) * mult\n", - " return int(size_str)\n", - "\n", - "# Current HMAC server settings\n", - "max_upload_bytes = parse_size(server_config['server'].get('max_upload_size', '10GB'))\n", - "max_header_bytes = server_config['server'].get('max_header_bytes', 1048576)\n", - "chunk_size_bytes = parse_size(server_config.get('uploads', {}).get('chunk_size', '10MB'))\n", - "\n", - "print(f\"šŸ“Š Current Server Configuration:\")\n", - "print(f\" Max Upload Size: {max_upload_bytes:,} bytes ({max_upload_bytes / (1024**3):.1f} GB)\")\n", - "print(f\" Max Header Bytes: {max_header_bytes:,} bytes ({max_header_bytes / (1024**2):.1f} MB)\")\n", - "print(f\" Chunk Size: {chunk_size_bytes:,} bytes ({chunk_size_bytes / (1024**2):.1f} MB)\")\n", - "\n", - "# Test file size\n", - "test_file_size = 970 * 1024 * 1024 # 970MB\n", - "print(f\"\\nšŸŽÆ Test File Analysis (970MB):\")\n", - "print(f\" File Size: {test_file_size:,} bytes\")\n", - "print(f\" Within upload limit: {'āœ… YES' if test_file_size <= max_upload_bytes else 'āŒ NO'}\")\n", - "print(f\" Chunks needed: {test_file_size / chunk_size_bytes:.1f}\")\n", - "\n", - "# Timeout analysis\n", - "read_timeout = server_config.get('timeouts', {}).get('readtimeout', '4800s')\n", - "write_timeout = server_config.get('timeouts', {}).get('writetimeout', '4800s')\n", - "\n", - "print(f\"\\nā±ļø Timeout Configuration:\")\n", - "print(f\" Read Timeout: {read_timeout}\")\n", - "print(f\" Write Timeout: {write_timeout}\")\n", - "print(f\" Both timeouts: {int(read_timeout[:-1])/60:.0f} minutes\")\n", - "\n", - "# Identify potential issues\n", - "issues = []\n", - "if test_file_size > max_upload_bytes:\n", - " issues.append(\"File exceeds max_upload_size limit\")\n", - "\n", - "if max_header_bytes < 2048: # Very small header limit\n", - " issues.append(\"Header size limit may be too restrictive\")\n", - "\n", - "print(f\"\\n🚨 Identified Issues:\")\n", - "if issues:\n", - " for issue in issues:\n", - " print(f\" āŒ {issue}\")\n", - "else:\n", - " print(\" āœ… No obvious configuration conflicts found\")\n", - " print(\" āž”ļø Issue likely in proxy/network layer\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc84e5ca", - "metadata": {}, - "outputs": [], - "source": [ - "# Test Upload Size Limits\n", - "print(\"=== Upload Size Limit Analysis ===\")\n", - "\n", - "# Check nginx configuration\n", - "try:\n", - " result = subprocess.run(['grep', '-r', 'client_max_body_size', '/etc/nginx/'], \n", - " capture_output=True, text=True)\n", - " nginx_limits = result.stdout.strip().split('\\n') if result.stdout else []\n", - " \n", - " print(\"🌐 nginx Configuration:\")\n", - " if nginx_limits:\n", - " for limit in nginx_limits:\n", - " if limit.strip():\n", - " print(f\" šŸ“„ {limit}\")\n", - " else:\n", - " print(\" āš ļø No client_max_body_size found (using default 1MB)\")\n", - " \n", - "except Exception as e:\n", - " print(f\" āŒ Could not check nginx config: {e}\")\n", - "\n", - "# Check system limits\n", - "try:\n", - " # Check available disk space\n", - " result = subprocess.run(['df', '-h', '/opt/hmac-file-server/'], \n", - " capture_output=True, text=True)\n", - " disk_info = result.stdout.strip().split('\\n')[1] if result.stdout else \"\"\n", - " \n", - " print(f\"\\nšŸ’¾ System Resources:\")\n", - " if disk_info:\n", - " parts = disk_info.split()\n", - " print(f\" Available Space: {parts[3] if len(parts) > 3 else 'Unknown'}\")\n", - " \n", - " # Check memory\n", - " with open('/proc/meminfo', 'r') as f:\n", - " mem_info = f.read()\n", - " mem_total = re.search(r'MemTotal:\\s+(\\d+)\\s+kB', mem_info)\n", - " mem_available = re.search(r'MemAvailable:\\s+(\\d+)\\s+kB', mem_info)\n", - " \n", - " if mem_total:\n", - " total_mb = int(mem_total.group(1)) / 1024\n", - " print(f\" Total Memory: {total_mb:.0f} MB\")\n", - " if mem_available:\n", - " avail_mb = int(mem_available.group(1)) / 1024\n", - " print(f\" Available Memory: {avail_mb:.0f} MB\")\n", - " \n", - "except Exception as e:\n", - " print(f\" āŒ Could not check system resources: {e}\")\n", - "\n", - "# Calculate upload time estimates\n", - "upload_speeds = {\n", - " \"DSL (1 Mbps up)\": 1,\n", - " \"Cable (10 Mbps up)\": 10, \n", - " \"Fiber (100 Mbps up)\": 100,\n", - " \"Gigabit (1000 Mbps up)\": 1000\n", - "}\n", - "\n", - "print(f\"\\nā±ļø Upload Time Estimates for 970MB:\")\n", - "file_size_mb = 970\n", - "for connection, speed_mbps in upload_speeds.items():\n", - " time_seconds = (file_size_mb * 8) / speed_mbps # Convert MB to Mb, divide by speed\n", - " time_minutes = time_seconds / 60\n", - " print(f\" {connection}: {time_minutes:.1f} minutes\")\n", - "\n", - "print(f\"\\nšŸŽÆ Critical Thresholds:\")\n", - "print(f\" • XEP-0363 PUT URL timeout: ~5 minutes\")\n", - "print(f\" • Typical client timeout: 2-5 minutes\") \n", - "print(f\" • nginx default timeout: 60 seconds\")\n", - "print(f\" • Current server timeout: 80 minutes\")\n", - "print(f\" āž”ļø Network/proxy timeouts likely cause of failures\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79ede717", - "metadata": {}, - "outputs": [], - "source": [ - "# Analyze Timeout Settings\n", - "print(\"=== Timeout Configuration Analysis ===\")\n", - "\n", - "# Parse current timeout settings\n", - "server_timeouts = {\n", - " \"read\": server_config.get('timeouts', {}).get('readtimeout', '4800s'),\n", - " \"write\": server_config.get('timeouts', {}).get('writetimeout', '4800s'), \n", - " \"idle\": server_config.get('timeouts', {}).get('idletimeout', '4800s')\n", - "}\n", - "\n", - "print(\"šŸ–„ļø HMAC Server Timeouts:\")\n", - "for timeout_type, value in server_timeouts.items():\n", - " seconds = int(value[:-1]) if value.endswith('s') else int(value)\n", - " minutes = seconds / 60\n", - " print(f\" {timeout_type.capitalize()}: {value} ({minutes:.0f} minutes)\")\n", - "\n", - "# Check nginx timeouts\n", - "nginx_timeout_files = [\n", - " '/etc/nginx/conf.d/share.conf',\n", - " '/etc/nginx/nginx-stream.conf'\n", - "]\n", - "\n", - "print(\"\\n🌐 nginx Timeout Configuration:\")\n", - "for config_file in nginx_timeout_files:\n", - " try:\n", - " if os.path.exists(config_file):\n", - " result = subprocess.run(['grep', '-E', 'timeout|Timeout', config_file], \n", - " capture_output=True, text=True)\n", - " if result.stdout:\n", - " print(f\" šŸ“„ {config_file}:\")\n", - " for line in result.stdout.strip().split('\\n'):\n", - " if line.strip():\n", - " print(f\" {line.strip()}\")\n", - " except Exception as e:\n", - " print(f\" āŒ Could not read {config_file}: {e}\")\n", - "\n", - "# Timeout chain analysis\n", - "timeout_chain = [\n", - " (\"Client\", \"60-300s\", \"Varies by client implementation\"),\n", - " (\"nginx Stream\", \"Variable\", \"Check stream proxy settings\"),\n", - " (\"nginx HTTP\", \"4800s\", \"From proxy configuration\"),\n", - " (\"HMAC Server\", \"4800s\", \"From server configuration\"),\n", - " (\"TCP/IP\", \"Variable\", \"OS-level settings\")\n", - "]\n", - "\n", - "print(f\"\\nšŸ”— Timeout Chain Analysis:\")\n", - "print(f\"{'Component':<15} {'Timeout':<12} {'Notes'}\")\n", - "print(f\"{'-'*50}\")\n", - "for component, timeout, notes in timeout_chain:\n", - " print(f\"{component:<15} {timeout:<12} {notes}\")\n", - "\n", - "# Calculate critical paths\n", - "print(f\"\\nāš ļø Critical Path Analysis:\")\n", - "print(f\" • 970MB upload on 10 Mbps: ~13 minutes\") \n", - "print(f\" • Current server timeout: 80 minutes āœ…\")\n", - "print(f\" • nginx HTTP timeout: 80 minutes āœ…\") \n", - "print(f\" • Client timeout: 2-5 minutes āŒ TOO SHORT\")\n", - "print(f\" • XEP-0363 PUT validity: ~5 minutes āŒ TOO SHORT\")\n", - "\n", - "print(f\"\\nšŸŽÆ Root Cause Identification:\")\n", - "print(f\" āŒ Client timeouts too short for large files\")\n", - "print(f\" āŒ XEP-0363 PUT URL expires before upload completes\")\n", - "print(f\" āŒ No chunking support in XEP-0363 standard\")\n", - "print(f\" āœ… Server and proxy timeouts adequate\")" - ] - }, - { - "cell_type": "markdown", - "id": "f07ba4c9", - "metadata": {}, - "source": [ - "## šŸ“‹ Recommendations & Solutions\n", - "\n", - "Based on our analysis, here are the specific recommendations to fix large file uploads in XMPP clients." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2417e440", - "metadata": {}, - "outputs": [], - "source": [ - "# Comprehensive Recommendations for Large File Upload Fixes\n", - "print(\"=== SOLUTION RECOMMENDATIONS ===\\n\")\n", - "\n", - "print(\"šŸŽÆ IMMEDIATE FIXES:\")\n", - "print(\"1. Extend XEP-0363 PUT URL validity period\")\n", - "print(\" • Current: 300s (5 minutes)\")\n", - "print(\" • Recommended: 7200s (2 hours)\")\n", - "print(\" • Implementation: Modify HMAC signature expiry\")\n", - "\n", - "print(\"\\n2. Increase client upload timeout limits\")\n", - "print(\" • Gajim: ~/.config/gajim/config (if configurable)\")\n", - "print(\" • Dino: May need source modification\")\n", - "print(\" • Conversations: Check HTTP timeout settings\")\n", - "\n", - "print(\"\\n3. Server-side timeout extension\")\n", - "print(\" • Current: 4800s āœ… (already good)\")\n", - "print(\" • Nginx: 4800s āœ… (already good)\")\n", - "print(\" • PUT URL validity: NEEDS EXTENSION āŒ\")\n", - "\n", - "print(\"\\nšŸ”§ CONFIGURATION CHANGES:\")\n", - "config_changes = {\n", - " \"hmac_validity\": \"7200s\", # 2 hours\n", - " \"max_upload_size\": \"10GB\", # Already set\n", - " \"read_timeout\": \"7200s\", # Match HMAC validity\n", - " \"write_timeout\": \"7200s\", # Match HMAC validity\n", - " \"client_max_body_size\": \"10g\" # nginx setting\n", - "}\n", - "\n", - "print(\"Required config.toml changes:\")\n", - "for key, value in config_changes.items():\n", - " print(f\" {key} = \\\"{value}\\\"\")\n", - "\n", - "print(\"\\nšŸ“Š TECHNICAL ANALYSIS:\")\n", - "print(\"• Root Cause: PUT URL expires before large uploads complete\")\n", - "print(\"• XEP-0363 Limitation: No chunking, single PUT required\")\n", - "print(\"• Client Behavior: All use synchronous HTTP PUT\")\n", - "print(\"• Network Reality: 970MB needs ~13 minutes on 10 Mbps\")\n", - "\n", - "print(\"\\nāš ļø COMPATIBILITY NOTES:\")\n", - "print(\"• Prosody default: 10MB limit, JWT auth\")\n", - "print(\"• Our server: 10GB limit, HMAC auth\")\n", - "print(\"• Standard compliance: XEP-0363 v1.1.0 āœ…\")\n", - "print(\"• Unique feature: Extended timeout support\")\n", - "\n", - "print(\"\\nšŸš€ IMPLEMENTATION PRIORITY:\")\n", - "priority_list = [\n", - " \"1. HIGH: Extend HMAC signature validity to 7200s\",\n", - " \"2. MEDIUM: Document client timeout recommendations\", \n", - " \"3. LOW: Consider chunked upload extension (non-standard)\",\n", - " \"4. INFO: Monitor client behavior with extended timeouts\"\n", - "]\n", - "\n", - "for item in priority_list:\n", - " print(f\" {item}\")\n", - "\n", - "print(\"\\nšŸ’” NEXT STEPS:\")\n", - "print(\"1. Modify HMAC generation to use 7200s expiry\")\n", - "print(\"2. Test 970MB upload with extended validity\")\n", - "print(\"3. Document client-specific timeout settings\")\n", - "print(\"4. Consider implementing XEP-0363 v2 with chunking\")\n", - "\n", - "# Calculate new timeout requirements\n", - "upload_time_10mbps = (970 * 8) / 10 / 60 # minutes\n", - "safety_margin = 2 # 2x safety factor\n", - "recommended_timeout = upload_time_10mbps * safety_margin * 60 # seconds\n", - "\n", - "print(f\"\\nšŸ“ˆ TIMEOUT CALCULATIONS:\")\n", - "print(f\" 970MB upload time (10 Mbps): {upload_time_10mbps:.1f} minutes\")\n", - "print(f\" Recommended timeout: {recommended_timeout:.0f}s ({recommended_timeout/60:.0f} minutes)\")\n", - "print(f\" Current HMAC validity: 300s (5 minutes) āŒ\")\n", - "print(f\" Proposed HMAC validity: 7200s (120 minutes) āœ…\")" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}