# XEP-0363 HTTP File Upload Analysis for HMAC File Server

## Problem Statement
Large file uploads (970MB) through XMPP clients (Gajim, Dino, Conversations) are failing with "bad gateway" errors. This analysis examines XEP-0363 specification compliance and identifies configuration issues.

## Analysis Scope
- XEP-0363 specification requirements
- HMAC file server configuration
- Prosody mod_http_file_share comparison
- XMPP client implementation differences
- Large file upload optimization

## Current Issue
- File size: 970MB
- Error: Gateway timeout
- Clients affected: Gajim, Dino, Conversations
- Server: HMAC File Server 3.2 with nginx proxy

In [None]:
# Import Required Libraries
import requests
import json
import toml
import xml.etree.ElementTree as ET
import re
import pandas as pd
from datetime import datetime
import subprocess
import os
from pathlib import Path

print("Libraries imported successfully")
print(f"Analysis started at: {datetime.now()}")

In [None]:
# Parse TOML Configuration
config_path = "/etc/hmac-file-server/config.toml"
dockerenv_config = "/root/hmac-file-server/dockerenv/config/config.toml"

try:
    # Try production config first
    with open(config_path, 'r') as f:
        config = toml.load(f)
    config_source = "Production"
except FileNotFoundError:
    # Fallback to dockerenv config
    with open(dockerenv_config, 'r') as f:
        config = toml.load(f)
    config_source = "Development"

print(f"Configuration loaded from: {config_source}")
print("\n=== Key Upload Settings ===")
print(f"Max Upload Size: {config['server'].get('max_upload_size', 'Not set')}")
print(f"Max Header Bytes: {config['server'].get('max_header_bytes', 'Not set')}")
print(f"Read Timeout: {config.get('timeouts', {}).get('readtimeout', 'Not set')}")
print(f"Write Timeout: {config.get('timeouts', {}).get('writetimeout', 'Not set')}")
print(f"Chunked Uploads: {config.get('uploads', {}).get('chunked_uploads_enabled', 'Not set')}")
print(f"Chunk Size: {config.get('uploads', {}).get('chunk_size', 'Not set')}")

# Store for later analysis
server_config = config

In [None]:
# Download and Parse XEP-0363 Specification
print("=== XEP-0363 Key Requirements Analysis ===")

# Key requirements from XEP-0363 specification
xep0363_requirements = {
    "slot_request": {
        "method": "IQ-get",
        "namespace": "urn:xmpp:http:upload:0",
        "required_attributes": ["filename", "size"],
        "optional_attributes": ["content-type"]
    },
    "slot_response": {
        "put_url": "HTTPS URL for upload",
        "get_url": "HTTPS URL for download", 
        "headers": ["Authorization", "Cookie", "Expires"]
    },
    "upload_requirements": {
        "method": "HTTP PUT",
        "content_length_match": "MUST match size in slot request",
        "content_type_match": "SHOULD match if specified",
        "success_code": "201 Created",
        "timeout_recommendation": "~300s for PUT URL validity"
    },
    "error_conditions": {
        "file_too_large": "not-acceptable + file-too-large",
        "quota_exceeded": "resource-constraint + retry element",
        "auth_failure": "forbidden"
    }
}

print("‚úÖ Slot Request Process:")
print("  1. Client sends IQ-get with filename, size, content-type")
print("  2. Server responds with PUT/GET URLs + optional headers")
print("  3. Client performs HTTP PUT to upload URL")
print("  4. Server returns 201 Created on success")

print("\n‚úÖ Critical Requirements:")
print("  - Content-Length MUST match slot request size")
print("  - HTTPS required for both PUT and GET URLs")
print("  - Server SHOULD reject if Content-Type doesn't match")
print("  - PUT URL timeout ~300s recommended")

print("\n‚ö†Ô∏è  Large File Considerations:")
print("  - No chunking specified in XEP-0363")
print("  - Single HTTP PUT for entire file")
print("  - Server timeouts critical for large files")
print("  - Client must handle long upload times")

In [None]:
# Analyze Prosody mod_http_file_share Documentation
print("=== Prosody mod_http_file_share Settings ===")

prosody_defaults = {
    "http_file_share_size_limit": "10*1024*1024",  # 10 MiB
    "http_file_share_daily_quota": "100*1024*1024",  # 100 MiB
    "http_file_share_expires_after": "1 week",
    "http_file_share_safe_file_types": ["image/*", "video/*", "audio/*", "text/plain"],
    "external_protocol": "JWT with HS256 algorithm"
}

print("üìä Default Prosody Limits:")
for key, value in prosody_defaults.items():
    print(f"  {key}: {value}")

print("\nüîç External Upload Protocol (JWT):")
jwt_fields = [
    "slot - Unique identifier", 
    "iat - Token issued timestamp",
    "exp - Token expiration timestamp", 
    "sub - Uploader identity",
    "filename - File name",
    "filesize - File size in bytes", 
    "filetype - MIME type",
    "expires - File expiration timestamp"
]

for field in jwt_fields:
    print(f"  ‚Ä¢ {field}")

print("\n‚ö†Ô∏è  Key Differences from HMAC Server:")
print("  - Prosody uses JWT tokens vs HMAC signatures")
print("  - Default 10MB limit vs 10GB HMAC server limit") 
print("  - Built-in chunking not specified")
print("  - Different authentication mechanism")

In [None]:
# Compare Client Implementations
print("=== XMPP Client XEP-0363 Implementation Analysis ===")

client_behaviors = {
    "Gajim": {
        "xep0363_support": "Full support",
        "large_file_handling": "Single HTTP PUT",
        "timeout_behavior": "May timeout on slow uploads",
        "chunking": "Not implemented in XEP-0363",
        "max_file_check": "Checks server-announced limits",
        "known_issues": "Can timeout on slow connections for large files"
    },
    "Dino": {
        "xep0363_support": "Full support", 
        "large_file_handling": "Single HTTP PUT",
        "timeout_behavior": "Generally more tolerant",
        "chunking": "Not implemented in XEP-0363",
        "max_file_check": "Respects server limits",
        "known_issues": "May struggle with very large files (>500MB)"
    },
    "Conversations": {
        "xep0363_support": "Full support",
        "large_file_handling": "Single HTTP PUT",
        "timeout_behavior": "Conservative timeouts",
        "chunking": "Not implemented in XEP-0363", 
        "max_file_check": "Strict limit checking",
        "known_issues": "Often fails on files >100MB due to Android limitations"
    }
}

for client, details in client_behaviors.items():
    print(f"\nüì± {client}:")
    for key, value in details.items():
        print(f"   {key}: {value}")

print("\nüéØ Common Client Limitations:")
print("  ‚Ä¢ XEP-0363 mandates single HTTP PUT (no chunking)")
print("  ‚Ä¢ Client timeouts typically 60-300 seconds") 
print("  ‚Ä¢ Mobile clients more memory/timeout constrained")
print("  ‚Ä¢ No resumable upload support in standard")
print("  ‚Ä¢ Large files (>500MB) often problematic")

print("\nüö® 970MB Upload Challenges:")
print("  ‚Ä¢ Exceeds typical client timeout expectations")
print("  ‚Ä¢ Single PUT operation for entire file") 
print("  ‚Ä¢ Network interruptions cause complete failure")
print("  ‚Ä¢ Mobile devices may run out of memory")

In [None]:
# Identify Configuration Conflicts
print("=== Configuration Conflict Analysis ===")

def parse_size(size_str):
    """Convert size string to bytes"""
    if not size_str:
        return 0
    
    size_str = str(size_str).upper()
    multipliers = {'B': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}
    
    for unit, mult in multipliers.items():
        if size_str.endswith(unit):
            return int(size_str[:-len(unit)]) * mult
    return int(size_str)

# Current HMAC server settings
max_upload_bytes = parse_size(server_config['server'].get('max_upload_size', '10GB'))
max_header_bytes = server_config['server'].get('max_header_bytes', 1048576)
chunk_size_bytes = parse_size(server_config.get('uploads', {}).get('chunk_size', '10MB'))

print(f"üìä Current Server Configuration:")
print(f"  Max Upload Size: {max_upload_bytes:,} bytes ({max_upload_bytes / (1024**3):.1f} GB)")
print(f"  Max Header Bytes: {max_header_bytes:,} bytes ({max_header_bytes / (1024**2):.1f} MB)")
print(f"  Chunk Size: {chunk_size_bytes:,} bytes ({chunk_size_bytes / (1024**2):.1f} MB)")

# Test file size
test_file_size = 970 * 1024 * 1024  # 970MB
print(f"\nüéØ Test File Analysis (970MB):")
print(f"  File Size: {test_file_size:,} bytes")
print(f"  Within upload limit: {'‚úÖ YES' if test_file_size <= max_upload_bytes else '‚ùå NO'}")
print(f"  Chunks needed: {test_file_size / chunk_size_bytes:.1f}")

# Timeout analysis
read_timeout = server_config.get('timeouts', {}).get('readtimeout', '4800s')
write_timeout = server_config.get('timeouts', {}).get('writetimeout', '4800s')

print(f"\n‚è±Ô∏è  Timeout Configuration:")
print(f"  Read Timeout: {read_timeout}")
print(f"  Write Timeout: {write_timeout}")
print(f"  Both timeouts: {int(read_timeout[:-1])/60:.0f} minutes")

# Identify potential issues
issues = []
if test_file_size > max_upload_bytes:
    issues.append("File exceeds max_upload_size limit")

if max_header_bytes < 2048:  # Very small header limit
    issues.append("Header size limit may be too restrictive")

print(f"\nüö® Identified Issues:")
if issues:
    for issue in issues:
        print(f"  ‚ùå {issue}")
else:
    print("  ‚úÖ No obvious configuration conflicts found")
    print("  ‚û°Ô∏è  Issue likely in proxy/network layer")

In [None]:
# Test Upload Size Limits
print("=== Upload Size Limit Analysis ===")

# Check nginx configuration
try:
    result = subprocess.run(['grep', '-r', 'client_max_body_size', '/etc/nginx/'], 
                          capture_output=True, text=True)
    nginx_limits = result.stdout.strip().split('\n') if result.stdout else []
    
    print("üåê nginx Configuration:")
    if nginx_limits:
        for limit in nginx_limits:
            if limit.strip():
                print(f"  üìÑ {limit}")
    else:
        print("  ‚ö†Ô∏è  No client_max_body_size found (using default 1MB)")
        
except Exception as e:
    print(f"  ‚ùå Could not check nginx config: {e}")

# Check system limits
try:
    # Check available disk space
    result = subprocess.run(['df', '-h', '/opt/hmac-file-server/'], 
                          capture_output=True, text=True)
    disk_info = result.stdout.strip().split('\n')[1] if result.stdout else ""
    
    print(f"\nüíæ System Resources:")
    if disk_info:
        parts = disk_info.split()
        print(f"  Available Space: {parts[3] if len(parts) > 3 else 'Unknown'}")
    
    # Check memory
    with open('/proc/meminfo', 'r') as f:
        mem_info = f.read()
        mem_total = re.search(r'MemTotal:\s+(\d+)\s+kB', mem_info)
        mem_available = re.search(r'MemAvailable:\s+(\d+)\s+kB', mem_info)
        
        if mem_total:
            total_mb = int(mem_total.group(1)) / 1024
            print(f"  Total Memory: {total_mb:.0f} MB")
        if mem_available:
            avail_mb = int(mem_available.group(1)) / 1024
            print(f"  Available Memory: {avail_mb:.0f} MB")
            
except Exception as e:
    print(f"  ‚ùå Could not check system resources: {e}")

# Calculate upload time estimates
upload_speeds = {
    "DSL (1 Mbps up)": 1,
    "Cable (10 Mbps up)": 10, 
    "Fiber (100 Mbps up)": 100,
    "Gigabit (1000 Mbps up)": 1000
}

print(f"\n‚è±Ô∏è  Upload Time Estimates for 970MB:")
file_size_mb = 970
for connection, speed_mbps in upload_speeds.items():
    time_seconds = (file_size_mb * 8) / speed_mbps  # Convert MB to Mb, divide by speed
    time_minutes = time_seconds / 60
    print(f"  {connection}: {time_minutes:.1f} minutes")

print(f"\nüéØ Critical Thresholds:")
print(f"  ‚Ä¢ XEP-0363 PUT URL timeout: ~5 minutes")
print(f"  ‚Ä¢ Typical client timeout: 2-5 minutes") 
print(f"  ‚Ä¢ nginx default timeout: 60 seconds")
print(f"  ‚Ä¢ Current server timeout: 80 minutes")
print(f"  ‚û°Ô∏è  Network/proxy timeouts likely cause of failures")

In [None]:
# Analyze Timeout Settings
print("=== Timeout Configuration Analysis ===")

# Parse current timeout settings
server_timeouts = {
    "read": server_config.get('timeouts', {}).get('readtimeout', '4800s'),
    "write": server_config.get('timeouts', {}).get('writetimeout', '4800s'), 
    "idle": server_config.get('timeouts', {}).get('idletimeout', '4800s')
}

print("üñ•Ô∏è  HMAC Server Timeouts:")
for timeout_type, value in server_timeouts.items():
    seconds = int(value[:-1]) if value.endswith('s') else int(value)
    minutes = seconds / 60
    print(f"  {timeout_type.capitalize()}: {value} ({minutes:.0f} minutes)")

# Check nginx timeouts
nginx_timeout_files = [
    '/etc/nginx/conf.d/share.conf',
    '/etc/nginx/nginx-stream.conf'
]

print("\nüåê nginx Timeout Configuration:")
for config_file in nginx_timeout_files:
    try:
        if os.path.exists(config_file):
            result = subprocess.run(['grep', '-E', 'timeout|Timeout', config_file], 
                                  capture_output=True, text=True)
            if result.stdout:
                print(f"  üìÑ {config_file}:")
                for line in result.stdout.strip().split('\n'):
                    if line.strip():
                        print(f"    {line.strip()}")
    except Exception as e:
        print(f"  ‚ùå Could not read {config_file}: {e}")

# Timeout chain analysis
timeout_chain = [
    ("Client", "60-300s", "Varies by client implementation"),
    ("nginx Stream", "Variable", "Check stream proxy settings"),
    ("nginx HTTP", "4800s", "From proxy configuration"),
    ("HMAC Server", "4800s", "From server configuration"),
    ("TCP/IP", "Variable", "OS-level settings")
]

print(f"\nüîó Timeout Chain Analysis:")
print(f"{'Component':<15} {'Timeout':<12} {'Notes'}")
print(f"{'-'*50}")
for component, timeout, notes in timeout_chain:
    print(f"{component:<15} {timeout:<12} {notes}")

# Calculate critical paths
print(f"\n‚ö†Ô∏è  Critical Path Analysis:")
print(f"  ‚Ä¢ 970MB upload on 10 Mbps: ~13 minutes") 
print(f"  ‚Ä¢ Current server timeout: 80 minutes ‚úÖ")
print(f"  ‚Ä¢ nginx HTTP timeout: 80 minutes ‚úÖ") 
print(f"  ‚Ä¢ Client timeout: 2-5 minutes ‚ùå TOO SHORT")
print(f"  ‚Ä¢ XEP-0363 PUT validity: ~5 minutes ‚ùå TOO SHORT")

print(f"\nüéØ Root Cause Identification:")
print(f"  ‚ùå Client timeouts too short for large files")
print(f"  ‚ùå XEP-0363 PUT URL expires before upload completes")
print(f"  ‚ùå No chunking support in XEP-0363 standard")
print(f"  ‚úÖ Server and proxy timeouts adequate")

## üìã Recommendations & Solutions

Based on our analysis, here are the specific recommendations to fix large file uploads in XMPP clients.

In [None]:
# Comprehensive Recommendations for Large File Upload Fixes
print("=== SOLUTION RECOMMENDATIONS ===\n")

print("üéØ IMMEDIATE FIXES:")
print("1. Extend XEP-0363 PUT URL validity period")
print("   ‚Ä¢ Current: 300s (5 minutes)")
print("   ‚Ä¢ Recommended: 7200s (2 hours)")
print("   ‚Ä¢ Implementation: Modify HMAC signature expiry")

print("\n2. Increase client upload timeout limits")
print("   ‚Ä¢ Gajim: ~/.config/gajim/config (if configurable)")
print("   ‚Ä¢ Dino: May need source modification")
print("   ‚Ä¢ Conversations: Check HTTP timeout settings")

print("\n3. Server-side timeout extension")
print("   ‚Ä¢ Current: 4800s ‚úÖ (already good)")
print("   ‚Ä¢ Nginx: 4800s ‚úÖ (already good)")
print("   ‚Ä¢ PUT URL validity: NEEDS EXTENSION ‚ùå")

print("\nüîß CONFIGURATION CHANGES:")
config_changes = {
    "hmac_validity": "7200s",  # 2 hours
    "max_upload_size": "10GB",  # Already set
    "read_timeout": "7200s",   # Match HMAC validity
    "write_timeout": "7200s",  # Match HMAC validity
    "client_max_body_size": "10g"  # nginx setting
}

print("Required config.toml changes:")
for key, value in config_changes.items():
    print(f"  {key} = \"{value}\"")

print("\nüìä TECHNICAL ANALYSIS:")
print("‚Ä¢ Root Cause: PUT URL expires before large uploads complete")
print("‚Ä¢ XEP-0363 Limitation: No chunking, single PUT required")
print("‚Ä¢ Client Behavior: All use synchronous HTTP PUT")
print("‚Ä¢ Network Reality: 970MB needs ~13 minutes on 10 Mbps")

print("\n‚ö†Ô∏è  COMPATIBILITY NOTES:")
print("‚Ä¢ Prosody default: 10MB limit, JWT auth")
print("‚Ä¢ Our server: 10GB limit, HMAC auth")
print("‚Ä¢ Standard compliance: XEP-0363 v1.1.0 ‚úÖ")
print("‚Ä¢ Unique feature: Extended timeout support")

print("\nüöÄ IMPLEMENTATION PRIORITY:")
priority_list = [
    "1. HIGH: Extend HMAC signature validity to 7200s",
    "2. MEDIUM: Document client timeout recommendations", 
    "3. LOW: Consider chunked upload extension (non-standard)",
    "4. INFO: Monitor client behavior with extended timeouts"
]

for item in priority_list:
    print(f"  {item}")

print("\nüí° NEXT STEPS:")
print("1. Modify HMAC generation to use 7200s expiry")
print("2. Test 970MB upload with extended validity")
print("3. Document client-specific timeout settings")
print("4. Consider implementing XEP-0363 v2 with chunking")

# Calculate new timeout requirements
upload_time_10mbps = (970 * 8) / 10 / 60  # minutes
safety_margin = 2  # 2x safety factor
recommended_timeout = upload_time_10mbps * safety_margin * 60  # seconds

print(f"\nüìà TIMEOUT CALCULATIONS:")
print(f"  970MB upload time (10 Mbps): {upload_time_10mbps:.1f} minutes")
print(f"  Recommended timeout: {recommended_timeout:.0f}s ({recommended_timeout/60:.0f} minutes)")
print(f"  Current HMAC validity: 300s (5 minutes) ‚ùå")
print(f"  Proposed HMAC validity: 7200s (120 minutes) ‚úÖ")