v5.8.17: Add PostgreSQL connection timeouts as hang safeguard
Some checks failed
CI/CD / Test (push) Successful in 3m6s
CI/CD / Lint (push) Successful in 1m10s
CI/CD / Integration Tests (push) Successful in 56s
CI/CD / Native Engine Tests (push) Successful in 51s
CI/CD / Build Binary (push) Successful in 43s
CI/CD / Test Release Build (push) Successful in 1m17s
CI/CD / Release Binaries (push) Failing after 9m55s

- Set statement_timeout=1hr, lock_timeout=5min, idle_in_transaction=10min
- These server-side timeouts ensure stuck queries abort even if context cancellation fails
- Additional defense-in-depth for TUI cluster restore hang issue
- Add test_cancel.sh for verifying cancellation behavior
This commit is contained in:
2026-02-05 11:43:20 +00:00
parent 0d85caea53
commit 084b8bd279
3 changed files with 69 additions and 1 deletions

View File

@ -108,6 +108,14 @@ func NewParallelRestoreEngineWithContext(ctx context.Context, config *PostgreSQL
// Default is 1 minute which causes hangs on Ctrl+C
poolConfig.HealthCheckPeriod = 5 * time.Second
// CRITICAL: Set connection-level timeouts to ensure queries can be cancelled
// This prevents infinite hangs on slow/stuck operations
poolConfig.ConnConfig.RuntimeParams = map[string]string{
"statement_timeout": "3600000", // 1 hour max per statement (in ms)
"lock_timeout": "300000", // 5 min max wait for locks (in ms)
"idle_in_transaction_session_timeout": "600000", // 10 min idle timeout (in ms)
}
// Use the provided context so pool health checks stop when context is cancelled
pool, err := pgxpool.NewWithConfig(ctx, poolConfig)
if err != nil {

View File

@ -16,7 +16,7 @@ import (
// Build information (set by ldflags)
var (
version = "5.8.16"
version = "5.8.17"
buildTime = "unknown"
gitCommit = "unknown"
)

60
test_cancel.sh Executable file
View File

@ -0,0 +1,60 @@
#!/bin/bash
# Test script to verify context cancellation works in TUI restore
# Run this BEFORE deploying to enterprise machine
set -e
echo "🧪 Testing TUI Cancellation Behavior"
echo "====================================="
# Create a test SQL file that simulates a large dump
TEST_SQL="/tmp/test_large_dump.sql"
echo "Creating test SQL file with 50000 statements..."
cat > "$TEST_SQL" << 'EOF'
-- Test pg_dumpall format
SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
EOF
# Add 50000 simple statements to simulate a large dump
for i in $(seq 1 50000); do
echo "SELECT $i; -- padding line to simulate large file" >> "$TEST_SQL"
done
echo "-- End of test dump" >> "$TEST_SQL"
echo "✅ Created $TEST_SQL ($(wc -l < "$TEST_SQL") lines)"
# Test 1: Verify parsing can be cancelled
echo ""
echo "Test 1: Parsing Cancellation (5 second timeout)"
echo "------------------------------------------------"
timeout 5 ./bin/dbbackup_linux_amd64 restore single "$TEST_SQL" --target testdb_noexist 2>&1 || {
if [ $? -eq 124 ]; then
echo "⚠️ TIMEOUT - parsing took longer than 5 seconds (potential hang)"
echo " This may indicate the fix is not working"
else
echo "✅ Command completed or failed normally (no hang)"
fi
}
# Test 2: TUI interrupt test (requires manual verification)
echo ""
echo "Test 2: TUI Interrupt Test (MANUAL)"
echo "------------------------------------"
echo "Run this command and press Ctrl+C after 2-3 seconds:"
echo ""
echo " ./bin/dbbackup_linux_amd64 restore single $TEST_SQL --target testdb"
echo ""
echo "Expected: Should exit cleanly within 1-2 seconds of Ctrl+C"
echo "Problem: If it hangs for 30+ seconds, the fix didn't work"
# Cleanup
echo ""
echo "Cleanup: rm $TEST_SQL"
echo ""
echo "🏁 Test complete!"