255 lines
8.8 KiB
Bash
Executable File
255 lines
8.8 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Optimized Large Database Creator - 50GB target
|
|
# More efficient approach using PostgreSQL's built-in functions
|
|
|
|
set -e
|
|
|
|
DB_NAME="testdb_50gb"
|
|
TARGET_SIZE_GB=50
|
|
|
|
echo "=================================================="
|
|
echo "OPTIMIZED Large Test Database Creator"
|
|
echo "Database: $DB_NAME"
|
|
echo "Target Size: ${TARGET_SIZE_GB}GB"
|
|
echo "=================================================="
|
|
|
|
# Check available space
|
|
AVAILABLE_GB=$(df / | tail -1 | awk '{print int($4/1024/1024)}')
|
|
echo "Available disk space: ${AVAILABLE_GB}GB"
|
|
|
|
if [ $AVAILABLE_GB -lt $((TARGET_SIZE_GB + 20)) ]; then
|
|
echo "❌ ERROR: Insufficient disk space. Need at least $((TARGET_SIZE_GB + 20))GB buffer"
|
|
exit 1
|
|
fi
|
|
|
|
echo "✅ Sufficient disk space available"
|
|
|
|
echo ""
|
|
echo "1. Creating optimized database schema..."
|
|
|
|
# Drop and recreate database
|
|
sudo -u postgres psql -c "DROP DATABASE IF EXISTS $DB_NAME;" 2>/dev/null || true
|
|
sudo -u postgres psql -c "CREATE DATABASE $DB_NAME;"
|
|
|
|
# Create optimized schema for rapid data generation
|
|
sudo -u postgres psql -d $DB_NAME << 'EOF'
|
|
-- Large blob table with efficient storage
|
|
CREATE TABLE mega_blobs (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
chunk_id INTEGER NOT NULL,
|
|
blob_data BYTEA NOT NULL,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
-- Massive text table for document storage
|
|
CREATE TABLE big_documents (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
doc_name VARCHAR(100),
|
|
content TEXT NOT NULL,
|
|
metadata JSONB,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
-- High-volume metrics table
|
|
CREATE TABLE huge_metrics (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
timestamp TIMESTAMP NOT NULL,
|
|
sensor_id INTEGER NOT NULL,
|
|
metric_type VARCHAR(50) NOT NULL,
|
|
value_data TEXT NOT NULL, -- Large text field
|
|
binary_payload BYTEA,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
-- Indexes for realism
|
|
CREATE INDEX idx_mega_blobs_chunk ON mega_blobs(chunk_id);
|
|
CREATE INDEX idx_big_docs_name ON big_documents(doc_name);
|
|
CREATE INDEX idx_huge_metrics_timestamp ON huge_metrics(timestamp);
|
|
CREATE INDEX idx_huge_metrics_sensor ON huge_metrics(sensor_id);
|
|
EOF
|
|
|
|
echo "✅ Optimized schema created"
|
|
|
|
echo ""
|
|
echo "2. Generating large-scale data using PostgreSQL's generate_series..."
|
|
|
|
# Strategy: Use PostgreSQL's efficient bulk operations
|
|
echo "Inserting massive text documents (targeting ~20GB)..."
|
|
|
|
sudo -u postgres psql -d $DB_NAME << 'EOF'
|
|
-- Insert 2 million large text documents (~20GB estimated)
|
|
INSERT INTO big_documents (doc_name, content, metadata)
|
|
SELECT
|
|
'doc_' || generate_series,
|
|
-- Each document: ~10KB of text content
|
|
repeat('Lorem ipsum dolor sit amet, consectetur adipiscing elit. ' ||
|
|
'Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ' ||
|
|
'Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. ' ||
|
|
'Duis aute irure dolor in reprehenderit in voluptate velit esse cillum. ' ||
|
|
'Excepteur sint occaecat cupidatat non proident, sunt in culpa qui. ' ||
|
|
'Nulla pariatur. Sed ut perspiciatis unde omnis iste natus error sit. ' ||
|
|
'At vero eos et accusamus et iusto odio dignissimos ducimus qui blanditiis. ' ||
|
|
'Document content section ' || generate_series || '. ', 50),
|
|
('{"doc_type": "test", "size_category": "large", "batch": ' || (generate_series / 10000) ||
|
|
', "tags": ["bulk_data", "test_doc", "large_dataset"]}')::jsonb
|
|
FROM generate_series(1, 2000000);
|
|
EOF
|
|
|
|
echo "✅ Large documents inserted"
|
|
|
|
# Check current size
|
|
CURRENT_SIZE=$(sudo -u postgres psql -d $DB_NAME -tAc "SELECT pg_database_size('$DB_NAME') / 1024 / 1024 / 1024.0;" 2>/dev/null)
|
|
echo "Current database size: ${CURRENT_SIZE}GB"
|
|
|
|
echo "Inserting high-volume metrics data (targeting additional ~15GB)..."
|
|
|
|
sudo -u postgres psql -d $DB_NAME << 'EOF'
|
|
-- Insert 5 million metrics records with large payloads (~15GB estimated)
|
|
INSERT INTO huge_metrics (timestamp, sensor_id, metric_type, value_data, binary_payload)
|
|
SELECT
|
|
NOW() - (generate_series * INTERVAL '1 second'),
|
|
generate_series % 10000, -- 10,000 different sensors
|
|
CASE (generate_series % 5)
|
|
WHEN 0 THEN 'temperature'
|
|
WHEN 1 THEN 'humidity'
|
|
WHEN 2 THEN 'pressure'
|
|
WHEN 3 THEN 'vibration'
|
|
ELSE 'electromagnetic'
|
|
END,
|
|
-- Large JSON-like text payload (~3KB each)
|
|
'{"readings": [' ||
|
|
'{"timestamp": "' || (NOW() - (generate_series * INTERVAL '1 second'))::text ||
|
|
'", "value": ' || (random() * 1000)::int ||
|
|
', "quality": "good", "metadata": "' || repeat('data_', 20) || '"},' ||
|
|
'{"timestamp": "' || (NOW() - ((generate_series + 1) * INTERVAL '1 second'))::text ||
|
|
'", "value": ' || (random() * 1000)::int ||
|
|
', "quality": "good", "metadata": "' || repeat('data_', 20) || '"},' ||
|
|
'{"timestamp": "' || (NOW() - ((generate_series + 2) * INTERVAL '1 second'))::text ||
|
|
'", "value": ' || (random() * 1000)::int ||
|
|
', "quality": "good", "metadata": "' || repeat('data_', 20) || '"}' ||
|
|
'], "sensor_info": "' || repeat('sensor_metadata_', 30) ||
|
|
'", "calibration": "' || repeat('calibration_data_', 25) || '"}',
|
|
-- Binary payload (~1KB each)
|
|
decode(encode(repeat('BINARY_SENSOR_DATA_CHUNK_', 25)::bytea, 'base64'), 'base64')
|
|
FROM generate_series(1, 5000000);
|
|
EOF
|
|
|
|
echo "✅ Metrics data inserted"
|
|
|
|
# Check size again
|
|
CURRENT_SIZE=$(sudo -u postgres psql -d $DB_NAME -tAc "SELECT pg_database_size('$DB_NAME') / 1024 / 1024 / 1024.0;" 2>/dev/null)
|
|
echo "Current database size: ${CURRENT_SIZE}GB"
|
|
|
|
echo "Inserting binary blob data to reach 50GB target..."
|
|
|
|
# Calculate remaining size needed
|
|
REMAINING_GB=$(echo "$TARGET_SIZE_GB - $CURRENT_SIZE" | bc -l 2>/dev/null || echo "15")
|
|
REMAINING_MB=$(echo "$REMAINING_GB * 1024" | bc -l 2>/dev/null || echo "15360")
|
|
|
|
echo "Need approximately ${REMAINING_GB}GB more data..."
|
|
|
|
# Insert binary blobs to fill remaining space
|
|
sudo -u postgres psql -d $DB_NAME << EOF
|
|
-- Insert large binary chunks to reach target size
|
|
-- Each blob will be approximately 5MB
|
|
INSERT INTO mega_blobs (chunk_id, blob_data)
|
|
SELECT
|
|
generate_series,
|
|
-- Generate ~5MB of binary data per row
|
|
decode(encode(repeat('LARGE_BINARY_CHUNK_FOR_TESTING_PURPOSES_', 100000)::bytea, 'base64'), 'base64')
|
|
FROM generate_series(1, ${REMAINING_MB%.*} / 5);
|
|
EOF
|
|
|
|
echo "✅ Binary blob data inserted"
|
|
|
|
echo ""
|
|
echo "3. Final optimization and statistics..."
|
|
|
|
# Analyze tables for accurate statistics
|
|
sudo -u postgres psql -d $DB_NAME << 'EOF'
|
|
-- Update table statistics
|
|
ANALYZE big_documents;
|
|
ANALYZE huge_metrics;
|
|
ANALYZE mega_blobs;
|
|
|
|
-- Vacuum to optimize storage
|
|
VACUUM ANALYZE;
|
|
EOF
|
|
|
|
echo ""
|
|
echo "4. Final database metrics..."
|
|
|
|
sudo -u postgres psql -d $DB_NAME << 'EOF'
|
|
-- Database size breakdown
|
|
SELECT
|
|
'TOTAL DATABASE SIZE' as component,
|
|
pg_size_pretty(pg_database_size(current_database())) as size,
|
|
ROUND(pg_database_size(current_database()) / 1024.0 / 1024.0 / 1024.0, 2) || ' GB' as size_gb
|
|
UNION ALL
|
|
SELECT
|
|
'big_documents table',
|
|
pg_size_pretty(pg_total_relation_size('big_documents')),
|
|
ROUND(pg_total_relation_size('big_documents') / 1024.0 / 1024.0 / 1024.0, 2) || ' GB'
|
|
UNION ALL
|
|
SELECT
|
|
'huge_metrics table',
|
|
pg_size_pretty(pg_total_relation_size('huge_metrics')),
|
|
ROUND(pg_total_relation_size('huge_metrics') / 1024.0 / 1024.0 / 1024.0, 2) || ' GB'
|
|
UNION ALL
|
|
SELECT
|
|
'mega_blobs table',
|
|
pg_size_pretty(pg_total_relation_size('mega_blobs')),
|
|
ROUND(pg_total_relation_size('mega_blobs') / 1024.0 / 1024.0 / 1024.0, 2) || ' GB';
|
|
|
|
-- Row counts
|
|
SELECT
|
|
'TABLE ROWS' as metric,
|
|
'' as value,
|
|
'' as extra
|
|
UNION ALL
|
|
SELECT
|
|
'big_documents',
|
|
COUNT(*)::text,
|
|
'rows'
|
|
FROM big_documents
|
|
UNION ALL
|
|
SELECT
|
|
'huge_metrics',
|
|
COUNT(*)::text,
|
|
'rows'
|
|
FROM huge_metrics
|
|
UNION ALL
|
|
SELECT
|
|
'mega_blobs',
|
|
COUNT(*)::text,
|
|
'rows'
|
|
FROM mega_blobs;
|
|
EOF
|
|
|
|
FINAL_SIZE=$(sudo -u postgres psql -d $DB_NAME -tAc "SELECT pg_size_pretty(pg_database_size('$DB_NAME'));" 2>/dev/null)
|
|
FINAL_GB=$(sudo -u postgres psql -d $DB_NAME -tAc "SELECT ROUND(pg_database_size('$DB_NAME') / 1024.0 / 1024.0 / 1024.0, 2);" 2>/dev/null)
|
|
|
|
echo ""
|
|
echo "=================================================="
|
|
echo "✅ LARGE DATABASE CREATION COMPLETED!"
|
|
echo "=================================================="
|
|
echo "Database Name: $DB_NAME"
|
|
echo "Final Size: $FINAL_SIZE (${FINAL_GB}GB)"
|
|
echo "Target: ${TARGET_SIZE_GB}GB"
|
|
echo "=================================================="
|
|
|
|
echo ""
|
|
echo "🧪 Ready for testing large database operations:"
|
|
echo ""
|
|
echo "# Test single database backup:"
|
|
echo "time sudo -u postgres ./dbbackup backup single $DB_NAME --confirm"
|
|
echo ""
|
|
echo "# Test cluster backup (includes this large DB):"
|
|
echo "time sudo -u postgres ./dbbackup backup cluster --confirm"
|
|
echo ""
|
|
echo "# Monitor backup progress:"
|
|
echo "watch 'ls -lah /backup/ 2>/dev/null || ls -lah ./*.dump* ./*.tar.gz 2>/dev/null'"
|
|
echo ""
|
|
echo "# Check database size anytime:"
|
|
echo "sudo -u postgres psql -d $DB_NAME -c \"SELECT pg_size_pretty(pg_database_size('$DB_NAME'));\"" |