←Back to Documentation
Performance Tuning
Optimize entropyDB for maximum performance with query tuning, indexing, and configuration
Overview
Performance tuning areas:
- • Query Optimization: EXPLAIN, query rewriting
- • Indexing Strategies: B-tree, GIN, BRIN, covering indexes
- • Configuration Tuning: Memory, connections, parallelism
- • Connection Pooling: PgBouncer, HikariCP
- • Maintenance: VACUUM, ANALYZE, reindexing
Query Optimization
-- Analyze query performance
EXPLAIN (ANALYZE, BUFFERS, VERBOSE)
SELECT u.username, COUNT(o.order_id)
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at > '2024-01-01'
GROUP BY u.username
HAVING COUNT(o.order_id) > 5;
-- Key metrics to look for:
-- - Seq Scan (bad for large tables)
-- - Index Scan (good)
-- - Execution Time
-- - Buffers (shared hit vs read)
-- Identify slow queries
SELECT
query,
calls,
total_exec_time / 1000 as total_time_sec,
mean_exec_time as avg_time_ms,
max_exec_time as max_time_ms
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 10;
-- Query rewriting for better performance
-- BAD: N+1 query pattern
SELECT * FROM users;
-- Then for each user:
SELECT * FROM orders WHERE user_id = ?;
-- GOOD: Single query with JOIN
SELECT u.*,
json_agg(o.*) as orders
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id;
-- Use CTEs for complex queries
WITH recent_users AS (
SELECT id, username
FROM users
WHERE last_login > NOW() - INTERVAL '30 days'
),
user_orders AS (
SELECT user_id, COUNT(*) as order_count
FROM orders
WHERE created_at > NOW() - INTERVAL '30 days'
GROUP BY user_id
)
SELECT ru.username, COALESCE(uo.order_count, 0) as orders
FROM recent_users ru
LEFT JOIN user_orders uo ON ru.id = uo.user_id;Indexing Strategies
-- B-tree indexes (default)
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_orders_user_created ON orders(user_id, created_at);
-- Covering indexes (include additional columns)
CREATE INDEX idx_orders_covering ON orders(user_id, created_at)
INCLUDE (total, status);
-- Partial indexes (for filtered queries)
CREATE INDEX idx_active_users ON users(email)
WHERE status = 'active';
CREATE INDEX idx_recent_orders ON orders(created_at)
WHERE created_at > '2024-01-01';
-- GIN indexes (for full-text search, JSONB)
CREATE INDEX idx_documents_content ON documents
USING GIN (to_tsvector('english', content));
CREATE INDEX idx_user_metadata ON users
USING GIN (metadata jsonb_path_ops);
-- BRIN indexes (for large sequential data)
CREATE INDEX idx_logs_timestamp ON logs
USING BRIN (timestamp);
-- Multi-column indexes (column order matters)
CREATE INDEX idx_orders_composite ON orders(user_id, status, created_at);
-- Use for queries like:
SELECT * FROM orders
WHERE user_id = 123 AND status = 'pending'
ORDER BY created_at;
-- Find missing indexes
SELECT
schemaname,
tablename,
seq_scan,
seq_tup_read,
idx_scan,
seq_tup_read / NULLIF(seq_scan, 0) as rows_per_scan
FROM pg_stat_user_tables
WHERE seq_scan > 1000
AND seq_tup_read / NULLIF(seq_scan, 0) > 10000
ORDER BY seq_tup_read DESC;
-- Find unused indexes
SELECT
schemaname,
tablename,
indexname,
idx_scan,
pg_size_pretty(pg_relation_size(indexrelid)) as size
FROM pg_stat_user_indexes
WHERE idx_scan = 0
AND indexrelname !~ '^pg_'
ORDER BY pg_relation_size(indexrelid) DESC;Configuration Tuning
-- Memory settings (entropydb.conf) # Total RAM: 16GB example shared_buffers = 4GB # 25% of RAM effective_cache_size = 12GB # 75% of RAM work_mem = 64MB # Per operation maintenance_work_mem = 1GB # For VACUUM, CREATE INDEX temp_buffers = 16MB # Temp tables # Connection settings max_connections = 200 max_worker_processes = 8 max_parallel_workers_per_gather = 4 max_parallel_workers = 8 max_parallel_maintenance_workers = 4 # WAL settings wal_buffers = 16MB min_wal_size = 1GB max_wal_size = 4GB wal_compression = on checkpoint_timeout = 15min checkpoint_completion_target = 0.9 # Query planner random_page_cost = 1.1 # For SSD effective_io_concurrency = 200 # For SSD default_statistics_target = 100 # Logging log_min_duration_statement = 1000 # Log slow queries (>1s) log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h ' # Apply changes ALTER SYSTEM SET shared_buffers = '4GB'; ALTER SYSTEM SET work_mem = '64MB'; SELECT pg_reload_conf(); -- Per-session tuning SET work_mem = '256MB'; -- For heavy query SET random_page_cost = 1.1; SET enable_seqscan = off; -- Force index usage (debug only)
Connection Pooling
-- PgBouncer configuration [databases] entropydb = host=localhost port=5432 dbname=mydb [pgbouncer] pool_mode = transaction # or session, statement max_client_conn = 1000 # Total connections default_pool_size = 25 # Per database reserve_pool_size = 5 reserve_pool_timeout = 3 max_db_connections = 100 max_user_connections = 100 # Start PgBouncer pgbouncer /etc/pgbouncer/pgbouncer.ini # Application connection string postgres://user:pass@localhost:6432/entropydb -- Monitor connections SELECT datname, count(*) as connections, max(backend_start) as oldest_connection FROM pg_stat_activity WHERE state != 'idle' GROUP BY datname; -- Kill idle connections SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE state = 'idle' AND state_change < NOW() - INTERVAL '1 hour' AND pid != pg_backend_pid(); -- Connection pool sizing -- Formula: connections = ((core_count * 2) + effective_spindle_count) -- Example: (8 cores * 2) + 1 = 17 connections per node
Maintenance Operations
-- Regular VACUUM and ANALYZE VACUUM ANALYZE users; -- Aggressive VACUUM (reclaims space) VACUUM FULL users; -- Auto-vacuum tuning ALTER TABLE users SET ( autovacuum_vacuum_scale_factor = 0.1, autovacuum_analyze_scale_factor = 0.05, autovacuum_vacuum_threshold = 50, autovacuum_analyze_threshold = 50 ); -- Check bloat SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size, n_dead_tup, n_live_tup, round(n_dead_tup * 100.0 / NULLIF(n_live_tup + n_dead_tup, 0), 2) as dead_pct FROM pg_stat_user_tables WHERE n_dead_tup > 1000 ORDER BY n_dead_tup DESC; -- Reindex when necessary REINDEX TABLE users; REINDEX INDEX CONCURRENTLY idx_users_email; -- Update statistics ANALYZE users; ANALYZE; -- All tables -- Monitor vacuum progress SELECT phase, round(100.0 * heap_blks_scanned / heap_blks_total, 1) AS scan_pct, round(100.0 * heap_blks_vacuumed / heap_blks_total, 1) AS vacuum_pct FROM pg_stat_progress_vacuum;
Best Practices
Query Optimization
- • Always EXPLAIN slow queries
- • Use appropriate indexes
- • Avoid N+1 query patterns
- • Use connection pooling
Maintenance
- • Regular VACUUM and ANALYZE
- • Monitor table bloat
- • Update statistics frequently
- • Reindex when needed