←Back to Documentation
Scaling & Sharding
Scale entropyDB horizontally to handle billions of rows and petabytes of data
Overview
entropyDB provides multiple scaling strategies:
- • Horizontal Scaling: Add nodes to distribute load
- • Sharding: Partition data across multiple nodes
- • Read Replicas: Scale read operations independently
- • Auto-Scaling: Automatically adjust cluster size
- • Rebalancing: Redistribute data as cluster grows
Horizontal Scaling
Adding Nodes to Cluster
Scale your cluster by adding new nodes:
# Check current cluster status entropy-admin cluster status Output: Cluster: production Nodes: 3 Status: healthy Total Storage: 1.5 TB Total Memory: 96 GB # Add new node to cluster entropy-admin node add \ --host new-node.example.com \ --port 5432 \ --role data # Verify node joined entropy-admin node list # Monitor rebalancing progress entropy-admin rebalance status # Scale using Kubernetes kubectl scale statefulset entropydb --replicas=5 # Or using Helm helm upgrade entropydb entropydb/entropydb \ --set replicaCount=5 \ --reuse-values
Node Roles
# Configure node roles # entropydb.conf # Data node - stores data shards node_role = 'data' shard_count = 16 replication_factor = 3 # Coordinator node - handles query routing node_role = 'coordinator' enable_query_routing = true connection_pool_size = 1000 # Compute node - handles heavy computations node_role = 'compute' enable_parallel_query = true max_worker_processes = 16 # Check node roles SELECT node_id, node_role, status, shard_count FROM entropy_cluster_nodes;
Sharding Strategies
Hash Sharding
Distribute data evenly using hash functions:
-- Create hash-sharded table CREATE TABLE users ( user_id BIGINT PRIMARY KEY, username TEXT, email TEXT, created_at TIMESTAMP ) PARTITION BY HASH (user_id); -- entropyDB automatically creates shards -- Data is distributed using: shard = hash(user_id) % shard_count -- Query automatically routes to correct shard SELECT * FROM users WHERE user_id = 12345; -- Check shard distribution SELECT shard_id, row_count, size_bytes, node_id FROM entropy_shard_stats WHERE table_name = 'users' ORDER BY shard_id; -- Multi-shard queries work transparently SELECT COUNT(*) FROM users WHERE created_at > NOW() - INTERVAL '7 days';
Range Sharding
-- Create range-sharded table (good for time-series)
CREATE TABLE events (
event_id BIGINT,
event_time TIMESTAMP,
event_type TEXT,
data JSONB
) PARTITION BY RANGE (event_time);
-- Create partitions manually
CREATE TABLE events_2024_01 PARTITION OF events
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE events_2024_02 PARTITION OF events
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
-- Or auto-create partitions
ALTER TABLE events SET (
auto_partition = true,
partition_interval = '1 month',
partition_retention = '12 months'
);
-- Queries leverage partition pruning
SELECT * FROM events
WHERE event_time BETWEEN '2024-01-15' AND '2024-01-20';
-- Only scans events_2024_01 partition
-- Drop old partitions
DROP TABLE events_2023_01;Geo-Distributed Sharding
-- Create geo-sharded table
CREATE TABLE orders (
order_id BIGINT PRIMARY KEY,
user_id BIGINT,
region TEXT,
total DECIMAL,
created_at TIMESTAMP
) PARTITION BY LIST (region);
-- Create regional partitions
CREATE TABLE orders_us PARTITION OF orders
FOR VALUES IN ('us-east', 'us-west', 'us-central')
WITH (location = 'us-west-2');
CREATE TABLE orders_eu PARTITION OF orders
FOR VALUES IN ('eu-west', 'eu-central', 'eu-north')
WITH (location = 'eu-central-1');
CREATE TABLE orders_asia PARTITION OF orders
FOR VALUES IN ('ap-south', 'ap-east', 'ap-southeast')
WITH (location = 'ap-southeast-1');
-- Queries automatically route to nearest region
SELECT * FROM orders WHERE region = 'us-east';
-- Multi-region queries
SELECT region, COUNT(*), SUM(total)
FROM orders
WHERE created_at > NOW() - INTERVAL '7 days'
GROUP BY region;Read Replicas
Creating Read Replicas
-- Enable read replicas on primary
ALTER SYSTEM SET wal_level = 'logical';
ALTER SYSTEM SET max_wal_senders = 10;
ALTER SYSTEM SET max_replication_slots = 10;
-- Create replication user
CREATE USER replicator WITH REPLICATION PASSWORD 'secure_password';
-- On replica server
entropy-admin create-replica \
--primary-host primary.example.com \
--primary-port 5432 \
--replication-user replicator \
--replication-password secure_password \
--data-dir /var/lib/entropydb/replica
-- Configure load balancing
CREATE SERVER read_pool FOREIGN DATA WRAPPER postgres_fdw
OPTIONS (
hosts 'replica1.example.com,replica2.example.com,replica3.example.com',
port '5432',
dbname 'mydb',
load_balance 'random'
);
-- Route read queries to replicas
-- Application-level routing
const pool = new Pool({
master: { host: 'primary.example.com', port: 5432 },
replicas: [
{ host: 'replica1.example.com', port: 5432 },
{ host: 'replica2.example.com', port: 5432 },
{ host: 'replica3.example.com', port: 5432 }
],
readPreference: 'replica'
});
// Writes go to primary
await pool.query('INSERT INTO users VALUES ($1, $2)', [1, 'alice']);
// Reads go to replicas
await pool.query('SELECT * FROM users WHERE id = $1', [1]);Monitoring Replication Lag
-- Check replication lag
SELECT
application_name,
client_addr,
state,
sent_lsn,
write_lsn,
flush_lsn,
replay_lsn,
sync_state,
COALESCE(
EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())),
0
) as lag_seconds
FROM pg_stat_replication;
-- Alert if lag exceeds threshold
CREATE OR REPLACE FUNCTION check_replication_lag()
RETURNS void AS $$
DECLARE
max_lag INTEGER;
BEGIN
SELECT MAX(EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())))
INTO max_lag
FROM pg_stat_replication;
IF max_lag > 10 THEN
RAISE WARNING 'Replication lag exceeds 10 seconds: % seconds', max_lag;
END IF;
END;
$$ LANGUAGE plpgsql;
-- Set up monitoring
SELECT cron.schedule('check-replication-lag', '*/1 * * * *',
'SELECT check_replication_lag()');Auto-Scaling
Kubernetes HPA
# Horizontal Pod Autoscaler
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: entropydb-hpa
namespace: entropydb
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: StatefulSet
name: entropydb
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
- type: Pods
pods:
metric:
name: entropydb_active_connections
target:
type: AverageValue
averageValue: "800"
behavior:
scaleUp:
stabilizationWindowSeconds: 60
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 10
periodSeconds: 60Cloud Auto-Scaling
# AWS Auto Scaling
resource "aws_autoscaling_group" "entropydb" {
name = "entropydb-asg"
min_size = 3
max_size = 10
desired_capacity = 3
vpc_zone_identifier = aws_subnet.private[*].id
launch_template {
id = aws_launch_template.entropydb.id
version = "$Latest"
}
tag {
key = "Name"
value = "entropydb-node"
propagate_at_launch = true
}
}
resource "aws_autoscaling_policy" "scale_up" {
name = "entropydb-scale-up"
scaling_adjustment = 2
adjustment_type = "ChangeInCapacity"
cooldown = 300
autoscaling_group_name = aws_autoscaling_group.entropydb.name
}
resource "aws_cloudwatch_metric_alarm" "high_cpu" {
alarm_name = "entropydb-high-cpu"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
period = "120"
statistic = "Average"
threshold = "70"
alarm_actions = [aws_autoscaling_policy.scale_up.arn]
}Data Rebalancing
Manual Rebalancing
-- Check shard distribution SELECT node_id, COUNT(*) as shard_count, SUM(size_bytes) as total_size, AVG(row_count) as avg_rows FROM entropy_shard_stats GROUP BY node_id ORDER BY shard_count DESC; -- Trigger manual rebalance SELECT entropy_rebalance_cluster( strategy => 'size_based', -- or 'count_based', 'load_based' max_parallel => 4, throttle_mbps => 100 ); -- Monitor rebalancing progress SELECT rebalance_id, status, shards_moved, shards_total, bytes_moved, bytes_total, started_at, estimated_completion FROM entropy_rebalance_status; -- Pause rebalancing SELECT entropy_rebalance_pause(); -- Resume rebalancing SELECT entropy_rebalance_resume(); -- Cancel rebalancing SELECT entropy_rebalance_cancel();
Automatic Rebalancing
-- Enable automatic rebalancing
ALTER SYSTEM SET auto_rebalance = true;
ALTER SYSTEM SET rebalance_threshold = 0.2; -- 20% imbalance triggers rebalance
ALTER SYSTEM SET rebalance_schedule = '0 2 * * 0'; -- Weekly at 2 AM Sunday
-- Configure rebalancing policy
CREATE REBALANCE POLICY balanced_load AS
TRIGGER WHEN (
SELECT MAX(shard_count) - MIN(shard_count)
FROM (
SELECT node_id, COUNT(*) as shard_count
FROM entropy_shard_stats
GROUP BY node_id
)
) > 5
EXECUTE
SELECT entropy_rebalance_cluster(
strategy => 'load_based',
max_parallel => 2,
throttle_mbps => 50
);
-- View rebalancing history
SELECT
rebalance_id,
trigger_reason,
shards_moved,
duration,
completed_at
FROM entropy_rebalance_history
ORDER BY completed_at DESC
LIMIT 10;Performance Optimization
Connection Pooling
-- Install PgBouncer for connection pooling
# pgbouncer.ini
[databases]
mydb = host=entropydb-node1,entropydb-node2,entropydb-node3 \
port=5432 dbname=mydb
[pgbouncer]
pool_mode = transaction
max_client_conn = 10000
default_pool_size = 25
reserve_pool_size = 5
reserve_pool_timeout = 3
# Start PgBouncer
pgbouncer -d /etc/pgbouncer/pgbouncer.ini
# Application connects to PgBouncer
const pool = new Pool({
host: 'pgbouncer.example.com',
port: 6432,
database: 'mydb',
max: 20, // Application-side pool
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000
});Query Routing
-- Configure query routing ALTER SYSTEM SET enable_query_routing = true; ALTER SYSTEM SET router_mode = 'adaptive'; -- or 'round_robin', 'least_connections' -- Route queries based on hints -- Route to specific node /*+ NODE(node1) */ SELECT * FROM users WHERE region = 'us-west'; -- Route to replica for read /*+ REPLICA */ SELECT * FROM products WHERE category = 'electronics'; -- Force primary for consistent reads /*+ PRIMARY */ SELECT balance FROM accounts WHERE user_id = 123; -- Parallel query across all nodes /*+ PARALLEL(4) */ SELECT COUNT(*) FROM large_table;
Best Practices
Sharding Strategy
- • Choose appropriate shard key
- • Avoid hot shards
- • Plan for future growth
- • Monitor shard distribution
Scaling Operations
- • Scale gradually to avoid disruption
- • Monitor during scale operations
- • Test scaling in staging first
- • Have rollback plan ready
Performance
- • Use connection pooling
- • Leverage read replicas
- • Optimize cross-shard queries
- • Cache frequently accessed data
Monitoring
- • Track shard metrics
- • Monitor replication lag
- • Alert on imbalances
- • Analyze query distribution