←Back to Documentation
EntropyQL
SQL with multi-model extensions for unified querying across all data types
Overview
EntropyQL is entropyDB's query language—a superset of PostgreSQL SQL that adds powerful extensions for documents, graphs, time-series, and vectors. Write familiar SQL while accessing all data models in a single query.
Key Features
- 100% PostgreSQL compatible for relational queries
- Native JSON operators for document queries
- Graph traversal syntax for relationship queries
- Vector similarity search functions
- Time-series aggregations and window functions
Basic SQL Queries
-- Standard SQL works exactly as expected
SELECT id, name, email
FROM users
WHERE active = true
ORDER BY created_at DESC
LIMIT 10;
-- Joins, aggregations, subqueries
SELECT
u.name,
COUNT(o.id) as order_count,
SUM(o.total) as total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.name
HAVING COUNT(o.id) > 5;
-- CTEs and window functions
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', created_at) as month,
SUM(total) as revenue
FROM orders
GROUP BY month
)
SELECT
month,
revenue,
LAG(revenue) OVER (ORDER BY month) as prev_month,
revenue - LAG(revenue) OVER (ORDER BY month) as growth
FROM monthly_sales;Document Queries (JSON/JSONB)
Querying JSON Fields
-- Access nested JSON fields
SELECT
id,
data->>'name' as name,
data->'address'->>'city' as city,
data->'preferences'->>'theme' as theme
FROM users;
-- Filter by JSON content
SELECT * FROM products
WHERE data @> '{"category": "electronics"}';
-- Array containment
SELECT * FROM posts
WHERE data->'tags' @> '["featured"]';
-- JSON exists operator
SELECT * FROM users
WHERE data ? 'premium_member';JSON Aggregation
-- Build JSON objects
SELECT
category,
jsonb_agg(
jsonb_build_object(
'name', name,
'price', price
)
) as products
FROM products
GROUP BY category;
-- JSON array aggregation
SELECT
user_id,
jsonb_agg(DISTINCT data->>'tag') as all_tags
FROM posts
GROUP BY user_id;Graph Queries
Path Traversal
-- Find all friends (1-hop) SELECT p2.name FROM person p1 JOIN follows f ON p1.id = f.follower_id JOIN person p2 ON f.followee_id = p2.id WHERE p1.name = 'Alice'; -- Multi-hop traversal with CTE WITH RECURSIVE connections AS ( SELECT id, name, 1 as depth FROM person WHERE name = 'Alice' UNION ALL SELECT p.id, p.name, c.depth + 1 FROM connections c JOIN follows f ON c.id = f.follower_id JOIN person p ON f.followee_id = p.id WHERE c.depth < 3 ) SELECT DISTINCT name, MIN(depth) as distance FROM connections WHERE name != 'Alice' GROUP BY name ORDER BY distance;
Pattern Matching
-- Find mutual friends SELECT p.name as mutual_friend FROM person p WHERE EXISTS ( SELECT 1 FROM follows f1 WHERE f1.follower_id = 1 AND f1.followee_id = p.id ) AND EXISTS ( SELECT 1 FROM follows f2 WHERE f2.follower_id = 2 AND f2.followee_id = p.id );
Vector Search
-- Similarity search with cosine distance
SELECT
id,
title,
1 - (embedding <=> query_vector) as similarity
FROM documents
ORDER BY embedding <=> query_vector
LIMIT 10;
-- L2 distance (Euclidean)
SELECT id, title
FROM documents
ORDER BY embedding <-> '[0.1, 0.2, 0.3, ...]'::vector
LIMIT 5;
-- Inner product
SELECT id, title
FROM documents
ORDER BY embedding <#> query_embedding DESC
LIMIT 10;
-- Hybrid search (vector + text)
SELECT
d.id,
d.title,
(0.7 * (1 - (d.embedding <=> $1))) +
(0.3 * ts_rank(d.search_vector, query)) as score
FROM documents d,
to_tsquery('english', $2) query
WHERE d.search_vector @@ query
ORDER BY score DESC
LIMIT 20;Time-Series Queries
Time Bucketing
-- Aggregate by time intervals
SELECT
time_bucket('5 minutes', timestamp) as bucket,
AVG(temperature) as avg_temp,
MAX(temperature) as max_temp,
MIN(temperature) as min_temp
FROM sensor_data
WHERE timestamp > NOW() - INTERVAL '24 hours'
GROUP BY bucket
ORDER BY bucket;
-- Moving average
SELECT
timestamp,
temperature,
AVG(temperature) OVER (
ORDER BY timestamp
ROWS BETWEEN 9 PRECEDING AND CURRENT ROW
) as moving_avg_10
FROM sensor_data;Continuous Aggregates
-- Create materialized aggregate view
CREATE MATERIALIZED VIEW hourly_metrics
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', timestamp) as hour,
device_id,
AVG(value) as avg_value,
MAX(value) as max_value,
COUNT(*) as count
FROM sensor_data
GROUP BY hour, device_id;
-- Query pre-aggregated data (much faster)
SELECT * FROM hourly_metrics
WHERE hour > NOW() - INTERVAL '7 days';Cross-Model Queries
Combine multiple data models in a single query:
-- Join relational, document, graph, and time-series data
SELECT
u.name, -- Relational
u.profile->>'bio' as bio, -- Document (JSON)
COUNT(DISTINCT f.followee_id) as following, -- Graph
AVG(e.metrics->>'session_duration')::float as avg_session, -- Time-series
vector_similarity(u.preferences_embedding,
$1::vector) as preference_match -- Vector
FROM users u
LEFT JOIN follows f ON u.id = f.follower_id
LEFT JOIN events e ON u.id = e.user_id
AND e.timestamp > NOW() - INTERVAL '30 days'
WHERE u.profile->>'account_type' = 'premium'
AND u.preferences_embedding IS NOT NULL
GROUP BY u.id, u.name, u.profile, u.preferences_embedding
HAVING COUNT(DISTINCT f.followee_id) > 10
ORDER BY preference_match DESC
LIMIT 50;Advanced Features
Full-Text Search
-- Text search with ranking
SELECT
title,
ts_rank(search_vector, query) as rank
FROM documents,
to_tsquery('english', 'database & performance') query
WHERE search_vector @@ query
ORDER BY rank DESC;
-- Phrase search
SELECT * FROM articles
WHERE to_tsvector('english', content) @@
phraseto_tsquery('english', 'machine learning');Geospatial Queries
-- PostGIS extension support SELECT name, ST_Distance(location, ST_MakePoint(-73.935242, 40.730610)) as distance FROM places WHERE ST_DWithin( location, ST_MakePoint(-73.935242, 40.730610), 1000 -- meters ) ORDER BY distance;
Array Operations
-- Array functions SELECT name, array_length(tags, 1) as tag_count, tags && ARRAY['featured', 'new'] as has_special_tag FROM products WHERE 'electronics' = ANY(tags); -- Unnest arrays SELECT product_id, unnest(tags) as tag FROM products;
Query Optimization
Use EXPLAIN ANALYZE
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'user@example.com';
Create Appropriate Indexes
-- B-tree for equality/range CREATE INDEX idx_users_email ON users(email); -- GIN for JSONB CREATE INDEX idx_users_data ON users USING GIN (data); -- HNSW for vectors CREATE INDEX idx_docs_embedding ON documents USING hnsw (embedding vector_cosine_ops);
Use Prepared Statements
PREPARE get_user (int) AS SELECT * FROM users WHERE id = $1; EXECUTE get_user(123);
Best Practices
1. Use Parameterized Queries
Always use $1, $2 placeholders instead of string concatenation to prevent SQL injection.
2. Limit Result Sets
Always use LIMIT for queries that might return large result sets.
3. Index Foreign Keys
Create indexes on foreign key columns used in joins.
4. Avoid SELECT *
Only select the columns you need to reduce network transfer and improve performance.
5. Use CTEs for Readability
Common Table Expressions make complex queries more maintainable.