EntropyQL

SQL with multi-model extensions for unified querying across all data types

Overview

EntropyQL is entropyDB's query language—a superset of PostgreSQL SQL that adds powerful extensions for documents, graphs, time-series, and vectors. Write familiar SQL while accessing all data models in a single query.

Key Features

100% PostgreSQL compatible for relational queries
Native JSON operators for document queries
Graph traversal syntax for relationship queries
Vector similarity search functions
Time-series aggregations and window functions

Basic SQL Queries

-- Standard SQL works exactly as expected
SELECT id, name, email 
FROM users 
WHERE active = true 
ORDER BY created_at DESC 
LIMIT 10;

-- Joins, aggregations, subqueries
SELECT 
  u.name,
  COUNT(o.id) as order_count,
  SUM(o.total) as total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.name
HAVING COUNT(o.id) > 5;

-- CTEs and window functions
WITH monthly_sales AS (
  SELECT 
    DATE_TRUNC('month', created_at) as month,
    SUM(total) as revenue
  FROM orders
  GROUP BY month
)
SELECT 
  month,
  revenue,
  LAG(revenue) OVER (ORDER BY month) as prev_month,
  revenue - LAG(revenue) OVER (ORDER BY month) as growth
FROM monthly_sales;

Document Queries (JSON/JSONB)

Querying JSON Fields

-- Access nested JSON fields
SELECT 
  id,
  data->>'name' as name,
  data->'address'->>'city' as city,
  data->'preferences'->>'theme' as theme
FROM users;

-- Filter by JSON content
SELECT * FROM products
WHERE data @> '{"category": "electronics"}';

-- Array containment
SELECT * FROM posts
WHERE data->'tags' @> '["featured"]';

-- JSON exists operator
SELECT * FROM users
WHERE data ? 'premium_member';

JSON Aggregation

-- Build JSON objects
SELECT 
  category,
  jsonb_agg(
    jsonb_build_object(
      'name', name,
      'price', price
    )
  ) as products
FROM products
GROUP BY category;

-- JSON array aggregation
SELECT 
  user_id,
  jsonb_agg(DISTINCT data->>'tag') as all_tags
FROM posts
GROUP BY user_id;

Graph Queries

Path Traversal

-- Find all friends (1-hop)
SELECT p2.name
FROM person p1
JOIN follows f ON p1.id = f.follower_id
JOIN person p2 ON f.followee_id = p2.id
WHERE p1.name = 'Alice';

-- Multi-hop traversal with CTE
WITH RECURSIVE connections AS (
  SELECT id, name, 1 as depth
  FROM person WHERE name = 'Alice'
  
  UNION ALL
  
  SELECT p.id, p.name, c.depth + 1
  FROM connections c
  JOIN follows f ON c.id = f.follower_id
  JOIN person p ON f.followee_id = p.id
  WHERE c.depth < 3
)
SELECT DISTINCT name, MIN(depth) as distance
FROM connections
WHERE name != 'Alice'
GROUP BY name
ORDER BY distance;

Pattern Matching

-- Find mutual friends
SELECT p.name as mutual_friend
FROM person p
WHERE EXISTS (
  SELECT 1 FROM follows f1 
  WHERE f1.follower_id = 1 AND f1.followee_id = p.id
)
AND EXISTS (
  SELECT 1 FROM follows f2
  WHERE f2.follower_id = 2 AND f2.followee_id = p.id
);

Vector Search

-- Similarity search with cosine distance
SELECT 
  id,
  title,
  1 - (embedding <=> query_vector) as similarity
FROM documents
ORDER BY embedding <=> query_vector
LIMIT 10;

-- L2 distance (Euclidean)
SELECT id, title
FROM documents
ORDER BY embedding <-> '[0.1, 0.2, 0.3, ...]'::vector
LIMIT 5;

-- Inner product
SELECT id, title
FROM documents
ORDER BY embedding <#> query_embedding DESC
LIMIT 10;

-- Hybrid search (vector + text)
SELECT 
  d.id,
  d.title,
  (0.7 * (1 - (d.embedding <=> $1))) + 
  (0.3 * ts_rank(d.search_vector, query)) as score
FROM documents d,
     to_tsquery('english', $2) query
WHERE d.search_vector @@ query
ORDER BY score DESC
LIMIT 20;

Time-Series Queries

Time Bucketing

-- Aggregate by time intervals
SELECT 
  time_bucket('5 minutes', timestamp) as bucket,
  AVG(temperature) as avg_temp,
  MAX(temperature) as max_temp,
  MIN(temperature) as min_temp
FROM sensor_data
WHERE timestamp > NOW() - INTERVAL '24 hours'
GROUP BY bucket
ORDER BY bucket;

-- Moving average
SELECT 
  timestamp,
  temperature,
  AVG(temperature) OVER (
    ORDER BY timestamp
    ROWS BETWEEN 9 PRECEDING AND CURRENT ROW
  ) as moving_avg_10
FROM sensor_data;

Continuous Aggregates

-- Create materialized aggregate view
CREATE MATERIALIZED VIEW hourly_metrics
WITH (timescaledb.continuous) AS
SELECT 
  time_bucket('1 hour', timestamp) as hour,
  device_id,
  AVG(value) as avg_value,
  MAX(value) as max_value,
  COUNT(*) as count
FROM sensor_data
GROUP BY hour, device_id;

-- Query pre-aggregated data (much faster)
SELECT * FROM hourly_metrics
WHERE hour > NOW() - INTERVAL '7 days';

Cross-Model Queries

Combine multiple data models in a single query:

-- Join relational, document, graph, and time-series data
SELECT 
  u.name,                                    -- Relational
  u.profile->>'bio' as bio,                 -- Document (JSON)
  COUNT(DISTINCT f.followee_id) as following, -- Graph
  AVG(e.metrics->>'session_duration')::float as avg_session, -- Time-series
  vector_similarity(u.preferences_embedding, 
                    $1::vector) as preference_match  -- Vector
FROM users u
LEFT JOIN follows f ON u.id = f.follower_id
LEFT JOIN events e ON u.id = e.user_id 
  AND e.timestamp > NOW() - INTERVAL '30 days'
WHERE u.profile->>'account_type' = 'premium'
  AND u.preferences_embedding IS NOT NULL
GROUP BY u.id, u.name, u.profile, u.preferences_embedding
HAVING COUNT(DISTINCT f.followee_id) > 10
ORDER BY preference_match DESC
LIMIT 50;

Advanced Features

Full-Text Search

-- Text search with ranking
SELECT 
  title,
  ts_rank(search_vector, query) as rank
FROM documents,
     to_tsquery('english', 'database & performance') query
WHERE search_vector @@ query
ORDER BY rank DESC;

-- Phrase search
SELECT * FROM articles
WHERE to_tsvector('english', content) @@ 
      phraseto_tsquery('english', 'machine learning');

Geospatial Queries

-- PostGIS extension support
SELECT 
  name,
  ST_Distance(location, ST_MakePoint(-73.935242, 40.730610)) as distance
FROM places
WHERE ST_DWithin(
  location,
  ST_MakePoint(-73.935242, 40.730610),
  1000  -- meters
)
ORDER BY distance;

Array Operations

-- Array functions
SELECT 
  name,
  array_length(tags, 1) as tag_count,
  tags && ARRAY['featured', 'new'] as has_special_tag
FROM products
WHERE 'electronics' = ANY(tags);

-- Unnest arrays
SELECT 
  product_id,
  unnest(tags) as tag
FROM products;

Query Optimization

Use EXPLAIN ANALYZE

EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'user@example.com';

Create Appropriate Indexes

-- B-tree for equality/range
CREATE INDEX idx_users_email ON users(email);

-- GIN for JSONB
CREATE INDEX idx_users_data ON users USING GIN (data);

-- HNSW for vectors
CREATE INDEX idx_docs_embedding ON documents 
USING hnsw (embedding vector_cosine_ops);

Use Prepared Statements

PREPARE get_user (int) AS
  SELECT * FROM users WHERE id = $1;

EXECUTE get_user(123);

Best Practices

1. Use Parameterized Queries

Always use $1, $2 placeholders instead of string concatenation to prevent SQL injection.

2. Limit Result Sets

Always use LIMIT for queries that might return large result sets.

3. Index Foreign Keys

Create indexes on foreign key columns used in joins.

4. Avoid SELECT *

Only select the columns you need to reduce network transfer and improve performance.

5. Use CTEs for Readability

Common Table Expressions make complex queries more maintainable.

Next Steps

→ Complete SQL Reference → Advanced Graph Queries → Vector Search Guide