←Back to Documentation
Graph Queries
Advanced graph traversals, path finding, and graph algorithms in entropyDB
Overview
Graph query capabilities include:
- Path Traversals: Find paths between nodes
- Pattern Matching: Cypher-like syntax
- Graph Algorithms: PageRank, centrality, community detection
- Recursive Queries: CTEs for graph traversal
- Subgraph Extraction: Extract portions of graphs
Basic Graph Pattern Matching
-- Match pattern: user follows user
MATCH (a:User)-[:FOLLOWS]->(b:User)
WHERE a.username = 'alice'
RETURN b.username, b.email;
-- Match with multiple relationships
MATCH (a:User)-[:FOLLOWS]->(b:User)-[:FOLLOWS]->(c:User)
WHERE a.username = 'alice'
RETURN c.username, COUNT(*) as mutual_connections;
-- Match bidirectional relationship
MATCH (a:User)-[:FRIENDS]-(b:User)
WHERE a.username = 'alice'
RETURN b.username;
-- Match with properties on edges
MATCH (u:User)-[f:FOLLOWS {since: '2024'}]->(other:User)
RETURN u.username, other.username, f.since;
-- Optional match (like LEFT JOIN)
MATCH (u:User)
OPTIONAL MATCH (u)-[:FOLLOWS]->(follower)
RETURN u.username, COUNT(follower) as follower_count;Path Finding
-- Shortest path between two nodes
MATCH path = shortestPath(
(a:User {username: 'alice'})-[:FOLLOWS*]-(b:User {username: 'bob'})
)
RETURN path, length(path) as distance;
-- All paths up to length 5
MATCH path = (a:User {username: 'alice'})-[:FOLLOWS*1..5]-(b:User {username: 'bob'})
RETURN path, length(path) as distance
ORDER BY distance;
-- Find all paths (not just shortest)
MATCH path = allShortestPaths(
(a:User {username: 'alice'})-[:FOLLOWS*]-(b:User {username: 'bob'})
)
RETURN path;
-- Path with specific edge types
MATCH path = shortestPath(
(a:City {name: 'San Francisco'})-[:ROAD|HIGHWAY*]-(b:City {name: 'New York'})
)
RETURN path,
reduce(dist = 0, r IN relationships(path) | dist + r.distance) as total_distance;
-- Variable length relationships
MATCH (u:User {username: 'alice'})-[:FOLLOWS*2..4]->(friend_of_friend)
RETURN DISTINCT friend_of_friend.username;Recursive CTEs for Graphs
-- Find all reachable nodes
WITH RECURSIVE reachable AS (
-- Base case: start node
SELECT id, username, 0 as depth
FROM users
WHERE username = 'alice'
UNION
-- Recursive case: follow edges
SELECT u.id, u.username, r.depth + 1
FROM users u
JOIN follows f ON f.followed_id = u.id
JOIN reachable r ON f.follower_id = r.id
WHERE r.depth < 5
)
SELECT DISTINCT username, MIN(depth) as shortest_distance
FROM reachable
GROUP BY username
ORDER BY shortest_distance;
-- Find organizational hierarchy
WITH RECURSIVE org_tree AS (
-- Root: CEO
SELECT id, name, manager_id, 0 as level, ARRAY[id] as path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Subordinates
SELECT e.id, e.name, e.manager_id, ot.level + 1, ot.path || e.id
FROM employees e
JOIN org_tree ot ON e.manager_id = ot.id
WHERE NOT e.id = ANY(ot.path) -- Prevent cycles
)
SELECT
REPEAT(' ', level) || name as hierarchy,
level,
array_to_string(path, ' -> ') as path_string
FROM org_tree
ORDER BY path;
-- Detect cycles in graph
WITH RECURSIVE paths AS (
SELECT
from_node as start,
to_node as current,
ARRAY[from_node, to_node] as path,
false as cycle_detected
FROM edges
UNION ALL
SELECT
p.start,
e.to_node,
p.path || e.to_node,
e.to_node = ANY(p.path) as cycle_detected
FROM paths p
JOIN edges e ON e.from_node = p.current
WHERE NOT p.cycle_detected
AND array_length(p.path, 1) < 10
)
SELECT start, path
FROM paths
WHERE cycle_detected
LIMIT 10;Graph Algorithms
-- PageRank
SELECT entropy_pagerank(
'users', -- node table
'follows', -- edge table
'follower_id', -- source column
'followed_id', -- target column
damping => 0.85,
iterations => 20
) as node_id, pagerank_score
ORDER BY pagerank_score DESC
LIMIT 10;
-- Betweenness Centrality
SELECT
user_id,
username,
entropy_betweenness_centrality(
'social_graph',
user_id
) as centrality
FROM users
ORDER BY centrality DESC
LIMIT 10;
-- Community Detection (Louvain Algorithm)
SELECT
user_id,
entropy_community_detection(
graph_name => 'social_network',
algorithm => 'louvain'
) as community_id
FROM users;
-- Degree Centrality
SELECT
u.username,
COUNT(DISTINCT f1.followed_id) as out_degree,
COUNT(DISTINCT f2.follower_id) as in_degree,
COUNT(DISTINCT f1.followed_id) + COUNT(DISTINCT f2.follower_id) as total_degree
FROM users u
LEFT JOIN follows f1 ON u.id = f1.follower_id
LEFT JOIN follows f2 ON u.id = f2.followed_id
GROUP BY u.id, u.username
ORDER BY total_degree DESC;
-- Connected Components
SELECT entropy_connected_components(
'nodes_table',
'edges_table',
'source_col',
'target_col'
) as component_id, node_id
ORDER BY component_id;
-- Triangle Counting (clustering coefficient)
SELECT
u.username,
entropy_triangle_count(u.id, 'follows') as triangles,
entropy_clustering_coefficient(u.id, 'follows') as clustering_coef
FROM users u
ORDER BY triangles DESC;Subgraph Extraction
-- Extract k-hop neighborhood
CREATE TEMP TABLE alice_network AS
WITH RECURSIVE neighborhood AS (
SELECT id, username, 0 as hop
FROM users
WHERE username = 'alice'
UNION
SELECT u.id, u.username, n.hop + 1
FROM users u
JOIN follows f ON f.followed_id = u.id
JOIN neighborhood n ON f.follower_id = n.id
WHERE n.hop < 2 -- 2-hop neighborhood
)
SELECT * FROM neighborhood;
-- Get edges in subgraph
SELECT f.*
FROM follows f
WHERE f.follower_id IN (SELECT id FROM alice_network)
AND f.followed_id IN (SELECT id FROM alice_network);
-- Extract subgraph by node properties
MATCH (n:User)-[r]-(m:User)
WHERE n.location = 'San Francisco'
AND m.location = 'San Francisco'
RETURN n, r, m;
-- Time-based subgraph
MATCH (a)-[r:INTERACTED]->(b)
WHERE r.timestamp > now() - INTERVAL '30 days'
RETURN a, r, b;
-- Extract influential subgraph
WITH influential_users AS (
SELECT user_id
FROM (
SELECT user_id, entropy_pagerank(...) as score
FROM users
) ranked
WHERE score > 0.01
)
SELECT *
FROM follows
WHERE follower_id IN (SELECT user_id FROM influential_users)
AND followed_id IN (SELECT user_id FROM influential_users);Best Practices
Performance
- Index graph node and edge columns
- Limit traversal depth
- Use materialized views for frequent patterns
- Batch graph algorithms
Modeling
- Design schema for query patterns
- Use labels for node types
- Store edge properties efficiently
- Consider bidirectional edges