Back to Documentation

Graph Queries

Advanced graph traversals, path finding, and graph algorithms in entropyDB

Overview

Graph query capabilities include:

  • Path Traversals: Find paths between nodes
  • Pattern Matching: Cypher-like syntax
  • Graph Algorithms: PageRank, centrality, community detection
  • Recursive Queries: CTEs for graph traversal
  • Subgraph Extraction: Extract portions of graphs

Basic Graph Pattern Matching

-- Match pattern: user follows user
MATCH (a:User)-[:FOLLOWS]->(b:User)
WHERE a.username = 'alice'
RETURN b.username, b.email;

-- Match with multiple relationships
MATCH (a:User)-[:FOLLOWS]->(b:User)-[:FOLLOWS]->(c:User)
WHERE a.username = 'alice'
RETURN c.username, COUNT(*) as mutual_connections;

-- Match bidirectional relationship
MATCH (a:User)-[:FRIENDS]-(b:User)
WHERE a.username = 'alice'
RETURN b.username;

-- Match with properties on edges
MATCH (u:User)-[f:FOLLOWS {since: '2024'}]->(other:User)
RETURN u.username, other.username, f.since;

-- Optional match (like LEFT JOIN)
MATCH (u:User)
OPTIONAL MATCH (u)-[:FOLLOWS]->(follower)
RETURN u.username, COUNT(follower) as follower_count;

Path Finding

-- Shortest path between two nodes
MATCH path = shortestPath(
  (a:User {username: 'alice'})-[:FOLLOWS*]-(b:User {username: 'bob'})
)
RETURN path, length(path) as distance;

-- All paths up to length 5
MATCH path = (a:User {username: 'alice'})-[:FOLLOWS*1..5]-(b:User {username: 'bob'})
RETURN path, length(path) as distance
ORDER BY distance;

-- Find all paths (not just shortest)
MATCH path = allShortestPaths(
  (a:User {username: 'alice'})-[:FOLLOWS*]-(b:User {username: 'bob'})
)
RETURN path;

-- Path with specific edge types
MATCH path = shortestPath(
  (a:City {name: 'San Francisco'})-[:ROAD|HIGHWAY*]-(b:City {name: 'New York'})
)
RETURN path, 
       reduce(dist = 0, r IN relationships(path) | dist + r.distance) as total_distance;

-- Variable length relationships
MATCH (u:User {username: 'alice'})-[:FOLLOWS*2..4]->(friend_of_friend)
RETURN DISTINCT friend_of_friend.username;

Recursive CTEs for Graphs

-- Find all reachable nodes
WITH RECURSIVE reachable AS (
  -- Base case: start node
  SELECT id, username, 0 as depth
  FROM users
  WHERE username = 'alice'
  
  UNION
  
  -- Recursive case: follow edges
  SELECT u.id, u.username, r.depth + 1
  FROM users u
  JOIN follows f ON f.followed_id = u.id
  JOIN reachable r ON f.follower_id = r.id
  WHERE r.depth < 5
)
SELECT DISTINCT username, MIN(depth) as shortest_distance
FROM reachable
GROUP BY username
ORDER BY shortest_distance;

-- Find organizational hierarchy
WITH RECURSIVE org_tree AS (
  -- Root: CEO
  SELECT id, name, manager_id, 0 as level, ARRAY[id] as path
  FROM employees
  WHERE manager_id IS NULL
  
  UNION ALL
  
  -- Subordinates
  SELECT e.id, e.name, e.manager_id, ot.level + 1, ot.path || e.id
  FROM employees e
  JOIN org_tree ot ON e.manager_id = ot.id
  WHERE NOT e.id = ANY(ot.path)  -- Prevent cycles
)
SELECT 
  REPEAT('  ', level) || name as hierarchy,
  level,
  array_to_string(path, ' -> ') as path_string
FROM org_tree
ORDER BY path;

-- Detect cycles in graph
WITH RECURSIVE paths AS (
  SELECT 
    from_node as start,
    to_node as current,
    ARRAY[from_node, to_node] as path,
    false as cycle_detected
  FROM edges
  
  UNION ALL
  
  SELECT 
    p.start,
    e.to_node,
    p.path || e.to_node,
    e.to_node = ANY(p.path) as cycle_detected
  FROM paths p
  JOIN edges e ON e.from_node = p.current
  WHERE NOT p.cycle_detected
    AND array_length(p.path, 1) < 10
)
SELECT start, path
FROM paths
WHERE cycle_detected
LIMIT 10;

Graph Algorithms

-- PageRank
SELECT entropy_pagerank(
  'users',           -- node table
  'follows',         -- edge table
  'follower_id',     -- source column
  'followed_id',     -- target column
  damping => 0.85,
  iterations => 20
) as node_id, pagerank_score
ORDER BY pagerank_score DESC
LIMIT 10;

-- Betweenness Centrality
SELECT 
  user_id,
  username,
  entropy_betweenness_centrality(
    'social_graph',
    user_id
  ) as centrality
FROM users
ORDER BY centrality DESC
LIMIT 10;

-- Community Detection (Louvain Algorithm)
SELECT 
  user_id,
  entropy_community_detection(
    graph_name => 'social_network',
    algorithm => 'louvain'
  ) as community_id
FROM users;

-- Degree Centrality
SELECT 
  u.username,
  COUNT(DISTINCT f1.followed_id) as out_degree,
  COUNT(DISTINCT f2.follower_id) as in_degree,
  COUNT(DISTINCT f1.followed_id) + COUNT(DISTINCT f2.follower_id) as total_degree
FROM users u
LEFT JOIN follows f1 ON u.id = f1.follower_id
LEFT JOIN follows f2 ON u.id = f2.followed_id
GROUP BY u.id, u.username
ORDER BY total_degree DESC;

-- Connected Components
SELECT entropy_connected_components(
  'nodes_table',
  'edges_table',
  'source_col',
  'target_col'
) as component_id, node_id
ORDER BY component_id;

-- Triangle Counting (clustering coefficient)
SELECT 
  u.username,
  entropy_triangle_count(u.id, 'follows') as triangles,
  entropy_clustering_coefficient(u.id, 'follows') as clustering_coef
FROM users u
ORDER BY triangles DESC;

Subgraph Extraction

-- Extract k-hop neighborhood
CREATE TEMP TABLE alice_network AS
WITH RECURSIVE neighborhood AS (
  SELECT id, username, 0 as hop
  FROM users
  WHERE username = 'alice'
  
  UNION
  
  SELECT u.id, u.username, n.hop + 1
  FROM users u
  JOIN follows f ON f.followed_id = u.id
  JOIN neighborhood n ON f.follower_id = n.id
  WHERE n.hop < 2  -- 2-hop neighborhood
)
SELECT * FROM neighborhood;

-- Get edges in subgraph
SELECT f.*
FROM follows f
WHERE f.follower_id IN (SELECT id FROM alice_network)
  AND f.followed_id IN (SELECT id FROM alice_network);

-- Extract subgraph by node properties
MATCH (n:User)-[r]-(m:User)
WHERE n.location = 'San Francisco' 
  AND m.location = 'San Francisco'
RETURN n, r, m;

-- Time-based subgraph
MATCH (a)-[r:INTERACTED]->(b)
WHERE r.timestamp > now() - INTERVAL '30 days'
RETURN a, r, b;

-- Extract influential subgraph
WITH influential_users AS (
  SELECT user_id
  FROM (
    SELECT user_id, entropy_pagerank(...) as score
    FROM users
  ) ranked
  WHERE score > 0.01
)
SELECT *
FROM follows
WHERE follower_id IN (SELECT user_id FROM influential_users)
  AND followed_id IN (SELECT user_id FROM influential_users);

Best Practices

Performance

  • Index graph node and edge columns
  • Limit traversal depth
  • Use materialized views for frequent patterns
  • Batch graph algorithms

Modeling

  • Design schema for query patterns
  • Use labels for node types
  • Store edge properties efficiently
  • Consider bidirectional edges

Next Steps