Graph Queries

Advanced graph traversals, path finding, and graph algorithms in entropyDB

Overview

Graph query capabilities include:

Path Traversals: Find paths between nodes
Pattern Matching: Cypher-like syntax
Graph Algorithms: PageRank, centrality, community detection
Recursive Queries: CTEs for graph traversal
Subgraph Extraction: Extract portions of graphs

Basic Graph Pattern Matching

-- Match pattern: user follows user
MATCH (a:User)-[:FOLLOWS]->(b:User)
WHERE a.username = 'alice'
RETURN b.username, b.email;

-- Match with multiple relationships
MATCH (a:User)-[:FOLLOWS]->(b:User)-[:FOLLOWS]->(c:User)
WHERE a.username = 'alice'
RETURN c.username, COUNT(*) as mutual_connections;

-- Match bidirectional relationship
MATCH (a:User)-[:FRIENDS]-(b:User)
WHERE a.username = 'alice'
RETURN b.username;

-- Match with properties on edges
MATCH (u:User)-[f:FOLLOWS {since: '2024'}]->(other:User)
RETURN u.username, other.username, f.since;

-- Optional match (like LEFT JOIN)
MATCH (u:User)
OPTIONAL MATCH (u)-[:FOLLOWS]->(follower)
RETURN u.username, COUNT(follower) as follower_count;

Path Finding

-- Shortest path between two nodes
MATCH path = shortestPath(
  (a:User {username: 'alice'})-[:FOLLOWS*]-(b:User {username: 'bob'})
)
RETURN path, length(path) as distance;

-- All paths up to length 5
MATCH path = (a:User {username: 'alice'})-[:FOLLOWS*1..5]-(b:User {username: 'bob'})
RETURN path, length(path) as distance
ORDER BY distance;

-- Find all paths (not just shortest)
MATCH path = allShortestPaths(
  (a:User {username: 'alice'})-[:FOLLOWS*]-(b:User {username: 'bob'})
)
RETURN path;

-- Path with specific edge types
MATCH path = shortestPath(
  (a:City {name: 'San Francisco'})-[:ROAD|HIGHWAY*]-(b:City {name: 'New York'})
)
RETURN path, 
       reduce(dist = 0, r IN relationships(path) | dist + r.distance) as total_distance;

-- Variable length relationships
MATCH (u:User {username: 'alice'})-[:FOLLOWS*2..4]->(friend_of_friend)
RETURN DISTINCT friend_of_friend.username;

Recursive CTEs for Graphs

-- Find all reachable nodes
WITH RECURSIVE reachable AS (
  -- Base case: start node
  SELECT id, username, 0 as depth
  FROM users
  WHERE username = 'alice'
  
  UNION
  
  -- Recursive case: follow edges
  SELECT u.id, u.username, r.depth + 1
  FROM users u
  JOIN follows f ON f.followed_id = u.id
  JOIN reachable r ON f.follower_id = r.id
  WHERE r.depth < 5
)
SELECT DISTINCT username, MIN(depth) as shortest_distance
FROM reachable
GROUP BY username
ORDER BY shortest_distance;

-- Find organizational hierarchy
WITH RECURSIVE org_tree AS (
  -- Root: CEO
  SELECT id, name, manager_id, 0 as level, ARRAY[id] as path
  FROM employees
  WHERE manager_id IS NULL
  
  UNION ALL
  
  -- Subordinates
  SELECT e.id, e.name, e.manager_id, ot.level + 1, ot.path || e.id
  FROM employees e
  JOIN org_tree ot ON e.manager_id = ot.id
  WHERE NOT e.id = ANY(ot.path)  -- Prevent cycles
)
SELECT 
  REPEAT('  ', level) || name as hierarchy,
  level,
  array_to_string(path, ' -> ') as path_string
FROM org_tree
ORDER BY path;

-- Detect cycles in graph
WITH RECURSIVE paths AS (
  SELECT 
    from_node as start,
    to_node as current,
    ARRAY[from_node, to_node] as path,
    false as cycle_detected
  FROM edges
  
  UNION ALL
  
  SELECT 
    p.start,
    e.to_node,
    p.path || e.to_node,
    e.to_node = ANY(p.path) as cycle_detected
  FROM paths p
  JOIN edges e ON e.from_node = p.current
  WHERE NOT p.cycle_detected
    AND array_length(p.path, 1) < 10
)
SELECT start, path
FROM paths
WHERE cycle_detected
LIMIT 10;

Graph Algorithms

-- PageRank
SELECT entropy_pagerank(
  'users',           -- node table
  'follows',         -- edge table
  'follower_id',     -- source column
  'followed_id',     -- target column
  damping => 0.85,
  iterations => 20
) as node_id, pagerank_score
ORDER BY pagerank_score DESC
LIMIT 10;

-- Betweenness Centrality
SELECT 
  user_id,
  username,
  entropy_betweenness_centrality(
    'social_graph',
    user_id
  ) as centrality
FROM users
ORDER BY centrality DESC
LIMIT 10;

-- Community Detection (Louvain Algorithm)
SELECT 
  user_id,
  entropy_community_detection(
    graph_name => 'social_network',
    algorithm => 'louvain'
  ) as community_id
FROM users;

-- Degree Centrality
SELECT 
  u.username,
  COUNT(DISTINCT f1.followed_id) as out_degree,
  COUNT(DISTINCT f2.follower_id) as in_degree,
  COUNT(DISTINCT f1.followed_id) + COUNT(DISTINCT f2.follower_id) as total_degree
FROM users u
LEFT JOIN follows f1 ON u.id = f1.follower_id
LEFT JOIN follows f2 ON u.id = f2.followed_id
GROUP BY u.id, u.username
ORDER BY total_degree DESC;

-- Connected Components
SELECT entropy_connected_components(
  'nodes_table',
  'edges_table',
  'source_col',
  'target_col'
) as component_id, node_id
ORDER BY component_id;

-- Triangle Counting (clustering coefficient)
SELECT 
  u.username,
  entropy_triangle_count(u.id, 'follows') as triangles,
  entropy_clustering_coefficient(u.id, 'follows') as clustering_coef
FROM users u
ORDER BY triangles DESC;

Subgraph Extraction

-- Extract k-hop neighborhood
CREATE TEMP TABLE alice_network AS
WITH RECURSIVE neighborhood AS (
  SELECT id, username, 0 as hop
  FROM users
  WHERE username = 'alice'
  
  UNION
  
  SELECT u.id, u.username, n.hop + 1
  FROM users u
  JOIN follows f ON f.followed_id = u.id
  JOIN neighborhood n ON f.follower_id = n.id
  WHERE n.hop < 2  -- 2-hop neighborhood
)
SELECT * FROM neighborhood;

-- Get edges in subgraph
SELECT f.*
FROM follows f
WHERE f.follower_id IN (SELECT id FROM alice_network)
  AND f.followed_id IN (SELECT id FROM alice_network);

-- Extract subgraph by node properties
MATCH (n:User)-[r]-(m:User)
WHERE n.location = 'San Francisco' 
  AND m.location = 'San Francisco'
RETURN n, r, m;

-- Time-based subgraph
MATCH (a)-[r:INTERACTED]->(b)
WHERE r.timestamp > now() - INTERVAL '30 days'
RETURN a, r, b;

-- Extract influential subgraph
WITH influential_users AS (
  SELECT user_id
  FROM (
    SELECT user_id, entropy_pagerank(...) as score
    FROM users
  ) ranked
  WHERE score > 0.01
)
SELECT *
FROM follows
WHERE follower_id IN (SELECT user_id FROM influential_users)
  AND followed_id IN (SELECT user_id FROM influential_users);

Best Practices

Performance

Index graph node and edge columns
Limit traversal depth
Use materialized views for frequent patterns
Batch graph algorithms

Modeling

Design schema for query patterns
Use labels for node types
Store edge properties efficiently
Consider bidirectional edges

Next Steps

Graph Model

Graph data model

Time Series

Time series queries

Cross-Model Joins

Join across models