Property Graphs

Model and query complex relationships with graph database capabilities

Overview

entropyDB supports property graphs for modeling complex, highly connected data. Query relationships efficiently with path traversals and pattern matching.

Key Features

Labeled property graph model
Cypher-like query language
Efficient path traversals
Pattern matching and graph algorithms

Creating Graph Schema

-- Create vertex table (nodes)
CREATE TABLE person (
  id SERIAL PRIMARY KEY,
  name TEXT NOT NULL,
  age INT,
  properties JSONB,
  created_at TIMESTAMP DEFAULT NOW()
);

-- Create edge table (relationships)
CREATE TABLE knows (
  id SERIAL PRIMARY KEY,
  from_person_id INT REFERENCES person(id),
  to_person_id INT REFERENCES person(id),
  since DATE,
  strength FLOAT,
  properties JSONB,
  created_at TIMESTAMP DEFAULT NOW()
);

-- Indexes for efficient traversals
CREATE INDEX idx_knows_from ON knows(from_person_id);
CREATE INDEX idx_knows_to ON knows(to_person_id);
CREATE INDEX idx_knows_both ON knows(from_person_id, to_person_id);

-- Additional vertex types
CREATE TABLE company (
  id SERIAL PRIMARY KEY,
  name TEXT NOT NULL,
  industry TEXT,
  properties JSONB
);

CREATE TABLE works_at (
  id SERIAL PRIMARY KEY,
  person_id INT REFERENCES person(id),
  company_id INT REFERENCES company(id),
  role TEXT,
  start_date DATE,
  end_date DATE
);

Basic Graph Operations

Creating Vertices and Edges

-- Create vertices
INSERT INTO person (name, age, properties)
VALUES 
  ('Alice', 30, '{"city": "NYC", "occupation": "Engineer"}'::jsonb),
  ('Bob', 28, '{"city": "SF", "occupation": "Designer"}'::jsonb),
  ('Charlie', 35, '{"city": "LA", "occupation": "Manager"}'::jsonb);

-- Create edges
INSERT INTO knows (from_person_id, to_person_id, since, strength)
VALUES
  (1, 2, '2020-01-15', 0.8),
  (1, 3, '2019-06-20', 0.6),
  (2, 3, '2021-03-10', 0.9);

Finding Neighbors

-- Find all friends of Alice
SELECT p2.name, k.since, k.strength
FROM person p1
JOIN knows k ON p1.id = k.from_person_id
JOIN person p2 ON k.to_person_id = p2.id
WHERE p1.name = 'Alice';

-- Bidirectional friends
SELECT DISTINCT p2.name
FROM person p1
JOIN knows k ON p1.id = k.from_person_id OR p1.id = k.to_person_id
JOIN person p2 ON (p2.id = k.to_person_id OR p2.id = k.from_person_id)
WHERE p1.name = 'Alice' AND p2.id != p1.id;

Path Traversals

2-Hop Traversal (Friends of Friends)

-- Find friends of friends
SELECT DISTINCT p3.name
FROM person p1
JOIN knows k1 ON p1.id = k1.from_person_id
JOIN person p2 ON k1.to_person_id = p2.id
JOIN knows k2 ON p2.id = k2.from_person_id
JOIN person p3 ON k2.to_person_id = p3.id
WHERE p1.name = 'Alice'
  AND p3.id != p1.id  -- Exclude self
  AND p3.id NOT IN (  -- Exclude direct friends
    SELECT to_person_id FROM knows WHERE from_person_id = p1.id
  );

Variable-Length Paths

-- Recursive CTE for multi-hop paths
WITH RECURSIVE path AS (
  -- Base case: direct connections
  SELECT 
    k.from_person_id,
    k.to_person_id,
    ARRAY[k.from_person_id, k.to_person_id] as path_nodes,
    1 as depth
  FROM knows k
  WHERE k.from_person_id = 1  -- Starting from Alice
  
  UNION ALL
  
  -- Recursive case: extend path
  SELECT 
    p.from_person_id,
    k.to_person_id,
    p.path_nodes || k.to_person_id,
    p.depth + 1
  FROM path p
  JOIN knows k ON p.to_person_id = k.from_person_id
  WHERE k.to_person_id != ALL(p.path_nodes)  -- Avoid cycles
    AND p.depth < 5  -- Maximum depth
)
SELECT 
  pe.name as start_person,
  p.depth,
  array_agg(per.name ORDER BY p.depth) as path
FROM path p
JOIN person pe ON p.from_person_id = pe.id
JOIN person per ON per.id = ANY(p.path_nodes)
GROUP BY pe.name, p.depth
ORDER BY p.depth;

Shortest Path

-- Find shortest path between two people
WITH RECURSIVE shortest_path AS (
  SELECT 
    k.from_person_id,
    k.to_person_id,
    ARRAY[k.from_person_id, k.to_person_id] as path,
    1 as length
  FROM knows k
  WHERE k.from_person_id = 1  -- Alice
  
  UNION ALL
  
  SELECT 
    sp.from_person_id,
    k.to_person_id,
    sp.path || k.to_person_id,
    sp.length + 1
  FROM shortest_path sp
  JOIN knows k ON sp.to_person_id = k.from_person_id
  WHERE k.to_person_id != ALL(sp.path)
    AND sp.length < 10
)
SELECT 
  path,
  length
FROM shortest_path
WHERE to_person_id = 5  -- Target person (Charlie)
ORDER BY length
LIMIT 1;

Pattern Matching

Triangle Detection

-- Find triangles (A knows B, B knows C, C knows A)
SELECT 
  p1.name as person1,
  p2.name as person2,
  p3.name as person3
FROM knows k1
JOIN knows k2 ON k1.to_person_id = k2.from_person_id
JOIN knows k3 ON k2.to_person_id = k3.from_person_id
JOIN person p1 ON k1.from_person_id = p1.id
JOIN person p2 ON k2.from_person_id = p2.id
JOIN person p3 ON k3.from_person_id = p3.id
WHERE k3.to_person_id = k1.from_person_id
  AND k1.from_person_id < k2.from_person_id
  AND k2.from_person_id < k3.from_person_id;

Common Friends

-- Find mutual connections
SELECT 
  p.name as mutual_friend,
  COUNT(*) as connection_count
FROM knows k1
JOIN knows k2 ON k1.to_person_id = k2.to_person_id
JOIN person p ON k1.to_person_id = p.id
WHERE k1.from_person_id = 1  -- Alice
  AND k2.from_person_id = 2  -- Bob
  AND k1.to_person_id NOT IN (1, 2)
GROUP BY p.name
ORDER BY connection_count DESC;

Community Detection

-- Find densely connected subgraphs
WITH person_connections AS (
  SELECT 
    from_person_id as person_id,
    COUNT(*) as connection_count
  FROM knows
  GROUP BY from_person_id
)
SELECT 
  p.name,
  pc.connection_count,
  AVG(k.strength) as avg_strength
FROM person_connections pc
JOIN person p ON pc.person_id = p.id
JOIN knows k ON pc.person_id = k.from_person_id
GROUP BY p.name, pc.connection_count
HAVING pc.connection_count > 3
ORDER BY pc.connection_count DESC, avg_strength DESC;

Graph Algorithms

PageRank

-- Simplified PageRank calculation
WITH RECURSIVE pagerank AS (
  -- Initialize all nodes with equal rank
  SELECT 
    id,
    1.0 / (SELECT COUNT(*) FROM person)::float as rank,
    0 as iteration
  FROM person
  
  UNION ALL
  
  -- Iterative rank calculation
  SELECT 
    p.id,
    0.15 / (SELECT COUNT(*) FROM person)::float + 
    0.85 * COALESCE(SUM(pr.rank / out_degree.degree), 0) as rank,
    pr.iteration + 1 as iteration
  FROM person p
  CROSS JOIN (SELECT MAX(iteration) as max_iter FROM pagerank) mi
  LEFT JOIN knows k ON p.id = k.to_person_id
  LEFT JOIN pagerank pr ON k.from_person_id = pr.id 
    AND pr.iteration = mi.max_iter
  LEFT JOIN (
    SELECT from_person_id, COUNT(*) as degree
    FROM knows GROUP BY from_person_id
  ) out_degree ON k.from_person_id = out_degree.from_person_id
  WHERE pr.iteration < 10  -- Number of iterations
  GROUP BY p.id, pr.iteration
)
SELECT 
  p.name,
  pr.rank
FROM (
  SELECT DISTINCT ON (id) *
  FROM pagerank
  ORDER BY id, iteration DESC
) pr
JOIN person p ON pr.id = p.id
ORDER BY pr.rank DESC;

Centrality Metrics

-- Degree centrality
SELECT 
  p.name,
  COUNT(k.id) as degree
FROM person p
LEFT JOIN knows k ON p.id = k.from_person_id
GROUP BY p.name
ORDER BY degree DESC;

-- Betweenness centrality (simplified)
-- Counts how often a node appears in shortest paths
WITH path_counts AS (
  SELECT 
    unnest(path) as node_id,
    COUNT(*) as path_count
  FROM shortest_path  -- From previous CTE
  GROUP BY unnest(path)
)
SELECT 
  p.name,
  COALESCE(pc.path_count, 0) as betweenness
FROM person p
LEFT JOIN path_counts pc ON p.id = pc.node_id
ORDER BY betweenness DESC;

Performance Optimization

Index Strategy

Always index both directions of edges

CREATE INDEX idx_from ON edges(from_id);
CREATE INDEX idx_to ON edges(to_id);
CREATE INDEX idx_from_to ON edges(from_id, to_id);

Limit Depth

Always set maximum depth in recursive queries to prevent infinite loops

Materialized Views

Cache expensive graph computations

CREATE MATERIALIZED VIEW person_degrees AS
SELECT person_id, COUNT(*) as degree
FROM knows GROUP BY person_id;

CREATE INDEX idx_person_degrees 
ON person_degrees(degree DESC);

Partitioning

For very large graphs, partition edges by source or time range

Best Practices

Model Relationships Explicitly

Create separate edge tables for each relationship type for clarity and performance.

Use Properties Wisely

Store frequently queried attributes as columns, use JSONB for flexible metadata.

Prevent Cycles

Always check for visited nodes in path queries to avoid infinite recursion.

Start Small

Test traversal queries on subsets before running on entire graph.

Monitor Query Performance

Graph queries can be expensive. Use EXPLAIN ANALYZE and set statement timeouts.

Next Steps

→ Advanced Graph Queries → Cross-Model Joins → Performance Tuning