Property Graphs
Model and query complex relationships with graph database capabilities
Overview
entropyDB supports property graphs for modeling complex, highly connected data. Query relationships efficiently with path traversals and pattern matching.
Key Features
- Labeled property graph model
- Cypher-like query language
- Efficient path traversals
- Pattern matching and graph algorithms
Creating Graph Schema
-- Create vertex table (nodes) CREATE TABLE person ( id SERIAL PRIMARY KEY, name TEXT NOT NULL, age INT, properties JSONB, created_at TIMESTAMP DEFAULT NOW() ); -- Create edge table (relationships) CREATE TABLE knows ( id SERIAL PRIMARY KEY, from_person_id INT REFERENCES person(id), to_person_id INT REFERENCES person(id), since DATE, strength FLOAT, properties JSONB, created_at TIMESTAMP DEFAULT NOW() ); -- Indexes for efficient traversals CREATE INDEX idx_knows_from ON knows(from_person_id); CREATE INDEX idx_knows_to ON knows(to_person_id); CREATE INDEX idx_knows_both ON knows(from_person_id, to_person_id); -- Additional vertex types CREATE TABLE company ( id SERIAL PRIMARY KEY, name TEXT NOT NULL, industry TEXT, properties JSONB ); CREATE TABLE works_at ( id SERIAL PRIMARY KEY, person_id INT REFERENCES person(id), company_id INT REFERENCES company(id), role TEXT, start_date DATE, end_date DATE );
Basic Graph Operations
Creating Vertices and Edges
-- Create vertices
INSERT INTO person (name, age, properties)
VALUES
('Alice', 30, '{"city": "NYC", "occupation": "Engineer"}'::jsonb),
('Bob', 28, '{"city": "SF", "occupation": "Designer"}'::jsonb),
('Charlie', 35, '{"city": "LA", "occupation": "Manager"}'::jsonb);
-- Create edges
INSERT INTO knows (from_person_id, to_person_id, since, strength)
VALUES
(1, 2, '2020-01-15', 0.8),
(1, 3, '2019-06-20', 0.6),
(2, 3, '2021-03-10', 0.9);Finding Neighbors
-- Find all friends of Alice SELECT p2.name, k.since, k.strength FROM person p1 JOIN knows k ON p1.id = k.from_person_id JOIN person p2 ON k.to_person_id = p2.id WHERE p1.name = 'Alice'; -- Bidirectional friends SELECT DISTINCT p2.name FROM person p1 JOIN knows k ON p1.id = k.from_person_id OR p1.id = k.to_person_id JOIN person p2 ON (p2.id = k.to_person_id OR p2.id = k.from_person_id) WHERE p1.name = 'Alice' AND p2.id != p1.id;
Path Traversals
2-Hop Traversal (Friends of Friends)
-- Find friends of friends
SELECT DISTINCT p3.name
FROM person p1
JOIN knows k1 ON p1.id = k1.from_person_id
JOIN person p2 ON k1.to_person_id = p2.id
JOIN knows k2 ON p2.id = k2.from_person_id
JOIN person p3 ON k2.to_person_id = p3.id
WHERE p1.name = 'Alice'
AND p3.id != p1.id -- Exclude self
AND p3.id NOT IN ( -- Exclude direct friends
SELECT to_person_id FROM knows WHERE from_person_id = p1.id
);Variable-Length Paths
-- Recursive CTE for multi-hop paths
WITH RECURSIVE path AS (
-- Base case: direct connections
SELECT
k.from_person_id,
k.to_person_id,
ARRAY[k.from_person_id, k.to_person_id] as path_nodes,
1 as depth
FROM knows k
WHERE k.from_person_id = 1 -- Starting from Alice
UNION ALL
-- Recursive case: extend path
SELECT
p.from_person_id,
k.to_person_id,
p.path_nodes || k.to_person_id,
p.depth + 1
FROM path p
JOIN knows k ON p.to_person_id = k.from_person_id
WHERE k.to_person_id != ALL(p.path_nodes) -- Avoid cycles
AND p.depth < 5 -- Maximum depth
)
SELECT
pe.name as start_person,
p.depth,
array_agg(per.name ORDER BY p.depth) as path
FROM path p
JOIN person pe ON p.from_person_id = pe.id
JOIN person per ON per.id = ANY(p.path_nodes)
GROUP BY pe.name, p.depth
ORDER BY p.depth;Shortest Path
-- Find shortest path between two people
WITH RECURSIVE shortest_path AS (
SELECT
k.from_person_id,
k.to_person_id,
ARRAY[k.from_person_id, k.to_person_id] as path,
1 as length
FROM knows k
WHERE k.from_person_id = 1 -- Alice
UNION ALL
SELECT
sp.from_person_id,
k.to_person_id,
sp.path || k.to_person_id,
sp.length + 1
FROM shortest_path sp
JOIN knows k ON sp.to_person_id = k.from_person_id
WHERE k.to_person_id != ALL(sp.path)
AND sp.length < 10
)
SELECT
path,
length
FROM shortest_path
WHERE to_person_id = 5 -- Target person (Charlie)
ORDER BY length
LIMIT 1;Pattern Matching
Triangle Detection
-- Find triangles (A knows B, B knows C, C knows A) SELECT p1.name as person1, p2.name as person2, p3.name as person3 FROM knows k1 JOIN knows k2 ON k1.to_person_id = k2.from_person_id JOIN knows k3 ON k2.to_person_id = k3.from_person_id JOIN person p1 ON k1.from_person_id = p1.id JOIN person p2 ON k2.from_person_id = p2.id JOIN person p3 ON k3.from_person_id = p3.id WHERE k3.to_person_id = k1.from_person_id AND k1.from_person_id < k2.from_person_id AND k2.from_person_id < k3.from_person_id;
Common Friends
-- Find mutual connections SELECT p.name as mutual_friend, COUNT(*) as connection_count FROM knows k1 JOIN knows k2 ON k1.to_person_id = k2.to_person_id JOIN person p ON k1.to_person_id = p.id WHERE k1.from_person_id = 1 -- Alice AND k2.from_person_id = 2 -- Bob AND k1.to_person_id NOT IN (1, 2) GROUP BY p.name ORDER BY connection_count DESC;
Community Detection
-- Find densely connected subgraphs
WITH person_connections AS (
SELECT
from_person_id as person_id,
COUNT(*) as connection_count
FROM knows
GROUP BY from_person_id
)
SELECT
p.name,
pc.connection_count,
AVG(k.strength) as avg_strength
FROM person_connections pc
JOIN person p ON pc.person_id = p.id
JOIN knows k ON pc.person_id = k.from_person_id
GROUP BY p.name, pc.connection_count
HAVING pc.connection_count > 3
ORDER BY pc.connection_count DESC, avg_strength DESC;Graph Algorithms
PageRank
-- Simplified PageRank calculation
WITH RECURSIVE pagerank AS (
-- Initialize all nodes with equal rank
SELECT
id,
1.0 / (SELECT COUNT(*) FROM person)::float as rank,
0 as iteration
FROM person
UNION ALL
-- Iterative rank calculation
SELECT
p.id,
0.15 / (SELECT COUNT(*) FROM person)::float +
0.85 * COALESCE(SUM(pr.rank / out_degree.degree), 0) as rank,
pr.iteration + 1 as iteration
FROM person p
CROSS JOIN (SELECT MAX(iteration) as max_iter FROM pagerank) mi
LEFT JOIN knows k ON p.id = k.to_person_id
LEFT JOIN pagerank pr ON k.from_person_id = pr.id
AND pr.iteration = mi.max_iter
LEFT JOIN (
SELECT from_person_id, COUNT(*) as degree
FROM knows GROUP BY from_person_id
) out_degree ON k.from_person_id = out_degree.from_person_id
WHERE pr.iteration < 10 -- Number of iterations
GROUP BY p.id, pr.iteration
)
SELECT
p.name,
pr.rank
FROM (
SELECT DISTINCT ON (id) *
FROM pagerank
ORDER BY id, iteration DESC
) pr
JOIN person p ON pr.id = p.id
ORDER BY pr.rank DESC;Centrality Metrics
-- Degree centrality
SELECT
p.name,
COUNT(k.id) as degree
FROM person p
LEFT JOIN knows k ON p.id = k.from_person_id
GROUP BY p.name
ORDER BY degree DESC;
-- Betweenness centrality (simplified)
-- Counts how often a node appears in shortest paths
WITH path_counts AS (
SELECT
unnest(path) as node_id,
COUNT(*) as path_count
FROM shortest_path -- From previous CTE
GROUP BY unnest(path)
)
SELECT
p.name,
COALESCE(pc.path_count, 0) as betweenness
FROM person p
LEFT JOIN path_counts pc ON p.id = pc.node_id
ORDER BY betweenness DESC;Performance Optimization
Index Strategy
Always index both directions of edges
CREATE INDEX idx_from ON edges(from_id); CREATE INDEX idx_to ON edges(to_id); CREATE INDEX idx_from_to ON edges(from_id, to_id);
Limit Depth
Always set maximum depth in recursive queries to prevent infinite loops
Materialized Views
Cache expensive graph computations
CREATE MATERIALIZED VIEW person_degrees AS SELECT person_id, COUNT(*) as degree FROM knows GROUP BY person_id; CREATE INDEX idx_person_degrees ON person_degrees(degree DESC);
Partitioning
For very large graphs, partition edges by source or time range
Best Practices
Model Relationships Explicitly
Create separate edge tables for each relationship type for clarity and performance.
Use Properties Wisely
Store frequently queried attributes as columns, use JSONB for flexible metadata.
Prevent Cycles
Always check for visited nodes in path queries to avoid infinite recursion.
Start Small
Test traversal queries on subsets before running on entire graph.
Monitor Query Performance
Graph queries can be expensive. Use EXPLAIN ANALYZE and set statement timeouts.