Back to Documentation

Cross-Model Joins

Join relational, document, graph, key-value, and time-series data in a single query

Overview

Cross-model join capabilities:

  • Relational + Document: Join SQL tables with JSON collections
  • Relational + Graph: Join tables with graph relationships
  • Document + Time Series: Enrich time data with metadata
  • Graph + Key-Value: Combine relationships with cache data
  • All Models: Query across all five data models

Relational + Document Joins

-- Join SQL table with JSON collection
SELECT 
  u.id,
  u.username,
  p.profile->>'bio' as bio,
  p.profile->'preferences'->>'theme' as theme
FROM users u
JOIN user_profiles_json p ON u.id = (p.document->>'user_id')::int
WHERE u.active = true;

-- Extract nested JSON fields
SELECT 
  o.order_id,
  o.total,
  od.document->>'product_name' as product,
  (od.document->>'quantity')::int as quantity,
  (od.document->>'price')::decimal as price
FROM orders o
JOIN order_details_json od ON o.order_id = (od.document->>'order_id')::int
WHERE o.status = 'completed';

-- Aggregate across models
SELECT 
  c.customer_name,
  COUNT(o.order_id) as order_count,
  SUM((od.document->>'total')::decimal) as total_spent
FROM customers c
JOIN orders o ON c.id = o.customer_id
JOIN order_summaries_json od ON o.order_id = (od.document->>'order_id')::int
GROUP BY c.customer_name
ORDER BY total_spent DESC;

-- Filter on JSON attributes
SELECT u.username, d.document
FROM users u
JOIN documents d ON u.id = (d.document->>'author_id')::int
WHERE d.document->>'status' = 'published'
  AND (d.document->>'view_count')::int > 1000;

Relational + Graph Joins

-- Join table with graph relationships
SELECT 
  u.username,
  u.email,
  f.followed_username,
  fu.email as followed_email
FROM users u
JOIN (
  MATCH (a:User)-[:FOLLOWS]->(b:User)
  RETURN a.username as follower_username, b.username as followed_username
) f ON u.username = f.follower_username
JOIN users fu ON f.followed_username = fu.username
WHERE u.active = true;

-- Combine SQL aggregations with graph traversal
WITH friend_network AS (
  MATCH (u:User {username: 'alice'})-[:FRIENDS*1..2]-(friend:User)
  RETURN DISTINCT friend.user_id as friend_id
)
SELECT 
  u.username,
  COUNT(o.order_id) as order_count,
  SUM(o.total) as total_spent
FROM users u
JOIN friend_network fn ON u.id = fn.friend_id
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.username;

-- Enrich graph with table data
SELECT 
  path,
  start_user.email,
  start_user.signup_date,
  end_user.email,
  end_user.signup_date
FROM (
  MATCH path = shortestPath(
    (start:User)-[:FRIENDS*]-(end:User)
  )
  WHERE start.username = 'alice' AND end.username = 'bob'
  RETURN path, start.user_id as start_id, end.user_id as end_id
) g
JOIN users start_user ON g.start_id = start_user.id
JOIN users end_user ON g.end_id = end_user.id;

Document + Time Series Joins

-- Enrich time series with metadata
SELECT 
  ts.timestamp,
  ts.value,
  s.document->>'name' as sensor_name,
  s.document->>'location' as location,
  s.document->'calibration'->>'last_calibrated' as last_calibrated
FROM sensor_readings ts
JOIN sensors_json s ON ts.sensor_id = (s.document->>'sensor_id')::int
WHERE ts.timestamp > NOW() - INTERVAL '1 hour'
  AND s.document->>'status' = 'active';

-- Aggregate time series by document attributes
SELECT 
  s.document->>'building' as building,
  s.document->>'floor' as floor,
  time_bucket('1 hour', ts.timestamp) as hour,
  AVG(ts.temperature) as avg_temp
FROM sensor_readings ts
JOIN sensors_json s ON ts.sensor_id = (s.document->>'sensor_id')::int
WHERE ts.timestamp > NOW() - INTERVAL '24 hours'
GROUP BY building, floor, hour
ORDER BY hour;

-- Complex event detection with metadata
SELECT 
  ts.timestamp,
  ts.value,
  s.document->>'name' as sensor,
  a.document->>'severity' as alert_severity,
  a.document->>'notification_email' as notify_email
FROM sensor_readings ts
JOIN sensors_json s ON ts.sensor_id = (s.document->>'sensor_id')::int
JOIN alert_rules_json a ON s.document->>'type' = a.document->>'sensor_type'
WHERE ts.value > (a.document->>'threshold')::float
  AND ts.timestamp > NOW() - INTERVAL '5 minutes';

Multi-Model Complex Queries

-- Combine all five data models
WITH 
  -- Graph: Social network
  social_connections AS (
    MATCH (u:User)-[:FRIENDS]-(friend:User)
    WHERE u.username = 'alice'
    RETURN DISTINCT friend.user_id as friend_id
  ),
  -- Document: User preferences
  preferences AS (
    SELECT 
      (document->>'user_id')::int as user_id,
      document->'interests' as interests
    FROM user_preferences_json
  ),
  -- Time Series: Recent activity
  recent_activity AS (
    SELECT 
      user_id,
      COUNT(*) as activity_count
    FROM user_events
    WHERE timestamp > NOW() - INTERVAL '7 days'
    GROUP BY user_id
  )
-- Relational: Combine everything
SELECT 
  u.username,
  u.email,
  p.interests,
  ra.activity_count,
  kv.value as cached_score
FROM users u
JOIN social_connections sc ON u.id = sc.friend_id
LEFT JOIN preferences p ON u.id = p.user_id
LEFT JOIN recent_activity ra ON u.id = ra.user_id
LEFT JOIN kv_store kv ON kv.key = CONCAT('user_score:', u.id)
ORDER BY ra.activity_count DESC NULLS LAST;

-- Recommendation system across models
WITH 
  user_purchases AS (
    SELECT user_id, jsonb_agg(product_id) as purchased_products
    FROM orders o
    JOIN order_items oi ON o.order_id = oi.order_id
    WHERE user_id = 123
    GROUP BY user_id
  ),
  similar_users AS (
    MATCH (u:User {user_id: 123})-[:SIMILAR_TO]-(similar:User)
    RETURN similar.user_id as similar_user_id
  ),
  trending_products AS (
    SELECT 
      product_id,
      COUNT(*) as purchase_count
    FROM order_items oi
    JOIN orders o ON oi.order_id = o.order_id
    WHERE o.created_at > NOW() - INTERVAL '30 days'
    GROUP BY product_id
    ORDER BY purchase_count DESC
    LIMIT 100
  )
SELECT 
  p.product_id,
  pd.document->>'name' as product_name,
  pd.document->>'category' as category,
  tp.purchase_count,
  COALESCE(pr.avg_rating, 0) as avg_rating
FROM trending_products tp
JOIN products p ON tp.product_id = p.product_id
JOIN product_details_json pd ON p.product_id = (pd.document->>'product_id')::int
LEFT JOIN product_ratings pr ON p.product_id = pr.product_id
WHERE NOT EXISTS (
  SELECT 1 FROM user_purchases up 
  WHERE up.purchased_products @> to_jsonb(p.product_id)
)
ORDER BY tp.purchase_count * COALESCE(pr.avg_rating, 3) DESC
LIMIT 10;

Performance Optimization

-- Create indexes for cross-model joins
CREATE INDEX idx_json_user_id ON user_profiles_json ((document->>'user_id'));
CREATE INDEX idx_sensor_readings_sensor_id ON sensor_readings(sensor_id, timestamp DESC);
CREATE INDEX idx_graph_user_id ON graph_nodes(properties->>'user_id');

-- Materialize frequent cross-model joins
CREATE MATERIALIZED VIEW user_activity_summary AS
SELECT 
  u.id,
  u.username,
  up.document->'preferences' as preferences,
  COUNT(DISTINCT o.order_id) as order_count,
  COUNT(DISTINCT f.followed_id) as following_count
FROM users u
LEFT JOIN user_profiles_json up ON u.id = (up.document->>'user_id')::int
LEFT JOIN orders o ON u.id = o.user_id
LEFT JOIN follows f ON u.id = f.follower_id
GROUP BY u.id, u.username, up.document;

-- Query plan optimization
EXPLAIN (ANALYZE, BUFFERS) 
SELECT ... -- Your cross-model query
FROM ...;

-- Use CTEs for complex cross-model queries
WITH doc_data AS (
  SELECT ... FROM documents
),
graph_data AS (
  MATCH ... RETURN ...
),
ts_data AS (
  SELECT ... FROM time_series
)
SELECT ... FROM relational_table
JOIN doc_data ...
JOIN graph_data ...
JOIN ts_data ...;

Best Practices

Performance

  • • Index join columns in all models
  • • Use CTEs for complex queries
  • • Materialize frequent joins
  • • Profile query execution plans

Design

  • • Design schema for join patterns
  • • Use consistent key formats
  • • Consider denormalization
  • • Cache frequently joined data

Next Steps