Back to Documentation

Document Store

Flexible schema with JSON/JSONB for document-oriented data

Overview

entropyDB provides native JSONB support for storing and querying document-oriented data. Store complex nested structures with flexible schemas while maintaining SQL query capabilities.

Key Features

  • Binary JSON storage for efficient operations
  • Rich query operators and indexing
  • Schema validation with CHECK constraints
  • Full-text search capabilities

Creating Document Tables

-- Simple document table
CREATE TABLE products (
  id SERIAL PRIMARY KEY,
  data JSONB NOT NULL,
  created_at TIMESTAMP DEFAULT NOW()
);

-- With validation
CREATE TABLE users (
  id SERIAL PRIMARY KEY,
  profile JSONB NOT NULL,
  CONSTRAINT valid_profile CHECK (
    profile ? 'email' AND
    profile ? 'username'
  )
);

-- Hybrid approach: structured + flexible
CREATE TABLE orders (
  id SERIAL PRIMARY KEY,
  user_id INT NOT NULL,
  total DECIMAL(10,2),
  items JSONB,  -- Flexible order items
  metadata JSONB,  -- Additional attributes
  created_at TIMESTAMP DEFAULT NOW()
);

Inserting Documents

-- Insert JSON document
INSERT INTO products (data) VALUES ('{
  "name": "Laptop",
  "price": 999.99,
  "specs": {
    "cpu": "Intel i7",
    "ram": "16GB",
    "storage": "512GB SSD"
  },
  "tags": ["electronics", "computers"],
  "inStock": true
}'::jsonb);

-- Insert from application
INSERT INTO users (profile) VALUES (
  jsonb_build_object(
    'username', 'john_doe',
    'email', 'john@example.com',
    'preferences', jsonb_build_object(
      'theme', 'dark',
      'notifications', true
    )
  )
);

Querying Documents

Accessing Fields

-- Get top-level field (as JSON)
SELECT data->'name' FROM products;

-- Get top-level field (as text)
SELECT data->>'name' as product_name FROM products;

-- Access nested fields
SELECT 
  data->>'name' as name,
  data->'specs'->>'cpu' as cpu,
  data->'specs'->>'ram' as ram
FROM products;

Filtering

-- Exact match
SELECT * FROM products
WHERE data @> '{"inStock": true}';

-- Contains key
SELECT * FROM products
WHERE data ? 'specs';

-- Array contains
SELECT * FROM products
WHERE data->'tags' @> '["electronics"]';

-- Comparison operators
SELECT * FROM products
WHERE (data->>'price')::numeric > 500;

Complex Queries

-- Multiple conditions
SELECT * FROM products
WHERE data @> '{"inStock": true}'
  AND (data->>'price')::numeric BETWEEN 500 AND 1500
  AND data->'tags' @> '["electronics"]';

-- Pattern matching
SELECT * FROM products
WHERE data->>'name' ILIKE '%laptop%';

-- Array operations
SELECT * FROM products
WHERE jsonb_array_length(data->'tags') > 2;

Updating Documents

-- Update entire document
UPDATE products
SET data = '{"name": "New Product", "price": 299.99}'::jsonb
WHERE id = 1;

-- Update specific field
UPDATE products
SET data = jsonb_set(data, '{price}', '899.99'::jsonb)
WHERE id = 1;

-- Update nested field
UPDATE products
SET data = jsonb_set(
  data,
  '{specs,ram}',
  '"32GB"'::jsonb
)
WHERE id = 1;

-- Add new field
UPDATE products
SET data = data || '{"warranty": "2 years"}'::jsonb
WHERE id = 1;

-- Remove field
UPDATE products
SET data = data - 'oldField'
WHERE id = 1;

-- Increment numeric field
UPDATE products
SET data = jsonb_set(
  data,
  '{views}',
  to_jsonb((data->>'views')::int + 1)
)
WHERE id = 1;

Indexing JSON Data

GIN Index (Recommended)

-- Index entire JSONB column
CREATE INDEX idx_products_data ON products USING GIN (data);

-- Index specific path
CREATE INDEX idx_products_tags 
ON products USING GIN ((data->'tags'));

-- Expression index
CREATE INDEX idx_products_price 
ON products ((data->>'price'));

B-tree Index for Specific Fields

-- For exact lookups on specific fields
CREATE INDEX idx_products_name 
ON products ((data->>'name'));

-- For range queries on numeric fields
CREATE INDEX idx_products_price_btree 
ON products (((data->>'price')::numeric));

Working with Arrays

-- Expand array to rows
SELECT 
  id,
  jsonb_array_elements_text(data->'tags') as tag
FROM products;

-- Array aggregation
SELECT 
  data->>'category' as category,
  jsonb_agg(data->>'name') as products
FROM products
GROUP BY category;

-- Filter by array element
SELECT * FROM products
WHERE data->'tags' ? 'featured';

-- Array length
SELECT * FROM products
WHERE jsonb_array_length(data->'tags') > 3;

-- Append to array
UPDATE products
SET data = jsonb_set(
  data,
  '{tags}',
  (data->'tags') || '["new-tag"]'::jsonb
)
WHERE id = 1;

Schema Validation

-- Ensure required fields exist
ALTER TABLE products
ADD CONSTRAINT require_name 
CHECK (data ? 'name');

-- Validate field types
ALTER TABLE products
ADD CONSTRAINT valid_price
CHECK ((data->>'price')::numeric > 0);

-- Complex validation
ALTER TABLE users
ADD CONSTRAINT valid_user_profile CHECK (
  data ? 'email' AND
  data ? 'username' AND
  jsonb_typeof(data->'preferences') = 'object' AND
  (data->>'email') ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}$'
);

Full-Text Search

-- Create search column
ALTER TABLE products 
ADD COLUMN search_vector tsvector
GENERATED ALWAYS AS (
  to_tsvector('english', 
    coalesce(data->>'name', '') || ' ' ||
    coalesce(data->>'description', '')
  )
) STORED;

-- Create GIN index for full-text search
CREATE INDEX idx_products_search 
ON products USING GIN (search_vector);

-- Search
SELECT 
  data->>'name' as name,
  ts_rank(search_vector, query) as rank
FROM products,
     to_tsquery('english', 'laptop & fast') query
WHERE search_vector @@ query
ORDER BY rank DESC;

Best Practices

1. Use JSONB, Not JSON

JSONB is binary and supports indexing. JSON is just text storage.

2. Index Frequently Queried Paths

Create GIN indexes on JSONB columns you query often.

3. Validate Critical Fields

Use CHECK constraints for required fields and data types.

4. Normalize When Appropriate

Don't store everything in JSON. Use relational tables for frequently joined data.

5. Keep Documents Reasonably Sized

Very large JSONB documents can impact performance. Consider splitting into multiple rows.

Next Steps