←Back to Documentation
Vector Indexing
High-performance vector indexing with HNSW, IVF, and quantization
Overview
Vector indexing methods:
- • HNSW: Hierarchical Navigable Small World graphs
- • IVF: Inverted File Index with quantization
- • Flat Index: Exact search for small datasets
- • Quantization: Compress vectors for efficiency
- • Hybrid Indexes: Combine multiple strategies
HNSW Index
-- Create HNSW index CREATE INDEX idx_documents_embedding ON documents USING hnsw (embedding vector_cosine_ops) WITH ( m = 16, -- Max connections per layer (higher = better recall, slower build) ef_construction = 64, -- Size of dynamic candidate list (higher = better quality) ef_search = 40 -- Runtime search quality (higher = better recall, slower search) ); -- Supported distance metrics CREATE INDEX idx_cosine ON documents USING hnsw (embedding vector_cosine_ops); CREATE INDEX idx_l2 ON documents USING hnsw (embedding vector_l2_ops); CREATE INDEX idx_inner_product ON documents USING hnsw (embedding vector_ip_ops); -- Tune for your use case -- Small datasets (< 100K): CREATE INDEX idx_small ON documents USING hnsw (embedding vector_cosine_ops) WITH (m = 8, ef_construction = 32); -- Large datasets (> 1M): CREATE INDEX idx_large ON documents USING hnsw (embedding vector_cosine_ops) WITH (m = 32, ef_construction = 128); -- High recall requirement: CREATE INDEX idx_high_recall ON documents USING hnsw (embedding vector_cosine_ops) WITH (m = 48, ef_construction = 256, ef_search = 100); -- Fast build time: CREATE INDEX idx_fast_build ON documents USING hnsw (embedding vector_cosine_ops) WITH (m = 12, ef_construction = 40); -- Query with index SET hnsw.ef_search = 100; -- Adjust at query time SELECT title, 1 - (embedding <=> '[0.1, 0.2, ...]') as similarity FROM documents ORDER BY embedding <=> '[0.1, 0.2, ...]' LIMIT 10;
IVF Index
-- Create IVF index CREATE INDEX idx_documents_ivf ON documents USING ivfflat (embedding vector_cosine_ops) WITH ( lists = 100 -- Number of clusters (sqrt(num_rows) is good starting point) ); -- For 1M vectors: CREATE INDEX idx_ivf_large ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 1000); -- Query with IVF SET ivfflat.probes = 10; -- Number of lists to search (higher = better recall) SELECT title, 1 - (embedding <=> '[0.1, 0.2, ...]') as similarity FROM documents ORDER BY embedding <=> '[0.1, 0.2, ...]' LIMIT 10; -- Trade-offs: -- More lists = faster queries, slower index build -- More probes = better recall, slower queries -- Optimal lists calculation SELECT SQRT(COUNT(*)) as recommended_lists FROM documents; -- Rebuild index after significant data changes REINDEX INDEX idx_documents_ivf;
Product Quantization
-- Create index with quantization
CREATE INDEX idx_documents_pq ON documents
USING ivfflat_pq (embedding vector_cosine_ops)
WITH (
lists = 1000,
pq_segments = 8, -- Divide vector into segments
pq_bits = 8 -- Bits per segment (256 centroids)
);
-- Memory savings calculation
-- Original: 1M vectors * 1536 dims * 4 bytes = 6GB
-- Quantized: 1M vectors * 8 segments * 1 byte = 8MB (750x reduction)
-- Binary quantization for extreme compression
CREATE INDEX idx_documents_binary ON documents
USING binary_quantization (embedding)
WITH (method = 'hamming');
-- Scalar quantization (int8)
ALTER TABLE documents
ALTER COLUMN embedding
SET STORAGE QUANTIZED
WITH (quantization_type = 'int8');
-- Query quantized index
SELECT title,
entropy_quantized_similarity(embedding, '[0.1, 0.2, ...]') as similarity
FROM documents
ORDER BY embedding <~> '[0.1, 0.2, ...]' -- Quantized distance operator
LIMIT 10;Index Management
-- View index statistics SELECT schemaname, tablename, indexname, idx_scan as times_used, idx_tup_read as tuples_read, idx_tup_fetch as tuples_fetched, pg_size_pretty(pg_relation_size(indexrelid)) as index_size FROM pg_stat_user_indexes WHERE indexname LIKE '%embedding%'; -- Monitor index build progress SELECT phase, round(100.0 * blocks_done / nullif(blocks_total, 0), 1) AS "% complete", blocks_done, blocks_total FROM pg_stat_progress_create_index WHERE command = 'CREATE INDEX'; -- Concurrent index creation (no locks) CREATE INDEX CONCURRENTLY idx_documents_embedding ON documents USING hnsw (embedding vector_cosine_ops); -- Drop and recreate DROP INDEX idx_documents_embedding; CREATE INDEX idx_documents_embedding ON documents USING hnsw (embedding vector_cosine_ops) WITH (m = 24, ef_construction = 80); -- Vacuum and analyze VACUUM ANALYZE documents; -- Check index bloat SELECT schemaname, tablename, indexname, pg_size_pretty(pg_relation_size(indexrelid)) as index_size, idx_scan, idx_tup_read / NULLIF(idx_scan, 0) as tuples_per_scan FROM pg_stat_user_indexes WHERE schemaname = 'public' ORDER BY pg_relation_size(indexrelid) DESC;
Performance Tuning
-- Benchmark different index configurations
CREATE OR REPLACE FUNCTION benchmark_vector_index(
query_vector VECTOR,
k INT DEFAULT 10
) RETURNS TABLE(
index_type TEXT,
avg_latency_ms DECIMAL,
recall DECIMAL
) AS $$
BEGIN
-- Test HNSW
RETURN QUERY
WITH hnsw_results AS (
SELECT title, embedding
FROM documents
ORDER BY embedding <=> query_vector
LIMIT k
),
timings AS (
SELECT
'HNSW' as index_type,
extract(milliseconds from (clock_timestamp() - statement_timestamp())) as latency
FROM hnsw_results
)
SELECT index_type, AVG(latency), 0.95 FROM timings GROUP BY index_type;
-- Add similar tests for IVF, PQ, etc.
END;
$$ LANGUAGE plpgsql;
-- Run benchmark
SELECT * FROM benchmark_vector_index('[0.1, 0.2, ...]'::vector);
-- Query plan analysis
EXPLAIN (ANALYZE, BUFFERS)
SELECT title
FROM documents
ORDER BY embedding <=> '[0.1, 0.2, ...]'
LIMIT 10;
-- Optimize settings
SET work_mem = '256MB';
SET maintenance_work_mem = '2GB';
SET effective_cache_size = '16GB';
SET random_page_cost = 1.1;
-- Parallel index build
SET max_parallel_maintenance_workers = 4;
CREATE INDEX idx_documents_parallel
ON documents USING hnsw (embedding vector_cosine_ops);Best Practices
Index Selection
- • HNSW: Best overall performance
- • IVF: Good for very large datasets
- • Quantization: Reduce memory usage
- • Benchmark before choosing
Maintenance
- • Monitor index size and usage
- • Rebuild after bulk updates
- • Tune parameters for workload
- • Use concurrent builds