\set ECHO queries \timing -- prepare test table DROP TABLE IF EXISTS ivfflat_test_table; CREATE TABLE ivfflat_test_table AS TABLE documents WITH NO DATA; INSERT INTO ivfflat_test_table SELECT * FROM documents; CREATE INDEX ON ivfflat_test_table (_id); -- needed later for random tuple queries -- tune index build params SET max_parallel_maintenance_workers = 7; SET maintenance_work_mem = '8GB'; -- create ivfflat index for the supported distance metrics -- the formulat for lists is # rows / 1000 or sqrt(# rows) if # rows > 1 million -- we have 1 million embeddings of vector size 1536 in column embeddings of table documents -- so we use 1000 lists CREATE INDEX ON ivfflat_test_table USING ivfflat (embeddings vector_l2_ops) WITH (lists = 1000); CREATE INDEX ON ivfflat_test_table USING ivfflat (embeddings vector_ip_ops) WITH (lists = 1000); CREATE INDEX ON ivfflat_test_table USING ivfflat (embeddings vector_cosine_ops) WITH (lists = 1000); CREATE INDEX ON ivfflat_test_table USING ivfflat (embeddings::halfvec(1536) halfvec_l2_ops) WITH (lists = 1000); CREATE INDEX ON ivfflat_test_table USING ivfflat ((binary_quantize(embeddings)::bit(1536)) bit_hamming_ops) WITH (lists = 1000); \d ivfflat_test_table -- show all indexes built on the table SELECT idx.relname AS index_name, tbl.relname AS table_name, am.amname AS access_method, a.attname AS column_name, opc.opcname AS operator_class FROM pg_index i JOIN pg_class idx ON idx.oid = i.indexrelid JOIN pg_class tbl ON tbl.oid = i.indrelid JOIN pg_am am ON am.oid = idx.relam JOIN pg_attribute a ON a.attrelid = tbl.oid AND a.attnum = ANY(i.indkey) JOIN pg_opclass opc ON opc.oid = i.indclass[0] WHERE tbl.relname = 'ivfflat_test_table' AND a.attname = 'embeddings'; -- show table sizes \dt+