mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
This includes a compatibility patch that is needed because pgvector now skips WAL-logging during the index build, and WAL-logs the index only in one go at the end. That's how GIN, GiST and SP-GIST index builds work in core PostgreSQL too, but we need some Neon-specific calls to mark the beginning and end of those build phases. pgvector is the first index AM that does that with parallel workers, so I had to modify those functions in the Neon extension to be aware of parallel workers. Only the leader needs to create the underlying file and perform the WAL-logging. (In principle, the parallel workers could participate in the WAL-logging too, but pgvector doesn't do that. This will need some further work if that changes). The previous attempt at this (#6592) missed that parallel workers needed those changes, and segfaulted in parallel build that spilled to disk. Testing ------- We don't have a place for regression tests of extensions at the moment. I tested this manually with the following script: ``` CREATE EXTENSION IF NOT EXISTS vector; DROP TABLE IF EXISTS tst; CREATE TABLE tst (i serial, v vector(3)); INSERT INTO tst (v) SELECT ARRAY[random(), random(), random()] FROM generate_series(1, 15000) g; -- Serial build, in memory ALTER TABLE tst SET (parallel_workers=0); SET maintenance_work_mem='50 MB'; CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops); -- Test that the index works. (The table contents are random, and the -- search is approximate anyway, so we cannot check the exact values. -- For now, just eyeball that they look reasonable) set enable_seqscan=off; explain SELECT * FROM tst ORDER BY v <-> ARRAY[0, 0, 0]::vector LIMIT 5; SELECT * FROM tst ORDER BY v <-> ARRAY[0, 0, 0]::vector LIMIT 5; DROP INDEX idx; -- Serial build, spills to on disk ALTER TABLE tst SET (parallel_workers=0); SET maintenance_work_mem='5 MB'; CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops); SELECT * FROM tst ORDER BY v <-> ARRAY[0, 0, 0]::vector LIMIT 5; DROP INDEX idx; -- Parallel build, in memory ALTER TABLE tst SET (parallel_workers=4); SET maintenance_work_mem='50 MB'; CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops); SELECT * FROM tst ORDER BY v <-> ARRAY[0, 0, 0]::vector LIMIT 5; DROP INDEX idx; -- Parallel build, spills to disk ALTER TABLE tst SET (parallel_workers=4); SET maintenance_work_mem='5 MB'; CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops); SELECT * FROM tst ORDER BY v <-> ARRAY[0, 0, 0]::vector LIMIT 5; DROP INDEX idx; ```
29 lines
352 B
Plaintext
29 lines
352 B
Plaintext
*
|
|
|
|
# Files
|
|
!Cargo.lock
|
|
!Cargo.toml
|
|
!Makefile
|
|
!rust-toolchain.toml
|
|
!scripts/combine_control_files.py
|
|
!scripts/ninstall.sh
|
|
!vm-cgconfig.conf
|
|
|
|
# Directories
|
|
!.cargo/
|
|
!.config/
|
|
!compute_tools/
|
|
!control_plane/
|
|
!libs/
|
|
!neon_local/
|
|
!pageserver/
|
|
!patches/
|
|
!pgxn/
|
|
!proxy/
|
|
!s3_scrubber/
|
|
!safekeeper/
|
|
!storage_broker/
|
|
!trace/
|
|
!vendor/postgres-*/
|
|
!workspace_hack/
|