mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-10 23:12:54 +00:00
This includes a compatibility patch that is needed because pgvector now skips WAL-logging during the index build, and WAL-logs the index only in one go at the end. That's how GIN, GiST and SP-GIST index builds work in core PostgreSQL too, but we need some Neon-specific calls to mark the beginning and end of those build phases. pgvector is the first index AM that does that with parallel workers, so I had to modify those functions in the Neon extension to be aware of parallel workers. Only the leader needs to create the underlying file and perform the WAL-logging. (In principle, the parallel workers could participate in the WAL-logging too, but pgvector doesn't do that. This will need some further work if that changes). The previous attempt at this (#6592) missed that parallel workers needed those changes, and segfaulted in parallel build that spilled to disk. Testing ------- We don't have a place for regression tests of extensions at the moment. I tested this manually with the following script: ``` CREATE EXTENSION IF NOT EXISTS vector; DROP TABLE IF EXISTS tst; CREATE TABLE tst (i serial, v vector(3)); INSERT INTO tst (v) SELECT ARRAY[random(), random(), random()] FROM generate_series(1, 15000) g; -- Serial build, in memory ALTER TABLE tst SET (parallel_workers=0); SET maintenance_work_mem='50 MB'; CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops); -- Test that the index works. (The table contents are random, and the -- search is approximate anyway, so we cannot check the exact values. -- For now, just eyeball that they look reasonable) set enable_seqscan=off; explain SELECT * FROM tst ORDER BY v <-> ARRAY[0, 0, 0]::vector LIMIT 5; SELECT * FROM tst ORDER BY v <-> ARRAY[0, 0, 0]::vector LIMIT 5; DROP INDEX idx; -- Serial build, spills to on disk ALTER TABLE tst SET (parallel_workers=0); SET maintenance_work_mem='5 MB'; CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops); SELECT * FROM tst ORDER BY v <-> ARRAY[0, 0, 0]::vector LIMIT 5; DROP INDEX idx; -- Parallel build, in memory ALTER TABLE tst SET (parallel_workers=4); SET maintenance_work_mem='50 MB'; CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops); SELECT * FROM tst ORDER BY v <-> ARRAY[0, 0, 0]::vector LIMIT 5; DROP INDEX idx; -- Parallel build, spills to disk ALTER TABLE tst SET (parallel_workers=4); SET maintenance_work_mem='5 MB'; CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops); SELECT * FROM tst ORDER BY v <-> ARRAY[0, 0, 0]::vector LIMIT 5; DROP INDEX idx; ```
79 lines
2.1 KiB
Diff
79 lines
2.1 KiB
Diff
From 0b0194a57bd0f3598bd57dbedd0df3932330169d Mon Sep 17 00:00:00 2001
|
|
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
|
|
Date: Fri, 2 Feb 2024 22:26:45 +0200
|
|
Subject: [PATCH 1/1] Make v0.6.0 work with Neon
|
|
|
|
Now that the WAL-logging happens as a separate step at the end of the
|
|
build, we need a few neon-specific hints to make it work.
|
|
---
|
|
src/hnswbuild.c | 36 ++++++++++++++++++++++++++++++++++++
|
|
1 file changed, 36 insertions(+)
|
|
|
|
diff --git a/src/hnswbuild.c b/src/hnswbuild.c
|
|
index 680789b..ec54dea 100644
|
|
--- a/src/hnswbuild.c
|
|
+++ b/src/hnswbuild.c
|
|
@@ -840,9 +840,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
|
|
|
|
hnswarea = shm_toc_lookup(toc, PARALLEL_KEY_HNSW_AREA, false);
|
|
|
|
+#ifdef NEON_SMGR
|
|
+ smgr_start_unlogged_build(RelationGetSmgr(indexRel));
|
|
+#endif
|
|
+
|
|
/* Perform inserts */
|
|
HnswParallelScanAndInsert(heapRel, indexRel, hnswshared, hnswarea, false);
|
|
|
|
+#ifdef NEON_SMGR
|
|
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(indexRel));
|
|
+#endif
|
|
+
|
|
/* Close relations within worker */
|
|
index_close(indexRel, indexLockmode);
|
|
table_close(heapRel, heapLockmode);
|
|
@@ -1089,13 +1097,41 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
|
|
SeedRandom(42);
|
|
#endif
|
|
|
|
+#ifdef NEON_SMGR
|
|
+ smgr_start_unlogged_build(RelationGetSmgr(index));
|
|
+#endif
|
|
+
|
|
InitBuildState(buildstate, heap, index, indexInfo, forkNum);
|
|
|
|
BuildGraph(buildstate, forkNum);
|
|
|
|
+#ifdef NEON_SMGR
|
|
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));
|
|
+#endif
|
|
+
|
|
if (RelationNeedsWAL(index))
|
|
+ {
|
|
log_newpage_range(index, forkNum, 0, RelationGetNumberOfBlocks(index), true);
|
|
|
|
+#ifdef NEON_SMGR
|
|
+ {
|
|
+#if PG_VERSION_NUM >= 160000
|
|
+ RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
|
|
+#else
|
|
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
|
|
+#endif
|
|
+
|
|
+ SetLastWrittenLSNForBlockRange(XactLastRecEnd, rlocator,
|
|
+ MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
|
|
+ SetLastWrittenLSNForRelation(XactLastRecEnd, rlocator, MAIN_FORKNUM);
|
|
+ }
|
|
+#endif
|
|
+ }
|
|
+
|
|
+#ifdef NEON_SMGR
|
|
+ smgr_end_unlogged_build(RelationGetSmgr(index));
|
|
+#endif
|
|
+
|
|
FreeBuildState(buildstate);
|
|
}
|
|
|
|
--
|
|
2.39.2
|
|
|