Enable parallel execution of extension tests (#12118)

## Problem
Extension tests were previously run sequentially, resulting in
unnecessary wait time and underutilization of available CPU cores.
## Summary of changes
Tests are now executed in a customizable number of parallel threads
using separate database branches. This reduces overall test time by
approximately 50% (e.g., on my laptop, parallel test lasts 173s, while
sequential one lasts 340s) and increases the load on the pageserver,
providing better test coverage.

---------

Co-authored-by: Alexander Bayandin <alexander@neon.tech>
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Alexey Masterov <alexey.masterov@databricks.com>
This commit is contained in:
a-masterov
2025-07-08 13:28:39 +02:00
committed by GitHub
parent f51ed4a2c4
commit 59e393aef3
8 changed files with 144 additions and 72 deletions

View File

@@ -986,6 +986,7 @@ jobs:
- name: Verify docker-compose example and test extensions - name: Verify docker-compose example and test extensions
timeout-minutes: 60 timeout-minutes: 60
env: env:
PARALLEL_COMPUTES: 3
TAG: >- TAG: >-
${{ ${{
needs.meta.outputs.run-kind == 'compute-rc-pr' needs.meta.outputs.run-kind == 'compute-rc-pr'

1
.gitignore vendored
View File

@@ -15,6 +15,7 @@ neon.iml
/.neon /.neon
/integration_tests/.neon /integration_tests/.neon
compaction-suite-results.* compaction-suite-results.*
docker-compose/docker-compose-parallel.yml
# Coverage # Coverage
*.profraw *.profraw

View File

@@ -1915,10 +1915,10 @@ RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /e
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
RUN echo /usr/local/pgsql/lib > /etc/ld.so.conf.d/00-neon.conf && /sbin/ldconfig RUN echo /usr/local/pgsql/lib > /etc/ld.so.conf.d/00-neon.conf && /sbin/ldconfig
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq \ RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq parallel \
&& apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/* && apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
ENV PATH=/usr/local/pgsql/bin:$PATH ENV PATH=/usr/local/pgsql/bin:$PATH
ENV PGHOST=compute ENV PGHOST=compute1
ENV PGPORT=55433 ENV PGPORT=55433
ENV PGUSER=cloud_admin ENV PGUSER=cloud_admin
ENV PGDATABASE=postgres ENV PGDATABASE=postgres

View File

@@ -54,14 +54,16 @@ else
printf '%s\n' "${result}" | jq . printf '%s\n' "${result}" | jq .
fi fi
echo "Check if a timeline present" if [[ "${RUN_PARALLEL:-false}" != "true" ]]; then
PARAMS=( echo "Check if a timeline present"
-X GET PARAMS=(
-H "Content-Type: application/json" -X GET
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline" -H "Content-Type: application/json"
) "http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id) )
if [[ -z "${timeline_id}" || "${timeline_id}" = null ]]; then timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
fi
if [[ -z "${timeline_id:-}" || "${timeline_id:-}" = null ]]; then
generate_id timeline_id generate_id timeline_id
PARAMS=( PARAMS=(
-sbf -sbf

View File

@@ -142,7 +142,7 @@ services:
- "storage_broker" - "storage_broker"
- "--listen-addr=0.0.0.0:50051" - "--listen-addr=0.0.0.0:50051"
compute: compute1:
restart: always restart: always
build: build:
context: ./compute_wrapper/ context: ./compute_wrapper/
@@ -152,6 +152,7 @@ services:
- TAG=${COMPUTE_TAG:-${TAG:-latest}} - TAG=${COMPUTE_TAG:-${TAG:-latest}}
- http_proxy=${http_proxy:-} - http_proxy=${http_proxy:-}
- https_proxy=${https_proxy:-} - https_proxy=${https_proxy:-}
image: built-compute
environment: environment:
- PG_VERSION=${PG_VERSION:-16} - PG_VERSION=${PG_VERSION:-16}
- TENANT_ID=${TENANT_ID:-} - TENANT_ID=${TENANT_ID:-}
@@ -166,6 +167,11 @@ services:
- 3080:3080 # http endpoints - 3080:3080 # http endpoints
entrypoint: entrypoint:
- "/shell/compute.sh" - "/shell/compute.sh"
# Ad an alias for compute1 for compatibility
networks:
default:
aliases:
- compute
depends_on: depends_on:
- safekeeper1 - safekeeper1
- safekeeper2 - safekeeper2
@@ -174,15 +180,20 @@ services:
compute_is_ready: compute_is_ready:
image: postgres:latest image: postgres:latest
environment:
- PARALLEL_COMPUTES=1
entrypoint: entrypoint:
- "/bin/bash" - "/bin/sh"
- "-c" - "-c"
command: command:
- "until pg_isready -h compute -p 55433 -U cloud_admin ; do - "for i in $(seq 1 $${PARALLEL_COMPUTES}); do
echo 'Waiting to start compute...' && sleep 1; until pg_isready -h compute$$i -p 55433 -U cloud_admin ; do
done" sleep 1;
done;
done;
echo All computes are started"
depends_on: depends_on:
- compute - compute1
neon-test-extensions: neon-test-extensions:
profiles: ["test-extensions"] profiles: ["test-extensions"]
@@ -196,4 +207,4 @@ services:
command: command:
- sleep 3600 - sleep 3600
depends_on: depends_on:
- compute - compute1

View File

@@ -1,4 +1,4 @@
#!/bin/bash #!/usr/bin/env bash
# A basic test to ensure Docker images are built correctly. # A basic test to ensure Docker images are built correctly.
# Build a wrapper around the compute, start all services and runs a simple SQL query. # Build a wrapper around the compute, start all services and runs a simple SQL query.
@@ -13,9 +13,36 @@
# #
set -eux -o pipefail set -eux -o pipefail
cd "$(dirname "${0}")"
export COMPOSE_FILE='docker-compose.yml' export COMPOSE_FILE='docker-compose.yml'
export COMPOSE_PROFILES=test-extensions export COMPOSE_PROFILES=test-extensions
cd "$(dirname "${0}")" export PARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}
READY_MESSAGE="All computes are started"
COMPUTES=()
for i in $(seq 1 "${PARALLEL_COMPUTES}"); do
COMPUTES+=("compute${i}")
done
CURRENT_TMPDIR=$(mktemp -d)
trap 'rm -rf ${CURRENT_TMPDIR} docker-compose-parallel.yml' EXIT
if [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then
export COMPOSE_FILE=docker-compose-parallel.yml
cp docker-compose.yml docker-compose-parallel.yml
# Replace the environment variable PARALLEL_COMPUTES with the actual value
yq eval -i ".services.compute_is_ready.environment |= map(select(. | test(\"^PARALLEL_COMPUTES=\") | not)) + [\"PARALLEL_COMPUTES=${PARALLEL_COMPUTES}\"]" ${COMPOSE_FILE}
for i in $(seq 2 "${PARALLEL_COMPUTES}"); do
# Duplicate compute1 as compute${i} for parallel execution
yq eval -i ".services.compute${i} = .services.compute1" ${COMPOSE_FILE}
# We don't need these sections, so delete them
yq eval -i "(del .services.compute${i}.build) | (del .services.compute${i}.ports) | (del .services.compute${i}.networks)" ${COMPOSE_FILE}
# Let the compute 1 be the only dependence
yq eval -i ".services.compute${i}.depends_on = [\"compute1\"]" ${COMPOSE_FILE}
# Set RUN_PARALLEL=true for compute2. They will generate tenant_id and timeline_id to avoid using the same as other computes
yq eval -i ".services.compute${i}.environment += [\"RUN_PARALLEL=true\"]" ${COMPOSE_FILE}
# Remove TENANT_ID and TIMELINE_ID from the environment variables of the generated computes
# They will create new TENANT_ID and TIMELINE_ID anyway.
yq eval -i ".services.compute${i}.environment |= map(select(. | (test(\"^TENANT_ID=\") or test(\"^TIMELINE_ID=\")) | not))" ${COMPOSE_FILE}
done
fi
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres" PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
function cleanup() { function cleanup() {
@@ -27,11 +54,11 @@ function cleanup() {
for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
pg_version=${pg_version/v/} pg_version=${pg_version/v/}
echo "clean up containers if exists" echo "clean up containers if exist"
cleanup cleanup
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version)) PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull --build -d PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose build compute1
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull -d
echo "wait until the compute is ready. timeout after 60s. " echo "wait until the compute is ready. timeout after 60s. "
cnt=0 cnt=0
while sleep 3; do while sleep 3; do
@@ -41,45 +68,50 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
echo "timeout before the compute is ready." echo "timeout before the compute is ready."
exit 1 exit 1
fi fi
if docker compose logs "compute_is_ready" | grep -q "accepting connections"; then if docker compose logs compute_is_ready | grep -q "${READY_MESSAGE}"; then
echo "OK. The compute is ready to connect." echo "OK. The compute is ready to connect."
echo "execute simple queries." echo "execute simple queries."
docker compose exec compute /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'" for compute in "${COMPUTES[@]}"; do
docker compose exec "${compute}" /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
done
break break
fi fi
done done
if [[ ${pg_version} -ge 16 ]]; then if [[ ${pg_version} -ge 16 ]]; then
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail mkdir "${CURRENT_TMPDIR}"/{pg_hint_plan-src,file_fdw,postgis-src}
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${CURRENT_TMPDIR}/postgis-src/test"
echo Adding dummy config docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${CURRENT_TMPDIR}/postgis-src/00-regress-install"
docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${CURRENT_TMPDIR}/pg_hint_plan-src/data"
# Prepare for the PostGIS test docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${CURRENT_TMPDIR}/file_fdw/data"
docker compose exec compute mkdir -p /tmp/pgis_reg/pgis_reg_tmp
TMPDIR=$(mktemp -d) for compute in "${COMPUTES[@]}"; do
docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${TMPDIR}" # This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${TMPDIR}" # It cannot be moved to Dockerfile now because the database directory is created after the start of the container
docker compose exec compute mkdir -p /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install echo Adding dummy config on "${compute}"
docker compose cp "${TMPDIR}/test" compute:/ext-src/postgis-src/raster/test docker compose exec "${compute}" touch /var/db/postgres/compute/compute_ctl_temp_override.conf
docker compose cp "${TMPDIR}/00-regress-install" compute:/ext-src/postgis-src/regress # Prepare for the PostGIS test
rm -rf "${TMPDIR}" docker compose exec "${compute}" mkdir -p /tmp/pgis_reg/pgis_reg_tmp /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment docker compose cp "${CURRENT_TMPDIR}/postgis-src/test" "${compute}":/ext-src/postgis-src/raster/test
TMPDIR=$(mktemp -d) docker compose cp "${CURRENT_TMPDIR}/postgis-src/00-regress-install" "${compute}":/ext-src/postgis-src/regress
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data" # The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
docker compose cp "${TMPDIR}/data" compute:/ext-src/pg_hint_plan-src/ docker compose cp "${CURRENT_TMPDIR}/pg_hint_plan-src/data" "${compute}":/ext-src/pg_hint_plan-src/
rm -rf "${TMPDIR}" # The following block does the same for the contrib/file_fdw test
# The following block does the same for the contrib/file_fdw test docker compose cp "${CURRENT_TMPDIR}/file_fdw/data" "${compute}":/postgres/contrib/file_fdw/data
TMPDIR=$(mktemp -d) done
docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${TMPDIR}/data"
docker compose cp "${TMPDIR}/data" compute:/postgres/contrib/file_fdw/data
rm -rf "${TMPDIR}"
# Apply patches # Apply patches
docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch" docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
# We are running tests now # We are running tests now
rm -f testout.txt testout_contrib.txt rm -f testout.txt testout_contrib.txt
# We want to run the longest tests first to better utilize parallelization and reduce overall test time.
# Tests listed in the RUN_FIRST variable will be run before others.
# If parallelization is not used, this environment variable will be ignored.
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \ docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
-e RUN_FIRST=hll-src,postgis-src,pgtap-src -e PARALLEL_COMPUTES="${PARALLEL_COMPUTES}" \
neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0 neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \ docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
-e PARALLEL_COMPUTES="${PARALLEL_COMPUTES}" \
neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0 neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then
CONTRIB_FAILED= CONTRIB_FAILED=

View File

@@ -1,4 +1,4 @@
#!/bin/bash #!/usr/bin/env bash
set -x set -x
if [[ -v BENCHMARK_CONNSTR ]]; then if [[ -v BENCHMARK_CONNSTR ]]; then
@@ -26,8 +26,9 @@ if [[ -v BENCHMARK_CONNSTR ]]; then
fi fi
fi fi
REGULAR_USER=false REGULAR_USER=false
while getopts r arg; do PARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}
case $arg in while getopts pr arg; do
case ${arg} in
r) r)
REGULAR_USER=true REGULAR_USER=true
shift $((OPTIND-1)) shift $((OPTIND-1))
@@ -41,26 +42,49 @@ extdir=${1}
cd "${extdir}" || exit 2 cd "${extdir}" || exit 2
FAILED= FAILED=
LIST=$( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u) export FAILED_FILE=/tmp/failed
for d in ${LIST}; do rm -f ${FAILED_FILE}
[ -d "${d}" ] || continue mapfile -t LIST < <( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
if ! psql -w -c "select 1" >/dev/null; then if [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then
FAILED="${d} ${FAILED}" # Avoid errors if RUN_FIRST is not defined
break RUN_FIRST=${RUN_FIRST:-}
fi # Move entries listed in the RUN_FIRST variable to the beginning
if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then ORDERED_LIST=$(printf "%s\n" "${LIST[@]}" | grep -x -Ff <(echo -e "${RUN_FIRST//,/$'\n'}"); printf "%s\n" "${LIST[@]}" | grep -vx -Ff <(echo -e "${RUN_FIRST//,/$'\n'}"))
"${d}/regular-test.sh" || FAILED="${d} ${FAILED}" parallel -j"${PARALLEL_COMPUTES}" "[[ -d {} ]] || exit 0
continue export PGHOST=compute{%}
fi if ! psql -c 'select 1'>/dev/null; then
exit 1
fi
echo Running on \${PGHOST}
if [[ -f ${extdir}/{}/neon-test.sh ]]; then
echo Running from script
${extdir}/{}/neon-test.sh || echo {} >> ${FAILED_FILE};
else
echo Running using make;
USE_PGXS=1 make -C {} installcheck || echo {} >> ${FAILED_FILE};
fi" ::: ${ORDERED_LIST}
[[ ! -f ${FAILED_FILE} ]] && exit 0
else
for d in "${LIST[@]}"; do
[ -d "${d}" ] || continue
if ! psql -w -c "select 1" >/dev/null; then
FAILED="${d} ${FAILED}"
break
fi
if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then
"${d}/regular-test.sh" || FAILED="${d} ${FAILED}"
continue
fi
if [ -f "${d}/neon-test.sh" ]; then if [ -f "${d}/neon-test.sh" ]; then
"${d}/neon-test.sh" || FAILED="${d} ${FAILED}" "${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
else else
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}" USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
fi fi
done done
[ -z "${FAILED}" ] && exit 0 [[ -z ${FAILED} ]] && exit 0
for d in ${FAILED}; do fi
for d in ${FAILED} $([[ ! -f ${FAILED_FILE} ]] || cat ${FAILED_FILE}); do
cat "$(find $d -name regression.diffs)" cat "$(find $d -name regression.diffs)"
done done
for postgis_diff in /tmp/pgis_reg/*_diff; do for postgis_diff in /tmp/pgis_reg/*_diff; do
@@ -68,4 +92,5 @@ for postgis_diff in /tmp/pgis_reg/*_diff; do
cat "${postgis_diff}" cat "${postgis_diff}"
done done
echo "${FAILED}" echo "${FAILED}"
cat ${FAILED_FILE}
exit 1 exit 1

View File

@@ -1,4 +1,4 @@
#!/bin/bash #!/usr/bin/env bash
set -eux -o pipefail set -eux -o pipefail
cd "$(dirname "${0}")" cd "$(dirname "${0}")"
# Takes a variable name as argument. The result is stored in that variable. # Takes a variable name as argument. The result is stored in that variable.
@@ -60,8 +60,8 @@ function check_timeline() {
# Restarts the compute node with the required compute tag and timeline. # Restarts the compute node with the required compute tag and timeline.
# Accepts the tag for the compute node and the timeline as parameters. # Accepts the tag for the compute node and the timeline as parameters.
function restart_compute() { function restart_compute() {
docker compose down compute compute_is_ready docker compose down compute1 compute_is_ready
COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute1 compute_is_ready
wait_for_ready wait_for_ready
check_timeline ${2} check_timeline ${2}
} }