diff --git a/.github/actionlint.yml b/.github/actionlint.yml
index 25b2fc702a..8a4bcaf811 100644
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -31,7 +31,7 @@ config-variables:
- NEON_PROD_AWS_ACCOUNT_ID
- PGREGRESS_PG16_PROJECT_ID
- PGREGRESS_PG17_PROJECT_ID
- - PREWARM_PGBENCH_SIZE
+ - PREWARM_PROJECT_ID
- REMOTE_STORAGE_AZURE_CONTAINER
- REMOTE_STORAGE_AZURE_REGION
- SLACK_CICD_CHANNEL_ID
diff --git a/.github/workflows/benchbase_tpcc.yml b/.github/workflows/benchbase_tpcc.yml
new file mode 100644
index 0000000000..3a36a97bb1
--- /dev/null
+++ b/.github/workflows/benchbase_tpcc.yml
@@ -0,0 +1,384 @@
+name: TPC-C like benchmark using benchbase
+
+on:
+ schedule:
+ # * is a special character in YAML so you have to quote this string
+ # ┌───────────── minute (0 - 59)
+ # │ ┌───────────── hour (0 - 23)
+ # │ │ ┌───────────── day of the month (1 - 31)
+ # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+ # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+ - cron: '0 6 * * *' # run once a day at 6 AM UTC
+ workflow_dispatch: # adds ability to run this manually
+
+defaults:
+ run:
+ shell: bash -euxo pipefail {0}
+
+concurrency:
+ # Allow only one workflow globally because we do not want to be too noisy in production environment
+ group: benchbase-tpcc-workflow
+ cancel-in-progress: false
+
+permissions:
+ contents: read
+
+jobs:
+ benchbase-tpcc:
+ strategy:
+ fail-fast: false # allow other variants to continue even if one fails
+ matrix:
+ include:
+ - warehouses: 50 # defines number of warehouses and is used to compute number of terminals
+ max_rate: 800 # measured max TPS at scale factor based on experiments. Adjust if performance is better/worse
+ min_cu: 0.25 # simulate free tier plan (0.25 -2 CU)
+ max_cu: 2
+ - warehouses: 500 # serverless plan (2-8 CU)
+ max_rate: 2000
+ min_cu: 2
+ max_cu: 8
+ - warehouses: 1000 # business plan (2-16 CU)
+ max_rate: 2900
+ min_cu: 2
+ max_cu: 16
+ max-parallel: 1 # we want to run each workload size sequentially to avoid noisy neighbors
+ permissions:
+ contents: write
+ statuses: write
+ id-token: write # aws-actions/configure-aws-credentials
+ env:
+ PG_CONFIG: /tmp/neon/pg_install/v17/bin/pg_config
+ PSQL: /tmp/neon/pg_install/v17/bin/psql
+ PG_17_LIB_PATH: /tmp/neon/pg_install/v17/lib
+ POSTGRES_VERSION: 17
+ runs-on: [ self-hosted, us-east-2, x64 ]
+ timeout-minutes: 1440
+
+ steps:
+ - name: Harden the runner (Audit all outbound calls)
+ uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+ with:
+ egress-policy: audit
+
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+ - name: Configure AWS credentials # necessary to download artefacts
+ uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
+ with:
+ aws-region: eu-central-1
+ role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+ role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
+
+ - name: Download Neon artifact
+ uses: ./.github/actions/download
+ with:
+ name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+ path: /tmp/neon/
+ prefix: latest
+ aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+
+ - name: Create Neon Project
+ id: create-neon-project-tpcc
+ uses: ./.github/actions/neon-project-create
+ with:
+ region_id: aws-us-east-2
+ postgres_version: ${{ env.POSTGRES_VERSION }}
+ compute_units: '[${{ matrix.min_cu }}, ${{ matrix.max_cu }}]'
+ api_key: ${{ secrets.NEON_PRODUCTION_API_KEY_4_BENCHMARKS }}
+ api_host: console.neon.tech # production (!)
+
+ - name: Initialize Neon project
+ env:
+ BENCHMARK_TPCC_CONNSTR: ${{ steps.create-neon-project-tpcc.outputs.dsn }}
+ PROJECT_ID: ${{ steps.create-neon-project-tpcc.outputs.project_id }}
+ run: |
+ echo "Initializing Neon project with project_id: ${PROJECT_ID}"
+ export LD_LIBRARY_PATH=${PG_17_LIB_PATH}
+
+ # Retry logic for psql connection with 1 minute sleep between attempts
+ for attempt in {1..3}; do
+ echo "Attempt ${attempt}/3: Creating extensions in Neon project"
+ if ${PSQL} "${BENCHMARK_TPCC_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"; then
+ echo "Successfully created extensions"
+ break
+ else
+ echo "Failed to create extensions on attempt ${attempt}"
+ if [ ${attempt} -lt 3 ]; then
+ echo "Waiting 60 seconds before retry..."
+ sleep 60
+ else
+ echo "All attempts failed, exiting"
+ exit 1
+ fi
+ fi
+ done
+
+ echo "BENCHMARK_TPCC_CONNSTR=${BENCHMARK_TPCC_CONNSTR}" >> $GITHUB_ENV
+
+ - name: Generate BenchBase workload configuration
+ env:
+ WAREHOUSES: ${{ matrix.warehouses }}
+ MAX_RATE: ${{ matrix.max_rate }}
+ run: |
+ echo "Generating BenchBase configs for warehouses: ${WAREHOUSES}, max_rate: ${MAX_RATE}"
+
+ # Extract hostname and password from connection string
+ # Format: postgresql://username:password@hostname/database?params (no port for Neon)
+ HOSTNAME=$(echo "${BENCHMARK_TPCC_CONNSTR}" | sed -n 's|.*://[^:]*:[^@]*@\([^/]*\)/.*|\1|p')
+ PASSWORD=$(echo "${BENCHMARK_TPCC_CONNSTR}" | sed -n 's|.*://[^:]*:\([^@]*\)@.*|\1|p')
+
+ echo "Extracted hostname: ${HOSTNAME}"
+
+ # Use runner temp (NVMe) as working directory
+ cd "${RUNNER_TEMP}"
+
+ # Copy the generator script
+ cp "${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/generate_workload_size.py" .
+
+ # Generate configs and scripts
+ python3 generate_workload_size.py \
+ --warehouses ${WAREHOUSES} \
+ --max-rate ${MAX_RATE} \
+ --hostname ${HOSTNAME} \
+ --password ${PASSWORD} \
+ --runner-arch ${{ runner.arch }}
+
+ # Fix path mismatch: move generated configs and scripts to expected locations
+ mv ../configs ./configs
+ mv ../scripts ./scripts
+
+ - name: Prepare database (load data)
+ env:
+ WAREHOUSES: ${{ matrix.warehouses }}
+ run: |
+ cd "${RUNNER_TEMP}"
+
+ echo "Loading ${WAREHOUSES} warehouses into database..."
+
+ # Run the loader script and capture output to log file while preserving stdout/stderr
+ ./scripts/load_${WAREHOUSES}_warehouses.sh 2>&1 | tee "load_${WAREHOUSES}_warehouses.log"
+
+ echo "Database loading completed"
+
+ - name: Run TPC-C benchmark (warmup phase, then benchmark at 70% of configuredmax TPS)
+ env:
+ WAREHOUSES: ${{ matrix.warehouses }}
+ run: |
+ cd "${RUNNER_TEMP}"
+
+ echo "Running TPC-C benchmark with ${WAREHOUSES} warehouses..."
+
+ # Run the optimal rate benchmark
+ ./scripts/execute_${WAREHOUSES}_warehouses_opt_rate.sh
+
+ echo "Benchmark execution completed"
+
+ - name: Run TPC-C benchmark (warmup phase, then ramp down TPS and up again in 5 minute intervals)
+
+ env:
+ WAREHOUSES: ${{ matrix.warehouses }}
+ run: |
+ cd "${RUNNER_TEMP}"
+
+ echo "Running TPC-C ramp-down-up with ${WAREHOUSES} warehouses..."
+
+ # Run the optimal rate benchmark
+ ./scripts/execute_${WAREHOUSES}_warehouses_ramp_up.sh
+
+ echo "Benchmark execution completed"
+
+ - name: Process results (upload to test results database and generate diagrams)
+ env:
+ WAREHOUSES: ${{ matrix.warehouses }}
+ MIN_CU: ${{ matrix.min_cu }}
+ MAX_CU: ${{ matrix.max_cu }}
+ PROJECT_ID: ${{ steps.create-neon-project-tpcc.outputs.project_id }}
+ REVISION: ${{ github.sha }}
+ PERF_DB_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }}
+ run: |
+ cd "${RUNNER_TEMP}"
+
+ echo "Creating temporary Python environment for results processing..."
+
+ # Create temporary virtual environment
+ python3 -m venv temp_results_env
+ source temp_results_env/bin/activate
+
+ # Install required packages in virtual environment
+ pip install matplotlib pandas psycopg2-binary
+
+ echo "Copying results processing scripts..."
+
+ # Copy both processing scripts
+ cp "${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/generate_diagrams.py" .
+ cp "${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/upload_results_to_perf_test_results.py" .
+
+ echo "Processing load phase metrics..."
+
+ # Find and process load log
+ LOAD_LOG=$(find . -name "load_${WAREHOUSES}_warehouses.log" -type f | head -1)
+ if [ -n "$LOAD_LOG" ]; then
+ echo "Processing load metrics from: $LOAD_LOG"
+ python upload_results_to_perf_test_results.py \
+ --load-log "$LOAD_LOG" \
+ --run-type "load" \
+ --warehouses "${WAREHOUSES}" \
+ --min-cu "${MIN_CU}" \
+ --max-cu "${MAX_CU}" \
+ --project-id "${PROJECT_ID}" \
+ --revision "${REVISION}" \
+ --connection-string "${PERF_DB_CONNSTR}"
+ else
+ echo "Warning: Load log file not found: load_${WAREHOUSES}_warehouses.log"
+ fi
+
+ echo "Processing warmup results for optimal rate..."
+
+ # Find and process warmup results
+ WARMUP_CSV=$(find results_warmup -name "*.results.csv" -type f | head -1)
+ WARMUP_JSON=$(find results_warmup -name "*.summary.json" -type f | head -1)
+
+ if [ -n "$WARMUP_CSV" ] && [ -n "$WARMUP_JSON" ]; then
+ echo "Generating warmup diagram from: $WARMUP_CSV"
+ python generate_diagrams.py \
+ --input-csv "$WARMUP_CSV" \
+ --output-svg "warmup_${WAREHOUSES}_warehouses_performance.svg" \
+ --title-suffix "Warmup at max TPS"
+
+ echo "Uploading warmup metrics from: $WARMUP_JSON"
+ python upload_results_to_perf_test_results.py \
+ --summary-json "$WARMUP_JSON" \
+ --results-csv "$WARMUP_CSV" \
+ --run-type "warmup" \
+ --min-cu "${MIN_CU}" \
+ --max-cu "${MAX_CU}" \
+ --project-id "${PROJECT_ID}" \
+ --revision "${REVISION}" \
+ --connection-string "${PERF_DB_CONNSTR}"
+ else
+ echo "Warning: Missing warmup results files (CSV: $WARMUP_CSV, JSON: $WARMUP_JSON)"
+ fi
+
+ echo "Processing optimal rate results..."
+
+ # Find and process optimal rate results
+ OPTRATE_CSV=$(find results_opt_rate -name "*.results.csv" -type f | head -1)
+ OPTRATE_JSON=$(find results_opt_rate -name "*.summary.json" -type f | head -1)
+
+ if [ -n "$OPTRATE_CSV" ] && [ -n "$OPTRATE_JSON" ]; then
+ echo "Generating optimal rate diagram from: $OPTRATE_CSV"
+ python generate_diagrams.py \
+ --input-csv "$OPTRATE_CSV" \
+ --output-svg "benchmark_${WAREHOUSES}_warehouses_performance.svg" \
+ --title-suffix "70% of max TPS"
+
+ echo "Uploading optimal rate metrics from: $OPTRATE_JSON"
+ python upload_results_to_perf_test_results.py \
+ --summary-json "$OPTRATE_JSON" \
+ --results-csv "$OPTRATE_CSV" \
+ --run-type "opt-rate" \
+ --min-cu "${MIN_CU}" \
+ --max-cu "${MAX_CU}" \
+ --project-id "${PROJECT_ID}" \
+ --revision "${REVISION}" \
+ --connection-string "${PERF_DB_CONNSTR}"
+ else
+ echo "Warning: Missing optimal rate results files (CSV: $OPTRATE_CSV, JSON: $OPTRATE_JSON)"
+ fi
+
+ echo "Processing warmup 2 results for ramp down/up phase..."
+
+ # Find and process warmup results
+ WARMUP_CSV=$(find results_warmup -name "*.results.csv" -type f | tail -1)
+ WARMUP_JSON=$(find results_warmup -name "*.summary.json" -type f | tail -1)
+
+ if [ -n "$WARMUP_CSV" ] && [ -n "$WARMUP_JSON" ]; then
+ echo "Generating warmup diagram from: $WARMUP_CSV"
+ python generate_diagrams.py \
+ --input-csv "$WARMUP_CSV" \
+ --output-svg "warmup_2_${WAREHOUSES}_warehouses_performance.svg" \
+ --title-suffix "Warmup at max TPS"
+
+ echo "Uploading warmup metrics from: $WARMUP_JSON"
+ python upload_results_to_perf_test_results.py \
+ --summary-json "$WARMUP_JSON" \
+ --results-csv "$WARMUP_CSV" \
+ --run-type "warmup" \
+ --min-cu "${MIN_CU}" \
+ --max-cu "${MAX_CU}" \
+ --project-id "${PROJECT_ID}" \
+ --revision "${REVISION}" \
+ --connection-string "${PERF_DB_CONNSTR}"
+ else
+ echo "Warning: Missing warmup results files (CSV: $WARMUP_CSV, JSON: $WARMUP_JSON)"
+ fi
+
+ echo "Processing ramp results..."
+
+ # Find and process ramp results
+ RAMPUP_CSV=$(find results_ramp_up -name "*.results.csv" -type f | head -1)
+ RAMPUP_JSON=$(find results_ramp_up -name "*.summary.json" -type f | head -1)
+
+ if [ -n "$RAMPUP_CSV" ] && [ -n "$RAMPUP_JSON" ]; then
+ echo "Generating ramp diagram from: $RAMPUP_CSV"
+ python generate_diagrams.py \
+ --input-csv "$RAMPUP_CSV" \
+ --output-svg "ramp_${WAREHOUSES}_warehouses_performance.svg" \
+ --title-suffix "ramp TPS down and up in 5 minute intervals"
+
+ echo "Uploading ramp metrics from: $RAMPUP_JSON"
+ python upload_results_to_perf_test_results.py \
+ --summary-json "$RAMPUP_JSON" \
+ --results-csv "$RAMPUP_CSV" \
+ --run-type "ramp-up" \
+ --min-cu "${MIN_CU}" \
+ --max-cu "${MAX_CU}" \
+ --project-id "${PROJECT_ID}" \
+ --revision "${REVISION}" \
+ --connection-string "${PERF_DB_CONNSTR}"
+ else
+ echo "Warning: Missing ramp results files (CSV: $RAMPUP_CSV, JSON: $RAMPUP_JSON)"
+ fi
+
+ # Deactivate and clean up virtual environment
+ deactivate
+ rm -rf temp_results_env
+ rm upload_results_to_perf_test_results.py
+
+ echo "Results processing completed and environment cleaned up"
+
+ - name: Set date for upload
+ id: set-date
+ run: echo "date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
+
+ - name: Configure AWS credentials # necessary to upload results
+ uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
+ with:
+ aws-region: us-east-2
+ role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+ role-duration-seconds: 900 # 900 is minimum value
+
+ - name: Upload benchmark results to S3
+ env:
+ S3_BUCKET: neon-public-benchmark-results
+ S3_PREFIX: benchbase-tpc-c/${{ steps.set-date.outputs.date }}/${{ github.run_id }}/${{ matrix.warehouses }}-warehouses
+ run: |
+ echo "Redacting passwords from configuration files before upload..."
+
+ # Mask all passwords in XML config files
+ find "${RUNNER_TEMP}/configs" -name "*.xml" -type f -exec sed -i 's|[^<]*|redacted|g' {} \;
+
+ echo "Uploading benchmark results to s3://${S3_BUCKET}/${S3_PREFIX}/"
+
+ # Upload the entire benchmark directory recursively
+ aws s3 cp --only-show-errors --recursive "${RUNNER_TEMP}" s3://${S3_BUCKET}/${S3_PREFIX}/
+
+ echo "Upload completed"
+
+ - name: Delete Neon Project
+ if: ${{ always() }}
+ uses: ./.github/actions/neon-project-delete
+ with:
+ project_id: ${{ steps.create-neon-project-tpcc.outputs.project_id }}
+ api_key: ${{ secrets.NEON_PRODUCTION_API_KEY_4_BENCHMARKS }}
+ api_host: console.neon.tech # production (!)
\ No newline at end of file
diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index df80bad579..c9a998bd4e 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -418,7 +418,7 @@ jobs:
statuses: write
id-token: write # aws-actions/configure-aws-credentials
env:
- PGBENCH_SIZE: ${{ vars.PREWARM_PGBENCH_SIZE }}
+ PROJECT_ID: ${{ vars.PREWARM_PROJECT_ID }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 17
TEST_OUTPUT: /tmp/test_output
diff --git a/.github/workflows/build-build-tools-image.yml b/.github/workflows/build-build-tools-image.yml
index 24e4c8fa3d..5e53d8231f 100644
--- a/.github/workflows/build-build-tools-image.yml
+++ b/.github/workflows/build-build-tools-image.yml
@@ -146,7 +146,9 @@ jobs:
with:
file: build-tools/Dockerfile
context: .
- provenance: false
+ attests: |
+ type=provenance,mode=max
+ type=sbom,generator=docker.io/docker/buildkit-syft-scanner:1
push: true
pull: true
build-args: |
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index f237a991cc..0dcbd1c6dd 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -634,7 +634,9 @@ jobs:
DEBIAN_VERSION=bookworm
secrets: |
SUBZERO_ACCESS_TOKEN=${{ secrets.CI_ACCESS_TOKEN }}
- provenance: false
+ attests: |
+ type=provenance,mode=max
+ type=sbom,generator=docker.io/docker/buildkit-syft-scanner:1
push: true
pull: true
file: Dockerfile
@@ -747,7 +749,9 @@ jobs:
PG_VERSION=${{ matrix.version.pg }}
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
DEBIAN_VERSION=${{ matrix.version.debian }}
- provenance: false
+ attests: |
+ type=provenance,mode=max
+ type=sbom,generator=docker.io/docker/buildkit-syft-scanner:1
push: true
pull: true
file: compute/compute-node.Dockerfile
@@ -766,7 +770,9 @@ jobs:
PG_VERSION=${{ matrix.version.pg }}
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
DEBIAN_VERSION=${{ matrix.version.debian }}
- provenance: false
+ attests: |
+ type=provenance,mode=max
+ type=sbom,generator=docker.io/docker/buildkit-syft-scanner:1
push: true
pull: true
file: compute/compute-node.Dockerfile
diff --git a/.github/workflows/pg-clients.yml b/.github/workflows/pg-clients.yml
index 6efe0b4c8c..40b2c51624 100644
--- a/.github/workflows/pg-clients.yml
+++ b/.github/workflows/pg-clients.yml
@@ -48,8 +48,20 @@ jobs:
uses: ./.github/workflows/build-build-tools-image.yml
secrets: inherit
+ generate-ch-tmppw:
+ runs-on: ubuntu-22.04
+ outputs:
+ tmp_val: ${{ steps.pwgen.outputs.tmp_val }}
+ steps:
+ - name: Generate a random password
+ id: pwgen
+ run: |
+ set +x
+ p=$(dd if=/dev/random bs=14 count=1 2>/dev/null | base64)
+ echo tmp_val="${p//\//}" >> "${GITHUB_OUTPUT}"
+
test-logical-replication:
- needs: [ build-build-tools-image ]
+ needs: [ build-build-tools-image, generate-ch-tmppw ]
runs-on: ubuntu-22.04
container:
@@ -60,16 +72,21 @@ jobs:
options: --init --user root
services:
clickhouse:
- image: clickhouse/clickhouse-server:24.6.3.64
+ image: clickhouse/clickhouse-server:25.6
+ env:
+ CLICKHOUSE_PASSWORD: ${{ needs.generate-ch-tmppw.outputs.tmp_val }}
+ PGSSLCERT: /tmp/postgresql.crt
ports:
- 9000:9000
- 8123:8123
zookeeper:
- image: quay.io/debezium/zookeeper:2.7
+ image: quay.io/debezium/zookeeper:3.1.3.Final
ports:
- 2181:2181
+ - 2888:2888
+ - 3888:3888
kafka:
- image: quay.io/debezium/kafka:2.7
+ image: quay.io/debezium/kafka:3.1.3.Final
env:
ZOOKEEPER_CONNECT: "zookeeper:2181"
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
@@ -79,7 +96,7 @@ jobs:
ports:
- 9092:9092
debezium:
- image: quay.io/debezium/connect:2.7
+ image: quay.io/debezium/connect:3.1.3.Final
env:
BOOTSTRAP_SERVERS: kafka:9092
GROUP_ID: 1
@@ -125,6 +142,7 @@ jobs:
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
+ CLICKHOUSE_PASSWORD: ${{ needs.generate-ch-tmppw.outputs.tmp_val }}
- name: Delete Neon Project
if: always()
diff --git a/Cargo.lock b/Cargo.lock
index 133ca5def9..9a0cc9076a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -211,11 +211,11 @@ dependencies = [
[[package]]
name = "async-lock"
-version = "3.2.0"
+version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7125e42787d53db9dd54261812ef17e937c95a51e4d291373b670342fa44310c"
+checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18"
dependencies = [
- "event-listener 4.0.0",
+ "event-listener 5.4.0",
"event-listener-strategy",
"pin-project-lite",
]
@@ -1404,9 +1404,9 @@ dependencies = [
[[package]]
name = "concurrent-queue"
-version = "2.3.0"
+version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f057a694a54f12365049b0958a1685bb52d567f5593b355fbf685838e873d400"
+checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
dependencies = [
"crossbeam-utils",
]
@@ -2232,9 +2232,9 @@ checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
[[package]]
name = "event-listener"
-version = "4.0.0"
+version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "770d968249b5d99410d61f5bf89057f3199a077a04d087092f58e7d10692baae"
+checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae"
dependencies = [
"concurrent-queue",
"parking",
@@ -2243,11 +2243,11 @@ dependencies = [
[[package]]
name = "event-listener-strategy"
-version = "0.4.0"
+version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3"
+checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93"
dependencies = [
- "event-listener 4.0.0",
+ "event-listener 5.4.0",
"pin-project-lite",
]
@@ -2516,6 +2516,20 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "304de19db7028420975a296ab0fcbbc8e69438c4ed254a1e41e2a7f37d5f0e0a"
+[[package]]
+name = "generator"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d18470a76cb7f8ff746cf1f7470914f900252ec36bbc40b569d74b1258446827"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "libc",
+ "log",
+ "rustversion",
+ "windows 0.61.3",
+]
+
[[package]]
name = "generic-array"
version = "0.14.7"
@@ -2834,7 +2848,7 @@ checksum = "f9c7c7c8ac16c798734b8a24560c1362120597c40d5e1459f09498f8f6c8f2ba"
dependencies = [
"cfg-if",
"libc",
- "windows",
+ "windows 0.52.0",
]
[[package]]
@@ -3105,7 +3119,7 @@ dependencies = [
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
- "windows-core",
+ "windows-core 0.52.0",
]
[[package]]
@@ -3656,6 +3670,19 @@ version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
+[[package]]
+name = "loom"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca"
+dependencies = [
+ "cfg-if",
+ "generator",
+ "scoped-tls",
+ "tracing",
+ "tracing-subscriber",
+]
+
[[package]]
name = "lru"
version = "0.12.3"
@@ -3872,6 +3899,25 @@ dependencies = [
"windows-sys 0.52.0",
]
+[[package]]
+name = "moka"
+version = "0.12.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9321642ca94a4282428e6ea4af8cc2ca4eac48ac7a6a4ea8f33f76d0ce70926"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+ "loom",
+ "parking_lot 0.12.1",
+ "portable-atomic",
+ "rustc_version",
+ "smallvec",
+ "tagptr",
+ "thiserror 1.0.69",
+ "uuid",
+]
+
[[package]]
name = "multimap"
version = "0.8.3"
@@ -5031,8 +5077,6 @@ dependencies = [
"crc32c",
"criterion",
"env_logger",
- "log",
- "memoffset 0.9.0",
"once_cell",
"postgres",
"postgres_ffi_types",
@@ -5385,7 +5429,6 @@ dependencies = [
"futures",
"gettid",
"hashbrown 0.14.5",
- "hashlink",
"hex",
"hmac",
"hostname",
@@ -5407,6 +5450,7 @@ dependencies = [
"lasso",
"measured",
"metrics",
+ "moka",
"once_cell",
"opentelemetry",
"ouroboros",
@@ -5473,6 +5517,7 @@ dependencies = [
"workspace_hack",
"x509-cert",
"zerocopy 0.8.24",
+ "zeroize",
]
[[package]]
@@ -6420,6 +6465,12 @@ dependencies = [
"pin-project-lite",
]
+[[package]]
+name = "scoped-tls"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
+
[[package]]
name = "scopeguard"
version = "1.1.0"
@@ -7269,6 +7320,12 @@ dependencies = [
"winapi",
]
+[[package]]
+name = "tagptr"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
+
[[package]]
name = "tar"
version = "0.4.40"
@@ -8638,10 +8695,32 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
dependencies = [
- "windows-core",
+ "windows-core 0.52.0",
"windows-targets 0.52.6",
]
+[[package]]
+name = "windows"
+version = "0.61.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
+dependencies = [
+ "windows-collections",
+ "windows-core 0.61.2",
+ "windows-future",
+ "windows-link",
+ "windows-numerics",
+]
+
+[[package]]
+name = "windows-collections"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
+dependencies = [
+ "windows-core 0.61.2",
+]
+
[[package]]
name = "windows-core"
version = "0.52.0"
@@ -8651,6 +8730,86 @@ dependencies = [
"windows-targets 0.52.6",
]
+[[package]]
+name = "windows-core"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-future"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
+dependencies = [
+ "windows-core 0.61.2",
+ "windows-link",
+ "windows-threading",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
+[[package]]
+name = "windows-numerics"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
+dependencies = [
+ "windows-core 0.61.2",
+ "windows-link",
+]
+
+[[package]]
+name = "windows-result"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link",
+]
+
[[package]]
name = "windows-sys"
version = "0.48.0"
@@ -8709,6 +8868,15 @@ dependencies = [
"windows_x86_64_msvc 0.52.6",
]
+[[package]]
+name = "windows-threading"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6"
+dependencies = [
+ "windows-link",
+]
+
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.0"
@@ -8845,6 +9013,8 @@ dependencies = [
"clap",
"clap_builder",
"const-oid",
+ "crossbeam-epoch",
+ "crossbeam-utils",
"crypto-bigint 0.5.5",
"der 0.7.8",
"deranged",
@@ -8890,6 +9060,7 @@ dependencies = [
"once_cell",
"p256 0.13.2",
"parquet",
+ "portable-atomic",
"prettyplease",
"proc-macro2",
"prost 0.13.5",
diff --git a/Cargo.toml b/Cargo.toml
index 18236a81f5..3f23086797 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -46,10 +46,10 @@ members = [
"libs/proxy/json",
"libs/proxy/postgres-protocol2",
"libs/proxy/postgres-types2",
+ "libs/proxy/subzero_core",
"libs/proxy/tokio-postgres2",
"endpoint_storage",
"pgxn/neon/communicator",
- "proxy/subzero_core",
]
[workspace.package]
@@ -135,7 +135,7 @@ lock_api = "0.4.13"
md5 = "0.7.0"
measured = { version = "0.0.22", features=["lasso"] }
measured-process = { version = "0.0.22" }
-memoffset = "0.9"
+moka = { version = "0.12", features = ["sync"] }
nix = { version = "0.30.1", features = ["dir", "fs", "mman", "process", "socket", "signal", "poll"] }
# Do not update to >= 7.0.0, at least. The update will have a significant impact
# on compute startup metrics (start_postgres_ms), >= 25% degradation.
@@ -233,9 +233,10 @@ uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
walkdir = "2.3.2"
rustls-native-certs = "0.8"
whoami = "1.5.1"
-zerocopy = { version = "0.8", features = ["derive", "simd"] }
json-structural-diff = { version = "0.2.0" }
x509-cert = { version = "0.2.5" }
+zerocopy = { version = "0.8", features = ["derive", "simd"] }
+zeroize = "1.8"
## TODO replace this with tracing
env_logger = "0.11"
diff --git a/Dockerfile b/Dockerfile
index 654ae72e56..63cc954873 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -103,7 +103,7 @@ RUN --mount=type=secret,uid=1000,id=SUBZERO_ACCESS_TOKEN \
&& if [ -s /run/secrets/SUBZERO_ACCESS_TOKEN ]; then \
export CARGO_FEATURES="rest_broker"; \
fi \
- && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
+ && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo auditable build \
--features $CARGO_FEATURES \
--bin pg_sni_router \
--bin pageserver \
diff --git a/build-tools/Dockerfile b/build-tools/Dockerfile
index b5fe642e6f..c9760f610b 100644
--- a/build-tools/Dockerfile
+++ b/build-tools/Dockerfile
@@ -39,13 +39,13 @@ COPY build-tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
set -e && \
- apt update && \
- apt install -y --no-install-recommends \
+ apt-get update && \
+ apt-get install -y --no-install-recommends \
ca-certificates wget gpg && \
wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor -o /usr/share/keyrings/postgresql-keyring.gpg && \
echo "deb [signed-by=/usr/share/keyrings/postgresql-keyring.gpg] http://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list && \
apt-get update && \
- apt install -y --no-install-recommends \
+ apt-get install -y --no-install-recommends \
build-essential \
autotools-dev \
libedit-dev \
@@ -89,8 +89,7 @@ RUN useradd -ms /bin/bash nonroot -b /home
# Use strict mode for bash to catch errors early
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
-RUN mkdir -p /pgcopydb/bin && \
- mkdir -p /pgcopydb/lib && \
+RUN mkdir -p /pgcopydb/{bin,lib} && \
chmod -R 755 /pgcopydb && \
chown -R nonroot:nonroot /pgcopydb
@@ -106,8 +105,8 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
# 'gdb' is included so that we get backtraces of core dumps produced in
# regression tests
RUN set -e \
- && apt update \
- && apt install -y \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends \
autoconf \
automake \
bison \
@@ -183,22 +182,22 @@ RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/
ENV LLVM_VERSION=20
RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
&& echo "deb http://apt.llvm.org/${DEBIAN_VERSION}/ llvm-toolchain-${DEBIAN_VERSION}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
- && apt update \
- && apt install -y clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install node
ENV NODE_VERSION=24
RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \
- && apt install -y nodejs \
+ && apt-get install -y --no-install-recommends nodejs \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install docker
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable" > /etc/apt/sources.list.d/docker.list \
- && apt update \
- && apt install -y docker-ce docker-ce-cli \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends docker-ce docker-ce-cli \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Configure sudo & docker
@@ -215,12 +214,11 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
# Mold: A Modern Linker
ENV MOLD_VERSION=v2.37.1
RUN set -e \
- && git clone https://github.com/rui314/mold.git \
+ && git clone -b "${MOLD_VERSION}" --depth 1 https://github.com/rui314/mold.git \
&& mkdir mold/build \
- && cd mold/build \
- && git checkout ${MOLD_VERSION} \
+ && cd mold/build \
&& cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang++ .. \
- && cmake --build . -j $(nproc) \
+ && cmake --build . -j "$(nproc)" \
&& cmake --install . \
&& cd .. \
&& rm -rf mold
@@ -254,7 +252,7 @@ ENV ICU_VERSION=67.1
ENV ICU_PREFIX=/usr/local/icu
# Download and build static ICU
-RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \
+RUN wget -O "/tmp/libicu-${ICU_VERSION}.tgz" https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \
echo "94a80cd6f251a53bd2a997f6f1b5ac6653fe791dfab66e1eb0227740fb86d5dc /tmp/libicu-${ICU_VERSION}.tgz" | sha256sum --check && \
mkdir /tmp/icu && \
pushd /tmp/icu && \
@@ -265,8 +263,7 @@ RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/re
make install && \
popd && \
rm -rf icu && \
- rm -f /tmp/libicu-${ICU_VERSION}.tgz && \
- popd
+ rm -f /tmp/libicu-${ICU_VERSION}.tgz
# Switch to nonroot user
USER nonroot:nonroot
@@ -279,19 +276,19 @@ ENV PYTHON_VERSION=3.11.12 \
PYENV_ROOT=/home/nonroot/.pyenv \
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
RUN set -e \
- && cd $HOME \
+ && cd "$HOME" \
&& curl -sSO https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer \
&& chmod +x pyenv-installer \
&& ./pyenv-installer \
&& export PYENV_ROOT=/home/nonroot/.pyenv \
&& export PATH="$PYENV_ROOT/bin:$PATH" \
&& export PATH="$PYENV_ROOT/shims:$PATH" \
- && pyenv install ${PYTHON_VERSION} \
- && pyenv global ${PYTHON_VERSION} \
+ && pyenv install "${PYTHON_VERSION}" \
+ && pyenv global "${PYTHON_VERSION}" \
&& python --version \
- && pip install --upgrade pip \
+ && pip install --no-cache-dir --upgrade pip \
&& pip --version \
- && pip install pipenv wheel poetry
+ && pip install --no-cache-dir pipenv wheel poetry
# Switch to nonroot user (again)
USER nonroot:nonroot
@@ -302,6 +299,7 @@ WORKDIR /home/nonroot
ENV RUSTC_VERSION=1.88.0
ENV RUSTUP_HOME="/home/nonroot/.rustup"
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
+ARG CARGO_AUDITABLE_VERSION=0.7.0
ARG RUSTFILT_VERSION=0.2.1
ARG CARGO_HAKARI_VERSION=0.9.36
ARG CARGO_DENY_VERSION=0.18.2
@@ -317,14 +315,16 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
. "$HOME/.cargo/env" && \
cargo --version && rustup --version && \
rustup component add llvm-tools rustfmt clippy && \
- cargo install rustfilt --locked --version ${RUSTFILT_VERSION} && \
- cargo install cargo-hakari --locked --version ${CARGO_HAKARI_VERSION} && \
- cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
- cargo install cargo-hack --locked --version ${CARGO_HACK_VERSION} && \
- cargo install cargo-nextest --locked --version ${CARGO_NEXTEST_VERSION} && \
- cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
- cargo install diesel_cli --locked --version ${CARGO_DIESEL_CLI_VERSION} \
- --features postgres-bundled --no-default-features && \
+ cargo install cargo-auditable --locked --version "${CARGO_AUDITABLE_VERSION}" && \
+ cargo auditable install cargo-auditable --locked --version "${CARGO_AUDITABLE_VERSION}" --force && \
+ cargo auditable install rustfilt --version "${RUSTFILT_VERSION}" && \
+ cargo auditable install cargo-hakari --locked --version "${CARGO_HAKARI_VERSION}" && \
+ cargo auditable install cargo-deny --locked --version "${CARGO_DENY_VERSION}" && \
+ cargo auditable install cargo-hack --locked --version "${CARGO_HACK_VERSION}" && \
+ cargo auditable install cargo-nextest --locked --version "${CARGO_NEXTEST_VERSION}" && \
+ cargo auditable install cargo-chef --locked --version "${CARGO_CHEF_VERSION}" && \
+ cargo auditable install diesel_cli --locked --version "${CARGO_DIESEL_CLI_VERSION}" \
+ --features postgres-bundled --no-default-features && \
rm -rf /home/nonroot/.cargo/registry && \
rm -rf /home/nonroot/.cargo/git
diff --git a/compute/patches/pg_repack.patch b/compute/patches/pg_repack.patch
index 10ed1054ff..b8a057e222 100644
--- a/compute/patches/pg_repack.patch
+++ b/compute/patches/pg_repack.patch
@@ -1,5 +1,11 @@
+commit 5eb393810cf7c7bafa4e394dad2e349e2a8cb2cb
+Author: Alexey Masterov
+Date: Mon Jul 28 18:11:02 2025 +0200
+
+ Patch for pg_repack
+
diff --git a/regress/Makefile b/regress/Makefile
-index bf6edcb..89b4c7f 100644
+index bf6edcb..110e734 100644
--- a/regress/Makefile
+++ b/regress/Makefile
@@ -17,7 +17,7 @@ INTVERSION := $(shell echo $$(($$(echo $(VERSION).0 | sed 's/\([[:digit:]]\{1,\}
@@ -7,18 +13,36 @@ index bf6edcb..89b4c7f 100644
#
-REGRESS := init-extension repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper tablespace get_order_by trigger
-+REGRESS := init-extension repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper get_order_by trigger
++REGRESS := init-extension noautovacuum repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper get_order_by trigger autovacuum
USE_PGXS = 1 # use pgxs if not in contrib directory
PGXS := $(shell $(PG_CONFIG) --pgxs)
-diff --git a/regress/expected/init-extension.out b/regress/expected/init-extension.out
-index 9f2e171..f6e4f8d 100644
---- a/regress/expected/init-extension.out
-+++ b/regress/expected/init-extension.out
-@@ -1,3 +1,2 @@
- SET client_min_messages = warning;
- CREATE EXTENSION pg_repack;
--RESET client_min_messages;
+diff --git a/regress/expected/autovacuum.out b/regress/expected/autovacuum.out
+new file mode 100644
+index 0000000..e7f2363
+--- /dev/null
++++ b/regress/expected/autovacuum.out
+@@ -0,0 +1,7 @@
++ALTER SYSTEM SET autovacuum='on';
++SELECT pg_reload_conf();
++ pg_reload_conf
++----------------
++ t
++(1 row)
++
+diff --git a/regress/expected/noautovacuum.out b/regress/expected/noautovacuum.out
+new file mode 100644
+index 0000000..fc7978e
+--- /dev/null
++++ b/regress/expected/noautovacuum.out
+@@ -0,0 +1,7 @@
++ALTER SYSTEM SET autovacuum='off';
++SELECT pg_reload_conf();
++ pg_reload_conf
++----------------
++ t
++(1 row)
++
diff --git a/regress/expected/nosuper.out b/regress/expected/nosuper.out
index 8d0a94e..63b68bf 100644
--- a/regress/expected/nosuper.out
@@ -50,14 +74,22 @@ index 8d0a94e..63b68bf 100644
INFO: repacking table "public.tbl_cluster"
ERROR: query failed: ERROR: current transaction is aborted, commands ignored until end of transaction block
DETAIL: query was: RESET lock_timeout
-diff --git a/regress/sql/init-extension.sql b/regress/sql/init-extension.sql
-index 9f2e171..f6e4f8d 100644
---- a/regress/sql/init-extension.sql
-+++ b/regress/sql/init-extension.sql
-@@ -1,3 +1,2 @@
- SET client_min_messages = warning;
- CREATE EXTENSION pg_repack;
--RESET client_min_messages;
+diff --git a/regress/sql/autovacuum.sql b/regress/sql/autovacuum.sql
+new file mode 100644
+index 0000000..a8eda63
+--- /dev/null
++++ b/regress/sql/autovacuum.sql
+@@ -0,0 +1,2 @@
++ALTER SYSTEM SET autovacuum='on';
++SELECT pg_reload_conf();
+diff --git a/regress/sql/noautovacuum.sql b/regress/sql/noautovacuum.sql
+new file mode 100644
+index 0000000..13d4836
+--- /dev/null
++++ b/regress/sql/noautovacuum.sql
+@@ -0,0 +1,2 @@
++ALTER SYSTEM SET autovacuum='off';
++SELECT pg_reload_conf();
diff --git a/regress/sql/nosuper.sql b/regress/sql/nosuper.sql
index 072f0fa..dbe60f8 100644
--- a/regress/sql/nosuper.sql
diff --git a/compute_tools/README.md b/compute_tools/README.md
index 446b441c18..e92e5920b9 100644
--- a/compute_tools/README.md
+++ b/compute_tools/README.md
@@ -54,11 +54,11 @@ stateDiagram-v2
Running --> TerminationPendingImmediate : Requested termination
Running --> ConfigurationPending : Received a /configure request with spec
Running --> RefreshConfigurationPending : Received a /refresh_configuration request, compute node will pull a new spec and reconfigure
- RefreshConfigurationPending --> Running : Compute has been re-configured
+ RefreshConfigurationPending --> RefreshConfiguration: Received compute spec and started configuration
+ RefreshConfiguration --> Running : Compute has been re-configured
+ RefreshConfiguration --> RefreshConfigurationPending : Configuration failed and to be retried
TerminationPendingFast --> Terminated compute with 30s delay for cplane to inspect status
TerminationPendingImmediate --> Terminated : Terminated compute immediately
- Running --> TerminationPending : Requested termination
- TerminationPending --> Terminated : Terminated compute
Failed --> RefreshConfigurationPending : Received a /refresh_configuration request
Failed --> [*] : Compute exited
Terminated --> [*] : Compute exited
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index 83a2e6dc68..2b4802f309 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -49,10 +49,10 @@ use compute_tools::compute::{
BUILD_TAG, ComputeNode, ComputeNodeParams, forward_termination_signal,
};
use compute_tools::extension_server::get_pg_version_string;
-use compute_tools::logger::*;
use compute_tools::params::*;
use compute_tools::pg_isready::get_pg_isready_bin;
use compute_tools::spec::*;
+use compute_tools::{hadron_metrics, installed_extensions, logger::*};
use rlimit::{Resource, setrlimit};
use signal_hook::consts::{SIGINT, SIGQUIT, SIGTERM};
use signal_hook::iterator::Signals;
@@ -82,6 +82,15 @@ struct Cli {
#[arg(long, default_value_t = 3081)]
pub internal_http_port: u16,
+ /// Backwards-compatible --http-port for Hadron deployments. Functionally the
+ /// same as --external-http-port.
+ #[arg(
+ long,
+ conflicts_with = "external_http_port",
+ conflicts_with = "internal_http_port"
+ )]
+ pub http_port: Option,
+
#[arg(short = 'D', long, value_name = "DATADIR")]
pub pgdata: String,
@@ -181,6 +190,26 @@ impl Cli {
}
}
+// Hadron helpers to get compatible compute_ctl http ports from Cli. The old `--http-port`
+// arg is used and acts the same as `--external-http-port`. The internal http port is defined
+// to be http_port + 1. Hadron runs in the dblet environment which uses the host network, so
+// we need to be careful with the ports to choose.
+fn get_external_http_port(cli: &Cli) -> u16 {
+ if cli.lakebase_mode {
+ return cli.http_port.unwrap_or(cli.external_http_port);
+ }
+ cli.external_http_port
+}
+fn get_internal_http_port(cli: &Cli) -> u16 {
+ if cli.lakebase_mode {
+ return cli
+ .http_port
+ .map(|p| p + 1)
+ .unwrap_or(cli.internal_http_port);
+ }
+ cli.internal_http_port
+}
+
fn main() -> Result<()> {
let cli = Cli::parse();
@@ -205,10 +234,18 @@ fn main() -> Result<()> {
// enable core dumping for all child processes
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
+ if cli.lakebase_mode {
+ installed_extensions::initialize_metrics();
+ hadron_metrics::initialize_metrics();
+ }
+
let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
let config = get_config(&cli)?;
+ let external_http_port = get_external_http_port(&cli);
+ let internal_http_port = get_internal_http_port(&cli);
+
let compute_node = ComputeNode::new(
ComputeNodeParams {
compute_id: cli.compute_id,
@@ -217,8 +254,8 @@ fn main() -> Result<()> {
pgdata: cli.pgdata.clone(),
pgbin: cli.pgbin.clone(),
pgversion: get_pg_version_string(&cli.pgbin),
- external_http_port: cli.external_http_port,
- internal_http_port: cli.internal_http_port,
+ external_http_port,
+ internal_http_port,
remote_ext_base_url: cli.remote_ext_base_url.clone(),
resize_swap_on_bind: cli.resize_swap_on_bind,
set_disk_quota_for_fs: cli.set_disk_quota_for_fs,
diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index e3ac887e9c..27d33d8cd8 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -6,7 +6,8 @@ use compute_api::responses::{
LfcPrewarmState, PromoteState, TlsConfig,
};
use compute_api::spec::{
- ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverProtocol, PgIdent,
+ ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, GenericOption,
+ PageserverProtocol, PgIdent, Role,
};
use futures::StreamExt;
use futures::future::join_all;
@@ -41,8 +42,9 @@ use utils::shard::{ShardCount, ShardIndex, ShardNumber};
use crate::configurator::launch_configurator;
use crate::disk_quota::set_disk_quota;
+use crate::hadron_metrics::COMPUTE_ATTACHED;
use crate::installed_extensions::get_installed_extensions;
-use crate::logger::startup_context_from_env;
+use crate::logger::{self, startup_context_from_env};
use crate::lsn_lease::launch_lsn_lease_bg_task_for_static;
use crate::metrics::COMPUTE_CTL_UP;
use crate::monitor::launch_monitor;
@@ -412,6 +414,130 @@ struct StartVmMonitorResult {
vm_monitor: Option>>,
}
+// BEGIN_HADRON
+/// This function creates roles that are used by Databricks.
+/// These roles are not needs to be botostrapped at PG Compute provisioning time.
+/// The auth method for these roles are configured in databricks_pg_hba.conf in universe repository.
+pub(crate) fn create_databricks_roles() -> Vec {
+ let roles = vec![
+ // Role for prometheus_stats_exporter
+ Role {
+ name: "databricks_monitor".to_string(),
+ // This uses "local" connection and auth method for that is "trust", so no password is needed.
+ encrypted_password: None,
+ options: Some(vec![GenericOption {
+ name: "IN ROLE pg_monitor".to_string(),
+ value: None,
+ vartype: "string".to_string(),
+ }]),
+ },
+ // Role for brickstore control plane
+ Role {
+ name: "databricks_control_plane".to_string(),
+ // Certificate user does not need password.
+ encrypted_password: None,
+ options: Some(vec![GenericOption {
+ name: "SUPERUSER".to_string(),
+ value: None,
+ vartype: "string".to_string(),
+ }]),
+ },
+ // Role for brickstore httpgateway.
+ Role {
+ name: "databricks_gateway".to_string(),
+ // Certificate user does not need password.
+ encrypted_password: None,
+ options: None,
+ },
+ ];
+
+ roles
+ .into_iter()
+ .map(|role| {
+ let query = format!(
+ r#"
+ DO $$
+ BEGIN
+ IF NOT EXISTS (
+ SELECT FROM pg_catalog.pg_roles WHERE rolname = '{}')
+ THEN
+ CREATE ROLE {} {};
+ END IF;
+ END
+ $$;"#,
+ role.name,
+ role.name.pg_quote(),
+ role.to_pg_options(),
+ );
+ query
+ })
+ .collect()
+}
+
+/// Databricks-specific environment variables to be passed to the `postgres` sub-process.
+pub struct DatabricksEnvVars {
+ /// The Databricks "endpoint ID" of the compute instance. Used by `postgres` to check
+ /// the token scopes of internal auth tokens.
+ pub endpoint_id: String,
+ /// Hostname of the Databricks workspace URL this compute instance belongs to.
+ /// Used by postgres to verify Databricks PAT tokens.
+ pub workspace_host: String,
+
+ pub lakebase_mode: bool,
+}
+
+impl DatabricksEnvVars {
+ pub fn new(
+ compute_spec: &ComputeSpec,
+ compute_id: Option<&String>,
+ instance_id: Option,
+ lakebase_mode: bool,
+ ) -> Self {
+ let endpoint_id = if let Some(instance_id) = instance_id {
+ // Use instance_id as endpoint_id if it is set. This code path is for PuPr model.
+ instance_id
+ } else {
+ // Use compute_id as endpoint_id if instance_id is not set. The code path is for PrPr model.
+ // compute_id is a string format of "{endpoint_id}/{compute_idx}"
+ // endpoint_id is a uuid. We only need to pass down endpoint_id to postgres.
+ // Panics if compute_id is not set or not in the expected format.
+ compute_id.unwrap().split('/').next().unwrap().to_string()
+ };
+ let workspace_host = compute_spec
+ .databricks_settings
+ .as_ref()
+ .map(|s| s.databricks_workspace_host.clone())
+ .unwrap_or("".to_string());
+ Self {
+ endpoint_id,
+ workspace_host,
+ lakebase_mode,
+ }
+ }
+
+ /// Constants for the names of Databricks-specific postgres environment variables.
+ const DATABRICKS_ENDPOINT_ID_ENVVAR: &'static str = "DATABRICKS_ENDPOINT_ID";
+ const DATABRICKS_WORKSPACE_HOST_ENVVAR: &'static str = "DATABRICKS_WORKSPACE_HOST";
+
+ /// Convert DatabricksEnvVars to a list of string pairs that can be passed as env vars. Consumes `self`.
+ pub fn to_env_var_list(self) -> Vec<(String, String)> {
+ if !self.lakebase_mode {
+ // In neon env, we don't need to pass down the env vars to postgres.
+ return vec![];
+ }
+ vec![
+ (
+ Self::DATABRICKS_ENDPOINT_ID_ENVVAR.to_string(),
+ self.endpoint_id.clone(),
+ ),
+ (
+ Self::DATABRICKS_WORKSPACE_HOST_ENVVAR.to_string(),
+ self.workspace_host.clone(),
+ ),
+ ]
+ }
+}
+
impl ComputeNode {
pub fn new(params: ComputeNodeParams, config: ComputeConfig) -> Result {
let connstr = params.connstr.as_str();
@@ -448,7 +574,11 @@ impl ComputeNode {
let mut new_state = ComputeState::new();
if let Some(spec) = config.spec {
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
- new_state.pspec = Some(pspec);
+ if params.lakebase_mode {
+ ComputeNode::set_spec(¶ms, &mut new_state, pspec);
+ } else {
+ new_state.pspec = Some(pspec);
+ }
}
Ok(ComputeNode {
@@ -1046,7 +1176,14 @@ impl ComputeNode {
// If it is something different then create_dir() will error out anyway.
let pgdata = &self.params.pgdata;
let _ok = fs::remove_dir_all(pgdata);
- fs::create_dir(pgdata)?;
+ if self.params.lakebase_mode {
+ // Ignore creation errors if the directory already exists (e.g. mounting it ahead of time).
+ // If it is something different then PG startup will error out anyway.
+ let _ok = fs::create_dir(pgdata);
+ } else {
+ fs::create_dir(pgdata)?;
+ }
+
fs::set_permissions(pgdata, fs::Permissions::from_mode(0o700))?;
Ok(())
@@ -1410,6 +1547,8 @@ impl ComputeNode {
let pgdata_path = Path::new(&self.params.pgdata);
let tls_config = self.tls_config(&pspec.spec);
+ let databricks_settings = spec.databricks_settings.as_ref();
+ let postgres_port = self.params.connstr.port();
// Remove/create an empty pgdata directory and put configuration there.
self.create_pgdata()?;
@@ -1417,8 +1556,11 @@ impl ComputeNode {
pgdata_path,
&self.params,
&pspec.spec,
+ postgres_port,
self.params.internal_http_port,
tls_config,
+ databricks_settings,
+ self.params.lakebase_mode,
)?;
// Syncing safekeepers is only safe with primary nodes: if a primary
@@ -1458,8 +1600,28 @@ impl ComputeNode {
)
})?;
- // Update pg_hba.conf received with basebackup.
- update_pg_hba(pgdata_path, None)?;
+ if let Some(settings) = databricks_settings {
+ copy_tls_certificates(
+ &settings.pg_compute_tls_settings.key_file,
+ &settings.pg_compute_tls_settings.cert_file,
+ pgdata_path,
+ )?;
+
+ // Update pg_hba.conf received with basebackup including additional databricks settings.
+ update_pg_hba(pgdata_path, Some(&settings.databricks_pg_hba))?;
+ update_pg_ident(pgdata_path, Some(&settings.databricks_pg_ident))?;
+ } else {
+ // Update pg_hba.conf received with basebackup.
+ update_pg_hba(pgdata_path, None)?;
+ }
+
+ if let Some(databricks_settings) = spec.databricks_settings.as_ref() {
+ copy_tls_certificates(
+ &databricks_settings.pg_compute_tls_settings.key_file,
+ &databricks_settings.pg_compute_tls_settings.cert_file,
+ pgdata_path,
+ )?;
+ }
// Place pg_dynshmem under /dev/shm. This allows us to use
// 'dynamic_shared_memory_type = mmap' so that the files are placed in
@@ -1500,7 +1662,7 @@ impl ComputeNode {
// symlink doesn't affect anything.
//
// See https://github.com/neondatabase/autoscaling/issues/800
- std::fs::remove_dir(pgdata_path.join("pg_dynshmem"))?;
+ std::fs::remove_dir_all(pgdata_path.join("pg_dynshmem"))?;
symlink("/dev/shm/", pgdata_path.join("pg_dynshmem"))?;
match spec.mode {
@@ -1515,6 +1677,12 @@ impl ComputeNode {
/// Start and stop a postgres process to warm up the VM for startup.
pub fn prewarm_postgres_vm_memory(&self) -> Result<()> {
+ if self.params.lakebase_mode {
+ // We are running in Hadron mode. Disabling this prewarming step for now as it could run
+ // into dblet port conflicts and also doesn't add much value with our current infra.
+ info!("Skipping postgres prewarming in Hadron mode");
+ return Ok(());
+ }
info!("prewarming VM memory");
// Create pgdata
@@ -1572,14 +1740,36 @@ impl ComputeNode {
pub fn start_postgres(&self, storage_auth_token: Option) -> Result {
let pgdata_path = Path::new(&self.params.pgdata);
+ let env_vars: Vec<(String, String)> = if self.params.lakebase_mode {
+ let databricks_env_vars = {
+ let state = self.state.lock().unwrap();
+ let spec = &state.pspec.as_ref().unwrap().spec;
+ DatabricksEnvVars::new(
+ spec,
+ Some(&self.params.compute_id),
+ self.params.instance_id.clone(),
+ self.params.lakebase_mode,
+ )
+ };
+
+ info!(
+ "Starting Postgres for databricks endpoint id: {}",
+ &databricks_env_vars.endpoint_id
+ );
+
+ let mut env_vars = databricks_env_vars.to_env_var_list();
+ env_vars.extend(storage_auth_token.map(|t| ("NEON_AUTH_TOKEN".to_string(), t)));
+ env_vars
+ } else if let Some(storage_auth_token) = &storage_auth_token {
+ vec![("NEON_AUTH_TOKEN".to_owned(), storage_auth_token.to_owned())]
+ } else {
+ vec![]
+ };
+
// Run postgres as a child process.
let mut pg = maybe_cgexec(&self.params.pgbin)
.args(["-D", &self.params.pgdata])
- .envs(if let Some(storage_auth_token) = &storage_auth_token {
- vec![("NEON_AUTH_TOKEN", storage_auth_token)]
- } else {
- vec![]
- })
+ .envs(env_vars)
.stderr(Stdio::piped())
.spawn()
.expect("cannot start postgres process");
@@ -1731,7 +1921,15 @@ impl ComputeNode {
/// Do initial configuration of the already started Postgres.
#[instrument(skip_all)]
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
- let conf = self.get_tokio_conn_conf(Some("compute_ctl:apply_config"));
+ let mut conf = self.get_tokio_conn_conf(Some("compute_ctl:apply_config"));
+
+ if self.params.lakebase_mode {
+ // Set a 2-minute statement_timeout for the session applying config. The individual SQL statements
+ // used in apply_spec_sql() should not take long (they are just creating users and installing
+ // extensions). If any of them are stuck for an extended period of time it usually indicates a
+ // pageserver connectivity problem and we should bail out.
+ conf.options("-c statement_timeout=2min");
+ }
let conf = Arc::new(conf);
let spec = Arc::new(
@@ -1882,12 +2080,16 @@ impl ComputeNode {
// Write new config
let pgdata_path = Path::new(&self.params.pgdata);
+ let postgres_port = self.params.connstr.port();
config::write_postgres_conf(
pgdata_path,
&self.params,
&spec,
+ postgres_port,
self.params.internal_http_port,
tls_config,
+ spec.databricks_settings.as_ref(),
+ self.params.lakebase_mode,
)?;
self.pg_reload_conf()?;
@@ -1993,6 +2195,7 @@ impl ComputeNode {
// wait
ComputeStatus::Init
| ComputeStatus::Configuration
+ | ComputeStatus::RefreshConfiguration
| ComputeStatus::RefreshConfigurationPending
| ComputeStatus::Empty => {
state = self.state_changed.wait(state).unwrap();
@@ -2044,7 +2247,17 @@ impl ComputeNode {
pub fn check_for_core_dumps(&self) -> Result<()> {
let core_dump_dir = match std::env::consts::OS {
"macos" => Path::new("/cores/"),
- _ => Path::new(&self.params.pgdata),
+ // BEGIN HADRON
+ // NB: Read core dump files from a fixed location outside of
+ // the data directory since `compute_ctl` wipes the data directory
+ // across container restarts.
+ _ => {
+ if self.params.lakebase_mode {
+ Path::new("/databricks/logs/brickstore")
+ } else {
+ Path::new(&self.params.pgdata)
+ }
+ } // END HADRON
};
// Collect core dump paths if any
@@ -2357,7 +2570,7 @@ LIMIT 100",
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
libs_vec = libs
.split(&[',', '\'', ' '])
- .filter(|s| *s != "neon" && !s.is_empty())
+ .filter(|s| *s != "neon" && *s != "databricks_auth" && !s.is_empty())
.map(str::to_string)
.collect();
}
@@ -2376,7 +2589,7 @@ LIMIT 100",
if let Some(libs) = shared_preload_libraries_line.split("='").nth(1) {
preload_libs_vec = libs
.split(&[',', '\'', ' '])
- .filter(|s| *s != "neon" && !s.is_empty())
+ .filter(|s| *s != "neon" && *s != "databricks_auth" && !s.is_empty())
.map(str::to_string)
.collect();
}
@@ -2550,6 +2763,34 @@ LIMIT 100",
);
}
}
+
+ /// Set the compute spec and update related metrics.
+ /// This is the central place where pspec is updated.
+ pub fn set_spec(params: &ComputeNodeParams, state: &mut ComputeState, pspec: ParsedSpec) {
+ state.pspec = Some(pspec);
+ ComputeNode::update_attached_metric(params, state);
+ let _ = logger::update_ids(¶ms.instance_id, &Some(params.compute_id.clone()));
+ }
+
+ pub fn update_attached_metric(params: &ComputeNodeParams, state: &mut ComputeState) {
+ // Update the pg_cctl_attached gauge when all identifiers are available.
+ if let Some(instance_id) = ¶ms.instance_id {
+ if let Some(pspec) = &state.pspec {
+ // Clear all values in the metric
+ COMPUTE_ATTACHED.reset();
+
+ // Set new metric value
+ COMPUTE_ATTACHED
+ .with_label_values(&[
+ ¶ms.compute_id,
+ instance_id,
+ &pspec.tenant_id.to_string(),
+ &pspec.timeline_id.to_string(),
+ ])
+ .set(1);
+ }
+ }
+ }
}
pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs
index dd46353343..55a1eda0b7 100644
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -7,11 +7,14 @@ use std::io::prelude::*;
use std::path::Path;
use compute_api::responses::TlsConfig;
-use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
+use compute_api::spec::{
+ ComputeAudit, ComputeMode, ComputeSpec, DatabricksSettings, GenericOption,
+};
use crate::compute::ComputeNodeParams;
use crate::pg_helpers::{
- GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
+ DatabricksSettingsExt as _, GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize,
+ escape_conf_value,
};
use crate::tls::{self, SERVER_CRT, SERVER_KEY};
@@ -40,12 +43,16 @@ pub fn line_in_file(path: &Path, line: &str) -> Result {
}
/// Create or completely rewrite configuration file specified by `path`
+#[allow(clippy::too_many_arguments)]
pub fn write_postgres_conf(
pgdata_path: &Path,
params: &ComputeNodeParams,
spec: &ComputeSpec,
+ postgres_port: Option,
extension_server_port: u16,
tls_config: &Option,
+ databricks_settings: Option<&DatabricksSettings>,
+ lakebase_mode: bool,
) -> Result<()> {
let path = pgdata_path.join("postgresql.conf");
// File::create() destroys the file content if it exists.
@@ -285,6 +292,24 @@ pub fn write_postgres_conf(
writeln!(file, "log_destination='stderr,syslog'")?;
}
+ if lakebase_mode {
+ // Explicitly set the port based on the connstr, overriding any previous port setting.
+ // Note: It is important that we don't specify a different port again after this.
+ let port = postgres_port.expect("port must be present in connstr");
+ writeln!(file, "port = {port}")?;
+
+ // This is databricks specific settings.
+ // This should be at the end of the file but before `compute_ctl_temp_override.conf` below
+ // so that it can override any settings above.
+ // `compute_ctl_temp_override.conf` is intended to override any settings above during specific operations.
+ // To prevent potential breakage in the future, we keep it above `compute_ctl_temp_override.conf`.
+ writeln!(file, "# Databricks settings start")?;
+ if let Some(settings) = databricks_settings {
+ writeln!(file, "{}", settings.as_pg_settings())?;
+ }
+ writeln!(file, "# Databricks settings end")?;
+ }
+
// This is essential to keep this line at the end of the file,
// because it is intended to override any settings above.
writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;
diff --git a/compute_tools/src/configurator.rs b/compute_tools/src/configurator.rs
index 864335fd2c..feca8337b2 100644
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -2,6 +2,7 @@ use std::fs::File;
use std::thread;
use std::{path::Path, sync::Arc};
+use anyhow::Result;
use compute_api::responses::{ComputeConfig, ComputeStatus};
use tracing::{error, info, instrument};
@@ -13,6 +14,10 @@ fn configurator_main_loop(compute: &Arc) {
info!("waiting for reconfiguration requests");
loop {
let mut state = compute.state.lock().unwrap();
+ /* BEGIN_HADRON */
+ // RefreshConfiguration should only be used inside the loop
+ assert_ne!(state.status, ComputeStatus::RefreshConfiguration);
+ /* END_HADRON */
if compute.params.lakebase_mode {
while state.status != ComputeStatus::ConfigurationPending
@@ -54,53 +59,81 @@ fn configurator_main_loop(compute: &Arc) {
info!(
"compute node suspects its configuration is out of date, now refreshing configuration"
);
- // Drop the lock guard here to avoid holding the lock while downloading spec from the control plane / HCC.
- // This is the only thread that can move compute_ctl out of the `RefreshConfigurationPending` state, so it
+ state.set_status(ComputeStatus::RefreshConfiguration, &compute.state_changed);
+ // Drop the lock guard here to avoid holding the lock while downloading config from the control plane / HCC.
+ // This is the only thread that can move compute_ctl out of the `RefreshConfiguration` state, so it
// is safe to drop the lock like this.
drop(state);
- let spec = if let Some(config_path) = &compute.params.config_path_test_only {
- // This path is only to make testing easier. In production we always get the spec from the HCC.
- info!(
- "reloading config.json from path: {}",
- config_path.to_string_lossy()
- );
- let path = Path::new(config_path);
- if let Ok(file) = File::open(path) {
- match serde_json::from_reader::(file) {
- Ok(config) => config.spec,
- Err(e) => {
- error!("could not parse spec file: {}", e);
- None
- }
- }
- } else {
- error!(
- "could not open config file at path: {}",
+ let get_config_result: anyhow::Result =
+ if let Some(config_path) = &compute.params.config_path_test_only {
+ // This path is only to make testing easier. In production we always get the config from the HCC.
+ info!(
+ "reloading config.json from path: {}",
config_path.to_string_lossy()
);
- None
- }
- } else if let Some(control_plane_uri) = &compute.params.control_plane_uri {
- match get_config_from_control_plane(control_plane_uri, &compute.params.compute_id) {
- Ok(config) => config.spec,
- Err(e) => {
- error!("could not get config from control plane: {}", e);
- None
+ let path = Path::new(config_path);
+ if let Ok(file) = File::open(path) {
+ match serde_json::from_reader::(file) {
+ Ok(config) => Ok(config),
+ Err(e) => {
+ error!("could not parse config file: {}", e);
+ Err(anyhow::anyhow!("could not parse config file: {}", e))
+ }
+ }
+ } else {
+ error!(
+ "could not open config file at path: {:?}",
+ config_path.to_string_lossy()
+ );
+ Err(anyhow::anyhow!(
+ "could not open config file at path: {}",
+ config_path.to_string_lossy()
+ ))
}
- }
- } else {
- None
- };
+ } else if let Some(control_plane_uri) = &compute.params.control_plane_uri {
+ get_config_from_control_plane(control_plane_uri, &compute.params.compute_id)
+ } else {
+ Err(anyhow::anyhow!("config_path_test_only is not set"))
+ };
- if let Some(spec) = spec {
- if let Ok(pspec) = ParsedSpec::try_from(spec) {
+ // Parse any received ComputeSpec and transpose the result into a Result