diff --git a/.github/actionlint.yml b/.github/actionlint.yml
index 25b2fc702a..8a4bcaf811 100644
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -31,7 +31,7 @@ config-variables:
- NEON_PROD_AWS_ACCOUNT_ID
- PGREGRESS_PG16_PROJECT_ID
- PGREGRESS_PG17_PROJECT_ID
- - PREWARM_PGBENCH_SIZE
+ - PREWARM_PROJECT_ID
- REMOTE_STORAGE_AZURE_CONTAINER
- REMOTE_STORAGE_AZURE_REGION
- SLACK_CICD_CHANNEL_ID
diff --git a/.github/workflows/benchbase_tpcc.yml b/.github/workflows/benchbase_tpcc.yml
new file mode 100644
index 0000000000..3a36a97bb1
--- /dev/null
+++ b/.github/workflows/benchbase_tpcc.yml
@@ -0,0 +1,384 @@
+name: TPC-C like benchmark using benchbase
+
+on:
+ schedule:
+ # * is a special character in YAML so you have to quote this string
+ # ┌───────────── minute (0 - 59)
+ # │ ┌───────────── hour (0 - 23)
+ # │ │ ┌───────────── day of the month (1 - 31)
+ # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+ # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+ - cron: '0 6 * * *' # run once a day at 6 AM UTC
+ workflow_dispatch: # adds ability to run this manually
+
+defaults:
+ run:
+ shell: bash -euxo pipefail {0}
+
+concurrency:
+ # Allow only one workflow globally because we do not want to be too noisy in production environment
+ group: benchbase-tpcc-workflow
+ cancel-in-progress: false
+
+permissions:
+ contents: read
+
+jobs:
+ benchbase-tpcc:
+ strategy:
+ fail-fast: false # allow other variants to continue even if one fails
+ matrix:
+ include:
+ - warehouses: 50 # defines number of warehouses and is used to compute number of terminals
+ max_rate: 800 # measured max TPS at scale factor based on experiments. Adjust if performance is better/worse
+ min_cu: 0.25 # simulate free tier plan (0.25 -2 CU)
+ max_cu: 2
+ - warehouses: 500 # serverless plan (2-8 CU)
+ max_rate: 2000
+ min_cu: 2
+ max_cu: 8
+ - warehouses: 1000 # business plan (2-16 CU)
+ max_rate: 2900
+ min_cu: 2
+ max_cu: 16
+ max-parallel: 1 # we want to run each workload size sequentially to avoid noisy neighbors
+ permissions:
+ contents: write
+ statuses: write
+ id-token: write # aws-actions/configure-aws-credentials
+ env:
+ PG_CONFIG: /tmp/neon/pg_install/v17/bin/pg_config
+ PSQL: /tmp/neon/pg_install/v17/bin/psql
+ PG_17_LIB_PATH: /tmp/neon/pg_install/v17/lib
+ POSTGRES_VERSION: 17
+ runs-on: [ self-hosted, us-east-2, x64 ]
+ timeout-minutes: 1440
+
+ steps:
+ - name: Harden the runner (Audit all outbound calls)
+ uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+ with:
+ egress-policy: audit
+
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+ - name: Configure AWS credentials # necessary to download artefacts
+ uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
+ with:
+ aws-region: eu-central-1
+ role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+ role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
+
+ - name: Download Neon artifact
+ uses: ./.github/actions/download
+ with:
+ name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+ path: /tmp/neon/
+ prefix: latest
+ aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+
+ - name: Create Neon Project
+ id: create-neon-project-tpcc
+ uses: ./.github/actions/neon-project-create
+ with:
+ region_id: aws-us-east-2
+ postgres_version: ${{ env.POSTGRES_VERSION }}
+ compute_units: '[${{ matrix.min_cu }}, ${{ matrix.max_cu }}]'
+ api_key: ${{ secrets.NEON_PRODUCTION_API_KEY_4_BENCHMARKS }}
+ api_host: console.neon.tech # production (!)
+
+ - name: Initialize Neon project
+ env:
+ BENCHMARK_TPCC_CONNSTR: ${{ steps.create-neon-project-tpcc.outputs.dsn }}
+ PROJECT_ID: ${{ steps.create-neon-project-tpcc.outputs.project_id }}
+ run: |
+ echo "Initializing Neon project with project_id: ${PROJECT_ID}"
+ export LD_LIBRARY_PATH=${PG_17_LIB_PATH}
+
+ # Retry logic for psql connection with 1 minute sleep between attempts
+ for attempt in {1..3}; do
+ echo "Attempt ${attempt}/3: Creating extensions in Neon project"
+ if ${PSQL} "${BENCHMARK_TPCC_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"; then
+ echo "Successfully created extensions"
+ break
+ else
+ echo "Failed to create extensions on attempt ${attempt}"
+ if [ ${attempt} -lt 3 ]; then
+ echo "Waiting 60 seconds before retry..."
+ sleep 60
+ else
+ echo "All attempts failed, exiting"
+ exit 1
+ fi
+ fi
+ done
+
+ echo "BENCHMARK_TPCC_CONNSTR=${BENCHMARK_TPCC_CONNSTR}" >> $GITHUB_ENV
+
+ - name: Generate BenchBase workload configuration
+ env:
+ WAREHOUSES: ${{ matrix.warehouses }}
+ MAX_RATE: ${{ matrix.max_rate }}
+ run: |
+ echo "Generating BenchBase configs for warehouses: ${WAREHOUSES}, max_rate: ${MAX_RATE}"
+
+ # Extract hostname and password from connection string
+ # Format: postgresql://username:password@hostname/database?params (no port for Neon)
+ HOSTNAME=$(echo "${BENCHMARK_TPCC_CONNSTR}" | sed -n 's|.*://[^:]*:[^@]*@\([^/]*\)/.*|\1|p')
+ PASSWORD=$(echo "${BENCHMARK_TPCC_CONNSTR}" | sed -n 's|.*://[^:]*:\([^@]*\)@.*|\1|p')
+
+ echo "Extracted hostname: ${HOSTNAME}"
+
+ # Use runner temp (NVMe) as working directory
+ cd "${RUNNER_TEMP}"
+
+ # Copy the generator script
+ cp "${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/generate_workload_size.py" .
+
+ # Generate configs and scripts
+ python3 generate_workload_size.py \
+ --warehouses ${WAREHOUSES} \
+ --max-rate ${MAX_RATE} \
+ --hostname ${HOSTNAME} \
+ --password ${PASSWORD} \
+ --runner-arch ${{ runner.arch }}
+
+ # Fix path mismatch: move generated configs and scripts to expected locations
+ mv ../configs ./configs
+ mv ../scripts ./scripts
+
+ - name: Prepare database (load data)
+ env:
+ WAREHOUSES: ${{ matrix.warehouses }}
+ run: |
+ cd "${RUNNER_TEMP}"
+
+ echo "Loading ${WAREHOUSES} warehouses into database..."
+
+ # Run the loader script and capture output to log file while preserving stdout/stderr
+ ./scripts/load_${WAREHOUSES}_warehouses.sh 2>&1 | tee "load_${WAREHOUSES}_warehouses.log"
+
+ echo "Database loading completed"
+
+ - name: Run TPC-C benchmark (warmup phase, then benchmark at 70% of configuredmax TPS)
+ env:
+ WAREHOUSES: ${{ matrix.warehouses }}
+ run: |
+ cd "${RUNNER_TEMP}"
+
+ echo "Running TPC-C benchmark with ${WAREHOUSES} warehouses..."
+
+ # Run the optimal rate benchmark
+ ./scripts/execute_${WAREHOUSES}_warehouses_opt_rate.sh
+
+ echo "Benchmark execution completed"
+
+ - name: Run TPC-C benchmark (warmup phase, then ramp down TPS and up again in 5 minute intervals)
+
+ env:
+ WAREHOUSES: ${{ matrix.warehouses }}
+ run: |
+ cd "${RUNNER_TEMP}"
+
+ echo "Running TPC-C ramp-down-up with ${WAREHOUSES} warehouses..."
+
+ # Run the optimal rate benchmark
+ ./scripts/execute_${WAREHOUSES}_warehouses_ramp_up.sh
+
+ echo "Benchmark execution completed"
+
+ - name: Process results (upload to test results database and generate diagrams)
+ env:
+ WAREHOUSES: ${{ matrix.warehouses }}
+ MIN_CU: ${{ matrix.min_cu }}
+ MAX_CU: ${{ matrix.max_cu }}
+ PROJECT_ID: ${{ steps.create-neon-project-tpcc.outputs.project_id }}
+ REVISION: ${{ github.sha }}
+ PERF_DB_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }}
+ run: |
+ cd "${RUNNER_TEMP}"
+
+ echo "Creating temporary Python environment for results processing..."
+
+ # Create temporary virtual environment
+ python3 -m venv temp_results_env
+ source temp_results_env/bin/activate
+
+ # Install required packages in virtual environment
+ pip install matplotlib pandas psycopg2-binary
+
+ echo "Copying results processing scripts..."
+
+ # Copy both processing scripts
+ cp "${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/generate_diagrams.py" .
+ cp "${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/upload_results_to_perf_test_results.py" .
+
+ echo "Processing load phase metrics..."
+
+ # Find and process load log
+ LOAD_LOG=$(find . -name "load_${WAREHOUSES}_warehouses.log" -type f | head -1)
+ if [ -n "$LOAD_LOG" ]; then
+ echo "Processing load metrics from: $LOAD_LOG"
+ python upload_results_to_perf_test_results.py \
+ --load-log "$LOAD_LOG" \
+ --run-type "load" \
+ --warehouses "${WAREHOUSES}" \
+ --min-cu "${MIN_CU}" \
+ --max-cu "${MAX_CU}" \
+ --project-id "${PROJECT_ID}" \
+ --revision "${REVISION}" \
+ --connection-string "${PERF_DB_CONNSTR}"
+ else
+ echo "Warning: Load log file not found: load_${WAREHOUSES}_warehouses.log"
+ fi
+
+ echo "Processing warmup results for optimal rate..."
+
+ # Find and process warmup results
+ WARMUP_CSV=$(find results_warmup -name "*.results.csv" -type f | head -1)
+ WARMUP_JSON=$(find results_warmup -name "*.summary.json" -type f | head -1)
+
+ if [ -n "$WARMUP_CSV" ] && [ -n "$WARMUP_JSON" ]; then
+ echo "Generating warmup diagram from: $WARMUP_CSV"
+ python generate_diagrams.py \
+ --input-csv "$WARMUP_CSV" \
+ --output-svg "warmup_${WAREHOUSES}_warehouses_performance.svg" \
+ --title-suffix "Warmup at max TPS"
+
+ echo "Uploading warmup metrics from: $WARMUP_JSON"
+ python upload_results_to_perf_test_results.py \
+ --summary-json "$WARMUP_JSON" \
+ --results-csv "$WARMUP_CSV" \
+ --run-type "warmup" \
+ --min-cu "${MIN_CU}" \
+ --max-cu "${MAX_CU}" \
+ --project-id "${PROJECT_ID}" \
+ --revision "${REVISION}" \
+ --connection-string "${PERF_DB_CONNSTR}"
+ else
+ echo "Warning: Missing warmup results files (CSV: $WARMUP_CSV, JSON: $WARMUP_JSON)"
+ fi
+
+ echo "Processing optimal rate results..."
+
+ # Find and process optimal rate results
+ OPTRATE_CSV=$(find results_opt_rate -name "*.results.csv" -type f | head -1)
+ OPTRATE_JSON=$(find results_opt_rate -name "*.summary.json" -type f | head -1)
+
+ if [ -n "$OPTRATE_CSV" ] && [ -n "$OPTRATE_JSON" ]; then
+ echo "Generating optimal rate diagram from: $OPTRATE_CSV"
+ python generate_diagrams.py \
+ --input-csv "$OPTRATE_CSV" \
+ --output-svg "benchmark_${WAREHOUSES}_warehouses_performance.svg" \
+ --title-suffix "70% of max TPS"
+
+ echo "Uploading optimal rate metrics from: $OPTRATE_JSON"
+ python upload_results_to_perf_test_results.py \
+ --summary-json "$OPTRATE_JSON" \
+ --results-csv "$OPTRATE_CSV" \
+ --run-type "opt-rate" \
+ --min-cu "${MIN_CU}" \
+ --max-cu "${MAX_CU}" \
+ --project-id "${PROJECT_ID}" \
+ --revision "${REVISION}" \
+ --connection-string "${PERF_DB_CONNSTR}"
+ else
+ echo "Warning: Missing optimal rate results files (CSV: $OPTRATE_CSV, JSON: $OPTRATE_JSON)"
+ fi
+
+ echo "Processing warmup 2 results for ramp down/up phase..."
+
+ # Find and process warmup results
+ WARMUP_CSV=$(find results_warmup -name "*.results.csv" -type f | tail -1)
+ WARMUP_JSON=$(find results_warmup -name "*.summary.json" -type f | tail -1)
+
+ if [ -n "$WARMUP_CSV" ] && [ -n "$WARMUP_JSON" ]; then
+ echo "Generating warmup diagram from: $WARMUP_CSV"
+ python generate_diagrams.py \
+ --input-csv "$WARMUP_CSV" \
+ --output-svg "warmup_2_${WAREHOUSES}_warehouses_performance.svg" \
+ --title-suffix "Warmup at max TPS"
+
+ echo "Uploading warmup metrics from: $WARMUP_JSON"
+ python upload_results_to_perf_test_results.py \
+ --summary-json "$WARMUP_JSON" \
+ --results-csv "$WARMUP_CSV" \
+ --run-type "warmup" \
+ --min-cu "${MIN_CU}" \
+ --max-cu "${MAX_CU}" \
+ --project-id "${PROJECT_ID}" \
+ --revision "${REVISION}" \
+ --connection-string "${PERF_DB_CONNSTR}"
+ else
+ echo "Warning: Missing warmup results files (CSV: $WARMUP_CSV, JSON: $WARMUP_JSON)"
+ fi
+
+ echo "Processing ramp results..."
+
+ # Find and process ramp results
+ RAMPUP_CSV=$(find results_ramp_up -name "*.results.csv" -type f | head -1)
+ RAMPUP_JSON=$(find results_ramp_up -name "*.summary.json" -type f | head -1)
+
+ if [ -n "$RAMPUP_CSV" ] && [ -n "$RAMPUP_JSON" ]; then
+ echo "Generating ramp diagram from: $RAMPUP_CSV"
+ python generate_diagrams.py \
+ --input-csv "$RAMPUP_CSV" \
+ --output-svg "ramp_${WAREHOUSES}_warehouses_performance.svg" \
+ --title-suffix "ramp TPS down and up in 5 minute intervals"
+
+ echo "Uploading ramp metrics from: $RAMPUP_JSON"
+ python upload_results_to_perf_test_results.py \
+ --summary-json "$RAMPUP_JSON" \
+ --results-csv "$RAMPUP_CSV" \
+ --run-type "ramp-up" \
+ --min-cu "${MIN_CU}" \
+ --max-cu "${MAX_CU}" \
+ --project-id "${PROJECT_ID}" \
+ --revision "${REVISION}" \
+ --connection-string "${PERF_DB_CONNSTR}"
+ else
+ echo "Warning: Missing ramp results files (CSV: $RAMPUP_CSV, JSON: $RAMPUP_JSON)"
+ fi
+
+ # Deactivate and clean up virtual environment
+ deactivate
+ rm -rf temp_results_env
+ rm upload_results_to_perf_test_results.py
+
+ echo "Results processing completed and environment cleaned up"
+
+ - name: Set date for upload
+ id: set-date
+ run: echo "date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
+
+ - name: Configure AWS credentials # necessary to upload results
+ uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
+ with:
+ aws-region: us-east-2
+ role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+ role-duration-seconds: 900 # 900 is minimum value
+
+ - name: Upload benchmark results to S3
+ env:
+ S3_BUCKET: neon-public-benchmark-results
+ S3_PREFIX: benchbase-tpc-c/${{ steps.set-date.outputs.date }}/${{ github.run_id }}/${{ matrix.warehouses }}-warehouses
+ run: |
+ echo "Redacting passwords from configuration files before upload..."
+
+ # Mask all passwords in XML config files
+ find "${RUNNER_TEMP}/configs" -name "*.xml" -type f -exec sed -i 's|[^<]*|redacted|g' {} \;
+
+ echo "Uploading benchmark results to s3://${S3_BUCKET}/${S3_PREFIX}/"
+
+ # Upload the entire benchmark directory recursively
+ aws s3 cp --only-show-errors --recursive "${RUNNER_TEMP}" s3://${S3_BUCKET}/${S3_PREFIX}/
+
+ echo "Upload completed"
+
+ - name: Delete Neon Project
+ if: ${{ always() }}
+ uses: ./.github/actions/neon-project-delete
+ with:
+ project_id: ${{ steps.create-neon-project-tpcc.outputs.project_id }}
+ api_key: ${{ secrets.NEON_PRODUCTION_API_KEY_4_BENCHMARKS }}
+ api_host: console.neon.tech # production (!)
\ No newline at end of file
diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index df80bad579..c9a998bd4e 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -418,7 +418,7 @@ jobs:
statuses: write
id-token: write # aws-actions/configure-aws-credentials
env:
- PGBENCH_SIZE: ${{ vars.PREWARM_PGBENCH_SIZE }}
+ PROJECT_ID: ${{ vars.PREWARM_PROJECT_ID }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 17
TEST_OUTPUT: /tmp/test_output
diff --git a/.github/workflows/pg-clients.yml b/.github/workflows/pg-clients.yml
index 6efe0b4c8c..b6b4eca2b8 100644
--- a/.github/workflows/pg-clients.yml
+++ b/.github/workflows/pg-clients.yml
@@ -48,8 +48,20 @@ jobs:
uses: ./.github/workflows/build-build-tools-image.yml
secrets: inherit
+ generate-ch-tmppw:
+ runs-on: ubuntu-22.04
+ outputs:
+ tmp_val: ${{ steps.pwgen.outputs.tmp_val }}
+ steps:
+ - name: Generate a random password
+ id: pwgen
+ run: |
+ set +x
+ p=$(dd if=/dev/random bs=14 count=1 2>/dev/null | base64)
+ echo tmp_val="${p//\//}" >> "${GITHUB_OUTPUT}"
+
test-logical-replication:
- needs: [ build-build-tools-image ]
+ needs: [ build-build-tools-image, generate-ch-tmppw ]
runs-on: ubuntu-22.04
container:
@@ -60,16 +72,20 @@ jobs:
options: --init --user root
services:
clickhouse:
- image: clickhouse/clickhouse-server:24.6.3.64
+ image: clickhouse/clickhouse-server:24.8
+ env:
+ CLICKHOUSE_PASSWORD: ${{ needs.generate-ch-tmppw.outputs.tmp_val }}
ports:
- 9000:9000
- 8123:8123
zookeeper:
- image: quay.io/debezium/zookeeper:2.7
+ image: quay.io/debezium/zookeeper:3.1.3.Final
ports:
- 2181:2181
+ - 2888:2888
+ - 3888:3888
kafka:
- image: quay.io/debezium/kafka:2.7
+ image: quay.io/debezium/kafka:3.1.3.Final
env:
ZOOKEEPER_CONNECT: "zookeeper:2181"
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
@@ -79,7 +95,7 @@ jobs:
ports:
- 9092:9092
debezium:
- image: quay.io/debezium/connect:2.7
+ image: quay.io/debezium/connect:3.1.3.Final
env:
BOOTSTRAP_SERVERS: kafka:9092
GROUP_ID: 1
@@ -125,6 +141,7 @@ jobs:
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
+ CLICKHOUSE_PASSWORD: ${{ needs.generate-ch-tmppw.outputs.tmp_val }}
- name: Delete Neon Project
if: always()
diff --git a/.github/workflows/proxy-benchmark.yml b/.github/workflows/proxy-benchmark.yml
index 0ae93ce295..e48fe41b45 100644
--- a/.github/workflows/proxy-benchmark.yml
+++ b/.github/workflows/proxy-benchmark.yml
@@ -3,7 +3,7 @@ name: Periodic proxy performance test on unit-perf-aws-arm runners
on:
push: # TODO: remove after testing
branches:
- - test-proxy-bench # Runs on pushes to branches starting with test-proxy-bench
+ - test-proxy-bench # Runs on pushes to test-proxy-bench branch
# schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
@@ -32,7 +32,7 @@ jobs:
statuses: write
contents: write
pull-requests: write
- runs-on: [self-hosted, unit-perf-aws-arm]
+ runs-on: [ self-hosted, unit-perf-aws-arm ]
timeout-minutes: 60 # 1h timeout
container:
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
@@ -55,30 +55,58 @@ jobs:
{
echo "PROXY_BENCH_PATH=$PROXY_BENCH_PATH"
echo "NEON_DIR=${RUNNER_TEMP}/neon"
+ echo "NEON_PROXY_PATH=${RUNNER_TEMP}/neon/bin/proxy"
echo "TEST_OUTPUT=${PROXY_BENCH_PATH}/test_output"
echo ""
} >> "$GITHUB_ENV"
- - name: Run proxy-bench
- run: ${PROXY_BENCH_PATH}/run.sh
+ - name: Cache poetry deps
+ uses: actions/cache@v4
+ with:
+ path: ~/.cache/pypoetry/virtualenvs
+ key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
- - name: Ingest Bench Results # neon repo script
+ - name: Install Python deps
+ shell: bash -euxo pipefail {0}
+ run: ./scripts/pysync
+
+ - name: show ulimits
+ shell: bash -euxo pipefail {0}
+ run: |
+ ulimit -a
+
+ - name: Run proxy-bench
+ working-directory: ${{ env.PROXY_BENCH_PATH }}
+ run: ./run.sh --with-grafana --bare-metal
+
+ - name: Ingest Bench Results
if: always()
+ working-directory: ${{ env.NEON_DIR }}
run: |
mkdir -p $TEST_OUTPUT
python $NEON_DIR/scripts/proxy_bench_results_ingest.py --out $TEST_OUTPUT
- name: Push Metrics to Proxy perf database
+ shell: bash -euxo pipefail {0}
if: always()
env:
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PROXY_TEST_RESULT_CONNSTR }}"
REPORT_FROM: $TEST_OUTPUT
+ working-directory: ${{ env.NEON_DIR }}
run: $NEON_DIR/scripts/generate_and_push_perf_report.sh
- - name: Docker cleanup
- if: always()
- run: docker compose down
-
- name: Notify Failure
if: failure()
- run: echo "Proxy bench job failed" && exit 1
\ No newline at end of file
+ run: echo "Proxy bench job failed" && exit 1
+
+ - name: Cleanup Test Resources
+ if: always()
+ shell: bash -euxo pipefail {0}
+ run: |
+ # Cleanup the test resources
+ if [[ -d "${TEST_OUTPUT}" ]]; then
+ rm -rf ${TEST_OUTPUT}
+ fi
+ if [[ -d "${PROXY_BENCH_PATH}/test_output" ]]; then
+ rm -rf ${PROXY_BENCH_PATH}/test_output
+ fi
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index b35fd7d074..065e7c5bd8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -250,11 +250,11 @@ dependencies = [
[[package]]
name = "async-lock"
-version = "3.2.0"
+version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7125e42787d53db9dd54261812ef17e937c95a51e4d291373b670342fa44310c"
+checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18"
dependencies = [
- "event-listener 4.0.0",
+ "event-listener 5.4.0",
"event-listener-strategy",
"pin-project-lite",
]
@@ -1483,6 +1483,7 @@ dependencies = [
"tower-http",
"tower-otel",
"tracing",
+ "tracing-appender",
"tracing-opentelemetry",
"tracing-subscriber",
"tracing-utils",
@@ -1498,9 +1499,9 @@ dependencies = [
[[package]]
name = "concurrent-queue"
-version = "2.3.0"
+version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f057a694a54f12365049b0958a1685bb52d567f5593b355fbf685838e873d400"
+checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
dependencies = [
"crossbeam-utils",
]
@@ -2349,9 +2350,9 @@ checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
[[package]]
name = "event-listener"
-version = "4.0.0"
+version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "770d968249b5d99410d61f5bf89057f3199a077a04d087092f58e7d10692baae"
+checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae"
dependencies = [
"concurrent-queue",
"parking",
@@ -2360,11 +2361,11 @@ dependencies = [
[[package]]
name = "event-listener-strategy"
-version = "0.4.0"
+version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3"
+checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93"
dependencies = [
- "event-listener 4.0.0",
+ "event-listener 5.4.0",
"pin-project-lite",
]
@@ -2639,6 +2640,20 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "304de19db7028420975a296ab0fcbbc8e69438c4ed254a1e41e2a7f37d5f0e0a"
+[[package]]
+name = "generator"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d18470a76cb7f8ff746cf1f7470914f900252ec36bbc40b569d74b1258446827"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "libc",
+ "log",
+ "rustversion",
+ "windows 0.61.3",
+]
+
[[package]]
name = "generic-array"
version = "0.14.7"
@@ -2966,7 +2981,7 @@ checksum = "f9c7c7c8ac16c798734b8a24560c1362120597c40d5e1459f09498f8f6c8f2ba"
dependencies = [
"cfg-if",
"libc",
- "windows",
+ "windows 0.52.0",
]
[[package]]
@@ -3237,7 +3252,7 @@ dependencies = [
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
- "windows-core",
+ "windows-core 0.52.0",
]
[[package]]
@@ -3794,6 +3809,19 @@ version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
+[[package]]
+name = "loom"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca"
+dependencies = [
+ "cfg-if",
+ "generator",
+ "scoped-tls",
+ "tracing",
+ "tracing-subscriber",
+]
+
[[package]]
name = "lru"
version = "0.12.3"
@@ -4010,6 +4038,25 @@ dependencies = [
"windows-sys 0.52.0",
]
+[[package]]
+name = "moka"
+version = "0.12.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9321642ca94a4282428e6ea4af8cc2ca4eac48ac7a6a4ea8f33f76d0ce70926"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+ "loom",
+ "parking_lot 0.12.1",
+ "portable-atomic",
+ "rustc_version",
+ "smallvec",
+ "tagptr",
+ "thiserror 1.0.69",
+ "uuid",
+]
+
[[package]]
name = "multimap"
version = "0.8.3"
@@ -5179,7 +5226,6 @@ dependencies = [
"criterion",
"env_logger",
"log",
- "memoffset 0.9.0",
"once_cell",
"postgres",
"postgres_ffi_types",
@@ -5532,7 +5578,6 @@ dependencies = [
"futures",
"gettid",
"hashbrown 0.14.5",
- "hashlink",
"hex",
"hmac",
"hostname",
@@ -5554,6 +5599,7 @@ dependencies = [
"lasso",
"measured",
"metrics",
+ "moka",
"once_cell",
"opentelemetry",
"ouroboros",
@@ -5620,6 +5666,7 @@ dependencies = [
"workspace_hack",
"x509-cert",
"zerocopy 0.8.24",
+ "zeroize",
]
[[package]]
@@ -6577,6 +6624,12 @@ dependencies = [
"pin-project-lite",
]
+[[package]]
+name = "scoped-tls"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
+
[[package]]
name = "scopeguard"
version = "1.1.0"
@@ -7427,6 +7480,12 @@ dependencies = [
"winapi",
]
+[[package]]
+name = "tagptr"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
+
[[package]]
name = "tar"
version = "0.4.40"
@@ -8093,11 +8152,12 @@ dependencies = [
[[package]]
name = "tracing-appender"
-version = "0.2.2"
+version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09d48f71a791638519505cefafe162606f706c25592e4bde4d97600c0195312e"
+checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf"
dependencies = [
"crossbeam-channel",
+ "thiserror 1.0.69",
"time",
"tracing-subscriber",
]
@@ -8818,10 +8878,32 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
dependencies = [
- "windows-core",
+ "windows-core 0.52.0",
"windows-targets 0.52.6",
]
+[[package]]
+name = "windows"
+version = "0.61.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
+dependencies = [
+ "windows-collections",
+ "windows-core 0.61.2",
+ "windows-future",
+ "windows-link",
+ "windows-numerics",
+]
+
+[[package]]
+name = "windows-collections"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
+dependencies = [
+ "windows-core 0.61.2",
+]
+
[[package]]
name = "windows-core"
version = "0.52.0"
@@ -8831,6 +8913,86 @@ dependencies = [
"windows-targets 0.52.6",
]
+[[package]]
+name = "windows-core"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-future"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
+dependencies = [
+ "windows-core 0.61.2",
+ "windows-link",
+ "windows-threading",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
+[[package]]
+name = "windows-numerics"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
+dependencies = [
+ "windows-core 0.61.2",
+ "windows-link",
+]
+
+[[package]]
+name = "windows-result"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link",
+]
+
[[package]]
name = "windows-sys"
version = "0.48.0"
@@ -8889,6 +9051,15 @@ dependencies = [
"windows_x86_64_msvc 0.52.6",
]
+[[package]]
+name = "windows-threading"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6"
+dependencies = [
+ "windows-link",
+]
+
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.0"
@@ -9025,6 +9196,8 @@ dependencies = [
"clap",
"clap_builder",
"const-oid",
+ "crossbeam-epoch",
+ "crossbeam-utils",
"crypto-bigint 0.5.5",
"der 0.7.8",
"deranged",
@@ -9071,6 +9244,7 @@ dependencies = [
"once_cell",
"p256 0.13.2",
"parquet",
+ "portable-atomic",
"prettyplease",
"proc-macro2",
"prost 0.13.5",
diff --git a/Cargo.toml b/Cargo.toml
index 1de261ed06..3744115ebf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -46,10 +46,10 @@ members = [
"libs/proxy/json",
"libs/proxy/postgres-protocol2",
"libs/proxy/postgres-types2",
+ "libs/proxy/subzero_core",
"libs/proxy/tokio-postgres2",
"endpoint_storage",
"pgxn/neon/communicator",
- "proxy/subzero_core",
]
[workspace.package]
@@ -135,7 +135,7 @@ lock_api = "0.4.13"
md5 = "0.7.0"
measured = { version = "0.0.22", features=["lasso"] }
measured-process = { version = "0.0.22" }
-memoffset = "0.9"
+moka = { version = "0.12", features = ["sync"] }
nix = { version = "0.30.1", features = ["dir", "fs", "mman", "process", "socket", "signal", "poll"] }
# Do not update to >= 7.0.0, at least. The update will have a significant impact
# on compute startup metrics (start_postgres_ms), >= 25% degradation.
@@ -146,7 +146,7 @@ oid-registry = "0.7.1"
once_cell = "1.13"
opentelemetry = "0.30"
opentelemetry_sdk = "0.30"
-opentelemetry-otlp = { version = "0.30", default-features = false, features = ["http-proto", "trace", "http", "reqwest-client"] }
+opentelemetry-otlp = { version = "0.30", default-features = false, features = ["http-proto", "trace", "http", "reqwest-blocking-client"] }
opentelemetry-semantic-conventions = "0.30"
parking_lot = "0.12"
parquet = { version = "53", default-features = false, features = ["zstd"] }
@@ -224,6 +224,7 @@ tracing-log = "0.2"
tracing-opentelemetry = "0.31"
tracing-serde = "0.2.0"
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
+tracing-appender = "0.2.3"
try-lock = "0.2.5"
test-log = { version = "0.2.17", default-features = false, features = ["log"] }
twox-hash = { version = "1.6.3", default-features = false }
@@ -234,10 +235,11 @@ uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
walkdir = "2.3.2"
rustls-native-certs = "0.8"
whoami = "1.5.1"
-zerocopy = { version = "0.8", features = ["derive", "simd"] }
json-structural-diff = { version = "0.2.0" }
x509-cert = { version = "0.2.5" }
x509-parser = "0.16"
+zerocopy = { version = "0.8", features = ["derive", "simd"] }
+zeroize = "1.8"
## TODO replace this with tracing
env_logger = "0.11"
diff --git a/build-tools/Dockerfile b/build-tools/Dockerfile
index b5fe642e6f..87966591c1 100644
--- a/build-tools/Dockerfile
+++ b/build-tools/Dockerfile
@@ -39,13 +39,13 @@ COPY build-tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
set -e && \
- apt update && \
- apt install -y --no-install-recommends \
+ apt-get update && \
+ apt-get install -y --no-install-recommends \
ca-certificates wget gpg && \
wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor -o /usr/share/keyrings/postgresql-keyring.gpg && \
echo "deb [signed-by=/usr/share/keyrings/postgresql-keyring.gpg] http://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list && \
apt-get update && \
- apt install -y --no-install-recommends \
+ apt-get install -y --no-install-recommends \
build-essential \
autotools-dev \
libedit-dev \
@@ -89,8 +89,7 @@ RUN useradd -ms /bin/bash nonroot -b /home
# Use strict mode for bash to catch errors early
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
-RUN mkdir -p /pgcopydb/bin && \
- mkdir -p /pgcopydb/lib && \
+RUN mkdir -p /pgcopydb/{bin,lib} && \
chmod -R 755 /pgcopydb && \
chown -R nonroot:nonroot /pgcopydb
@@ -106,8 +105,8 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
# 'gdb' is included so that we get backtraces of core dumps produced in
# regression tests
RUN set -e \
- && apt update \
- && apt install -y \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends \
autoconf \
automake \
bison \
@@ -183,22 +182,22 @@ RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/
ENV LLVM_VERSION=20
RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
&& echo "deb http://apt.llvm.org/${DEBIAN_VERSION}/ llvm-toolchain-${DEBIAN_VERSION}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
- && apt update \
- && apt install -y clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install node
ENV NODE_VERSION=24
RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \
- && apt install -y nodejs \
+ && apt-get install -y --no-install-recommends nodejs \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install docker
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable" > /etc/apt/sources.list.d/docker.list \
- && apt update \
- && apt install -y docker-ce docker-ce-cli \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends docker-ce docker-ce-cli \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Configure sudo & docker
@@ -215,12 +214,11 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
# Mold: A Modern Linker
ENV MOLD_VERSION=v2.37.1
RUN set -e \
- && git clone https://github.com/rui314/mold.git \
+ && git clone -b "${MOLD_VERSION}" --depth 1 https://github.com/rui314/mold.git \
&& mkdir mold/build \
- && cd mold/build \
- && git checkout ${MOLD_VERSION} \
+ && cd mold/build \
&& cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang++ .. \
- && cmake --build . -j $(nproc) \
+ && cmake --build . -j "$(nproc)" \
&& cmake --install . \
&& cd .. \
&& rm -rf mold
@@ -254,7 +252,7 @@ ENV ICU_VERSION=67.1
ENV ICU_PREFIX=/usr/local/icu
# Download and build static ICU
-RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \
+RUN wget -O "/tmp/libicu-${ICU_VERSION}.tgz" https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \
echo "94a80cd6f251a53bd2a997f6f1b5ac6653fe791dfab66e1eb0227740fb86d5dc /tmp/libicu-${ICU_VERSION}.tgz" | sha256sum --check && \
mkdir /tmp/icu && \
pushd /tmp/icu && \
@@ -265,8 +263,7 @@ RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/re
make install && \
popd && \
rm -rf icu && \
- rm -f /tmp/libicu-${ICU_VERSION}.tgz && \
- popd
+ rm -f /tmp/libicu-${ICU_VERSION}.tgz
# Switch to nonroot user
USER nonroot:nonroot
@@ -279,19 +276,19 @@ ENV PYTHON_VERSION=3.11.12 \
PYENV_ROOT=/home/nonroot/.pyenv \
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
RUN set -e \
- && cd $HOME \
+ && cd "$HOME" \
&& curl -sSO https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer \
&& chmod +x pyenv-installer \
&& ./pyenv-installer \
&& export PYENV_ROOT=/home/nonroot/.pyenv \
&& export PATH="$PYENV_ROOT/bin:$PATH" \
&& export PATH="$PYENV_ROOT/shims:$PATH" \
- && pyenv install ${PYTHON_VERSION} \
- && pyenv global ${PYTHON_VERSION} \
+ && pyenv install "${PYTHON_VERSION}" \
+ && pyenv global "${PYTHON_VERSION}" \
&& python --version \
- && pip install --upgrade pip \
+ && pip install --no-cache-dir --upgrade pip \
&& pip --version \
- && pip install pipenv wheel poetry
+ && pip install --no-cache-dir pipenv wheel poetry
# Switch to nonroot user (again)
USER nonroot:nonroot
@@ -317,13 +314,13 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
. "$HOME/.cargo/env" && \
cargo --version && rustup --version && \
rustup component add llvm-tools rustfmt clippy && \
- cargo install rustfilt --locked --version ${RUSTFILT_VERSION} && \
- cargo install cargo-hakari --locked --version ${CARGO_HAKARI_VERSION} && \
- cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
- cargo install cargo-hack --locked --version ${CARGO_HACK_VERSION} && \
- cargo install cargo-nextest --locked --version ${CARGO_NEXTEST_VERSION} && \
- cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
- cargo install diesel_cli --locked --version ${CARGO_DIESEL_CLI_VERSION} \
+ cargo install rustfilt --locked --version "${RUSTFILT_VERSION}" && \
+ cargo install cargo-hakari --locked --version "${CARGO_HAKARI_VERSION}" && \
+ cargo install cargo-deny --locked --version "${CARGO_DENY_VERSION}" && \
+ cargo install cargo-hack --locked --version "${CARGO_HACK_VERSION}" && \
+ cargo install cargo-nextest --locked --version "${CARGO_NEXTEST_VERSION}" && \
+ cargo install cargo-chef --locked --version "${CARGO_CHEF_VERSION}" && \
+ cargo install diesel_cli --locked --version "${CARGO_DIESEL_CLI_VERSION}" \
--features postgres-bundled --no-default-features && \
rm -rf /home/nonroot/.cargo/registry && \
rm -rf /home/nonroot/.cargo/git
diff --git a/build-tools/package-lock.json b/build-tools/package-lock.json
index b2c44ed9b4..0d48345fd5 100644
--- a/build-tools/package-lock.json
+++ b/build-tools/package-lock.json
@@ -6,7 +6,7 @@
"": {
"name": "build-tools",
"devDependencies": {
- "@redocly/cli": "1.34.4",
+ "@redocly/cli": "1.34.5",
"@sourcemeta/jsonschema": "10.0.0"
}
},
@@ -472,9 +472,9 @@
}
},
"node_modules/@redocly/cli": {
- "version": "1.34.4",
- "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.34.4.tgz",
- "integrity": "sha512-seH/GgrjSB1EeOsgJ/4Ct6Jk2N7sh12POn/7G8UQFARMyUMJpe1oHtBwT2ndfp4EFCpgBAbZ/82Iw6dwczNxEA==",
+ "version": "1.34.5",
+ "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.34.5.tgz",
+ "integrity": "sha512-5IEwxs7SGP5KEXjBKLU8Ffdz9by/KqNSeBk6YUVQaGxMXK//uYlTJIPntgUXbo1KAGG2d2q2XF8y4iFz6qNeiw==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -484,14 +484,14 @@
"@opentelemetry/sdk-trace-node": "1.26.0",
"@opentelemetry/semantic-conventions": "1.27.0",
"@redocly/config": "^0.22.0",
- "@redocly/openapi-core": "1.34.4",
- "@redocly/respect-core": "1.34.4",
+ "@redocly/openapi-core": "1.34.5",
+ "@redocly/respect-core": "1.34.5",
"abort-controller": "^3.0.0",
"chokidar": "^3.5.1",
"colorette": "^1.2.0",
"core-js": "^3.32.1",
"dotenv": "16.4.7",
- "form-data": "^4.0.0",
+ "form-data": "^4.0.4",
"get-port-please": "^3.0.1",
"glob": "^7.1.6",
"handlebars": "^4.7.6",
@@ -522,9 +522,9 @@
"license": "MIT"
},
"node_modules/@redocly/openapi-core": {
- "version": "1.34.4",
- "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.34.4.tgz",
- "integrity": "sha512-hf53xEgpXIgWl3b275PgZU3OTpYh1RoD2LHdIfQ1JzBNTWsiNKczTEsI/4Tmh2N1oq9YcphhSMyk3lDh85oDjg==",
+ "version": "1.34.5",
+ "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.34.5.tgz",
+ "integrity": "sha512-0EbE8LRbkogtcCXU7liAyC00n9uNG9hJ+eMyHFdUsy9lB/WGqnEBgwjA9q2cyzAVcdTkQqTBBU1XePNnN3OijA==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -544,21 +544,21 @@
}
},
"node_modules/@redocly/respect-core": {
- "version": "1.34.4",
- "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.34.4.tgz",
- "integrity": "sha512-MitKyKyQpsizA4qCVv+MjXL4WltfhFQAoiKiAzrVR1Kusro3VhYb6yJuzoXjiJhR0ukLP5QOP19Vcs7qmj9dZg==",
+ "version": "1.34.5",
+ "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.34.5.tgz",
+ "integrity": "sha512-GheC/g/QFztPe9UA9LamooSplQuy9pe0Yr8XGTqkz0ahivLDl7svoy/LSQNn1QH3XGtLKwFYMfTwFR2TAYyh5Q==",
"dev": true,
"license": "MIT",
"dependencies": {
"@faker-js/faker": "^7.6.0",
"@redocly/ajv": "8.11.2",
- "@redocly/openapi-core": "1.34.4",
+ "@redocly/openapi-core": "1.34.5",
"better-ajv-errors": "^1.2.0",
"colorette": "^2.0.20",
"concat-stream": "^2.0.0",
"cookie": "^0.7.2",
"dotenv": "16.4.7",
- "form-data": "4.0.0",
+ "form-data": "^4.0.4",
"jest-diff": "^29.3.1",
"jest-matcher-utils": "^29.3.1",
"js-yaml": "4.1.0",
@@ -582,21 +582,6 @@
"dev": true,
"license": "MIT"
},
- "node_modules/@redocly/respect-core/node_modules/form-data": {
- "version": "4.0.0",
- "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
- "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
- "dev": true,
- "license": "MIT",
- "dependencies": {
- "asynckit": "^0.4.0",
- "combined-stream": "^1.0.8",
- "mime-types": "^2.1.12"
- },
- "engines": {
- "node": ">= 6"
- }
- },
"node_modules/@sinclair/typebox": {
"version": "0.27.8",
"resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz",
@@ -1345,9 +1330,9 @@
"license": "MIT"
},
"node_modules/form-data": {
- "version": "4.0.3",
- "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.3.tgz",
- "integrity": "sha512-qsITQPfmvMOSAdeyZ+12I1c+CKSstAFAwu+97zrnWAbIr5u8wfsExUzCesVLC8NgHuRUqNN4Zy6UPWUTRGslcA==",
+ "version": "4.0.4",
+ "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz",
+ "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==",
"dev": true,
"license": "MIT",
"dependencies": {
diff --git a/build-tools/package.json b/build-tools/package.json
index 000969c672..2dc1359075 100644
--- a/build-tools/package.json
+++ b/build-tools/package.json
@@ -2,7 +2,7 @@
"name": "build-tools",
"private": true,
"devDependencies": {
- "@redocly/cli": "1.34.4",
+ "@redocly/cli": "1.34.5",
"@sourcemeta/jsonschema": "10.0.0"
}
}
diff --git a/compute/vm-image-spec-bookworm.yaml b/compute/vm-image-spec-bookworm.yaml
index 267e4c83b5..5f27b6bf9d 100644
--- a/compute/vm-image-spec-bookworm.yaml
+++ b/compute/vm-image-spec-bookworm.yaml
@@ -26,7 +26,13 @@ commands:
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
- shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
+ # Turn off database collector (`--no-collector.database`), we don't use `pg_database_size_bytes` metric anyway, see
+ # https://github.com/neondatabase/flux-fleet/blob/5e19b3fd897667b70d9a7ad4aa06df0ca22b49ff/apps/base/compute-metrics/scrape-compute-pg-exporter-neon.yaml#L29
+ # but it's enabled by default and it doesn't filter out invalid databases, see
+ # https://github.com/prometheus-community/postgres_exporter/blob/06a553c8166512c9d9c5ccf257b0f9bba8751dbc/collector/pg_database.go#L67
+ # so if it hits one, it starts spamming logs
+ # ERROR: [NEON_SMGR] [reqid d9700000018] could not read db size of db 705302 from page server at lsn 5/A2457EB0
+ shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none" /bin/postgres_exporter --no-collector.database --config.file=/etc/postgres_exporter.yml'
- name: pgbouncer-exporter
user: postgres
sysvInitAction: respawn
diff --git a/compute/vm-image-spec-bullseye.yaml b/compute/vm-image-spec-bullseye.yaml
index 2b6e77b656..cf26ace72a 100644
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -26,7 +26,13 @@ commands:
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
- shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
+ # Turn off database collector (`--no-collector.database`), we don't use `pg_database_size_bytes` metric anyway, see
+ # https://github.com/neondatabase/flux-fleet/blob/5e19b3fd897667b70d9a7ad4aa06df0ca22b49ff/apps/base/compute-metrics/scrape-compute-pg-exporter-neon.yaml#L29
+ # but it's enabled by default and it doesn't filter out invalid databases, see
+ # https://github.com/prometheus-community/postgres_exporter/blob/06a553c8166512c9d9c5ccf257b0f9bba8751dbc/collector/pg_database.go#L67
+ # so if it hits one, it starts spamming logs
+ # ERROR: [NEON_SMGR] [reqid d9700000018] could not read db size of db 705302 from page server at lsn 5/A2457EB0
+ shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none" /bin/postgres_exporter --no-collector.database --config.file=/etc/postgres_exporter.yml'
- name: pgbouncer-exporter
user: postgres
sysvInitAction: respawn
diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml
index 496471acc7..558760b0ad 100644
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -62,6 +62,7 @@ tokio-stream.workspace = true
tonic.workspace = true
tower-otel.workspace = true
tracing.workspace = true
+tracing-appender.workspace = true
tracing-opentelemetry.workspace = true
tracing-subscriber.workspace = true
tracing-utils.workspace = true
diff --git a/compute_tools/README.md b/compute_tools/README.md
index 49f1368f0e..e92e5920b9 100644
--- a/compute_tools/README.md
+++ b/compute_tools/README.md
@@ -52,8 +52,14 @@ stateDiagram-v2
Init --> Running : Started Postgres
Running --> TerminationPendingFast : Requested termination
Running --> TerminationPendingImmediate : Requested termination
+ Running --> ConfigurationPending : Received a /configure request with spec
+ Running --> RefreshConfigurationPending : Received a /refresh_configuration request, compute node will pull a new spec and reconfigure
+ RefreshConfigurationPending --> RefreshConfiguration: Received compute spec and started configuration
+ RefreshConfiguration --> Running : Compute has been re-configured
+ RefreshConfiguration --> RefreshConfigurationPending : Configuration failed and to be retried
TerminationPendingFast --> Terminated compute with 30s delay for cplane to inspect status
TerminationPendingImmediate --> Terminated : Terminated compute immediately
+ Failed --> RefreshConfigurationPending : Received a /refresh_configuration request
Failed --> [*] : Compute exited
Terminated --> [*] : Compute exited
```
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index 04723d6f3d..9c86aba531 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -49,9 +49,10 @@ use compute_tools::compute::{
BUILD_TAG, ComputeNode, ComputeNodeParams, forward_termination_signal,
};
use compute_tools::extension_server::get_pg_version_string;
-use compute_tools::logger::*;
use compute_tools::params::*;
+use compute_tools::pg_isready::get_pg_isready_bin;
use compute_tools::spec::*;
+use compute_tools::{hadron_metrics, installed_extensions, logger::*};
use rlimit::{Resource, setrlimit};
use signal_hook::consts::{SIGINT, SIGQUIT, SIGTERM};
use signal_hook::iterator::Signals;
@@ -194,11 +195,19 @@ fn main() -> Result<()> {
.build()?;
let _rt_guard = runtime.enter();
- let tracing_provider = init(cli.dev)?;
+ let mut log_dir = None;
+ if cli.lakebase_mode {
+ log_dir = std::env::var("COMPUTE_CTL_LOG_DIRECTORY").ok();
+ }
+
+ let (tracing_provider, _file_logs_guard) = init(cli.dev, log_dir)?;
// enable core dumping for all child processes
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
+ installed_extensions::initialize_metrics();
+ hadron_metrics::initialize_metrics();
+
let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
let config = get_config(&cli)?;
@@ -226,7 +235,12 @@ fn main() -> Result<()> {
cli.installed_extensions_collection_interval,
)),
pg_init_timeout: cli.pg_init_timeout.map(Duration::from_secs),
+ pg_isready_bin: get_pg_isready_bin(&cli.pgbin),
+ instance_id: std::env::var("INSTANCE_ID").ok(),
lakebase_mode: cli.lakebase_mode,
+ build_tag: BUILD_TAG.to_string(),
+ control_plane_uri: cli.control_plane_uri,
+ config_path_test_only: cli.config,
},
config,
)?;
@@ -238,8 +252,14 @@ fn main() -> Result<()> {
deinit_and_exit(tracing_provider, exit_code);
}
-fn init(dev_mode: bool) -> Result