Compare commits

..

1 Commits

Author SHA1 Message Date
Konstantin Knizhnik
d05eabaedb Change expected error message for test_physical_replication_config_mismatch_max_locks_per_transaction 2025-01-13 21:51:08 +02:00
351 changed files with 7085 additions and 21820 deletions

View File

@@ -3,7 +3,6 @@ name: Bug Template
about: Used for describing bugs
title: ''
labels: t/bug
type: Bug
assignees: ''
---

View File

@@ -4,7 +4,6 @@ about: A set of related tasks contributing towards specific outcome, comprising
more than 1 week of work.
title: 'Epic: '
labels: t/Epic
type: Epic
assignees: ''
---

View File

@@ -4,7 +4,6 @@ self-hosted-runner:
- large
- large-arm64
- small
- small-metal
- small-arm64
- us-east-2
config-variables:
@@ -26,5 +25,3 @@ config-variables:
- PGREGRESS_PG17_PROJECT_ID
- SLACK_ON_CALL_QA_STAGING_STREAM
- DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
- SLACK_ON_CALL_STORAGE_STAGING_STREAM
- SLACK_CICD_CHANNEL_ID

View File

@@ -17,34 +17,6 @@ inputs:
compute_units:
description: '[Min, Max] compute units'
default: '[1, 1]'
# settings below only needed if you want the project to be sharded from the beginning
shard_split_project:
description: 'by default new projects are not shard-split, specify true to shard-split'
required: false
default: 'false'
admin_api_key:
description: 'Admin API Key needed for shard-splitting. Must be specified if shard_split_project is true'
required: false
shard_count:
description: 'Number of shards to split the project into, only applies if shard_split_project is true'
required: false
default: '8'
stripe_size:
description: 'Stripe size, optional, in 8kiB pages. e.g. set 2048 for 16MB stripes. Default is 128 MiB, only applies if shard_split_project is true'
required: false
default: '32768'
psql_path:
description: 'Path to psql binary - it is caller responsibility to provision the psql binary'
required: false
default: '/tmp/neon/pg_install/v16/bin/psql'
libpq_lib_path:
description: 'Path to directory containing libpq library - it is caller responsibility to provision the libpq library'
required: false
default: '/tmp/neon/pg_install/v16/lib'
project_settings:
description: 'A JSON object with project settings'
required: false
default: '{}'
outputs:
dsn:
@@ -76,7 +48,7 @@ runs:
\"provisioner\": \"k8s-neonvm\",
\"autoscaling_limit_min_cu\": ${MIN_CU},
\"autoscaling_limit_max_cu\": ${MAX_CU},
\"settings\": ${PROJECT_SETTINGS}
\"settings\": { }
}
}")
@@ -91,23 +63,6 @@ runs:
echo "project_id=${project_id}" >> $GITHUB_OUTPUT
echo "Project ${project_id} has been created"
if [ "${SHARD_SPLIT_PROJECT}" = "true" ]; then
# determine tenant ID
TENANT_ID=`${PSQL} ${dsn} -t -A -c "SHOW neon.tenant_id"`
echo "Splitting project ${project_id} with tenant_id ${TENANT_ID} into $((SHARD_COUNT)) shards with stripe size $((STRIPE_SIZE))"
echo "Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split"
echo "with body {\"new_shard_count\": $((SHARD_COUNT)), \"new_stripe_size\": $((STRIPE_SIZE))}"
# we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)
curl -X PUT \
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split" \
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
-d "{\"new_shard_count\": $SHARD_COUNT, \"new_stripe_size\": $STRIPE_SIZE}"
fi
env:
API_HOST: ${{ inputs.api_host }}
API_KEY: ${{ inputs.api_key }}
@@ -115,10 +70,3 @@ runs:
POSTGRES_VERSION: ${{ inputs.postgres_version }}
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
SHARD_SPLIT_PROJECT: ${{ inputs.shard_split_project }}
ADMIN_API_KEY: ${{ inputs.admin_api_key }}
SHARD_COUNT: ${{ inputs.shard_count }}
STRIPE_SIZE: ${{ inputs.stripe_size }}
PSQL: ${{ inputs.psql_path }}
LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
PROJECT_SETTINGS: ${{ inputs.project_settings }}

View File

@@ -1,5 +1,4 @@
rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']
rust_dependencies: ['**/Cargo.lock']
v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']

View File

@@ -17,7 +17,7 @@ jobs:
strategy:
fail-fast: false
matrix:
platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon, neon_pg17 ]
platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon ]
database: [ clickbench, tpch, userexample ]
env:
@@ -41,9 +41,6 @@ jobs:
neon)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
;;
neon_pg17)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR_PG17 }}
;;
aws-rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
;;

View File

@@ -158,6 +158,8 @@ jobs:
- name: Run cargo build
run: |
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
export PQ_LIB_DIR
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
# Do install *before* running rust tests because they might recompile the
@@ -215,6 +217,8 @@ jobs:
env:
NEXTEST_RETRIES: 3
run: |
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
export PQ_LIB_DIR
LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
export LD_LIBRARY_PATH
@@ -225,13 +229,8 @@ jobs:
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
# run pageserver tests with different settings
for get_vectored_concurrent_io in sequential sidecar-task; do
for io_engine in std-fs tokio-epoll-uring ; do
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
${cov_prefix} \
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
done
for io_engine in std-fs tokio-epoll-uring ; do
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
done
# Run separate tests for real S3
@@ -267,26 +266,6 @@ jobs:
path: /tmp/neon
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Check diesel schema
if: inputs.build-type == 'release' && inputs.arch == 'x64'
env:
DATABASE_URL: postgresql://localhost:1235/storage_controller
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
run: |
/tmp/neon/bin/neon_local init
/tmp/neon/bin/neon_local storage_controller start
diesel print-schema > storage_controller/src/schema.rs
if [ -n "$(git diff storage_controller/src/schema.rs)" ]; then
echo >&2 "Uncommitted changes in diesel schema"
git diff .
exit 1
fi
/tmp/neon/bin/neon_local storage_controller stop
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
- name: Merge and upload coverage data
if: inputs.build-type == 'debug'
@@ -335,7 +314,6 @@ jobs:
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
BUILD_TAG: ${{ inputs.build-tag }}
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
# Temporary disable this step until we figure out why it's so flaky

View File

@@ -1,89 +0,0 @@
name: Check Codestyle Rust
on:
workflow_call:
inputs:
build-tools-image:
description: "build-tools image"
required: true
type: string
archs:
description: "Json array of architectures to run on"
type: string
defaults:
run:
shell: bash -euxo pipefail {0}
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
jobs:
check-codestyle-rust:
strategy:
matrix:
arch: ${{ fromJson(inputs.archs) }}
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
container:
image: ${{ inputs.build-tools-image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- name: Cache cargo deps
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
!~/.cargo/registry/src
~/.cargo/git
target
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
run: make postgres-headers -j$(nproc)
# cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
# This will catch compiler & clippy warnings in all feature combinations.
# TODO: use cargo hack for build and test as well, but, that's quite expensive.
# NB: keep clippy args in sync with ./run_clippy.sh
#
# The only difference between "clippy --debug" and "clippy --release" is that in --release mode,
# #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second
# time just for that, so skip "clippy --release".
- run: |
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
echo "No clippy args found in .neon_clippy_args"
exit 1
fi
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
- name: Run cargo clippy (debug)
run: cargo hack --features default --ignore-unknown-features --feature-powerset clippy $CLIPPY_COMMON_ARGS
- name: Check documentation generation
run: cargo doc --workspace --no-deps --document-private-items
env:
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
- name: Check formatting
if: ${{ !cancelled() }}
run: cargo fmt --all -- --check
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
- name: Check rust dependencies
if: ${{ !cancelled() }}
run: |
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack

View File

@@ -94,9 +94,7 @@ jobs:
echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- run: gh pr checkout "${PR_NUMBER}"
- run: git checkout -b "${BRANCH}"

View File

@@ -63,15 +63,11 @@ jobs:
fail-fast: false
matrix:
include:
- PG_VERSION: 16
- DEFAULT_PG_VERSION: 16
PLATFORM: "neon-staging"
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
RUNNER: [ self-hosted, us-east-2, x64 ]
- PG_VERSION: 17
PLATFORM: "neon-staging"
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
RUNNER: [ self-hosted, us-east-2, x64 ]
- PG_VERSION: 16
- DEFAULT_PG_VERSION: 16
PLATFORM: "azure-staging"
region_id: 'azure-eastus2'
RUNNER: [ self-hosted, eastus2, x64 ]
@@ -79,7 +75,7 @@ jobs:
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
PG_VERSION: ${{ matrix.PG_VERSION }}
DEFAULT_PG_VERSION: ${{ matrix.DEFAULT_PG_VERSION }}
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -116,7 +112,7 @@ jobs:
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ matrix.region_id }}
postgres_version: ${{ env.PG_VERSION }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Run benchmark
@@ -126,7 +122,7 @@ jobs:
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
pg_version: ${{ env.PG_VERSION }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
# Set --sparse-ordering option of pytest-order plugin
# to ensure tests are running in order of appears in the file.
@@ -317,11 +313,7 @@ jobs:
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
}'
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
@@ -337,15 +329,12 @@ jobs:
matrix='{
"platform": [
"neonvm-captest-reuse"
],
"pg_version" : [
16,17
]
}'
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 16, "platform": "rds-postgres" },
{ "pg_version": 16, "platform": "rds-aurora" }]')
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres" },
{ "platform": "rds-aurora" }]')
fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -357,14 +346,14 @@ jobs:
"platform": [
"neonvm-captest-reuse"
],
"pg_version" : [
16,17
"scale": [
"10"
]
}'
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 16, "platform": "rds-postgres" },
{ "pg_version": 16, "platform": "rds-aurora" }]')
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "scale": "10" },
{ "platform": "rds-aurora", "scale": "10" }]')
fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -389,7 +378,7 @@ jobs:
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
PG_VERSION: ${{ matrix.pg_version }}
DEFAULT_PG_VERSION: ${{ matrix.pg_version }}
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -427,7 +416,7 @@ jobs:
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ matrix.region_id }}
postgres_version: ${{ env.PG_VERSION }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
compute_units: ${{ (contains(matrix.platform, 'captest-freetier') && '[0.25, 0.25]') || '[1, 1]' }}
@@ -458,7 +447,7 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
# we want to compare Neon project OLTP throughput and latency at scale factor 10 GB
# we want to compare Neon project OLTP throughput and latency at scale factor 10 GB
# without (neonvm-captest-new)
# and with (neonvm-captest-new-many-tables) many relations in the database
- name: Create many relations before the run
@@ -470,7 +459,7 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations
pg_version: ${{ env.PG_VERSION }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
@@ -486,7 +475,7 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
pg_version: ${{ env.PG_VERSION }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
@@ -501,7 +490,7 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
pg_version: ${{ env.PG_VERSION }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
@@ -516,7 +505,7 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
pg_version: ${{ env.PG_VERSION }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
@@ -560,19 +549,14 @@ jobs:
include:
- PLATFORM: "neonvm-captest-pgvector"
RUNNER: [ self-hosted, us-east-2, x64 ]
postgres_version: 16
- PLATFORM: "neonvm-captest-pgvector-pg17"
RUNNER: [ self-hosted, us-east-2, x64 ]
postgres_version: 17
- PLATFORM: "azure-captest-pgvector"
RUNNER: [ self-hosted, eastus2, x64 ]
postgres_version: 16
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
TEST_PG_BENCH_SCALES_MATRIX: "1"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
PG_VERSION: ${{ matrix.postgres_version }}
DEFAULT_PG_VERSION: 16
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
@@ -590,20 +574,32 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
# until https://github.com/neondatabase/neon/issues/8275 is fixed we temporarily install postgresql-16
# instead of using Neon artifacts containing pgbench
- name: Install postgresql-16 where pytest expects it
run: |
# Just to make it easier to test things locally on macOS (with arm64)
arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
cd /home/nonroot
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.2-1.pgdg120+1_${arch}.deb"
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.6-1.pgdg120+1_${arch}.deb"
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.6-1.pgdg120+1_${arch}.deb"
dpkg -x libpq5_17.2-1.pgdg120+1_${arch}.deb pg
dpkg -x postgresql-16_16.6-1.pgdg120+1_${arch}.deb pg
dpkg -x postgresql-client-16_16.6-1.pgdg120+1_${arch}.deb pg
mkdir -p /tmp/neon/pg_install/v16/bin
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql
ln -s /home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu /tmp/neon/pg_install/v16/lib
LD_LIBRARY_PATH="/home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu:${LD_LIBRARY_PATH:-}"
export LD_LIBRARY_PATH
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> ${GITHUB_ENV}
/tmp/neon/pg_install/v16/bin/pgbench --version
/tmp/neon/pg_install/v16/bin/psql --version
- name: Set up Connection String
id: set-up-connstr
@@ -612,9 +608,6 @@ jobs:
neonvm-captest-pgvector)
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
;;
neonvm-captest-pgvector-pg17)
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_PG17 }}
;;
azure-captest-pgvector)
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_AZURE }}
;;
@@ -626,6 +619,13 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- name: Benchmark pgvector hnsw indexing
uses: ./.github/actions/run-python-test-set
with:
@@ -634,7 +634,7 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
pg_version: ${{ env.PG_VERSION }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -649,7 +649,7 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600
pg_version: ${{ env.PG_VERSION }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
@@ -696,7 +696,7 @@ jobs:
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
PG_VERSION: ${{ matrix.pg_version }}
DEFAULT_PG_VERSION: 16
TEST_OUTPUT: /tmp/test_output
TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain }}
TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements }}
@@ -739,18 +739,7 @@ jobs:
run: |
case "${PLATFORM}" in
neonvm-captest-reuse)
case "${PG_VERSION}" in
16)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
;;
17)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_CONNSTR_PG17 }}
;;
*)
echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
exit 1
;;
esac
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CLICKBENCH_10M_CONNSTR }}
@@ -774,7 +763,7 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
pg_version: ${{ env.PG_VERSION }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -810,7 +799,7 @@ jobs:
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
# if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
permissions:
contents: write
statuses: write
@@ -823,11 +812,12 @@ jobs:
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
PG_VERSION: ${{ matrix.pg_version }}
DEFAULT_PG_VERSION: 16
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.platform }}
TEST_OLAP_SCALE: ${{ matrix.scale }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
@@ -859,24 +849,13 @@ jobs:
run: |
case "${PLATFORM}" in
neonvm-captest-reuse)
case "${PG_VERSION}" in
16)
CONNSTR_SECRET_NAME="BENCHMARK_CAPTEST_TPCH_S10_CONNSTR"
;;
17)
CONNSTR_SECRET_NAME="BENCHMARK_CAPTEST_TPCH_CONNSTR_PG17"
;;
*)
echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
exit 1
;;
esac
ENV_PLATFORM=CAPTEST_TPCH
;;
rds-aurora)
CONNSTR_SECRET_NAME="BENCHMARK_RDS_AURORA_TPCH_S10_CONNSTR"
ENV_PLATFORM=RDS_AURORA_TPCH
;;
rds-postgres)
CONNSTR_SECRET_NAME="BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR"
ENV_PLATFORM=RDS_POSTGRES_TPCH
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
@@ -884,6 +863,7 @@ jobs:
;;
esac
CONNSTR_SECRET_NAME="BENCHMARK_${ENV_PLATFORM}_S${TEST_OLAP_SCALE}_CONNSTR"
echo "CONNSTR_SECRET_NAME=${CONNSTR_SECRET_NAME}" >> $GITHUB_ENV
- name: Set up Connection String
@@ -901,13 +881,13 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
pg_version: ${{ env.PG_VERSION }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
TEST_OLAP_SCALE: 10
TEST_OLAP_SCALE: ${{ matrix.scale }}
- name: Create Allure report
id: create-allure-report
@@ -929,7 +909,7 @@ jobs:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
user-examples-compare:
# if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
permissions:
contents: write
statuses: write
@@ -942,7 +922,7 @@ jobs:
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
PG_VERSION: ${{ matrix.pg_version }}
DEFAULT_PG_VERSION: 16
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -979,18 +959,7 @@ jobs:
run: |
case "${PLATFORM}" in
neonvm-captest-reuse)
case "${PG_VERSION}" in
16)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
;;
17)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_USER_EXAMPLE_CONNSTR_PG17 }}
;;
*)
echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
exit 1
;;
esac
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
;;
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_AURORA_CONNSTR }}
@@ -1014,7 +983,7 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
pg_version: ${{ env.PG_VERSION }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"

View File

@@ -235,7 +235,7 @@ jobs:
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Run cargo build (only for v17)
run: cargo build --all --release -j$(sysctl -n hw.ncpu)
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release -j$(sysctl -n hw.ncpu)
- name: Check that no warnings are produced (only for v17)
run: ./run_clippy.sh

View File

@@ -45,26 +45,6 @@ jobs:
run cancel-previous-in-concurrency-group.yml \
--field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
files-changed:
needs: [ check-permissions ]
runs-on: [ self-hosted, small ]
timeout-minutes: 3
outputs:
check-rust-dependencies: ${{ steps.files-changed.outputs.rust_dependencies }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- name: Check for file changes
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
id: files-changed
with:
token: ${{ secrets.GITHUB_TOKEN }}
filters: .github/file-filters.yaml
tag:
needs: [ check-permissions ]
runs-on: [ self-hosted, small ]
@@ -184,19 +164,77 @@ jobs:
check-codestyle-rust:
needs: [ check-permissions, build-build-tools-image ]
uses: ./.github/workflows/_check-codestyle-rust.yml
with:
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
archs: '["x64", "arm64"]'
secrets: inherit
strategy:
matrix:
arch: [ x64, arm64 ]
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
check-dependencies-rust:
needs: [ files-changed, build-build-tools-image ]
if: ${{ needs.files-changed.outputs.check-rust-dependencies == 'true' }}
uses: ./.github/workflows/cargo-deny.yml
with:
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
secrets: inherit
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- name: Cache cargo deps
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
!~/.cargo/registry/src
~/.cargo/git
target
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
run: make postgres-headers -j$(nproc)
# cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
# This will catch compiler & clippy warnings in all feature combinations.
# TODO: use cargo hack for build and test as well, but, that's quite expensive.
# NB: keep clippy args in sync with ./run_clippy.sh
#
# The only difference between "clippy --debug" and "clippy --release" is that in --release mode,
# #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second
# time just for that, so skip "clippy --release".
- run: |
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
echo "No clippy args found in .neon_clippy_args"
exit 1
fi
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
- name: Run cargo clippy (debug)
run: cargo hack --features default --ignore-unknown-features --feature-powerset clippy $CLIPPY_COMMON_ARGS
- name: Check documentation generation
run: cargo doc --workspace --no-deps --document-private-items
env:
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
- name: Check formatting
if: ${{ !cancelled() }}
run: cargo fmt --all -- --check
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
- name: Check rust dependencies
if: ${{ !cancelled() }}
run: |
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
# https://github.com/EmbarkStudios/cargo-deny
- name: Check rust licenses/bans/advisories/sources
if: ${{ !cancelled() }}
run: cargo deny check --hide-inclusion-graph
build-and-test-locally:
needs: [ tag, build-build-tools-image ]
@@ -270,7 +308,7 @@ jobs:
statuses: write
contents: write
pull-requests: write
runs-on: [ self-hosted, small-metal ]
runs-on: [ self-hosted, small ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
credentials:
@@ -308,22 +346,25 @@ jobs:
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones
report-benchmarks-results-to-slack:
report-benchmarks-failures:
needs: [ benchmarks, create-test-report ]
if: github.ref_name == 'main' && !cancelled() && contains(fromJSON('["success", "failure"]'), needs.benchmarks.result)
if: github.ref_name == 'main' && failure() && needs.benchmarks.result == 'failure'
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
contents: write
pull-requests: write
runs-on: ubuntu-22.04
steps:
- uses: slackapi/slack-github-action@v2
- uses: slackapi/slack-github-action@v1
with:
method: chat.postMessage
token: ${{ secrets.SLACK_BOT_TOKEN }}
payload: |
channel: "${{ vars.SLACK_ON_CALL_STORAGE_STAGING_STREAM }}"
text: |
Benchmarks on main: *${{ needs.benchmarks.result }}*
- <${{ needs.create-test-report.outputs.report-url }}|Allure report>
- <${{ github.event.head_commit.url }}|${{ github.sha }}>
channel-id: C060CNA47S9 # on-call-staging-storage-stream
slack-message: |
Benchmarks failed on main <${{ github.event.head_commit.url }}|${{ github.sha }}>
<${{ needs.create-test-report.outputs.report-url }}|Allure report>
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
create-test-report:
needs: [ check-permissions, build-and-test-locally, coverage-report, build-build-tools-image, benchmarks ]
@@ -682,7 +723,7 @@ jobs:
push: true
pull: true
file: compute/compute-node.Dockerfile
target: extension-tests
target: neon-pg-ext-test
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
tags: |
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
@@ -814,17 +855,6 @@ jobs:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- name: Get the last compute release tag
id: get-last-compute-release-tag
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
tag=$(gh api -q '[.[].tag_name | select(startswith("release-compute"))][0]'\
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/repos/${{ github.repository }}/releases")
echo tag=${tag} >> ${GITHUB_OUTPUT}
# `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
# Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
# Regular pageserver version string looks like
@@ -856,28 +886,14 @@ jobs:
TEST_VERSION_ONLY: ${{ matrix.pg_version }}
run: ./docker-compose/docker_compose_test.sh
- name: Print logs and clean up docker-compose test
if: always()
run: |
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
- name: Test extension upgrade
timeout-minutes: 20
if: ${{ needs.tag.outputs.build-tag == github.run_id }}
env:
NEWTAG: ${{ needs.tag.outputs.build-tag }}
OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
run: ./docker-compose/test_extensions_upgrade.sh
- name: Print logs and clean up
if: always()
run: |
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
docker compose -f ./docker-compose/docker-compose.yml logs || 0
docker compose -f ./docker-compose/docker-compose.yml down
promote-images-dev:
needs: [ check-permissions, tag, vm-compute-node-image, neon-image ]
needs: [ check-permissions, tag, vm-compute-node-image ]
runs-on: ubuntu-22.04
permissions:
@@ -912,7 +928,7 @@ jobs:
done
promote-images-prod:
needs: [ check-permissions, tag, test-images, promote-images-dev ]
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
runs-on: ubuntu-22.04
if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
@@ -1103,7 +1119,6 @@ jobs:
retries: 5
script: |
const tag = "${{ needs.tag.outputs.build-tag }}";
const branch = "${{ github.ref_name }}";
try {
const existingRef = await github.rest.git.getRef({
@@ -1132,6 +1147,12 @@ jobs:
console.log(`Tag ${tag} created successfully.`);
}
// TODO: check how GitHub releases looks for proxy/compute releases and enable them if they're ok
if (context.ref !== 'refs/heads/release') {
console.log(`GitHub release skipped for ${context.ref}.`);
return;
}
try {
const existingRelease = await github.rest.repos.getReleaseByTag({
owner: context.repo.owner,
@@ -1146,48 +1167,11 @@ jobs:
}
console.log(`Release for tag ${tag} does not exist. Creating it...`);
// Find the PR number using the commit SHA
const pullRequests = await github.rest.pulls.list({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'closed',
base: branch,
});
const pr = pullRequests.data.find(pr => pr.merge_commit_sha === context.sha);
const prNumber = pr ? pr.number : null;
// Find the previous release on the branch
const releases = await github.rest.repos.listReleases({
owner: context.repo.owner,
repo: context.repo.repo,
per_page: 100,
});
const branchReleases = releases.data
.filter((release) => {
const regex = new RegExp(`^${branch}-\\d+$`);
return regex.test(release.tag_name) && !release.draft && !release.prerelease;
})
.sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
const previousTag = branchReleases.length > 0 ? branchReleases[0].tag_name : null;
const releaseNotes = [
prNumber
? `Release PR https://github.com/${context.repo.owner}/${context.repo.repo}/pull/${prNumber}.`
: 'Release PR not found.',
previousTag
? `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${previousTag}...${tag}.`
: `No previous release found on branch ${branch}.`,
].join('\n\n');
await github.rest.repos.createRelease({
owner: context.repo.owner,
repo: context.repo.repo,
tag_name: tag,
body: releaseNotes,
generate_release_notes: true,
});
console.log(`Release for tag ${tag} created successfully.`);
}
@@ -1360,8 +1344,6 @@ jobs:
- build-and-test-locally
- check-codestyle-python
- check-codestyle-rust
- check-dependencies-rust
- files-changed
- promote-images-dev
- test-images
- trigger-custom-extensions-build-and-wait
@@ -1374,11 +1356,4 @@ jobs:
if: |
contains(needs.*.result, 'failure')
|| contains(needs.*.result, 'cancelled')
|| (needs.check-dependencies-rust.result == 'skipped' && needs.files-changed.outputs.check-rust-dependencies == 'true')
|| needs.build-and-test-locally.result == 'skipped'
|| needs.check-codestyle-python.result == 'skipped'
|| needs.check-codestyle-rust.result == 'skipped'
|| needs.files-changed.result == 'skipped'
|| needs.promote-images-dev.result == 'skipped'
|| needs.test-images.result == 'skipped'
|| needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
|| contains(needs.*.result, 'skipped')

View File

@@ -1,57 +0,0 @@
name: cargo deny checks
on:
workflow_call:
inputs:
build-tools-image:
required: false
type: string
schedule:
- cron: '0 0 * * *'
jobs:
cargo-deny:
strategy:
matrix:
ref: >-
${{
fromJSON(
github.event_name == 'schedule'
&& '["main","release","release-proxy","release-compute"]'
|| format('["{0}"]', github.sha)
)
}}
runs-on: [self-hosted, small]
container:
image: ${{ inputs.build-tools-image || 'neondatabase/build-tools:pinned' }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
steps:
- name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ matrix.ref }}
- name: Check rust licenses/bans/advisories/sources
env:
CARGO_DENY_TARGET: >-
${{ github.event_name == 'schedule' && 'advisories' || 'all' }}
run: cargo deny check --hide-inclusion-graph $CARGO_DENY_TARGET
- name: Post to a Slack channel
if: ${{ github.event_name == 'schedule' && failure() }}
uses: slackapi/slack-github-action@v2
with:
method: chat.postMessage
token: ${{ secrets.SLACK_BOT_TOKEN }}
payload: |
channel: ${{ vars.SLACK_CICD_CHANNEL_ID }}
text: |
Periodic cargo-deny on ${{ matrix.ref }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
Pinging @oncall-devprod.

View File

@@ -28,24 +28,7 @@ jobs:
strategy:
fail-fast: false # allow other variants to continue even if one fails
matrix:
include:
- target_project: new_empty_project_stripe_size_2048
stripe_size: 2048 # 16 MiB
postgres_version: 16
- target_project: new_empty_project_stripe_size_32768
stripe_size: 32768 # 256 MiB # note that this is different from null because using null will shard_split the project only if it reaches the threshold
# while here it is sharded from the beginning with a shard size of 256 MiB
postgres_version: 16
- target_project: new_empty_project
stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
postgres_version: 16
- target_project: new_empty_project
stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
postgres_version: 17
- target_project: large_existing_project
stripe_size: null # cannot re-shared or choose different stripe size for existing, already sharded project
postgres_version: 16
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
target_project: [new_empty_project, large_existing_project]
permissions:
contents: write
statuses: write
@@ -84,21 +67,17 @@ jobs:
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Create Neon Project
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
if: ${{ matrix.target_project == 'new_empty_project' }}
id: create-neon-project-ingest-target
uses: ./.github/actions/neon-project-create
with:
region_id: aws-us-east-2
postgres_version: ${{ matrix.postgres_version }}
postgres_version: 16
compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
shard_split_project: ${{ matrix.stripe_size != null && 'true' || 'false' }}
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
shard_count: 8
stripe_size: ${{ matrix.stripe_size }}
- name: Initialize Neon project
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
if: ${{ matrix.target_project == 'new_empty_project' }}
env:
BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
@@ -151,7 +130,7 @@ jobs:
test_selection: performance/test_perf_ingest_using_pgcopydb.py
run_in_parallel: false
extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
pg_version: v${{ matrix.postgres_version }}
pg_version: v16
save_perf_report: true
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
@@ -167,7 +146,7 @@ jobs:
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+"
- name: Delete Neon Project
if: ${{ always() && startsWith(matrix.target_project, 'new_empty_project') }}
if: ${{ always() && matrix.target_project == 'new_empty_project' }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}

View File

@@ -114,7 +114,7 @@ jobs:
run: make walproposer-lib -j$(nproc)
- name: Produce the build stats
run: cargo build --all --release --timings -j$(nproc)
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4

View File

@@ -12,8 +12,8 @@ on:
pull_request:
paths:
- '.github/workflows/pg-clients.yml'
- 'test_runner/pg_clients/**/*.py'
- 'test_runner/logical_repl/**/*.py'
- 'test_runner/pg_clients/**'
- 'test_runner/logical_repl/**'
- 'poetry.lock'
workflow_dispatch:
@@ -104,8 +104,6 @@ jobs:
with:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
project_settings: >-
{"enable_logical_replication": true}
- name: Run tests
uses: ./.github/actions/run-python-test-set

View File

@@ -1,12 +1,6 @@
name: Pre-merge checks
on:
pull_request:
paths:
- .github/workflows/_check-codestyle-python.yml
- .github/workflows/_check-codestyle-rust.yml
- .github/workflows/build-build-tools-image.yml
- .github/workflows/pre-merge-checks.yml
merge_group:
branches:
- main
@@ -23,10 +17,8 @@ jobs:
runs-on: ubuntu-22.04
outputs:
python-changed: ${{ steps.python-src.outputs.any_changed }}
rust-changed: ${{ steps.rust-src.outputs.any_changed }}
steps:
- uses: actions/checkout@v4
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
id: python-src
with:
@@ -38,31 +30,14 @@ jobs:
poetry.lock
pyproject.toml
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
id: rust-src
with:
files: |
.github/workflows/_check-codestyle-rust.yml
.github/workflows/build-build-tools-image.yml
.github/workflows/pre-merge-checks.yml
**/**.rs
**/Cargo.toml
Cargo.toml
Cargo.lock
- name: PRINT ALL CHANGED FILES FOR DEBUG PURPOSES
env:
PYTHON_CHANGED_FILES: ${{ steps.python-src.outputs.all_changed_files }}
RUST_CHANGED_FILES: ${{ steps.rust-src.outputs.all_changed_files }}
run: |
echo "${PYTHON_CHANGED_FILES}"
echo "${RUST_CHANGED_FILES}"
build-build-tools-image:
if: |
false
|| needs.get-changed-files.outputs.python-changed == 'true'
|| needs.get-changed-files.outputs.rust-changed == 'true'
if: needs.get-changed-files.outputs.python-changed == 'true'
needs: [ get-changed-files ]
uses: ./.github/workflows/build-build-tools-image.yml
with:
@@ -80,30 +55,18 @@ jobs:
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64
secrets: inherit
check-codestyle-rust:
if: needs.get-changed-files.outputs.rust-changed == 'true'
needs: [ get-changed-files, build-build-tools-image ]
uses: ./.github/workflows/_check-codestyle-rust.yml
with:
# `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64
archs: '["x64"]'
secrets: inherit
# To get items from the merge queue merged into main we need to satisfy "Status checks that are required".
# Currently we require 2 jobs (checks with exact name):
# - conclusion
# - neon-cloud-e2e
conclusion:
# Do not run job on Pull Requests as it interferes with the `conclusion` job from the `build_and_test` workflow
if: always() && github.event_name == 'merge_group'
if: always()
permissions:
statuses: write # for `github.repos.createCommitStatus(...)`
contents: write
needs:
- get-changed-files
- check-codestyle-python
- check-codestyle-rust
runs-on: ubuntu-22.04
steps:
- name: Create fake `neon-cloud-e2e` check
@@ -128,8 +91,6 @@ jobs:
- name: Fail the job if any of the dependencies do not succeed or skipped
run: exit 1
if: |
false
|| (needs.check-codestyle-python.result == 'skipped' && needs.get-changed-files.outputs.python-changed == 'true')
|| (needs.check-codestyle-rust.result == 'skipped' && needs.get-changed-files.outputs.rust-changed == 'true')
(contains(needs.check-codestyle-python.result, 'skipped') && needs.get-changed-files.outputs.python-changed == 'true')
|| contains(needs.*.result, 'failure')
|| contains(needs.*.result, 'cancelled')

View File

@@ -3,9 +3,8 @@ name: Create Release Branch
on:
schedule:
# It should be kept in sync with if-condition in jobs
- cron: '0 6 * * THU' # Proxy release
- cron: '0 6 * * FRI' # Storage release
- cron: '0 7 * * FRI' # Compute release
- cron: '0 6 * * THU' # Proxy release
workflow_dispatch:
inputs:
create-storage-release-branch:
@@ -56,7 +55,7 @@ jobs:
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
create-compute-release-branch:
if: ${{ github.event.schedule == '0 7 * * FRI' || inputs.create-compute-release-branch }}
if: inputs.create-compute-release-branch
permissions:
contents: write

572
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -54,7 +54,6 @@ async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
atomic-take = "1.1.0"
backtrace = "0.3.74"
flate2 = "1.0.26"
assert-json-diff = "2"
async-stream = "0.3"
async-trait = "0.1"
aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
@@ -66,7 +65,7 @@ aws-smithy-types = "1.2"
aws-credential-types = "1.2.0"
aws-sigv4 = { version = "1.2", features = ["sign-http"] }
aws-types = "1.3"
axum = { version = "0.8.1", features = ["ws"] }
axum = { version = "0.7.9", features = ["ws"] }
base64 = "0.13.0"
bincode = "1.3"
bindgen = "0.70"
@@ -78,10 +77,10 @@ camino = "1.1.6"
cfg-if = "1.0.0"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
clap = { version = "4.0", features = ["derive", "env"] }
clashmap = { version = "1.0", features = ["raw-api"] }
comfy-table = "7.1"
const_format = "0.2"
crc32c = "0.6"
dashmap = { version = "5.5.0", features = ["raw-api"] }
diatomic-waker = { version = "0.2.3" }
either = "1.8"
enum-map = "2.4.2"
@@ -124,7 +123,7 @@ measured = { version = "0.0.22", features=["lasso"] }
measured-process = { version = "0.0.22" }
memoffset = "0.9"
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
notify = "8.0.0"
notify = "6.0.0"
num_cpus = "1.15"
num-traits = "0.2.15"
once_cell = "1.13"
@@ -178,7 +177,7 @@ test-context = "0.3"
thiserror = "1.0"
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
tokio = { version = "1.41", features = ["macros"] }
tokio = { version = "1.17", features = ["macros"] }
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
tokio-io-timeout = "1.2.0"
tokio-postgres-rustls = "0.12.0"
@@ -188,15 +187,13 @@ tokio-tar = "0.3"
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
toml = "0.8"
toml_edit = "0.22"
tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
tonic = {version = "0.12.3", features = ["tls", "tls-roots"]}
tower = { version = "0.5.2", default-features = false }
tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
tower-service = "0.3.3"
tracing = "0.1"
tracing-error = "0.2"
tracing-log = "0.2"
tracing-opentelemetry = "0.28"
tracing-serde = "0.2.0"
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
try-lock = "0.2.5"
twox-hash = { version = "1.6.3", default-features = false }

View File

@@ -45,7 +45,7 @@ COPY --chown=nonroot . .
ARG ADDITIONAL_RUSTFLAGS
RUN set -e \
&& RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
--bin pg_sni_router \
--bin pageserver \
--bin pagectl \
@@ -64,7 +64,6 @@ ARG DEFAULT_PG_VERSION
WORKDIR /data
RUN set -e \
&& echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries \
&& apt update \
&& apt install -y \
libreadline-dev \
@@ -73,7 +72,6 @@ RUN set -e \
# System postgres for use with client libraries (e.g. in storage controller)
postgresql-15 \
openssl \
&& rm -f /etc/apt/apt.conf.d/80-retries \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& useradd -d /data neon \
&& chown -R neon:neon /data

View File

@@ -64,6 +64,8 @@ CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
# Force cargo not to print progress bar
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"

View File

@@ -21,10 +21,8 @@ The Neon storage engine consists of two major components:
See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more information.
## Running a local development environment
## Running local installation
Neon can be run on a workstation for small experiments and to test code changes, by
following these instructions.
#### Installing dependencies on Linux
1. Install build dependencies and other applicable packages
@@ -240,7 +238,7 @@ postgres=# select * from t;
> cargo neon stop
```
More advanced usages can be found at [Local Development Control Plane (`neon_local`))](./control_plane/README.md).
More advanced usages can be found at [Control Plane and Neon Local](./control_plane/README.md).
#### Handling build failures

View File

@@ -3,15 +3,6 @@ ARG DEBIAN_VERSION=bookworm
FROM debian:bookworm-slim AS pgcopydb_builder
ARG DEBIAN_VERSION
# Use strict mode for bash to catch errors early
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
# By default, /bin/sh used in debian images will treat '\n' as eol,
# but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.
RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
set -e && \
apt update && \
@@ -60,8 +51,7 @@ ARG DEBIAN_VERSION
# Add nonroot user
RUN useradd -ms /bin/bash nonroot -b /home
# Use strict mode for bash to catch errors early
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
SHELL ["/bin/bash", "-c"]
RUN mkdir -p /pgcopydb/bin && \
mkdir -p /pgcopydb/lib && \
@@ -71,10 +61,6 @@ RUN mkdir -p /pgcopydb/bin && \
COPY --from=pgcopydb_builder /usr/lib/postgresql/16/bin/pgcopydb /pgcopydb/bin/pgcopydb
COPY --from=pgcopydb_builder /pgcopydb/lib/libpq.so.5 /pgcopydb/lib/libpq.so.5
RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
# System deps
#
# 'gdb' is included so that we get backtraces of core dumps produced in
@@ -129,7 +115,7 @@ RUN set -e \
# Keep the version the same as in compute/compute-node.Dockerfile and
# test_runner/regress/test_compute_metrics.py.
ENV SQL_EXPORTER_VERSION=0.17.0
ENV SQL_EXPORTER_VERSION=0.16.0
RUN curl -fsSL \
"https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case "$(uname -m)" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz" \
--output sql_exporter.tar.gz \
@@ -196,14 +182,8 @@ RUN set -e \
# It includes several bug fixes on top on v2.0 release (https://github.com/linux-test-project/lcov/compare/v2.0...master)
# And patches from us:
# - Generates json file with code coverage summary (https://github.com/neondatabase/lcov/commit/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz)
RUN set +o pipefail && \
for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JSON::XS Memory::Process Time::HiRes JSON; do \
yes | perl -MCPAN -e "CPAN::Shell->notest('install', '$package')";\
done && \
set -o pipefail
# Split into separate step to debug flaky failures here
RUN wget https://github.com/neondatabase/lcov/archive/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz -O lcov.tar.gz \
&& ls -laht lcov.tar.gz && sha256sum lcov.tar.gz \
RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JSON::XS Memory::Process Time::HiRes JSON; do yes | perl -MCPAN -e "CPAN::Shell->notest('install', '$package')"; done \
&& wget https://github.com/neondatabase/lcov/archive/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz -O lcov.tar.gz \
&& echo "61a22a62e20908b8b9e27d890bd0ea31f567a7b9668065589266371dcbca0992 lcov.tar.gz" | sha256sum --check \
&& mkdir -p lcov && tar -xzf lcov.tar.gz -C lcov --strip-components=1 \
&& cd lcov \
@@ -238,8 +218,6 @@ RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/re
USER nonroot:nonroot
WORKDIR /home/nonroot
RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /home/nonroot/.curlrc
# Python
ENV PYTHON_VERSION=3.11.10 \
PYENV_ROOT=/home/nonroot/.pyenv \
@@ -265,7 +243,7 @@ WORKDIR /home/nonroot
# Rust
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
ENV RUSTC_VERSION=1.84.1
ENV RUSTC_VERSION=1.84.0
ENV RUSTUP_HOME="/home/nonroot/.rustup"
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
ARG RUSTFILT_VERSION=0.2.1
@@ -273,7 +251,6 @@ ARG CARGO_HAKARI_VERSION=0.9.33
ARG CARGO_DENY_VERSION=0.16.2
ARG CARGO_HACK_VERSION=0.6.33
ARG CARGO_NEXTEST_VERSION=0.9.85
ARG CARGO_DIESEL_CLI_VERSION=2.2.6
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
chmod +x rustup-init && \
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
@@ -287,8 +264,6 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} \
--features postgres-bundled --no-default-features && \
rm -rf /home/nonroot/.cargo/registry && \
rm -rf /home/nonroot/.cargo/git

File diff suppressed because it is too large Load Diff

View File

@@ -19,8 +19,6 @@ max_prepared_statements=0
admin_users=postgres
unix_socket_dir=/tmp/
unix_socket_mode=0777
; required for pgbouncer_exporter
ignore_startup_parameters=extra_float_digits
;; Disable connection logging. It produces a lot of logs that no one looks at,
;; and we can get similar log entries from the proxy too. We had incidents in

View File

@@ -1,242 +0,0 @@
diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out
index 979e5e8..2375b45 100644
--- a/contrib/amcheck/expected/check_heap.out
+++ b/contrib/amcheck/expected/check_heap.out
@@ -80,12 +80,9 @@ INSERT INTO heaptest (a, b)
-- same transaction. The heaptest table is smaller than the default
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
-- shared_buffers. A transaction delays that and excludes any autovacuum.
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
SELECT sum(reads) AS stats_bulkreads_before
FROM pg_stat_io WHERE context = 'bulkread' \gset
BEGIN;
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
-- Check that valid options are not rejected nor corruption reported
-- for a non-empty table
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
@@ -118,14 +115,6 @@ SELECT pg_stat_force_next_flush();
(1 row)
-SELECT sum(reads) AS stats_bulkreads_after
- FROM pg_stat_io WHERE context = 'bulkread' \gset
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
- ?column?
-----------
- t
-(1 row)
-
CREATE ROLE regress_heaptest_role;
-- verify permissions are checked (error due to function not callable)
SET ROLE regress_heaptest_role;
@@ -233,7 +222,6 @@ ERROR: cannot check relation "test_foreign_table"
DETAIL: This operation is not supported for foreign tables.
-- cleanup
DROP TABLE heaptest;
-DROP TABLESPACE regress_test_stats_tblspc;
DROP TABLE test_partition;
DROP TABLE test_partitioned;
DROP OWNED BY regress_heaptest_role; -- permissions
diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql
index 1745bae..3b429c3 100644
--- a/contrib/amcheck/sql/check_heap.sql
+++ b/contrib/amcheck/sql/check_heap.sql
@@ -40,12 +40,9 @@ INSERT INTO heaptest (a, b)
-- same transaction. The heaptest table is smaller than the default
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
-- shared_buffers. A transaction delays that and excludes any autovacuum.
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
SELECT sum(reads) AS stats_bulkreads_before
FROM pg_stat_io WHERE context = 'bulkread' \gset
BEGIN;
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
-- Check that valid options are not rejected nor corruption reported
-- for a non-empty table
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
@@ -58,9 +55,6 @@ COMMIT;
-- ALTER TABLE ... SET TABLESPACE ...
-- causing an additional bulkread, which should be reflected in pg_stat_io.
SELECT pg_stat_force_next_flush();
-SELECT sum(reads) AS stats_bulkreads_after
- FROM pg_stat_io WHERE context = 'bulkread' \gset
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
CREATE ROLE regress_heaptest_role;
@@ -140,7 +134,6 @@ SELECT * FROM verify_heapam('test_foreign_table',
-- cleanup
DROP TABLE heaptest;
-DROP TABLESPACE regress_test_stats_tblspc;
DROP TABLE test_partition;
DROP TABLE test_partitioned;
DROP OWNED BY regress_heaptest_role; -- permissions
diff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out
index 33be13a..70a406c 100644
--- a/contrib/citext/expected/create_index_acl.out
+++ b/contrib/citext/expected/create_index_acl.out
@@ -5,9 +5,6 @@
-- owner having as few applicable privileges as possible. (The privileges.sql
-- regress_sro_user tests look for the opposite defect; they confirm that
-- DefineIndex() uses the table owner userid where necessary.)
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
-RESET allow_in_place_tablespaces;
BEGIN;
CREATE ROLE regress_minimal;
CREATE SCHEMA s;
@@ -49,11 +46,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
-- Empty-table DefineIndex()
CREATE UNIQUE INDEX u0rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Make the table nonempty.
INSERT INTO s.x VALUES ('foo'), ('bar');
@@ -66,11 +61,9 @@ RESET search_path;
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
CREATE UNIQUE INDEX u2rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Shall not find s.coll via search_path, despite the s.const->public.setter
-- call having set search_path=s during expression planning. Suppress the
@@ -78,9 +71,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
\set VERBOSITY sqlstate
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
ERROR: 42704
\set VERBOSITY default
ROLLBACK;
-DROP TABLESPACE regress_create_idx_tblspace;
diff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql
index 10b5225..ae442e1 100644
--- a/contrib/citext/sql/create_index_acl.sql
+++ b/contrib/citext/sql/create_index_acl.sql
@@ -6,10 +6,6 @@
-- regress_sro_user tests look for the opposite defect; they confirm that
-- DefineIndex() uses the table owner userid where necessary.)
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
-RESET allow_in_place_tablespaces;
-
BEGIN;
CREATE ROLE regress_minimal;
CREATE SCHEMA s;
@@ -51,11 +47,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
-- Empty-table DefineIndex()
CREATE UNIQUE INDEX u0rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Make the table nonempty.
INSERT INTO s.x VALUES ('foo'), ('bar');
@@ -68,11 +62,9 @@ RESET search_path;
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
CREATE UNIQUE INDEX u2rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Shall not find s.coll via search_path, despite the s.const->public.setter
-- call having set search_path=s during expression planning. Suppress the
@@ -80,9 +72,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
\set VERBOSITY sqlstate
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
\set VERBOSITY default
ROLLBACK;
-DROP TABLESPACE regress_create_idx_tblspace;
diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
index 72304e0..ebe131b 100644
--- a/contrib/file_fdw/expected/file_fdw.out
+++ b/contrib/file_fdw/expected/file_fdw.out
@@ -4,6 +4,7 @@
-- directory paths are passed to us in environment variables
\getenv abs_srcdir PG_ABS_SRCDIR
-- Clean up in case a prior regression run failed
+SET compute_query_id TO 'off';
SET client_min_messages TO 'warning';
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
RESET client_min_messages;
diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
index f0548e1..848a08c 100644
--- a/contrib/file_fdw/sql/file_fdw.sql
+++ b/contrib/file_fdw/sql/file_fdw.sql
@@ -6,6 +6,7 @@
\getenv abs_srcdir PG_ABS_SRCDIR
-- Clean up in case a prior regression run failed
+SET compute_query_id TO 'off';
SET client_min_messages TO 'warning';
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
RESET client_min_messages;
diff --git a/contrib/pageinspect/expected/gist.out b/contrib/pageinspect/expected/gist.out
index d1adbab..38b52ac 100644
--- a/contrib/pageinspect/expected/gist.out
+++ b/contrib/pageinspect/expected/gist.out
@@ -10,25 +10,6 @@ BEGIN;
CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM
generate_series(1,1000) i;
CREATE INDEX test_gist_idx ON test_gist USING gist (p);
--- Page 0 is the root, the rest are leaf pages
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));
- lsn | nsn | rightlink | flags
------+-----+------------+-------
- 0/1 | 0/0 | 4294967295 | {}
-(1 row)
-
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1));
- lsn | nsn | rightlink | flags
------+-----+------------+--------
- 0/1 | 0/0 | 4294967295 | {leaf}
-(1 row)
-
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));
- lsn | nsn | rightlink | flags
------+-----+-----------+--------
- 0/1 | 0/0 | 1 | {leaf}
-(1 row)
-
COMMIT;
SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');
itemoffset | ctid | itemlen | dead | keys
diff --git a/contrib/pageinspect/sql/gist.sql b/contrib/pageinspect/sql/gist.sql
index d263542..607992f 100644
--- a/contrib/pageinspect/sql/gist.sql
+++ b/contrib/pageinspect/sql/gist.sql
@@ -12,11 +12,6 @@ CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM
generate_series(1,1000) i;
CREATE INDEX test_gist_idx ON test_gist USING gist (p);
--- Page 0 is the root, the rest are leaf pages
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1));
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));
-
COMMIT;
SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');

View File

@@ -1,196 +0,0 @@
diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out
index 979e5e8..2375b45 100644
--- a/contrib/amcheck/expected/check_heap.out
+++ b/contrib/amcheck/expected/check_heap.out
@@ -80,12 +80,9 @@ INSERT INTO heaptest (a, b)
-- same transaction. The heaptest table is smaller than the default
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
-- shared_buffers. A transaction delays that and excludes any autovacuum.
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
SELECT sum(reads) AS stats_bulkreads_before
FROM pg_stat_io WHERE context = 'bulkread' \gset
BEGIN;
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
-- Check that valid options are not rejected nor corruption reported
-- for a non-empty table
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
@@ -118,14 +115,6 @@ SELECT pg_stat_force_next_flush();
(1 row)
-SELECT sum(reads) AS stats_bulkreads_after
- FROM pg_stat_io WHERE context = 'bulkread' \gset
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
- ?column?
-----------
- t
-(1 row)
-
CREATE ROLE regress_heaptest_role;
-- verify permissions are checked (error due to function not callable)
SET ROLE regress_heaptest_role;
@@ -233,7 +222,6 @@ ERROR: cannot check relation "test_foreign_table"
DETAIL: This operation is not supported for foreign tables.
-- cleanup
DROP TABLE heaptest;
-DROP TABLESPACE regress_test_stats_tblspc;
DROP TABLE test_partition;
DROP TABLE test_partitioned;
DROP OWNED BY regress_heaptest_role; -- permissions
diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql
index 1745bae..3b429c3 100644
--- a/contrib/amcheck/sql/check_heap.sql
+++ b/contrib/amcheck/sql/check_heap.sql
@@ -40,12 +40,9 @@ INSERT INTO heaptest (a, b)
-- same transaction. The heaptest table is smaller than the default
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
-- shared_buffers. A transaction delays that and excludes any autovacuum.
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
SELECT sum(reads) AS stats_bulkreads_before
FROM pg_stat_io WHERE context = 'bulkread' \gset
BEGIN;
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
-- Check that valid options are not rejected nor corruption reported
-- for a non-empty table
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
@@ -58,9 +55,6 @@ COMMIT;
-- ALTER TABLE ... SET TABLESPACE ...
-- causing an additional bulkread, which should be reflected in pg_stat_io.
SELECT pg_stat_force_next_flush();
-SELECT sum(reads) AS stats_bulkreads_after
- FROM pg_stat_io WHERE context = 'bulkread' \gset
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
CREATE ROLE regress_heaptest_role;
@@ -140,7 +134,6 @@ SELECT * FROM verify_heapam('test_foreign_table',
-- cleanup
DROP TABLE heaptest;
-DROP TABLESPACE regress_test_stats_tblspc;
DROP TABLE test_partition;
DROP TABLE test_partitioned;
DROP OWNED BY regress_heaptest_role; -- permissions
diff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out
index 33be13a..70a406c 100644
--- a/contrib/citext/expected/create_index_acl.out
+++ b/contrib/citext/expected/create_index_acl.out
@@ -5,9 +5,6 @@
-- owner having as few applicable privileges as possible. (The privileges.sql
-- regress_sro_user tests look for the opposite defect; they confirm that
-- DefineIndex() uses the table owner userid where necessary.)
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
-RESET allow_in_place_tablespaces;
BEGIN;
CREATE ROLE regress_minimal;
CREATE SCHEMA s;
@@ -49,11 +46,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
-- Empty-table DefineIndex()
CREATE UNIQUE INDEX u0rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Make the table nonempty.
INSERT INTO s.x VALUES ('foo'), ('bar');
@@ -66,11 +61,9 @@ RESET search_path;
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
CREATE UNIQUE INDEX u2rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Shall not find s.coll via search_path, despite the s.const->public.setter
-- call having set search_path=s during expression planning. Suppress the
@@ -78,9 +71,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
\set VERBOSITY sqlstate
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
ERROR: 42704
\set VERBOSITY default
ROLLBACK;
-DROP TABLESPACE regress_create_idx_tblspace;
diff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql
index 10b5225..ae442e1 100644
--- a/contrib/citext/sql/create_index_acl.sql
+++ b/contrib/citext/sql/create_index_acl.sql
@@ -6,10 +6,6 @@
-- regress_sro_user tests look for the opposite defect; they confirm that
-- DefineIndex() uses the table owner userid where necessary.)
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
-RESET allow_in_place_tablespaces;
-
BEGIN;
CREATE ROLE regress_minimal;
CREATE SCHEMA s;
@@ -51,11 +47,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
-- Empty-table DefineIndex()
CREATE UNIQUE INDEX u0rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Make the table nonempty.
INSERT INTO s.x VALUES ('foo'), ('bar');
@@ -68,11 +62,9 @@ RESET search_path;
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
CREATE UNIQUE INDEX u2rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Shall not find s.coll via search_path, despite the s.const->public.setter
-- call having set search_path=s during expression planning. Suppress the
@@ -80,9 +72,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
\set VERBOSITY sqlstate
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
\set VERBOSITY default
ROLLBACK;
-DROP TABLESPACE regress_create_idx_tblspace;
diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
index 86c148a..81bdb2c 100644
--- a/contrib/file_fdw/expected/file_fdw.out
+++ b/contrib/file_fdw/expected/file_fdw.out
@@ -4,6 +4,7 @@
-- directory paths are passed to us in environment variables
\getenv abs_srcdir PG_ABS_SRCDIR
-- Clean up in case a prior regression run failed
+SET compute_query_id TO 'off';
SET client_min_messages TO 'warning';
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
RESET client_min_messages;
diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
index f0548e1..848a08c 100644
--- a/contrib/file_fdw/sql/file_fdw.sql
+++ b/contrib/file_fdw/sql/file_fdw.sql
@@ -6,6 +6,7 @@
\getenv abs_srcdir PG_ABS_SRCDIR
-- Clean up in case a prior regression run failed
+SET compute_query_id TO 'off';
SET client_min_messages TO 'warning';
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
RESET client_min_messages;

View File

@@ -1,19 +0,0 @@
commit ec6a491d126882966a696f9ad5d3698935361d55
Author: Alexey Masterov <alexeymasterov@neon.tech>
Date: Tue Dec 17 10:25:00 2024 +0100
Changes required to run tests on Neon
diff --git a/test/expected/permissions_functions.out b/test/expected/permissions_functions.out
index 1e9fbc2..94cbe25 100644
--- a/test/expected/permissions_functions.out
+++ b/test/expected/permissions_functions.out
@@ -64,7 +64,7 @@ begin;
select current_user;
current_user
--------------
- postgres
+ cloud_admin
(1 row)
-- revoke default access from the public role for new functions

View File

@@ -1,24 +1,8 @@
diff --git a/Makefile b/Makefile
index 7a4b88c..56678af 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,10 @@ EXTVERSION = 0.8.0
MODULE_big = vector
DATA = $(wildcard sql/*--*--*.sql)
-DATA_built = sql/$(EXTENSION)--$(EXTVERSION).sql
+# This change is needed to install different per-version SQL files
+# like pgvector--0.8.0.sql and pgvector--0.7.4.sql
+# The corresponding file is downloaded during the Docker image build process
+DATA_built = sql/$(EXTENSION)--$(EXTVERSION).sql sql/vector--0.7.4.sql
OBJS = src/bitutils.o src/bitvec.o src/halfutils.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/sparsevec.o src/vector.o
HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
diff --git a/src/hnswbuild.c b/src/hnswbuild.c
index b667478..fc1897c 100644
index dcfb2bd..d5189ee 100644
--- a/src/hnswbuild.c
+++ b/src/hnswbuild.c
@@ -843,9 +843,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
@@ -860,9 +860,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
hnswarea = shm_toc_lookup(toc, PARALLEL_KEY_HNSW_AREA, false);
@@ -36,7 +20,7 @@ index b667478..fc1897c 100644
/* Close relations within worker */
index_close(indexRel, indexLockmode);
table_close(heapRel, heapLockmode);
@@ -1100,12 +1108,38 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
@@ -1117,12 +1125,38 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
SeedRandom(42);
#endif

View File

@@ -27,10 +27,6 @@ commands:
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
- name: pgbouncer-exporter
user: postgres
sysvInitAction: respawn
shell: '/bin/pgbouncer_exporter --pgBouncer.connectionString="postgres:///pgbouncer?host=/tmp&port=6432&dbname=pgbouncer&user=pgbouncer"'
- name: sql-exporter
user: nobody
sysvInitAction: respawn

View File

@@ -27,10 +27,6 @@ commands:
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
- name: pgbouncer-exporter
user: postgres
sysvInitAction: respawn
shell: '/bin/pgbouncer_exporter --pgBouncer.connectionString="postgres:///pgbouncer?host=/tmp&port=6432&dbname=pgbouncer&user=pgbouncer"'
- name: sql-exporter
user: nobody
sysvInitAction: respawn

View File

@@ -51,7 +51,6 @@ tracing-subscriber.workspace = true
tracing-utils.workspace = true
thiserror.workspace = true
url.workspace = true
uuid.workspace = true
prometheus.workspace = true
postgres_initdb.workspace = true

View File

@@ -34,7 +34,6 @@
//! -r http://pg-ext-s3-gateway \
//! ```
use std::collections::HashMap;
use std::ffi::OsString;
use std::fs::File;
use std::path::Path;
use std::process::exit;
@@ -45,7 +44,7 @@ use std::{thread, time::Duration};
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Parser;
use clap::Arg;
use compute_tools::disk_quota::set_disk_quota;
use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
use signal_hook::consts::{SIGQUIT, SIGTERM};
@@ -74,76 +73,11 @@ use utils::failpoint_support;
// in-case of not-set environment var
const BUILD_TAG_DEFAULT: &str = "latest";
// Compatibility hack: if the control plane specified any remote-ext-config
// use the default value for extension storage proxy gateway.
// Remove this once the control plane is updated to pass the gateway URL
fn parse_remote_ext_config(arg: &str) -> Result<String> {
if arg.starts_with("http") {
Ok(arg.trim_end_matches('/').to_string())
} else {
Ok("http://pg-ext-s3-gateway".to_string())
}
}
#[derive(Parser)]
#[command(rename_all = "kebab-case")]
struct Cli {
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
pub pgbin: String,
#[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
pub remote_ext_config: Option<String>,
#[arg(long, default_value_t = 3080)]
pub http_port: u16,
#[arg(short = 'D', long, value_name = "DATADIR")]
pub pgdata: String,
#[arg(short = 'C', long, value_name = "DATABASE_URL")]
pub connstr: String,
#[cfg(target_os = "linux")]
#[arg(long, default_value = "neon-postgres")]
pub cgroup: String,
#[cfg(target_os = "linux")]
#[arg(
long,
default_value = "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor"
)]
pub filecache_connstr: String,
#[cfg(target_os = "linux")]
#[arg(long, default_value = "0.0.0.0:10301")]
pub vm_monitor_addr: String,
#[arg(long, action = clap::ArgAction::SetTrue)]
pub resize_swap_on_bind: bool,
#[arg(long)]
pub set_disk_quota_for_fs: Option<String>,
#[arg(short = 's', long = "spec", group = "spec")]
pub spec_json: Option<String>,
#[arg(short = 'S', long, group = "spec-path")]
pub spec_path: Option<OsString>,
#[arg(short = 'i', long, group = "compute-id", conflicts_with_all = ["spec", "spec-path"])]
pub compute_id: Option<String>,
#[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], requires = "compute-id", value_name = "CONTROL_PLANE_API_BASE_URL")]
pub control_plane_uri: Option<String>,
}
fn main() -> Result<()> {
let cli = Cli::parse();
let build_tag = init()?;
let scenario = failpoint_support::init();
let (build_tag, clap_args) = init()?;
// enable core dumping for all child processes
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
@@ -151,11 +85,13 @@ fn main() -> Result<()> {
// Enter startup tracing context
let _startup_context_guard = startup_context_from_env();
let cli_spec = try_spec_from_cli(&cli)?;
let cli_args = process_cli(&clap_args)?;
let compute = wait_spec(build_tag, &cli, cli_spec)?;
let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
start_postgres(&cli, compute)?
let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
start_postgres(&clap_args, wait_spec_result)?
// Startup is finished, exit the startup tracing span
};
@@ -172,7 +108,7 @@ fn main() -> Result<()> {
deinit_and_exit(wait_pg_result);
}
fn init() -> Result<String> {
fn init() -> Result<(String, clap::ArgMatches)> {
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
@@ -187,7 +123,66 @@ fn init() -> Result<String> {
.to_string();
info!("build_tag: {build_tag}");
Ok(build_tag)
Ok((build_tag, cli().get_matches()))
}
fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
let pgbin_default = "postgres";
let pgbin = matches
.get_one::<String>("pgbin")
.map(|s| s.as_str())
.unwrap_or(pgbin_default);
let ext_remote_storage = matches
.get_one::<String>("remote-ext-config")
// Compatibility hack: if the control plane specified any remote-ext-config
// use the default value for extension storage proxy gateway.
// Remove this once the control plane is updated to pass the gateway URL
.map(|conf| {
if conf.starts_with("http") {
conf.trim_end_matches('/')
} else {
"http://pg-ext-s3-gateway"
}
});
let http_port = *matches
.get_one::<u16>("http-port")
.expect("http-port is required");
let pgdata = matches
.get_one::<String>("pgdata")
.expect("PGDATA path is required");
let connstr = matches
.get_one::<String>("connstr")
.expect("Postgres connection string is required");
let spec_json = matches.get_one::<String>("spec");
let spec_path = matches.get_one::<String>("spec-path");
let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
let set_disk_quota_for_fs = matches.get_one::<String>("set-disk-quota-for-fs");
Ok(ProcessCliResult {
connstr,
pgdata,
pgbin,
ext_remote_storage,
http_port,
spec_json,
spec_path,
resize_swap_on_bind,
set_disk_quota_for_fs,
})
}
struct ProcessCliResult<'clap> {
connstr: &'clap str,
pgdata: &'clap str,
pgbin: &'clap str,
ext_remote_storage: Option<&'clap str>,
http_port: u16,
spec_json: Option<&'clap String>,
spec_path: Option<&'clap String>,
resize_swap_on_bind: bool,
set_disk_quota_for_fs: Option<&'clap String>,
}
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
@@ -240,9 +235,19 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
}
}
fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
fn try_spec_from_cli(
matches: &clap::ArgMatches,
ProcessCliResult {
spec_json,
spec_path,
..
}: &ProcessCliResult,
) -> Result<CliSpecParams> {
let compute_id = matches.get_one::<String>("compute-id");
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
// First, try to get cluster spec from the cli argument
if let Some(ref spec_json) = cli.spec_json {
if let Some(spec_json) = spec_json {
info!("got spec from cli argument {}", spec_json);
return Ok(CliSpecParams {
spec: Some(serde_json::from_str(spec_json)?),
@@ -251,7 +256,7 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
}
// Second, try to read it from the file if path is provided
if let Some(ref spec_path) = cli.spec_path {
if let Some(spec_path) = spec_path {
let file = File::open(Path::new(spec_path))?;
return Ok(CliSpecParams {
spec: Some(serde_json::from_reader(file)?),
@@ -259,20 +264,17 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
});
}
if cli.compute_id.is_none() {
let Some(compute_id) = compute_id else {
panic!(
"compute spec should be provided by one of the following ways: \
--spec OR --spec-path OR --control-plane-uri and --compute-id"
);
};
if cli.control_plane_uri.is_none() {
let Some(control_plane_uri) = control_plane_uri else {
panic!("must specify both --control-plane-uri and --compute-id or none");
};
match get_spec_from_control_plane(
cli.control_plane_uri.as_ref().unwrap(),
cli.compute_id.as_ref().unwrap(),
) {
match get_spec_from_control_plane(control_plane_uri, compute_id) {
Ok(spec) => Ok(CliSpecParams {
spec,
live_config_allowed: true,
@@ -296,12 +298,21 @@ struct CliSpecParams {
fn wait_spec(
build_tag: String,
cli: &Cli,
ProcessCliResult {
connstr,
pgdata,
pgbin,
ext_remote_storage,
resize_swap_on_bind,
set_disk_quota_for_fs,
http_port,
..
}: ProcessCliResult,
CliSpecParams {
spec,
live_config_allowed,
}: CliSpecParams,
) -> Result<Arc<ComputeNode>> {
) -> Result<WaitSpecResult> {
let mut new_state = ComputeState::new();
let spec_set;
@@ -313,7 +324,7 @@ fn wait_spec(
} else {
spec_set = false;
}
let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
let connstr = Url::parse(connstr).context("cannot parse connstr as a URL")?;
let conn_conf = postgres::config::Config::from_str(connstr.as_str())
.context("cannot build postgres config from connstr")?;
let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str())
@@ -322,14 +333,14 @@ fn wait_spec(
connstr,
conn_conf,
tokio_conn_conf,
pgdata: cli.pgdata.clone(),
pgbin: cli.pgbin.clone(),
pgversion: get_pg_version_string(&cli.pgbin),
http_port: cli.http_port,
pgdata: pgdata.to_string(),
pgbin: pgbin.to_string(),
pgversion: get_pg_version_string(pgbin),
http_port,
live_config_allowed,
state: Mutex::new(new_state),
state_changed: Condvar::new(),
ext_remote_storage: cli.remote_ext_config.clone(),
ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
ext_download_progress: RwLock::new(HashMap::new()),
build_tag,
};
@@ -346,7 +357,7 @@ fn wait_spec(
// Launch http service first, so that we can serve control-plane requests
// while configuration is still in progress.
let _http_handle =
launch_http_server(cli.http_port, &compute).expect("cannot launch http endpoint thread");
launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
if !spec_set {
// No spec provided, hang waiting for it.
@@ -378,12 +389,27 @@ fn wait_spec(
launch_lsn_lease_bg_task_for_static(&compute);
Ok(compute)
Ok(WaitSpecResult {
compute,
resize_swap_on_bind,
set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(),
})
}
struct WaitSpecResult {
compute: Arc<ComputeNode>,
resize_swap_on_bind: bool,
set_disk_quota_for_fs: Option<String>,
}
fn start_postgres(
cli: &Cli,
compute: Arc<ComputeNode>,
// need to allow unused because `matches` is only used if target_os = "linux"
#[allow(unused_variables)] matches: &clap::ArgMatches,
WaitSpecResult {
compute,
resize_swap_on_bind,
set_disk_quota_for_fs,
}: WaitSpecResult,
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
// We got all we need, update the state.
let mut state = compute.state.lock().unwrap();
@@ -411,7 +437,7 @@ fn start_postgres(
let mut delay_exit = false;
// Resize swap to the desired size if the compute spec says so
if let (Some(size_bytes), true) = (swap_size_bytes, cli.resize_swap_on_bind) {
if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
// To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
// *before* starting postgres.
//
@@ -438,9 +464,9 @@ fn start_postgres(
// Set disk quota if the compute spec says so
if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
(disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref())
(disk_quota_bytes, set_disk_quota_for_fs)
{
match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) {
match set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) {
Ok(()) => {
let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
info!(%disk_quota_bytes, %size_mib, "set disk quota");
@@ -483,7 +509,13 @@ fn start_postgres(
if #[cfg(target_os = "linux")] {
use std::env;
use tokio_util::sync::CancellationToken;
let vm_monitor_addr = matches
.get_one::<String>("vm-monitor-addr")
.expect("--vm-monitor-addr should always be set because it has a default arg");
let file_cache_connstr = matches.get_one::<String>("filecache-connstr");
let cgroup = matches.get_one::<String>("cgroup");
// Only make a runtime if we need to.
// Note: it seems like you can make a runtime in an inner scope and
// if you start a task in it it won't be dropped. However, make it
// in the outermost scope just to be safe.
@@ -506,15 +538,15 @@ fn start_postgres(
let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
None
} else {
Some(cli.filecache_connstr.clone())
file_cache_connstr.cloned()
};
let vm_monitor = rt.as_ref().map(|rt| {
rt.spawn(vm_monitor::start(
Box::leak(Box::new(vm_monitor::Args {
cgroup: Some(cli.cgroup.clone()),
cgroup: cgroup.cloned(),
pgconnstr,
addr: cli.vm_monitor_addr.clone(),
addr: vm_monitor_addr.clone(),
})),
token.clone(),
))
@@ -670,6 +702,105 @@ fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
exit(exit_code.unwrap_or(1))
}
fn cli() -> clap::Command {
// Env variable is set by `cargo`
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
clap::Command::new("compute_ctl")
.version(version)
.arg(
Arg::new("http-port")
.long("http-port")
.value_name("HTTP_PORT")
.default_value("3080")
.value_parser(clap::value_parser!(u16))
.required(false),
)
.arg(
Arg::new("connstr")
.short('C')
.long("connstr")
.value_name("DATABASE_URL")
.required(true),
)
.arg(
Arg::new("pgdata")
.short('D')
.long("pgdata")
.value_name("DATADIR")
.required(true),
)
.arg(
Arg::new("pgbin")
.short('b')
.long("pgbin")
.default_value("postgres")
.value_name("POSTGRES_PATH"),
)
.arg(
Arg::new("spec")
.short('s')
.long("spec")
.value_name("SPEC_JSON"),
)
.arg(
Arg::new("spec-path")
.short('S')
.long("spec-path")
.value_name("SPEC_PATH"),
)
.arg(
Arg::new("compute-id")
.short('i')
.long("compute-id")
.value_name("COMPUTE_ID"),
)
.arg(
Arg::new("control-plane-uri")
.short('p')
.long("control-plane-uri")
.value_name("CONTROL_PLANE_API_BASE_URI"),
)
.arg(
Arg::new("remote-ext-config")
.short('r')
.long("remote-ext-config")
.value_name("REMOTE_EXT_CONFIG"),
)
// TODO(fprasx): we currently have default arguments because the cloud PR
// to pass them in hasn't been merged yet. We should get rid of them once
// the PR is merged.
.arg(
Arg::new("vm-monitor-addr")
.long("vm-monitor-addr")
.default_value("0.0.0.0:10301")
.value_name("VM_MONITOR_ADDR"),
)
.arg(
Arg::new("cgroup")
.long("cgroup")
.default_value("neon-postgres")
.value_name("CGROUP"),
)
.arg(
Arg::new("filecache-connstr")
.long("filecache-connstr")
.default_value(
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
)
.value_name("FILECACHE_CONNSTR"),
)
.arg(
Arg::new("resize-swap-on-bind")
.long("resize-swap-on-bind")
.action(clap::ArgAction::SetTrue),
)
.arg(
Arg::new("set-disk-quota-for-fs")
.long("set-disk-quota-for-fs")
.value_name("SET_DISK_QUOTA_FOR_FS")
)
}
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
/// to prevent leakage. TODO: it is better to convert compute_ctl to async and
/// wait for termination which would be easy then.
@@ -679,14 +810,7 @@ fn handle_exit_signal(sig: i32) {
exit(1);
}
#[cfg(test)]
mod test {
use clap::CommandFactory;
use super::Cli;
#[test]
fn verify_cli() {
Cli::command().debug_assert()
}
#[test]
fn verify_cli() {
cli().debug_assert()
}

View File

@@ -31,7 +31,7 @@ use camino::{Utf8Path, Utf8PathBuf};
use clap::Parser;
use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
use nix::unistd::Pid;
use tracing::{error, info, info_span, warn, Instrument};
use tracing::{info, info_span, warn, Instrument};
use utils::fs_ext::is_directory_empty;
#[path = "fast_import/aws_s3_sync.rs"]
@@ -41,25 +41,16 @@ mod child_stdio_to_log;
#[path = "fast_import/s3_uri.rs"]
mod s3_uri;
const PG_WAIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(600);
const PG_WAIT_RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_millis(300);
#[derive(clap::Parser)]
struct Args {
#[clap(long)]
working_directory: Utf8PathBuf,
#[clap(long, env = "NEON_IMPORTER_S3_PREFIX")]
s3_prefix: Option<s3_uri::S3Uri>,
#[clap(long)]
source_connection_string: Option<String>,
#[clap(short, long)]
interactive: bool,
s3_prefix: s3_uri::S3Uri,
#[clap(long)]
pg_bin_dir: Utf8PathBuf,
#[clap(long)]
pg_lib_dir: Utf8PathBuf,
#[clap(long)]
pg_port: Option<u16>, // port to run postgres on, 5432 is default
}
#[serde_with::serde_as]
@@ -76,13 +67,6 @@ enum EncryptionSecret {
KMS { key_id: String },
}
// copied from pageserver_api::config::defaults::DEFAULT_LOCALE to avoid dependency just for a constant
const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
"C"
} else {
"C.UTF-8"
};
#[tokio::main]
pub(crate) async fn main() -> anyhow::Result<()> {
utils::logging::init(
@@ -93,74 +77,30 @@ pub(crate) async fn main() -> anyhow::Result<()> {
info!("starting");
let args = Args::parse();
let Args {
working_directory,
s3_prefix,
pg_bin_dir,
pg_lib_dir,
} = Args::parse();
// Validate arguments
if args.s3_prefix.is_none() && args.source_connection_string.is_none() {
anyhow::bail!("either s3_prefix or source_connection_string must be specified");
}
if args.s3_prefix.is_some() && args.source_connection_string.is_some() {
anyhow::bail!("only one of s3_prefix or source_connection_string can be specified");
}
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
let working_directory = args.working_directory;
let pg_bin_dir = args.pg_bin_dir;
let pg_lib_dir = args.pg_lib_dir;
let pg_port = args.pg_port.unwrap_or_else(|| {
info!("pg_port not specified, using default 5432");
5432
});
// Initialize AWS clients only if s3_prefix is specified
let (aws_config, kms_client) = if args.s3_prefix.is_some() {
let config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
let kms = aws_sdk_kms::Client::new(&config);
(Some(config), Some(kms))
} else {
(None, None)
};
// Get source connection string either from S3 spec or direct argument
let source_connection_string = if let Some(s3_prefix) = &args.s3_prefix {
let spec: Spec = {
let spec_key = s3_prefix.append("/spec.json");
let s3_client = aws_sdk_s3::Client::new(aws_config.as_ref().unwrap());
let object = s3_client
.get_object()
.bucket(&spec_key.bucket)
.key(spec_key.key)
.send()
.await
.context("get spec from s3")?
.body
.collect()
.await
.context("download spec body")?;
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
};
match spec.encryption_secret {
EncryptionSecret::KMS { key_id } => {
let mut output = kms_client
.unwrap()
.decrypt()
.key_id(key_id)
.ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
spec.source_connstring_ciphertext_base64,
))
.send()
.await
.context("decrypt source connection string")?;
let plaintext = output
.plaintext
.take()
.context("get plaintext source connection string")?;
String::from_utf8(plaintext.into_inner())
.context("parse source connection string as utf8")?
}
}
} else {
args.source_connection_string.unwrap()
let spec: Spec = {
let spec_key = s3_prefix.append("/spec.json");
let s3_client = aws_sdk_s3::Client::new(&aws_config);
let object = s3_client
.get_object()
.bucket(&spec_key.bucket)
.key(spec_key.key)
.send()
.await
.context("get spec from s3")?
.body
.collect()
.await
.context("download spec body")?;
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
};
match tokio::fs::create_dir(&working_directory).await {
@@ -183,6 +123,15 @@ pub(crate) async fn main() -> anyhow::Result<()> {
.await
.context("create pgdata directory")?;
//
// Setup clients
//
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
let kms_client = aws_sdk_kms::Client::new(&aws_config);
//
// Initialize pgdata
//
let pgbin = pg_bin_dir.join("postgres");
let pg_version = match get_pg_version(pgbin.as_ref()) {
PostgresMajorVersion::V14 => 14,
@@ -193,7 +142,7 @@ pub(crate) async fn main() -> anyhow::Result<()> {
let superuser = "cloud_admin"; // XXX: this shouldn't be hard-coded
postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
superuser,
locale: DEFAULT_LOCALE, // XXX: this shouldn't be hard-coded,
locale: "en_US.UTF-8", // XXX: this shouldn't be hard-coded,
pg_version,
initdb_bin: pg_bin_dir.join("initdb").as_ref(),
library_search_path: &pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
@@ -210,7 +159,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
let mut postgres_proc = tokio::process::Command::new(pgbin)
.arg("-D")
.arg(&pgdata_dir)
.args(["-p", &format!("{pg_port}")])
.args(["-c", "wal_level=minimal"])
.args(["-c", "shared_buffers=10GB"])
.args(["-c", "max_wal_senders=0"])
@@ -222,15 +170,8 @@ pub(crate) async fn main() -> anyhow::Result<()> {
.args(["-c", &format!("max_parallel_workers={nproc}")])
.args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
.args(["-c", &format!("max_worker_processes={nproc}")])
.args([
"-c",
&format!(
"effective_io_concurrency={}",
if cfg!(target_os = "macos") { 0 } else { 100 }
),
])
.args(["-c", "effective_io_concurrency=100"])
.env_clear()
.env("LD_LIBRARY_PATH", &pg_lib_dir)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
@@ -244,58 +185,44 @@ pub(crate) async fn main() -> anyhow::Result<()> {
)
.instrument(info_span!("postgres")),
);
// Create neondb database in the running postgres
let restore_pg_connstring =
format!("host=localhost port={pg_port} user={superuser} dbname=postgres");
let start_time = std::time::Instant::now();
format!("host=localhost port=5432 user={superuser} dbname=postgres");
loop {
if start_time.elapsed() > PG_WAIT_TIMEOUT {
error!(
"timeout exceeded: failed to poll postgres and create database within 10 minutes"
);
std::process::exit(1);
}
match tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await {
Ok((client, connection)) => {
// Spawn the connection handling task to maintain the connection
tokio::spawn(async move {
if let Err(e) = connection.await {
warn!("connection error: {}", e);
}
});
match client.simple_query("CREATE DATABASE neondb;").await {
Ok(_) => {
info!("created neondb database");
break;
}
Err(e) => {
warn!(
"failed to create database: {}, retying in {}s",
e,
PG_WAIT_RETRY_INTERVAL.as_secs_f32()
);
tokio::time::sleep(PG_WAIT_RETRY_INTERVAL).await;
continue;
}
}
}
Err(_) => {
info!(
"postgres not ready yet, retrying in {}s",
PG_WAIT_RETRY_INTERVAL.as_secs_f32()
);
tokio::time::sleep(PG_WAIT_RETRY_INTERVAL).await;
continue;
}
let res = tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await;
if res.is_ok() {
info!("postgres is ready, could connect to it");
break;
}
}
let restore_pg_connstring = restore_pg_connstring.replace("dbname=postgres", "dbname=neondb");
//
// Decrypt connection string
//
let source_connection_string = {
match spec.encryption_secret {
EncryptionSecret::KMS { key_id } => {
let mut output = kms_client
.decrypt()
.key_id(key_id)
.ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
spec.source_connstring_ciphertext_base64,
))
.send()
.await
.context("decrypt source connection string")?;
let plaintext = output
.plaintext
.take()
.context("get plaintext source connection string")?;
String::from_utf8(plaintext.into_inner())
.context("parse source connection string as utf8")?
}
}
};
//
// Start the work
//
let dumpdir = working_directory.join("dumpdir");
@@ -329,7 +256,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
.arg(&source_connection_string)
// how we run it
.env_clear()
.env("LD_LIBRARY_PATH", &pg_lib_dir)
.kill_on_drop(true)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
@@ -363,7 +289,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
.arg(&dumpdir)
// how we run it
.env_clear()
.env("LD_LIBRARY_PATH", &pg_lib_dir)
.kill_on_drop(true)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
@@ -385,12 +310,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
}
}
// If interactive mode, wait for Ctrl+C
if args.interactive {
info!("Running in interactive mode. Press Ctrl+C to shut down.");
tokio::signal::ctrl_c().await.context("wait for ctrl-c")?;
}
info!("shutdown postgres");
{
nix::sys::signal::kill(
@@ -406,24 +325,21 @@ pub(crate) async fn main() -> anyhow::Result<()> {
.context("wait for postgres to shut down")?;
}
// Only sync if s3_prefix was specified
if let Some(s3_prefix) = args.s3_prefix {
info!("upload pgdata");
aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
.await
.context("sync dump directory to destination")?;
info!("upload pgdata");
aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
.await
.context("sync dump directory to destination")?;
info!("write status");
{
let status_dir = working_directory.join("status");
std::fs::create_dir(&status_dir).context("create status directory")?;
let status_file = status_dir.join("pgdata");
std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
.context("write status file")?;
aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
.await
.context("sync status directory to destination")?;
}
info!("write status");
{
let status_dir = working_directory.join("status");
std::fs::create_dir(&status_dir).context("create status directory")?;
let status_file = status_dir.join("pgdata");
std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
.context("write status file")?;
aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
.await
.context("sync status directory to destination")?;
}
Ok(())

View File

@@ -41,14 +41,14 @@ use crate::local_proxy;
use crate::pg_helpers::*;
use crate::spec::*;
use crate::spec_apply::ApplySpecPhase::{
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon,
CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
RunInEachDatabase,
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSuperUser,
DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
};
use crate::spec_apply::PerDatabasePhase;
use crate::spec_apply::PerDatabasePhase::{
ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
ChangeSchemaPerms, DeleteDBRoleReferences, DropSubscriptionsForDeletedDatabases,
HandleAnonExtension,
};
use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
use crate::sync_sk::{check_if_synced, ping_safekeeper};
@@ -340,15 +340,6 @@ impl ComputeNode {
self.state.lock().unwrap().status
}
pub fn get_timeline_id(&self) -> Option<TimelineId> {
self.state
.lock()
.unwrap()
.pspec
.as_ref()
.map(|s| s.timeline_id)
}
// Remove `pgdata` directory and create it again with right permissions.
fn create_pgdata(&self) -> Result<()> {
// Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
@@ -938,48 +929,6 @@ impl ComputeNode {
.map(|role| (role.name.clone(), role))
.collect::<HashMap<String, Role>>();
// Check if we need to drop subscriptions before starting the endpoint.
//
// It is important to do this operation exactly once when endpoint starts on a new branch.
// Otherwise, we may drop not inherited, but newly created subscriptions.
//
// We cannot rely only on spec.drop_subscriptions_before_start flag,
// because if for some reason compute restarts inside VM,
// it will start again with the same spec and flag value.
//
// To handle this, we save the fact of the operation in the database
// in the neon.drop_subscriptions_done table.
// If the table does not exist, we assume that the operation was never performed, so we must do it.
// If table exists, we check if the operation was performed on the current timelilne.
//
let mut drop_subscriptions_done = false;
if spec.drop_subscriptions_before_start {
let timeline_id = self.get_timeline_id().context("timeline_id must be set")?;
let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id);
info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);
drop_subscriptions_done = match
client.simple_query(&query).await {
Ok(result) => {
matches!(&result[0], postgres::SimpleQueryMessage::Row(_))
},
Err(e) =>
{
match e.code() {
Some(&SqlState::UNDEFINED_TABLE) => false,
_ => {
// We don't expect any other error here, except for the schema/table not existing
error!("Error checking if drop subscription operation was already performed: {}", e);
return Err(e.into());
}
}
}
}
};
let jwks_roles = Arc::new(
spec.as_ref()
.local_proxy_config
@@ -1047,7 +996,7 @@ impl ComputeNode {
jwks_roles.clone(),
concurrency_token.clone(),
db,
[DropLogicalSubscriptions].to_vec(),
[DropSubscriptionsForDeletedDatabases].to_vec(),
);
Ok(spawn(fut))
@@ -1075,7 +1024,6 @@ impl ComputeNode {
CreateAndAlterRoles,
RenameAndDeleteDatabases,
CreateAndAlterDatabases,
CreateSchemaNeon,
] {
info!("Applying phase {:?}", &phase);
apply_operations(
@@ -1116,17 +1064,6 @@ impl ComputeNode {
}
let conf = Arc::new(conf);
let mut phases = vec![
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
];
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
phases.push(DropLogicalSubscriptions);
}
let fut = Self::apply_spec_sql_db(
spec.clone(),
conf,
@@ -1134,7 +1071,12 @@ impl ComputeNode {
jwks_roles.clone(),
concurrency_token.clone(),
db,
phases,
[
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
]
.to_vec(),
);
Ok(spawn(fut))
@@ -1146,20 +1088,12 @@ impl ComputeNode {
handle.await??;
}
let mut phases = vec![
for phase in vec![
HandleOtherExtensions,
HandleNeonExtension, // This step depends on CreateSchemaNeon
HandleNeonExtension,
CreateAvailabilityCheck,
DropRoles,
];
// This step depends on CreateSchemaNeon
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
phases.push(FinalizeDropLogicalSubscriptions);
}
for phase in phases {
] {
debug!("Applying phase {:?}", &phase);
apply_operations(
spec.clone(),
@@ -1529,14 +1463,6 @@ impl ComputeNode {
Ok(())
},
)?;
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
if config::line_in_file(
&postgresql_conf_path,
"neon.disable_logical_replication_subscribers=false",
)? {
info!("updated postgresql.conf to set neon.disable_logical_replication_subscribers=false");
}
self.pg_reload_conf()?;
}
self.post_apply_config()?;

View File

@@ -129,13 +129,6 @@ pub fn write_postgres_conf(
writeln!(file, "neon.extension_server_port={}", extension_server_port)?;
if spec.drop_subscriptions_before_start {
writeln!(file, "neon.disable_logical_replication_subscribers=true")?;
} else {
// be explicit about the default value
writeln!(file, "neon.disable_logical_replication_subscribers=false")?;
}
// This is essential to keep this line at the end of the file,
// because it is intended to override any settings above.
writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;

View File

@@ -85,8 +85,6 @@ use tracing::info;
use tracing::log::warn;
use zstd::stream::read::Decoder;
use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
fn get_pg_config(argument: &str, pgbin: &str) -> String {
// gives the result of `pg_config [argument]`
// where argument is a flag like `--version` or `--sharedir`
@@ -258,60 +256,23 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
let uri = format!("{}/{}", ext_remote_storage, ext_path);
info!("Download extension {} from uri {}", ext_path, uri);
info!("Download extension {:?} from uri {:?}", ext_path, uri);
match do_extension_server_request(&uri).await {
Ok(resp) => {
info!("Successfully downloaded remote extension data {}", ext_path);
REMOTE_EXT_REQUESTS_TOTAL
.with_label_values(&[&StatusCode::OK.to_string()])
.inc();
Ok(resp)
}
Err((msg, status)) => {
REMOTE_EXT_REQUESTS_TOTAL
.with_label_values(&[&status])
.inc();
bail!(msg);
}
}
}
let resp = reqwest::get(uri).await?;
// Do a single remote extensions server request.
// Return result or (error message + stringified status code) in case of any failures.
async fn do_extension_server_request(uri: &str) -> Result<Bytes, (String, String)> {
let resp = reqwest::get(uri).await.map_err(|e| {
(
format!(
"could not perform remote extensions server request: {:?}",
e
),
UNKNOWN_HTTP_STATUS.to_string(),
)
})?;
let status = resp.status();
match status {
match resp.status() {
StatusCode::OK => match resp.bytes().await {
Ok(resp) => Ok(resp),
Err(e) => Err((
format!("could not read remote extensions server response: {:?}", e),
// It's fine to return and report error with status as 200 OK,
// because we still failed to read the response.
status.to_string(),
)),
Ok(resp) => {
info!("Download extension {:?} completed successfully", ext_path);
Ok(resp)
}
Err(e) => bail!("could not deserialize remote extension response: {}", e),
},
StatusCode::SERVICE_UNAVAILABLE => Err((
"remote extensions server is temporarily unavailable".to_string(),
status.to_string(),
)),
_ => Err((
format!(
"unexpected remote extensions server response status code: {}",
status
),
status.to_string(),
)),
StatusCode::SERVICE_UNAVAILABLE => bail!("remote extension is temporarily unavailable"),
_ => bail!(
"unexpected remote extension response status code: {}",
resp.status()
),
}
}

View File

@@ -1,6 +1,9 @@
use std::ops::{Deref, DerefMut};
use axum::extract::{rejection::JsonRejection, FromRequest, Request};
use axum::{
async_trait,
extract::{rejection::JsonRejection, FromRequest, Request},
};
use compute_api::responses::GenericAPIError;
use http::StatusCode;
@@ -9,6 +12,7 @@ use http::StatusCode;
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct Json<T>(pub T);
#[async_trait]
impl<S, T> FromRequest<S> for Json<T>
where
axum::Json<T>: FromRequest<S, Rejection = JsonRejection>,

View File

@@ -1,6 +1,9 @@
use std::ops::{Deref, DerefMut};
use axum::extract::{rejection::PathRejection, FromRequestParts};
use axum::{
async_trait,
extract::{rejection::PathRejection, FromRequestParts},
};
use compute_api::responses::GenericAPIError;
use http::{request::Parts, StatusCode};
@@ -9,6 +12,7 @@ use http::{request::Parts, StatusCode};
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct Path<T>(pub T);
#[async_trait]
impl<S, T> FromRequestParts<S> for Path<T>
where
axum::extract::Path<T>: FromRequestParts<S, Rejection = PathRejection>,

View File

@@ -1,6 +1,9 @@
use std::ops::{Deref, DerefMut};
use axum::extract::{rejection::QueryRejection, FromRequestParts};
use axum::{
async_trait,
extract::{rejection::QueryRejection, FromRequestParts},
};
use compute_api::responses::GenericAPIError;
use http::{request::Parts, StatusCode};
@@ -9,6 +12,7 @@ use http::{request::Parts, StatusCode};
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct Query<T>(pub T);
#[async_trait]
impl<S, T> FromRequestParts<S> for Query<T>
where
axum::extract::Query<T>: FromRequestParts<S, Rejection = QueryRejection>,

View File

@@ -68,6 +68,35 @@ paths:
schema:
$ref: "#/components/schemas/ComputeInsights"
/installed_extensions:
get:
tags:
- Info
summary: Get installed extensions.
description: ""
operationId: getInstalledExtensions
responses:
200:
description: List of installed extensions
content:
application/json:
schema:
$ref: "#/components/schemas/InstalledExtensions"
/info:
get:
tags:
- Info
summary: Get info about the compute pod / VM.
description: ""
operationId: getInfo
responses:
200:
description: Info
content:
application/json:
schema:
$ref: "#/components/schemas/Info"
/dbs_and_roles:
get:
tags:

View File

@@ -17,8 +17,7 @@ use crate::{
#[derive(Debug, Clone, Deserialize)]
pub(in crate::http) struct ExtensionServerParams {
#[serde(default)]
is_library: bool,
is_library: Option<bool>,
}
/// Download a remote extension.
@@ -52,7 +51,7 @@ pub(in crate::http) async fn download_extension(
remote_extensions.get_ext(
&filename,
params.is_library,
params.is_library.unwrap_or(false),
&compute.build_tag,
&compute.pgversion,
)

View File

@@ -0,0 +1,11 @@
use axum::response::Response;
use compute_api::responses::InfoResponse;
use http::StatusCode;
use crate::http::JsonResponse;
/// Get information about the physical characteristics about the compute.
pub(in crate::http) async fn get_info() -> Response {
let num_cpus = num_cpus::get_physical();
JsonResponse::success(StatusCode::OK, &InfoResponse { num_cpus })
}

View File

@@ -0,0 +1,33 @@
use std::sync::Arc;
use axum::{extract::State, response::Response};
use compute_api::responses::ComputeStatus;
use http::StatusCode;
use tokio::task;
use crate::{compute::ComputeNode, http::JsonResponse, installed_extensions};
/// Get a list of installed extensions.
pub(in crate::http) async fn get_installed_extensions(
State(compute): State<Arc<ComputeNode>>,
) -> Response {
let status = compute.get_status();
if status != ComputeStatus::Running {
return JsonResponse::invalid_status(status);
}
let conf = compute.get_conn_conf(None);
let res = task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
.await
.unwrap();
match res {
Ok(installed_extensions) => {
JsonResponse::success(StatusCode::OK, Some(installed_extensions))
}
Err(e) => JsonResponse::error(
StatusCode::INTERNAL_SERVER_ERROR,
format!("failed to get list of installed extensions: {e}"),
),
}
}

View File

@@ -2,16 +2,17 @@ use axum::{body::Body, response::Response};
use http::header::CONTENT_TYPE;
use http::StatusCode;
use metrics::proto::MetricFamily;
use metrics::{Encoder, TextEncoder};
use metrics::Encoder;
use metrics::TextEncoder;
use crate::{http::JsonResponse, metrics::collect};
use crate::{http::JsonResponse, installed_extensions};
/// Expose Prometheus metrics.
pub(in crate::http) async fn get_metrics() -> Response {
// When we call TextEncoder::encode() below, it will immediately return an
// error if a metric family has no metrics, so we need to preemptively
// filter out metric families with no metrics.
let metrics = collect()
let metrics = installed_extensions::collect()
.into_iter()
.filter(|m| !m.get_metric().is_empty())
.collect::<Vec<MetricFamily>>();

View File

@@ -10,7 +10,9 @@ pub(in crate::http) mod extension_server;
pub(in crate::http) mod extensions;
pub(in crate::http) mod failpoints;
pub(in crate::http) mod grants;
pub(in crate::http) mod info;
pub(in crate::http) mod insights;
pub(in crate::http) mod installed_extensions;
pub(in crate::http) mod metrics;
pub(in crate::http) mod metrics_json;
pub(in crate::http) mod status;

View File

@@ -1,14 +1,15 @@
use std::{
net::{IpAddr, Ipv6Addr, SocketAddr},
sync::Arc,
sync::{
atomic::{AtomicU64, Ordering},
Arc,
},
thread,
time::Duration,
};
use anyhow::Result;
use axum::{
extract::Request,
middleware::{self, Next},
response::{IntoResponse, Response},
routing::{get, post},
Router,
@@ -16,13 +17,16 @@ use axum::{
use http::StatusCode;
use tokio::net::TcpListener;
use tower::ServiceBuilder;
use tower_http::{request_id::PropagateRequestIdLayer, trace::TraceLayer};
use tower_http::{
request_id::{MakeRequestId, PropagateRequestIdLayer, RequestId, SetRequestIdLayer},
trace::TraceLayer,
};
use tracing::{debug, error, info, Span};
use uuid::Uuid;
use super::routes::{
check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
grants, insights, metrics, metrics_json, status, terminate,
grants, info as info_route, insights, installed_extensions, metrics, metrics_json, status,
terminate,
};
use crate::compute::ComputeNode;
@@ -30,36 +34,47 @@ async fn handle_404() -> Response {
StatusCode::NOT_FOUND.into_response()
}
const X_REQUEST_ID: &str = "x-request-id";
#[derive(Clone, Default)]
struct ComputeMakeRequestId(Arc<AtomicU64>);
/// This middleware function allows compute_ctl to generate its own request ID
/// if one isn't supplied. The control plane will always send one as a UUID. The
/// neon Postgres extension on the other hand does not send one.
async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
let headers = request.headers_mut();
impl MakeRequestId for ComputeMakeRequestId {
fn make_request_id<B>(
&mut self,
_request: &http::Request<B>,
) -> Option<tower_http::request_id::RequestId> {
let request_id = self
.0
.fetch_add(1, Ordering::SeqCst)
.to_string()
.parse()
.unwrap();
if headers.get(X_REQUEST_ID).is_none() {
headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
Some(RequestId::new(request_id))
}
next.run(request).await
}
/// Run the HTTP server and wait on it forever.
#[tokio::main]
async fn serve(port: u16, compute: Arc<ComputeNode>) {
const X_REQUEST_ID: &str = "x-request-id";
let mut app = Router::new()
.route("/check_writability", post(check_writability::is_writable))
.route("/configure", post(configure::configure))
.route("/database_schema", get(database_schema::get_schema_dump))
.route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
.route(
"/extension_server/{*filename}",
"/extension_server/*filename",
post(extension_server::download_extension),
)
.route("/extensions", post(extensions::install_extension))
.route("/grants", post(grants::add_grant))
.route("/info", get(info_route::get_info))
.route("/insights", get(insights::get_insights))
.route(
"/installed_extensions",
get(installed_extensions::get_installed_extensions),
)
.route("/metrics", get(metrics::get_metrics))
.route("/metrics.json", get(metrics_json::get_metrics))
.route("/status", get(status::get_status))
@@ -67,8 +82,9 @@ async fn serve(port: u16, compute: Arc<ComputeNode>) {
.fallback(handle_404)
.layer(
ServiceBuilder::new()
// Add this middleware since we assume the request ID exists
.layer(middleware::from_fn(maybe_add_request_id_header))
.layer(SetRequestIdLayer::x_request_id(
ComputeMakeRequestId::default(),
))
.layer(
TraceLayer::new_for_http()
.on_request(|request: &http::Request<_>, _span: &Span| {

View File

@@ -1,10 +1,13 @@
use compute_api::responses::{InstalledExtension, InstalledExtensions};
use metrics::proto::MetricFamily;
use std::collections::HashMap;
use anyhow::Result;
use postgres::{Client, NoTls};
use crate::metrics::INSTALLED_EXTENSIONS;
use metrics::core::Collector;
use metrics::{register_uint_gauge_vec, UIntGaugeVec};
use once_cell::sync::Lazy;
/// We don't reuse get_existing_dbs() just for code clarity
/// and to make database listing query here more explicit.
@@ -99,3 +102,16 @@ pub fn get_installed_extensions(mut conf: postgres::config::Config) -> Result<In
extensions: extensions_map.into_values().collect(),
})
}
static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
register_uint_gauge_vec!(
"compute_installed_extensions",
"Number of databases where the version of extension is installed",
&["extension_name", "version", "owned_by_superuser"]
)
.expect("failed to define a metric")
});
pub fn collect() -> Vec<MetricFamily> {
INSTALLED_EXTENSIONS.collect()
}

View File

@@ -16,7 +16,6 @@ pub mod extension_server;
pub mod installed_extensions;
pub mod local_proxy;
pub mod lsn_lease;
pub mod metrics;
mod migration;
pub mod monitor;
pub mod params;

View File

@@ -1,70 +0,0 @@
use metrics::core::Collector;
use metrics::proto::MetricFamily;
use metrics::{register_int_counter_vec, register_uint_gauge_vec, IntCounterVec, UIntGaugeVec};
use once_cell::sync::Lazy;
pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
register_uint_gauge_vec!(
"compute_installed_extensions",
"Number of databases where the version of extension is installed",
&["extension_name", "version", "owned_by_superuser"]
)
.expect("failed to define a metric")
});
// Normally, any HTTP API request is described by METHOD (e.g. GET, POST, etc.) + PATH,
// but for all our APIs we defined a 'slug'/method/operationId in the OpenAPI spec.
// And it's fair to call it a 'RPC' (Remote Procedure Call).
pub enum CPlaneRequestRPC {
GetSpec,
}
impl CPlaneRequestRPC {
pub fn as_str(&self) -> &str {
match self {
CPlaneRequestRPC::GetSpec => "GetSpec",
}
}
}
pub const UNKNOWN_HTTP_STATUS: &str = "unknown";
pub(crate) static CPLANE_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"compute_ctl_cplane_requests_total",
"Total number of control plane requests made by compute_ctl by status",
&["rpc", "http_status"]
)
.expect("failed to define a metric")
});
/// Total number of failed database migrations. Per-compute, this is actually a boolean metric,
/// either empty or with a single value (1, migration_id) because we stop at the first failure.
/// Yet, the sum over the fleet will provide the total number of failures.
pub(crate) static DB_MIGRATION_FAILED: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"compute_ctl_db_migration_failed_total",
"Total number of failed database migrations",
&["migration_id"]
)
.expect("failed to define a metric")
});
pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"compute_ctl_remote_ext_requests_total",
"Total number of requests made by compute_ctl to download extensions from S3 proxy by status",
// Do not use any labels like extension name yet.
// We can add them later if needed.
&["http_status"]
)
.expect("failed to define a metric")
});
pub fn collect() -> Vec<MetricFamily> {
let mut metrics = INSTALLED_EXTENSIONS.collect();
metrics.extend(CPLANE_REQUESTS_TOTAL.collect());
metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
metrics.extend(DB_MIGRATION_FAILED.collect());
metrics
}

View File

@@ -1,9 +1,7 @@
use anyhow::{Context, Result};
use fail::fail_point;
use postgres::{Client, Transaction};
use tracing::{error, info};
use crate::metrics::DB_MIGRATION_FAILED;
use tracing::info;
/// Runs a series of migrations on a target database
pub(crate) struct MigrationRunner<'m> {
@@ -80,31 +78,24 @@ impl<'m> MigrationRunner<'m> {
Ok(())
}
/// Run an individual migration in a separate transaction block.
fn run_migration(client: &mut Client, migration_id: i64, migration: &str) -> Result<()> {
let mut txn = client
.transaction()
.with_context(|| format!("begin transaction for migration {migration_id}"))?;
/// Run an individual migration
fn run_migration(txn: &mut Transaction, migration_id: i64, migration: &str) -> Result<()> {
if migration.starts_with("-- SKIP") {
info!("Skipping migration id={}", migration_id);
// Even though we are skipping the migration, updating the
// migration ID should help keep logic easy to understand when
// trying to understand the state of a cluster.
Self::update_migration_id(&mut txn, migration_id)?;
Self::update_migration_id(txn, migration_id)?;
} else {
info!("Running migration id={}:\n{}\n", migration_id, migration);
txn.simple_query(migration)
.with_context(|| format!("apply migration {migration_id}"))?;
Self::update_migration_id(&mut txn, migration_id)?;
Self::update_migration_id(txn, migration_id)?;
}
txn.commit()
.with_context(|| format!("commit transaction for migration {migration_id}"))?;
Ok(())
}
@@ -118,20 +109,19 @@ impl<'m> MigrationRunner<'m> {
// The index lags the migration ID by 1, so the current migration
// ID is also the next index
let migration_id = (current_migration + 1) as i64;
let migration = self.migrations[current_migration];
match Self::run_migration(self.client, migration_id, migration) {
Ok(_) => {
info!("Finished migration id={}", migration_id);
}
Err(e) => {
error!("Failed to run migration id={}: {:?}", migration_id, e);
DB_MIGRATION_FAILED
.with_label_values(&[migration_id.to_string().as_str()])
.inc();
return Err(e);
}
}
let mut txn = self
.client
.transaction()
.with_context(|| format!("begin transaction for migration {migration_id}"))?;
Self::run_migration(&mut txn, migration_id, self.migrations[current_migration])
.with_context(|| format!("running migration {migration_id}"))?;
txn.commit()
.with_context(|| format!("commit transaction for migration {migration_id}"))?;
info!("Finished migration id={}", migration_id);
current_migration += 1;
}

View File

@@ -6,7 +6,6 @@ use std::path::Path;
use tracing::{error, info, instrument, warn};
use crate::config;
use crate::metrics::{CPlaneRequestRPC, CPLANE_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
use crate::migration::MigrationRunner;
use crate::params::PG_HBA_ALL_MD5;
use crate::pg_helpers::*;
@@ -20,7 +19,7 @@ use compute_api::spec::ComputeSpec;
fn do_control_plane_request(
uri: &str,
jwt: &str,
) -> Result<ControlPlaneSpecResponse, (bool, String, String)> {
) -> Result<ControlPlaneSpecResponse, (bool, String)> {
let resp = reqwest::blocking::Client::new()
.get(uri)
.header("Authorization", format!("Bearer {}", jwt))
@@ -28,42 +27,35 @@ fn do_control_plane_request(
.map_err(|e| {
(
true,
format!("could not perform spec request to control plane: {:?}", e),
UNKNOWN_HTTP_STATUS.to_string(),
format!("could not perform spec request to control plane: {}", e),
)
})?;
let status = resp.status();
match status {
match resp.status() {
StatusCode::OK => match resp.json::<ControlPlaneSpecResponse>() {
Ok(spec_resp) => Ok(spec_resp),
Err(e) => Err((
true,
format!("could not deserialize control plane response: {:?}", e),
status.to_string(),
format!("could not deserialize control plane response: {}", e),
)),
},
StatusCode::SERVICE_UNAVAILABLE => Err((
true,
"control plane is temporarily unavailable".to_string(),
status.to_string(),
)),
StatusCode::SERVICE_UNAVAILABLE => {
Err((true, "control plane is temporarily unavailable".to_string()))
}
StatusCode::BAD_GATEWAY => {
// We have a problem with intermittent 502 errors now
// https://github.com/neondatabase/cloud/issues/2353
// It's fine to retry GET request in this case.
Err((
true,
"control plane request failed with 502".to_string(),
status.to_string(),
))
Err((true, "control plane request failed with 502".to_string()))
}
// Another code, likely 500 or 404, means that compute is unknown to the control plane
// or some internal failure happened. Doesn't make much sense to retry in this case.
_ => Err((
false,
format!("unexpected control plane response status code: {}", status),
status.to_string(),
format!(
"unexpected control plane response status code: {}",
resp.status()
),
)),
}
}
@@ -91,28 +83,17 @@ pub fn get_spec_from_control_plane(
// - got spec -> return Ok(Some(spec))
while attempt < 4 {
spec = match do_control_plane_request(&cp_uri, &jwt) {
Ok(spec_resp) => {
CPLANE_REQUESTS_TOTAL
.with_label_values(&[
CPlaneRequestRPC::GetSpec.as_str(),
&StatusCode::OK.to_string(),
])
.inc();
match spec_resp.status {
ControlPlaneComputeStatus::Empty => Ok(None),
ControlPlaneComputeStatus::Attached => {
if let Some(spec) = spec_resp.spec {
Ok(Some(spec))
} else {
bail!("compute is attached, but spec is empty")
}
Ok(spec_resp) => match spec_resp.status {
ControlPlaneComputeStatus::Empty => Ok(None),
ControlPlaneComputeStatus::Attached => {
if let Some(spec) = spec_resp.spec {
Ok(Some(spec))
} else {
bail!("compute is attached, but spec is empty")
}
}
}
Err((retry, msg, status)) => {
CPLANE_REQUESTS_TOTAL
.with_label_values(&[CPlaneRequestRPC::GetSpec.as_str(), &status])
.inc();
},
Err((retry, msg)) => {
if retry {
Err(anyhow!(msg))
} else {

View File

@@ -47,7 +47,7 @@ pub enum PerDatabasePhase {
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
DropLogicalSubscriptions,
DropSubscriptionsForDeletedDatabases,
}
#[derive(Clone, Debug)]
@@ -58,13 +58,11 @@ pub enum ApplySpecPhase {
CreateAndAlterRoles,
RenameAndDeleteDatabases,
CreateAndAlterDatabases,
CreateSchemaNeon,
RunInEachDatabase { db: DB, subphase: PerDatabasePhase },
HandleOtherExtensions,
HandleNeonExtension,
CreateAvailabilityCheck,
DropRoles,
FinalizeDropLogicalSubscriptions,
}
pub struct Operation {
@@ -333,7 +331,7 @@ async fn get_operations<'a>(
// NB: there could be other db states, which prevent us from dropping
// the database. For example, if db is used by any active subscription
// or replication slot.
// Such cases are handled in the DropLogicalSubscriptions
// Such cases are handled in the DropSubscriptionsForDeletedDatabases
// phase. We do all the cleanup before actually dropping the database.
let drop_db_query: String = format!(
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
@@ -444,19 +442,13 @@ async fn get_operations<'a>(
Ok(Box::new(operations))
}
ApplySpecPhase::CreateSchemaNeon => Ok(Box::new(once(Operation {
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
comment: Some(String::from(
"create schema for neon extension and utils tables",
)),
}))),
ApplySpecPhase::RunInEachDatabase { db, subphase } => {
match subphase {
PerDatabasePhase::DropLogicalSubscriptions => {
PerDatabasePhase::DropSubscriptionsForDeletedDatabases => {
match &db {
DB::UserDB(db) => {
let drop_subscription_query: String = format!(
include_str!("sql/drop_subscriptions.sql"),
include_str!("sql/drop_subscription_for_drop_dbs.sql"),
datname_str = escape_literal(&db.name),
);
@@ -674,6 +666,10 @@ async fn get_operations<'a>(
}
ApplySpecPhase::HandleNeonExtension => {
let operations = vec![
Operation {
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
comment: Some(String::from("init: add schema for extension")),
},
Operation {
query: String::from("CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"),
comment: Some(String::from(
@@ -716,9 +712,5 @@ async fn get_operations<'a>(
Ok(Box::new(operations))
}
ApplySpecPhase::FinalizeDropLogicalSubscriptions => Ok(Box::new(once(Operation {
query: String::from(include_str!("sql/finalize_drop_subscriptions.sql")),
comment: None,
}))),
}
}

View File

@@ -1,21 +0,0 @@
DO $$
BEGIN
IF NOT EXISTS(
SELECT 1
FROM pg_catalog.pg_tables
WHERE tablename = 'drop_subscriptions_done'
AND schemaname = 'neon'
)
THEN
CREATE TABLE neon.drop_subscriptions_done
(id serial primary key, timeline_id text);
END IF;
-- preserve the timeline_id of the last drop_subscriptions run
-- to ensure that the cleanup of a timeline is executed only once.
-- use upsert to avoid the table bloat in case of cascade branching (branch of a branch)
INSERT INTO neon.drop_subscriptions_done VALUES (1, current_setting('neon.timeline_id'))
ON CONFLICT (id) DO UPDATE
SET timeline_id = current_setting('neon.timeline_id');
END
$$

View File

@@ -1,10 +1,6 @@
# Local Development Control Plane (`neon_local`)
# Control Plane and Neon Local
This crate contains tools to start a Neon development environment locally. This utility can be used with the `cargo neon` command. This is a convenience to invoke
the `neon_local` binary.
**Note**: this is a dev/test tool -- a minimal control plane suitable for testing
code changes locally, but not suitable for running production systems.
This crate contains tools to start a Neon development environment locally. This utility can be used with the `cargo neon` command.
## Example: Start with Postgres 16

View File

@@ -1357,7 +1357,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
args.pg_version,
mode,
!args.update_catalog,
false,
)?;
}
EndpointCmd::Start(args) => {

View File

@@ -76,7 +76,6 @@ pub struct EndpointConf {
http_port: u16,
pg_version: u32,
skip_pg_catalog_updates: bool,
drop_subscriptions_before_start: bool,
features: Vec<ComputeFeature>,
}
@@ -144,7 +143,6 @@ impl ComputeControlPlane {
pg_version: u32,
mode: ComputeMode,
skip_pg_catalog_updates: bool,
drop_subscriptions_before_start: bool,
) -> Result<Arc<Endpoint>> {
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
@@ -164,7 +162,6 @@ impl ComputeControlPlane {
// with this we basically test a case of waking up an idle compute, where
// we also skip catalog updates in the cloud.
skip_pg_catalog_updates,
drop_subscriptions_before_start,
features: vec![],
});
@@ -180,7 +177,6 @@ impl ComputeControlPlane {
pg_port,
pg_version,
skip_pg_catalog_updates,
drop_subscriptions_before_start,
features: vec![],
})?,
)?;
@@ -244,7 +240,6 @@ pub struct Endpoint {
// Optimizations
skip_pg_catalog_updates: bool,
drop_subscriptions_before_start: bool,
// Feature flags
features: Vec<ComputeFeature>,
}
@@ -296,7 +291,6 @@ impl Endpoint {
tenant_id: conf.tenant_id,
pg_version: conf.pg_version,
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
features: conf.features,
})
}
@@ -631,7 +625,6 @@ impl Endpoint {
shard_stripe_size: Some(shard_stripe_size),
local_proxy_config: None,
reconfigure_concurrency: 1,
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
};
let spec_path = self.endpoint_path().join("spec.json");
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;

View File

@@ -347,31 +347,11 @@ impl PageServerNode {
.map(|x| x.parse::<usize>())
.transpose()
.context("Failed to parse 'compaction_threshold' as an integer")?,
compaction_upper_limit: settings
.remove("compaction_upper_limit")
.map(|x| x.parse::<usize>())
.transpose()
.context("Failed to parse 'compaction_upper_limit' as an integer")?,
compaction_algorithm: settings
.remove("compaction_algorithm")
.map(serde_json::from_str)
.transpose()
.context("Failed to parse 'compaction_algorithm' json")?,
l0_flush_delay_threshold: settings
.remove("l0_flush_delay_threshold")
.map(|x| x.parse::<usize>())
.transpose()
.context("Failed to parse 'l0_flush_delay_threshold' as an integer")?,
l0_flush_wait_upload: settings
.remove("l0_flush_wait_upload")
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'l0_flush_wait_upload' as a boolean")?,
l0_flush_stall_threshold: settings
.remove("l0_flush_stall_threshold")
.map(|x| x.parse::<usize>())
.transpose()
.context("Failed to parse 'l0_flush_stall_threshold' as an integer")?,
gc_horizon: settings
.remove("gc_horizon")
.map(|x| x.parse::<u64>())
@@ -388,11 +368,6 @@ impl PageServerNode {
.map(|x| x.parse::<u8>())
.transpose()
.context("Failed to parse 'image_creation_check_threshold' as integer")?,
image_creation_preempt_threshold: settings
.remove("image_creation_preempt_threshold")
.map(|x| x.parse::<usize>())
.transpose()
.context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
walreceiver_connect_timeout: settings
.remove("walreceiver_connect_timeout")
@@ -443,26 +418,6 @@ impl PageServerNode {
.map(serde_json::from_str)
.transpose()
.context("parse `wal_receiver_protocol_override` from json")?,
rel_size_v2_enabled: settings
.remove("rel_size_v2_enabled")
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'rel_size_v2_enabled' as bool")?,
gc_compaction_enabled: settings
.remove("gc_compaction_enabled")
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'gc_compaction_enabled' as bool")?,
gc_compaction_initial_threshold_kb: settings
.remove("gc_compaction_initial_threshold_kb")
.map(|x| x.parse::<u64>())
.transpose()
.context("Failed to parse 'gc_compaction_initial_threshold_kb' as integer")?,
gc_compaction_ratio_percent: settings
.remove("gc_compaction_ratio_percent")
.map(|x| x.parse::<u64>())
.transpose()
.context("Failed to parse 'gc_compaction_ratio_percent' as integer")?,
};
if !settings.is_empty() {
bail!("Unrecognized tenant settings: {settings:?}")

View File

@@ -822,7 +822,10 @@ impl StorageController {
self.dispatch(
Method::PUT,
format!("control/v1/tenant/{tenant_shard_id}/migrate"),
Some(TenantShardMigrateRequest { node_id }),
Some(TenantShardMigrateRequest {
tenant_shard_id,
node_id,
}),
)
.await
}

View File

@@ -1,17 +1,12 @@
use futures::StreamExt;
use std::{
collections::{HashMap, HashSet},
str::FromStr,
time::Duration,
};
use std::{str::FromStr, time::Duration};
use clap::{Parser, Subcommand};
use pageserver_api::{
controller_api::{
AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy,
TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
SafekeeperDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest,
TenantDescribeResponse, TenantPolicyRequest,
},
models::{
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
@@ -117,13 +112,6 @@ enum Command {
#[arg(long)]
node: NodeId,
},
/// Migrate the secondary location for a tenant shard to a specific pageserver.
TenantShardMigrateSecondary {
#[arg(long)]
tenant_shard_id: TenantShardId,
#[arg(long)]
node: NodeId,
},
/// Cancel any ongoing reconciliation for this shard
TenantShardCancelReconcile {
#[arg(long)]
@@ -158,12 +146,6 @@ enum Command {
#[arg(long)]
tenant_id: TenantId,
},
TenantSetPreferredAz {
#[arg(long)]
tenant_id: TenantId,
#[arg(long)]
preferred_az: Option<String>,
},
/// Uncleanly drop a tenant from the storage controller: this doesn't delete anything from pageservers. Appropriate
/// if you e.g. used `tenant-warmup` by mistake on a tenant ID that doesn't really exist, or is in some other region.
TenantDrop {
@@ -232,13 +214,6 @@ enum Command {
},
/// List safekeepers known to the storage controller
Safekeepers {},
/// Set the scheduling policy of the specified safekeeper
SafekeeperScheduling {
#[arg(long)]
node_id: NodeId,
#[arg(long)]
scheduling_policy: SkSchedulingPolicyArg,
},
}
#[derive(Parser)]
@@ -291,17 +266,6 @@ impl FromStr for PlacementPolicyArg {
}
}
#[derive(Debug, Clone)]
struct SkSchedulingPolicyArg(SkSchedulingPolicy);
impl FromStr for SkSchedulingPolicyArg {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
SkSchedulingPolicy::from_str(s).map(Self)
}
}
#[derive(Debug, Clone)]
struct ShardSchedulingPolicyArg(ShardSchedulingPolicy);
@@ -431,12 +395,11 @@ async fn main() -> anyhow::Result<()> {
resp.sort_by(|a, b| a.listen_http_addr.cmp(&b.listen_http_addr));
let mut table = comfy_table::Table::new();
table.set_header(["Id", "Hostname", "AZ", "Scheduling", "Availability"]);
table.set_header(["Id", "Hostname", "Scheduling", "Availability"]);
for node in resp {
table.add_row([
format!("{}", node.id),
node.listen_http_addr,
node.availability_zone_id,
format!("{:?}", node.scheduling),
format!("{:?}", node.availability),
]);
@@ -496,65 +459,33 @@ async fn main() -> anyhow::Result<()> {
println!("{table}");
}
Command::Tenants { node_id: None } => {
// Set up output formatting
let mut resp = storcon_client
.dispatch::<(), Vec<TenantDescribeResponse>>(
Method::GET,
"control/v1/tenant".to_string(),
None,
)
.await?;
resp.sort_by(|a, b| a.tenant_id.cmp(&b.tenant_id));
let mut table = comfy_table::Table::new();
table.set_header([
"TenantId",
"Preferred AZ",
"ShardCount",
"StripeSize",
"Placement",
"Scheduling",
]);
// Pagination loop over listing API
let mut start_after = None;
const LIMIT: usize = 1000;
loop {
let path = match start_after {
None => format!("control/v1/tenant?limit={LIMIT}"),
Some(start_after) => {
format!("control/v1/tenant?limit={LIMIT}&start_after={start_after}")
}
};
let resp = storcon_client
.dispatch::<(), Vec<TenantDescribeResponse>>(Method::GET, path, None)
.await?;
if resp.is_empty() {
// End of data reached
break;
}
// Give some visual feedback while we're building up the table (comfy_table doesn't have
// streaming output)
if resp.len() >= LIMIT {
eprint!(".");
}
start_after = Some(resp.last().unwrap().tenant_id);
for tenant in resp {
let shard_zero = tenant.shards.into_iter().next().unwrap();
table.add_row([
format!("{}", tenant.tenant_id),
shard_zero
.preferred_az_id
.as_ref()
.cloned()
.unwrap_or("".to_string()),
format!("{}", shard_zero.tenant_shard_id.shard_count.literal()),
format!("{:?}", tenant.stripe_size),
format!("{:?}", tenant.policy),
format!("{:?}", shard_zero.scheduling_policy),
]);
}
}
// Terminate progress dots
if table.row_count() > LIMIT {
eprint!("");
for tenant in resp {
let shard_zero = tenant.shards.into_iter().next().unwrap();
table.add_row([
format!("{}", tenant.tenant_id),
format!("{}", shard_zero.tenant_shard_id.shard_count.literal()),
format!("{:?}", tenant.stripe_size),
format!("{:?}", tenant.policy),
format!("{:?}", shard_zero.scheduling_policy),
]);
}
println!("{table}");
@@ -609,7 +540,10 @@ async fn main() -> anyhow::Result<()> {
tenant_shard_id,
node,
} => {
let req = TenantShardMigrateRequest { node_id: node };
let req = TenantShardMigrateRequest {
tenant_shard_id,
node_id: node,
};
storcon_client
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
@@ -619,20 +553,6 @@ async fn main() -> anyhow::Result<()> {
)
.await?;
}
Command::TenantShardMigrateSecondary {
tenant_shard_id,
node,
} => {
let req = TenantShardMigrateRequest { node_id: node };
storcon_client
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
Method::PUT,
format!("control/v1/tenant/{tenant_shard_id}/migrate_secondary"),
Some(req),
)
.await?;
}
Command::TenantShardCancelReconcile { tenant_shard_id } => {
storcon_client
.dispatch::<(), ()>(
@@ -676,19 +596,6 @@ async fn main() -> anyhow::Result<()> {
None,
)
.await?;
let nodes = storcon_client
.dispatch::<(), Vec<NodeDescribeResponse>>(
Method::GET,
"control/v1/node".to_string(),
None,
)
.await?;
let nodes = nodes
.into_iter()
.map(|n| (n.id, n))
.collect::<HashMap<_, _>>();
println!("Tenant {tenant_id}");
let mut table = comfy_table::Table::new();
table.add_row(["Policy", &format!("{:?}", policy)]);
@@ -697,14 +604,7 @@ async fn main() -> anyhow::Result<()> {
println!("{table}");
println!("Shards:");
let mut table = comfy_table::Table::new();
table.set_header([
"Shard",
"Attached",
"Attached AZ",
"Secondary",
"Last error",
"status",
]);
table.set_header(["Shard", "Attached", "Secondary", "Last error", "status"]);
for shard in shards {
let secondary = shard
.node_secondary
@@ -727,18 +627,11 @@ async fn main() -> anyhow::Result<()> {
}
let status = status_parts.join(",");
let attached_node = shard
.node_attached
.as_ref()
.map(|id| nodes.get(id).expect("Shard references nonexistent node"));
table.add_row([
format!("{}", shard.tenant_shard_id),
attached_node
.map(|n| format!("{} ({})", n.listen_http_addr, n.id))
.unwrap_or(String::new()),
attached_node
.map(|n| n.availability_zone_id.clone())
shard
.node_attached
.map(|n| format!("{}", n))
.unwrap_or(String::new()),
secondary,
shard.last_error,
@@ -747,66 +640,6 @@ async fn main() -> anyhow::Result<()> {
}
println!("{table}");
}
Command::TenantSetPreferredAz {
tenant_id,
preferred_az,
} => {
// First learn about the tenant's shards
let describe_response = storcon_client
.dispatch::<(), TenantDescribeResponse>(
Method::GET,
format!("control/v1/tenant/{tenant_id}"),
None,
)
.await?;
// Learn about nodes to validate the AZ ID
let nodes = storcon_client
.dispatch::<(), Vec<NodeDescribeResponse>>(
Method::GET,
"control/v1/node".to_string(),
None,
)
.await?;
if let Some(preferred_az) = &preferred_az {
let azs = nodes
.into_iter()
.map(|n| (n.availability_zone_id))
.collect::<HashSet<_>>();
if !azs.contains(preferred_az) {
anyhow::bail!(
"AZ {} not found on any node: known AZs are: {:?}",
preferred_az,
azs
);
}
} else {
// Make it obvious to the user that since they've omitted an AZ, we're clearing it
eprintln!("Clearing preferred AZ for tenant {}", tenant_id);
}
// Construct a request that modifies all the tenant's shards
let req = ShardsPreferredAzsRequest {
preferred_az_ids: describe_response
.shards
.into_iter()
.map(|s| {
(
s.tenant_shard_id,
preferred_az.clone().map(AvailabilityZone),
)
})
.collect(),
};
storcon_client
.dispatch::<ShardsPreferredAzsRequest, ShardsPreferredAzsResponse>(
Method::PUT,
"control/v1/preferred_azs".to_string(),
Some(req),
)
.await?;
}
Command::TenantWarmup { tenant_id } => {
let describe_response = storcon_client
.dispatch::<(), TenantDescribeResponse>(
@@ -1082,7 +915,10 @@ async fn main() -> anyhow::Result<()> {
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
Method::PUT,
format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id),
Some(TenantShardMigrateRequest { node_id: mv.to }),
Some(TenantShardMigrateRequest {
tenant_shard_id: mv.tenant_shard_id,
node_id: mv.to,
}),
)
.await
.map_err(|e| (mv.tenant_shard_id, mv.from, mv.to, e))
@@ -1221,23 +1057,6 @@ async fn main() -> anyhow::Result<()> {
}
println!("{table}");
}
Command::SafekeeperScheduling {
node_id,
scheduling_policy,
} => {
let scheduling_policy = scheduling_policy.0;
storcon_client
.dispatch::<SafekeeperSchedulingPolicyRequest, ()>(
Method::POST,
format!("control/v1/safekeeper/{node_id}/scheduling_policy"),
Some(SafekeeperSchedulingPolicyRequest { scheduling_policy }),
)
.await?;
println!(
"Scheduling policy of {node_id} set to {}",
String::from(scheduling_policy)
);
}
}
Ok(())

View File

@@ -32,7 +32,6 @@ reason = "the marvin attack only affects private key decryption, not public key
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
[licenses]
allow = [
"0BSD",
"Apache-2.0",
"BSD-2-Clause",
"BSD-3-Clause",
@@ -42,8 +41,8 @@ allow = [
"MIT",
"MPL-2.0",
"OpenSSL",
"Unicode-DFS-2016",
"Unicode-3.0",
"Zlib",
]
confidence-threshold = 0.8
exceptions = [

View File

@@ -7,12 +7,14 @@ FROM $REPOSITORY/${COMPUTE_IMAGE}:$TAG
ARG COMPUTE_IMAGE
USER root
RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
apt-get update && \
RUN apt-get update && \
apt-get install -y curl \
jq \
python3-pip \
netcat-openbsd
#Faker is required for the pg_anon test
RUN case $COMPUTE_IMAGE in compute-node-v17) OPT="--break-system-packages";; *) OPT= ;; esac && pip3 install $OPT Faker
#This is required for the pg_hintplan test
RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw
RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src
USER postgres
USER postgres

View File

@@ -20,55 +20,30 @@ while ! nc -z pageserver 6400; do
done
echo "Page server is ready."
cp ${SPEC_FILE_ORG} ${SPEC_FILE}
echo "Create a tenant and timeline"
generate_id tenant_id
PARAMS=(
-X PUT
-H "Content-Type: application/json"
-d "{\"mode\": \"AttachedSingle\", \"generation\": 1, \"tenant_conf\": {}}"
"http://pageserver:9898/v1/tenant/${tenant_id}/location_config"
)
result=$(curl "${PARAMS[@]}")
echo $result | jq .
if [ -n "${TENANT_ID:-}" ] && [ -n "${TIMELINE_ID:-}" ]; then
tenant_id=${TENANT_ID}
timeline_id=${TIMELINE_ID}
else
echo "Check if a tenant present"
PARAMS=(
-X GET
-H "Content-Type: application/json"
"http://pageserver:9898/v1/tenant"
)
tenant_id=$(curl "${PARAMS[@]}" | jq -r .[0].id)
if [ -z "${tenant_id}" ] || [ "${tenant_id}" = null ]; then
echo "Create a tenant"
generate_id tenant_id
PARAMS=(
-X PUT
-H "Content-Type: application/json"
-d "{\"mode\": \"AttachedSingle\", \"generation\": 1, \"tenant_conf\": {}}"
"http://pageserver:9898/v1/tenant/${tenant_id}/location_config"
)
result=$(curl "${PARAMS[@]}")
echo $result | jq .
fi
echo "Check if a timeline present"
PARAMS=(
-X GET
-H "Content-Type: application/json"
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
)
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
if [ -z "${timeline_id}" ] || [ "${timeline_id}" = null ]; then
generate_id timeline_id
PARAMS=(
-sbf
-X POST
-H "Content-Type: application/json"
-d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
)
result=$(curl "${PARAMS[@]}")
echo $result | jq .
fi
fi
generate_id timeline_id
PARAMS=(
-sbf
-X POST
-H "Content-Type: application/json"
-d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
)
result=$(curl "${PARAMS[@]}")
echo $result | jq .
echo "Overwrite tenant id and timeline id in spec file"
sed -i "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE}
sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}
cat ${SPEC_FILE}

View File

@@ -149,13 +149,11 @@ services:
args:
- REPOSITORY=${REPOSITORY:-neondatabase}
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-16}
- TAG=${COMPUTE_TAG:-${TAG:-latest}}
- http_proxy=${http_proxy:-}
- https_proxy=${https_proxy:-}
- TAG=${TAG:-latest}
- http_proxy=$http_proxy
- https_proxy=$https_proxy
environment:
- PG_VERSION=${PG_VERSION:-16}
- TENANT_ID=${TENANT_ID:-}
- TIMELINE_ID=${TIMELINE_ID:-}
#- RUST_BACKTRACE=1
# Mount the test files directly, for faster editing cycle.
volumes:
@@ -187,8 +185,6 @@ services:
neon-test-extensions:
profiles: ["test-extensions"]
image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest}
environment:
- PGPASSWORD=cloud_admin
entrypoint:
- "/bin/bash"
- "-c"

View File

@@ -18,10 +18,14 @@ cd $(dirname $0)
COMPUTE_CONTAINER_NAME=docker-compose-compute-1
TEST_CONTAINER_NAME=docker-compose-neon-test-extensions-1
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
: ${http_proxy:=}
: ${https_proxy:=}
export http_proxy https_proxy
cleanup() {
echo "show container information"
docker ps
docker compose --profile test-extensions -f $COMPOSE_FILE logs
echo "stop containers..."
docker compose --profile test-extensions -f $COMPOSE_FILE down
}
@@ -31,7 +35,13 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
echo "clean up containers if exists"
cleanup
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --quiet-pull --build -d
# The support of pg_anon not yet added to PG17, so we have to add the corresponding option for other PG versions
if [ "${pg_version}" -ne 17 ]; then
SPEC_PATH="compute_wrapper/var/db/postgres/specs"
mv $SPEC_PATH/spec.json $SPEC_PATH/spec.bak
jq '.cluster.settings += [{"name": "session_preload_libraries","value": "anon","vartype": "string"}]' "${SPEC_PATH}/spec.bak" > "${SPEC_PATH}/spec.json"
fi
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d
echo "wait until the compute is ready. timeout after 60s. "
cnt=0
@@ -40,6 +50,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
cnt=`expr $cnt + 3`
if [ $cnt -gt 60 ]; then
echo "timeout before the compute is ready."
cleanup
exit 1
fi
if docker compose --profile test-extensions -f $COMPOSE_FILE logs "compute_is_ready" | grep -q "accepting connections"; then
@@ -51,46 +62,52 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
done
if [ $pg_version -ge 16 ]; then
docker cp ext-src $TEST_CONTAINER_NAME:/
docker exec $TEST_CONTAINER_NAME bash -c "apt update && apt install -y libtap-parser-sourcehandler-pgtap-perl"
echo Enabling trust connection
docker exec $COMPUTE_CONTAINER_NAME bash -c "sed -i '\$d' /var/db/postgres/compute/pg_hba.conf && echo -e 'host\t all\t all\t all\t trust' >> /var/db/postgres/compute/pg_hba.conf && psql $PSQL_OPTION -c 'select pg_reload_conf()' "
echo Adding postgres role
docker exec $COMPUTE_CONTAINER_NAME psql $PSQL_OPTION -c "CREATE ROLE postgres SUPERUSER LOGIN"
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
echo Adding dummy config
docker exec $COMPUTE_CONTAINER_NAME touch /var/db/postgres/compute/compute_ctl_temp_override.conf
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
# This block is required for the pg_anon extension test.
# The test assumes that it is running on the same host with the postgres engine.
# In our case it's not true, that's why we are copying files to the compute node
TMPDIR=$(mktemp -d)
# Add support for pg_anon for pg_v16
if [ $pg_version -ne 17 ]; then
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_anon-src/data $TMPDIR/data
echo -e '1\t too \t many \t tabs' > $TMPDIR/data/bad.csv
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/tmp/tmp_anon_alternate_data
rm -rf $TMPDIR
fi
TMPDIR=$(mktemp -d)
# The following block does the same for the pg_hintplan test
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
rm -rf $TMPDIR
# The following block does the same for the contrib/file_fdw test
TMPDIR=$(mktemp -d)
docker cp $TEST_CONTAINER_NAME:/postgres/contrib/file_fdw/data $TMPDIR/data
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/postgres/contrib/file_fdw/data
rm -rf $TMPDIR
# Apply patches
cat ../compute/patches/contrib_pg${pg_version}.patch | docker exec -i $TEST_CONTAINER_NAME bash -c "(cd /postgres && patch -p1)"
# We are running tests now
rm -f testout.txt testout_contrib.txt
docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
$TEST_CONTAINER_NAME /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
docker exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
$TEST_CONTAINER_NAME /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
if [ $EXT_SUCCESS -eq 0 ] || [ $CONTRIB_SUCCESS -eq 0 ]; then
CONTRIB_FAILED=
FAILED=
[ $EXT_SUCCESS -eq 0 ] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
[ $CONTRIB_SUCCESS -eq 0 ] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
for d in $FAILED $CONTRIB_FAILED; do
dn="$(basename $d)"
rm -rf $dn
mkdir $dn
docker cp $TEST_CONTAINER_NAME:$d/regression.diffs $dn || [ $? -eq 1 ]
docker cp $TEST_CONTAINER_NAME:$d/regression.out $dn || [ $? -eq 1 ]
cat $dn/regression.out $dn/regression.diffs || true
rm -rf $dn
if docker exec -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
$TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
then
cleanup
else
FAILED=$(tail -1 testout.txt)
for d in $FAILED
do
mkdir $d
docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.diffs $d || true
docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.out $d || true
cat $d/regression.out $d/regression.diffs || true
done
rm -rf $FAILED
cleanup
exit 1
fi
fi
cleanup
# Restore the original spec.json
if [ "$pg_version" -ne 17 ]; then
mv "$SPEC_PATH/spec.bak" "$SPEC_PATH/spec.json"
fi
done

View File

@@ -1,5 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression add_agg agg_oob auto_sparse card_op cast_shape copy_binary cumulative_add_cardinality_correction cumulative_add_comprehensive_promotion cumulative_add_sparse_edge cumulative_add_sparse_random cumulative_add_sparse_step cumulative_union_comprehensive cumulative_union_explicit_explicit cumulative_union_explicit_promotion cumulative_union_probabilistic_probabilistic cumulative_union_sparse_full_representation cumulative_union_sparse_promotion cumulative_union_sparse_sparse disable_hashagg equal explicit_thresh hash hash_any meta_func murmur_bigint murmur_bytea nosparse notequal scalar_oob storedproc transaction typmod typmod_insert union_op

View File

@@ -1,27 +0,0 @@
diff --git a/expected/hypopg.out b/expected/hypopg.out
index 90121d0..859260b 100644
--- a/expected/hypopg.out
+++ b/expected/hypopg.out
@@ -11,7 +11,8 @@ BEGIN
END;
$_$
LANGUAGE plpgsql;
-CREATE EXTENSION hypopg;
+CREATE EXTENSION IF NOT EXISTS hypopg;
+NOTICE: extension "hypopg" already exists, skipping
CREATE TABLE hypo (id integer, val text, "Id2" bigint);
INSERT INTO hypo SELECT i, 'line ' || i
FROM generate_series(1,100000) f(i);
diff --git a/test/sql/hypopg.sql b/test/sql/hypopg.sql
index 99722b0..8d6bacb 100644
--- a/test/sql/hypopg.sql
+++ b/test/sql/hypopg.sql
@@ -12,7 +12,7 @@ END;
$_$
LANGUAGE plpgsql;
-CREATE EXTENSION hypopg;
+CREATE EXTENSION IF NOT EXISTS hypopg;
CREATE TABLE hypo (id integer, val text, "Id2" bigint);

View File

@@ -1,6 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
patch -p1 <test-upgrade.patch
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --inputdir=test --dbname=contrib_regression hypopg hypo_brin hypo_index_part hypo_include hypo_hash hypo_hide_index

View File

@@ -1,23 +0,0 @@
diff --git a/expected/ip4r.out b/expected/ip4r.out
index 7527af3..b38ed29 100644
--- a/expected/ip4r.out
+++ b/expected/ip4r.out
@@ -1,6 +1,5 @@
--
/*CUT-HERE*/
-CREATE EXTENSION ip4r;
-- Check whether any of our opclasses fail amvalidate
DO $d$
DECLARE
diff --git a/sql/ip4r.sql b/sql/ip4r.sql
index 65c49ec..24ade09 100644
--- a/sql/ip4r.sql
+++ b/sql/ip4r.sql
@@ -1,7 +1,6 @@
--
/*CUT-HERE*/
-CREATE EXTENSION ip4r;
-- Check whether any of our opclasses fail amvalidate

View File

@@ -1,6 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
patch -p1 <test-upgrade.patch
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ip4r ip4r-softerr ip4r-v11

View File

@@ -1,75 +0,0 @@
diff --git a/expected/pg_cron-test.out b/expected/pg_cron-test.out
index d79d542..1663886 100644
--- a/expected/pg_cron-test.out
+++ b/expected/pg_cron-test.out
@@ -1,30 +1,3 @@
-CREATE EXTENSION pg_cron VERSION '1.0';
-SELECT extversion FROM pg_extension WHERE extname='pg_cron';
- extversion
-------------
- 1.0
-(1 row)
-
--- Test binary compatibility with v1.4 function signature.
-ALTER EXTENSION pg_cron UPDATE TO '1.4';
-SELECT cron.unschedule(job_name := 'no_such_job');
-ERROR: could not find valid entry for job 'no_such_job'
-SELECT cron.schedule('testjob', '* * * * *', 'SELECT 1');
- schedule
-----------
- 1
-(1 row)
-
-SELECT cron.unschedule('testjob');
- unschedule
-------------
- t
-(1 row)
-
--- Test cache invalidation
-DROP EXTENSION pg_cron;
-CREATE EXTENSION pg_cron VERSION '1.4';
-ALTER EXTENSION pg_cron UPDATE;
-- Vacuum every day at 10:00am (GMT)
SELECT cron.schedule('0 10 * * *', 'VACUUM');
schedule
@@ -300,8 +273,3 @@ SELECT jobid, jobname, schedule, command FROM cron.job ORDER BY jobid;
SELECT cron.schedule('bad-last-dom-job1', '0 11 $foo * *', 'VACUUM FULL');
ERROR: invalid schedule: 0 11 $foo * *
HINT: Use cron format (e.g. 5 4 * * *), or interval format '[1-59] seconds'
--- cleaning
-DROP EXTENSION pg_cron;
-drop user pgcron_cront;
-drop database pgcron_dbno;
-drop database pgcron_dbyes;
diff --git a/sql/pg_cron-test.sql b/sql/pg_cron-test.sql
index 45f94d9..241cf73 100644
--- a/sql/pg_cron-test.sql
+++ b/sql/pg_cron-test.sql
@@ -1,17 +1,3 @@
-CREATE EXTENSION pg_cron VERSION '1.0';
-SELECT extversion FROM pg_extension WHERE extname='pg_cron';
--- Test binary compatibility with v1.4 function signature.
-ALTER EXTENSION pg_cron UPDATE TO '1.4';
-SELECT cron.unschedule(job_name := 'no_such_job');
-SELECT cron.schedule('testjob', '* * * * *', 'SELECT 1');
-SELECT cron.unschedule('testjob');
-
--- Test cache invalidation
-DROP EXTENSION pg_cron;
-CREATE EXTENSION pg_cron VERSION '1.4';
-
-ALTER EXTENSION pg_cron UPDATE;
-
-- Vacuum every day at 10:00am (GMT)
SELECT cron.schedule('0 10 * * *', 'VACUUM');
@@ -156,8 +142,3 @@ SELECT jobid, jobname, schedule, command FROM cron.job ORDER BY jobid;
-- invalid last of day job
SELECT cron.schedule('bad-last-dom-job1', '0 11 $foo * *', 'VACUUM FULL');
--- cleaning
-DROP EXTENSION pg_cron;
-drop user pgcron_cront;
-drop database pgcron_dbno;
-drop database pgcron_dbyes;

View File

@@ -1,6 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
patch -p1 <test-upgrade.patch
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_cron-test

View File

@@ -1,13 +0,0 @@
#!/bin/bash
set -ex
cd "$(dirname "${0}")"
dropdb --if-exists contrib_regression
createdb contrib_regression
PGXS="$(dirname "$(pg_config --pgxs)" )"
REGRESS="${PGXS}/../test/regress/pg_regress"
TESTDIR="test"
TESTS=$(ls "${TESTDIR}/sql" | sort )
TESTS=${TESTS//\.sql/}
psql -v ON_ERROR_STOP=1 -f test/fixtures.sql -d contrib_regression
${REGRESS} --use-existing --dbname=contrib_regression --inputdir=${TESTDIR} ${TESTS}

View File

@@ -1,18 +0,0 @@
diff --git a/expected/pg_ivm.out b/expected/pg_ivm.out
index e8798ee..cca58d0 100644
--- a/expected/pg_ivm.out
+++ b/expected/pg_ivm.out
@@ -1,4 +1,3 @@
-CREATE EXTENSION pg_ivm;
GRANT ALL ON SCHEMA public TO public;
-- create a table to use as a basis for views and materialized views in various combinations
CREATE TABLE mv_base_a (i int, j int);
diff --git a/sql/pg_ivm.sql b/sql/pg_ivm.sql
index d3c1a01..9382d7f 100644
--- a/sql/pg_ivm.sql
+++ b/sql/pg_ivm.sql
@@ -1,4 +1,3 @@
-CREATE EXTENSION pg_ivm;
GRANT ALL ON SCHEMA public TO public;
-- create a table to use as a basis for views and materialized views in various combinations

View File

@@ -1,6 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
patch -p1 <test-upgrade.patch
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_ivm create_immv refresh_immv

View File

@@ -1,25 +0,0 @@
diff --git a/expected/roaringbitmap.out b/expected/roaringbitmap.out
index de70531..a5f7c15 100644
--- a/expected/roaringbitmap.out
+++ b/expected/roaringbitmap.out
@@ -1,7 +1,6 @@
--
-- Test roaringbitmap extension
--
-CREATE EXTENSION if not exists roaringbitmap;
-- Test input and output
set roaringbitmap.output_format='array';
set extra_float_digits = 0;
diff --git a/sql/roaringbitmap.sql b/sql/roaringbitmap.sql
index a0e9c74..84bc966 100644
--- a/sql/roaringbitmap.sql
+++ b/sql/roaringbitmap.sql
@@ -2,8 +2,6 @@
-- Test roaringbitmap extension
--
-CREATE EXTENSION if not exists roaringbitmap;
-
-- Test input and output
set roaringbitmap.output_format='array';

View File

@@ -1,6 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
patch -p1 <test-upgrade.patch
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression roaringbitmap

View File

@@ -1,24 +0,0 @@
diff --git a/test/sql/base.sql b/test/sql/base.sql
index af599d8..2eed91b 100644
--- a/test/sql/base.sql
+++ b/test/sql/base.sql
@@ -2,7 +2,6 @@
BEGIN;
\i test/pgtap-core.sql
-\i sql/semver.sql
SELECT plan(334);
--SELECT * FROM no_plan();
diff --git a/test/sql/corpus.sql b/test/sql/corpus.sql
index 1f5f637..a519905 100644
--- a/test/sql/corpus.sql
+++ b/test/sql/corpus.sql
@@ -4,7 +4,6 @@ BEGIN;
-- Test the SemVer corpus from https://regex101.com/r/Ly7O1x/3/.
\i test/pgtap-core.sql
-\i sql/semver.sql
SELECT plan(71);
--SELECT * FROM no_plan();

View File

@@ -1,6 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
patch -p1 <test-upgrade.patch
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression base corpus

View File

@@ -1,5 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression 002_uuid_generate_v7 003_uuid_v7_to_timestamptz 004_uuid_timestamptz_to_v7 005_uuid_v7_to_timestamp 006_uuid_timestamp_to_v7

View File

@@ -1,4 +0,0 @@
#!/bin/bash
set -ex
cd "$(dirname "${0}")"
pg_prove test.sql

View File

@@ -1,15 +0,0 @@
diff --git a/test.sql b/test.sql
index d7a0ca8..f15bc76 100644
--- a/test.sql
+++ b/test.sql
@@ -9,9 +9,7 @@
\set ON_ERROR_STOP true
\set QUIET 1
-CREATE EXTENSION pgcrypto;
-CREATE EXTENSION pgtap;
-CREATE EXTENSION pgjwt;
+CREATE EXTENSION IF NOT EXISTS pgtap;
BEGIN;
SELECT plan(23);

View File

@@ -1,5 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
patch -p1 <test-upgrade.patch
pg_prove test.sql

View File

@@ -1,5 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --use-existing --dbname=contrib_regression bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector sparsevec vector_type

View File

@@ -1,5 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression plv8 plv8-errors scalar_args inline json startup_pre startup varparam json_conv jsonb_conv window guc es6 arraybuffer composites currentresource startup_perms bytea find_function_perms memory_limits reset show array_spread regression dialect bigint procedure

View File

@@ -1,5 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression extension tables unit binary unicode prefix units time temperature functions language_functions round derived compare aggregate iec custom crosstab convert

View File

@@ -1,5 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression prefix falcon explain queries

View File

@@ -1,19 +0,0 @@
diff --git a/expected/rum.out b/expected/rum.out
index 5966d19..8860b79 100644
--- a/expected/rum.out
+++ b/expected/rum.out
@@ -1,4 +1,3 @@
-CREATE EXTENSION rum;
CREATE TABLE test_rum( t text, a tsvector );
CREATE TRIGGER tsvectorupdate
BEFORE UPDATE OR INSERT ON test_rum
diff --git a/sql/rum.sql b/sql/rum.sql
index 8414bb9..898e6ab 100644
--- a/sql/rum.sql
+++ b/sql/rum.sql
@@ -1,5 +1,3 @@
-CREATE EXTENSION rum;
-
CREATE TABLE test_rum( t text, a tsvector );
CREATE TRIGGER tsvectorupdate

View File

@@ -1,6 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
patch -p1 <test-upgrade.patch
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression rum rum_validate rum_hash ruminv timestamp orderby orderby_hash altorder altorder_hash limits int2 int4 int8 float4 float8 money oid time timetz date interval macaddr inet cidr text varchar char bytea bit varbit numeric rum_weight expr array

View File

@@ -1,22 +1,14 @@
#!/bin/bash
set -x
extdir=${1}
cd "${extdir}" || exit 2
cd /ext-src || exit 2
FAILED=
LIST=$( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
for d in ${LIST}; do
[ -d "${d}" ] || continue
if ! psql -w -c "select 1" >/dev/null; then
FAILED="${d} ${FAILED}"
break
fi
if [ -f "${d}/neon-test.sh" ]; then
"${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
else
LIST=$( (echo -e "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
for d in ${LIST}
do
[ -d "${d}" ] || continue
psql -c "select 1" >/dev/null || break
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
fi
done
[ -z "${FAILED}" ] && exit 0
echo "${FAILED}"

View File

@@ -1,94 +0,0 @@
#!/bin/bash
set -eux -o pipefail
cd "$(dirname "${0}")"
# Takes a variable name as argument. The result is stored in that variable.
generate_id() {
local -n resvar=$1
printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
}
if [ -z ${OLDTAG+x} ] || [ -z ${NEWTAG+x} ] || [ -z "${OLDTAG}" ] || [ -z "${NEWTAG}" ]; then
echo OLDTAG and NEWTAG must be defined
exit 1
fi
export PG_VERSION=${PG_VERSION:-16}
function wait_for_ready {
TIME=0
while ! docker compose logs compute_is_ready | grep -q "accepting connections" && [ ${TIME} -le 300 ] ; do
((TIME += 1 ))
sleep 1
done
if [ ${TIME} -gt 300 ]; then
echo Time is out.
exit 2
fi
}
function create_extensions() {
for ext in ${1}; do
docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext} CASCADE"
done
}
EXTENSIONS='[
{"extname": "plv8", "extdir": "plv8-src"},
{"extname": "vector", "extdir": "pgvector-src"},
{"extname": "unit", "extdir": "postgresql-unit-src"},
{"extname": "hypopg", "extdir": "hypopg-src"},
{"extname": "rum", "extdir": "rum-src"},
{"extname": "ip4r", "extdir": "ip4r-src"},
{"extname": "prefix", "extdir": "prefix-src"},
{"extname": "hll", "extdir": "hll-src"},
{"extname": "pg_cron", "extdir": "pg_cron-src"},
{"extname": "pg_uuidv7", "extdir": "pg_uuidv7-src"},
{"extname": "roaringbitmap", "extdir": "pg_roaringbitmap-src"},
{"extname": "semver", "extdir": "pg_semver-src"},
{"extname": "pg_ivm", "extdir": "pg_ivm-src"},
{"extname": "pgjwt", "extdir": "pgjwt-src"}
]'
EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
wait_for_ready
docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
create_extensions "${EXTNAMES}"
query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')"
new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
docker compose --profile test-extensions down
TAG=${OLDTAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
wait_for_ready
docker compose cp ext-src neon-test-extensions:/
docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
create_extensions "${EXTNAMES}"
query="select pge.extname from pg_extension pge join (select key as extname, value as extversion from json_each_text('${new_vers}')) x on pge.extname=x.extname and pge.extversion <> x.extversion"
exts=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
if [ -z "${exts}" ]; then
echo "No extensions were upgraded"
else
tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id")
timeline_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
for ext in ${exts}; do
echo Testing ${ext}...
EXTDIR=$(echo ${EXTENSIONS} | jq -r '.[] | select(.extname=="'${ext}'") | .extdir')
generate_id new_timeline_id
PARAMS=(
-sbf
-X POST
-H "Content-Type: application/json"
-d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${timeline_id}\"}"
"http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/"
)
result=$(curl "${PARAMS[@]}")
echo $result | jq .
TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} TAG=${OLDTAG} docker compose down compute compute_is_ready
COMPUTE_TAG=${NEWTAG} TAG=${OLDTAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready
wait_for_ready
TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
if [ ${TID} != ${new_timeline_id} ]; then
echo Timeline mismatch
exit 1
fi
docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh
docker compose exec neon-test-extensions psql -d contrib_regression -c "alter extension ${ext} update"
docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
done
fi

View File

@@ -81,7 +81,7 @@ configuration generation in them is less than its current one. Namely, it
refuses to vote, to truncate WAL in `handle_elected` and to accept WAL. In
response it sends its current configuration generation to let walproposer know.
Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
accepting `Configuration`. Safekeeper switches to the given conf it is higher than its
current one and ignores it otherwise. In any case it replies with
```
@@ -103,7 +103,7 @@ currently and tries to communicate with all of them. However, the list does not
define consensus members. Instead, on start walproposer tracks highest
configuration it receives from `AcceptorGreeting`s. Once it assembles greetings
from majority of `sk_set` and majority of `new_sk_set` (if it is present), it
establishes this configuration as its own and moves to voting.
establishes this configuration as its own and moves to voting.
It should stop talking to safekeepers not listed in the configuration at this
point, though it is not unsafe to continue doing so.
@@ -119,7 +119,7 @@ refusal to accept due to configuration change) it simply restarts.
The following algorithm can be executed anywhere having access to configuration
storage and safekeepers. It is safe to interrupt / restart it and run multiple
instances of it concurrently, though likely one of them won't make
progress then. It accepts `desired_set: Vec<NodeId>` as input.
progress then. It accepts `desired_set: Vec<NodeId>` as input.
Algorithm will refuse to make the change if it encounters previous interrupted
change attempt, but in this case it will try to finish it.
@@ -140,7 +140,7 @@ storage are reachable.
safe. Failed CAS aborts the procedure.
4) Call `PUT` `configuration` on safekeepers from the current set,
delivering them `joint_conf`. Collecting responses from majority is required
to proceed. If any response returned generation higher than
to proceed. If any response returned generation higher than
`joint_conf.generation`, abort (another switch raced us). Otherwise, choose
max `<last_log_term, flush_lsn>` among responses and establish it as
(in memory) `sync_position`. Also choose max `term` and establish it as (in
@@ -149,49 +149,49 @@ storage are reachable.
without ack from the new set. Similarly, we'll bump term on new majority
to `sync_term` so that two computes with the same term are never elected.
4) Initialize timeline on safekeeper(s) from `new_sk_set` where it
doesn't exist yet by doing `pull_timeline` from the majority of the
doesn't exist yet by doing `pull_timeline` from the majority of the
current set. Doing that on majority of `new_sk_set` is enough to
proceed, but it is reasonable to ensure that all `new_sk_set` members
are initialized -- if some of them are down why are we migrating there?
5) Call `POST` `bump_term(sync_term)` on safekeepers from the new set.
5) Call `POST` `bump_term(sync_term)` on safekeepers from the new set.
Success on majority is enough.
6) Repeatedly call `PUT` `configuration` on safekeepers from the new set,
delivering them `joint_conf` and collecting their positions. This will
switch them to the `joint_conf` which generally won't be needed
switch them to the `joint_conf` which generally won't be needed
because `pull_timeline` already includes it and plus additionally would be
broadcast by compute. More importantly, we may proceed to the next step
only when `<last_log_term, flush_lsn>` on the majority of the new set reached
`sync_position`. Similarly, on the happy path no waiting is not needed because
only when `<last_log_term, flush_lsn>` on the majority of the new set reached
`sync_position`. Similarly, on the happy path no waiting is not needed because
`pull_timeline` already includes it. However, we should double
check to be safe. For example, timeline could have been created earlier e.g.
manually or after try-to-migrate, abort, try-to-migrate-again sequence.
7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
manually or after try-to-migrate, abort, try-to-migrate-again sequence.
7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
storage under one more CAS.
8) Call `PUT` `configuration` on safekeepers from the new set,
delivering them `new_conf`. It is enough to deliver it to the majority
delivering them `new_conf`. It is enough to deliver it to the majority
of the new set; the rest can be updated by compute.
I haven't put huge effort to make the description above very precise, because it
is natural language prone to interpretations anyway. Instead I'd like to make TLA+
spec of it.
Description above focuses on safety. To make the flow practical and live, here a few more
Description above focuses on safety. To make the flow practical and live, here a few more
considerations.
1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
step 3.
2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
it is safe to rollback to the old conf with one more CAS.
3) On step 4 timeline might be already created on members of the new set for various reasons;
3) On step 4 timeline might be already created on members of the new set for various reasons;
the simplest is the procedure restart. There are more complicated scenarious like mentioned
in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
generations, so seems simpler to treat existing timeline as success. However, this also
in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
generations, so seems simpler to treat existing timeline as success. However, this also
has a disadvantage: you might imagine an surpassingly unlikely schedule where condition in
the step 5 is never reached until compute is (re)awaken up to synchronize new member(s).
I don't think we'll observe this in practice, but can add waking up compute if needed.
4) In the end timeline should be locally deleted on the safekeeper(s) which are
in the old set but not in the new one, unless they are unreachable. To be
safe this also should be done under generation number (deletion proceeds only if
safe this also should be done under generation number (deletion proceeds only if
current configuration is <= than one in request and safekeeper is not memeber of it).
5) If current conf fetched on step 1 is already not joint and members equal to `desired_set`,
jump to step 7, using it as `new_conf`.
@@ -202,87 +202,47 @@ The procedure ought to be driven from somewhere. Obvious candidates are control
plane and storage_controller; and as each of them already has db we don't want
yet another storage. I propose to manage safekeepers in storage_controller
because 1) since it is in rust it simplifies simulation testing (more on this
below) 2) it already manages pageservers.
below) 2) it already manages pageservers.
This assumes that migration will be fully usable only after we migrate all
tenants/timelines to storage_controller. It is discussible whether we want also
to manage pageserver attachments for all of these, but likely we do.
This requires us to define storcon <-> cplane interface and changes.
This requires us to define storcon <-> cplane interface.
### storage_controller <-> control plane interface and changes
### storage_controller <-> control plane interface
First of all, control plane should
[change](https://neondb.slack.com/archives/C03438W3FLZ/p1719226543199829)
storing safekeepers per timeline instead of per tenant because we can't migrate
tenants atomically.
tenants atomically.
The important question is how updated configuration is delivered from
storage_controller to control plane to provide it to computes. As always, there
are two options, pull and push. Let's do it the same push as with pageserver
`/notify-attach` because 1) it keeps storage_controller out of critical compute
start path 2) uniformity. It makes storage_controller responsible for retrying
notifying control plane until it succeeds.
start path 2) provides easier upgrade: there won't be such a thing as 'timeline
managed by control plane / storcon', cplane just takes the value out of its db
when needed 3) uniformity. It makes storage_controller responsible for retrying notifying
control plane until it succeeds.
It is not needed for the control plane to fully know the `Configuration`. It is
enough for it to only to be aware of the list of safekeepers in the latest
configuration to supply it to compute, plus associated generation number to
protect from stale update requests and to also pass it to compute.
So, cplane `/notify-safekeepers` for the timeline can accept JSON like
```
{
tenant_id: String,
timeline_id: String,
generation: u32,
safekeepers: Vec<SafekeeperId>,
}
```
where `SafekeeperId` is
```
{
node_id: u64,
host: String
}
```
In principle `host` is redundant, but may be useful for observability.
The request updates list of safekeepers in the db if the provided conf
generation is higher (the cplane db should also store generations for this).
Similarly to
[`/notify-attach`](https://www.notion.so/neondatabase/Storage-Controller-Control-Plane-interface-6de56dd310a043bfa5c2f5564fa98365),
it should update db which makes the call successful, and then try to schedule
`apply_config` if possible, it is ok if not. storage_controller should rate
limit calling the endpoint, but likely this won't be needed, as migration
So, cplane `/notify-safekeepers` for the timeline accepts `Configuration` and
updates it in the db if the provided conf generation is higher (the cplane db
should also store generations for this). Similarly to [`/notify-attach`](https://www.notion.so/neondatabase/Storage-Controller-Control-Plane-interface-6de56dd310a043bfa5c2f5564fa98365), it
should update db which makes the call successful, and then try to schedule
`apply_config` if possible, it is ok if not. storage_controller
should rate limit calling the endpoint, but likely this won't be needed, as migration
throughput is limited by `pull_timeline`.
Timeline (branch) creation in cplane should call storage_controller POST
`tenant/:tenant_id/timeline` like it currently does for sharded tenants.
Response should be augmented with `safekeepers_generation` and `safekeepers`
fields like described in `/notify-safekeepers` above. Initially (currently)
these fields may be absent; in this case cplane chooses safekeepers on its own
like it currently does. The call should be retried until succeeds.
Response should be augmented with `safekeeper_conf: Configuration`. The call
should be retried until succeeds.
Timeline deletion and tenant deletion in cplane should call appropriate
storage_controller endpoints like it currently does for sharded tenants. The
calls should be retried until they succeed.
When compute receives safekeepers list from control plane it needs to know the
generation to checked whether it should be updated (note that compute may get
safekeeper list from either cplane or safekeepers). Currently `neon.safekeepers`
GUC is just a comma separates list of `host:port`. Let's prefix it with
`g#<generation>:` to this end, so it will look like
```
g#42:safekeeper-0.eu-central-1.aws.neon.tech:6401,safekeeper-2.eu-central-1.aws.neon.tech:6401,safekeeper-1.eu-central-1.aws.neon.tech:6401
```
To summarize, list of cplane changes:
- per tenant -> per timeline safekeepers management and addition of int `safekeeper_generation` field.
- `/notify-safekeepers` endpoint.
- Branch creation call may return list of safekeepers and when it is
present cplane should adopt it instead of choosing on its own like it does currently.
- `neon.safekeepers` GUC should be prefixed with `g#<generation>:`.
### storage_controller implementation
Current 'load everything on startup and keep in memory' easy design is fine.
@@ -400,10 +360,10 @@ source safekeeper might fail, which is not a problem if we are going to
decomission the node but leaves garbage otherwise. I'd propose in the first version
1) Don't attempt deletion at all if node status is `offline`.
2) If it failed, just issue warning.
And add PUT `/control/v1/safekeepers/:node_id/scrub` endpoint which would find and
remove garbage timelines for manual use. It will 1) list all timelines on the
safekeeper 2) compare each one against configuration storage: if timeline
doesn't exist at all (had been deleted), it can be deleted. Otherwise, it can
And add PUT `/control/v1/safekeepers/:node_id/scrub` endpoint which would find and
remove garbage timelines for manual use. It will 1) list all timelines on the
safekeeper 2) compare each one against configuration storage: if timeline
doesn't exist at all (had been deleted), it can be deleted. Otherwise, it can
be deleted under generation number if node is not member of current generation.
Automating this is untrivial; we'd need to register all potential missing
@@ -452,8 +412,8 @@ There should be following layers of tests:
3) Since simulation testing injects at relatively high level points (not
syscalls), it omits some code, in particular `pull_timeline`. Thus it is
better to have basic tests covering whole system as well. Extended version of
`test_restarts_under_load` would do: start background load and do migration
under it, then restart endpoint and check that no reported commits
`test_restarts_under_load` would do: start background load and do migration
under it, then restart endpoint and check that no reported commits
had been lost. I'd also add one more creating classic network split scenario, with
one compute talking to AC and another to BD while migration from nodes ABC to ABD
happens.
@@ -462,51 +422,35 @@ There should be following layers of tests:
## Order of implementation and rollout
Note that
Note that
- Control plane parts and integration with it is fully independent from everything else
(tests would use simulation and neon_local).
- It is reasonable to make compute <-> safekeepers protocol change
independent of enabling generations.
- There is a lot of infra work making storage_controller aware of timelines and safekeepers
and its impl/rollout should be separate from migration itself.
- Initially walproposer can just stop working while it observes joint configuration.
- Initially walproposer can just stop working while it observers joint configuration.
Such window would be typically very short anyway.
- Obviously we want to test the whole thing thoroughly on staging and only then
gradually enable in prod.
Let's have the following implementation bits for gradual rollout:
- compute gets `neon.safekeepers_proto_version` flag.
Initially both compute and safekeepers will be able to talk both
versions so that we can delay force restart of them and for
simplicity of rollback in case it is needed.
- storcon gets `-set-safekeepers` config option disabled by
default. Timeline creation request chooses safekeepers
(and returns them in response to cplane) only when it is set to
true.
- control_plane [see above](storage_controller-<->-control-plane interface-and-changes)
prefixes `neon.safekeepers` GUC with generation number. When it is 0
(or prefix not present at all), walproposer behaves as currently, committing on
the provided safekeeper list -- generations are disabled.
If it is non 0 it follows this RFC rules.
- We provide a script for manual migration to storage controller.
It selects timeline(s) from control plane (specified or all of them) db
and calls special import endpoint on storage controller which is very
similar to timeline creation: it inserts into the db, sets
configuration to initial on the safekeepers, calls cplane
`notify-safekeepers`.
To rollout smoothly, both walproposer and safekeeper should have flag
`configurations_enabled`; when set to false, they would work as currently, i.e.
walproposer is able to commit on whatever safekeeper set it is provided. Until
all timelines are managed by storcon we'd need to use current script to migrate
and update/drop entries in the storage_controller database if it has any.
Then the rollout for a region would be:
- Current situation: safekeepers are choosen by control_plane.
- We manually migrate some timelines, test moving them around.
- Then we enable `--set-safekeepers` so that all new timelines
are on storage controller.
- Finally migrate all existing timelines using the script (no
compute should be speaking old proto version at this point).
Safekeepers would need to be able to talk both current and new protocol version
with compute to reduce number of computes restarted in prod once v2 protocol is
deployed (though before completely switching we'd need to force this).
Let's have the following rollout order:
- storage_controller becomes aware of safekeepers;
- storage_controller gets timeline creation for new timelines and deletion requests, but
doesn't manage all timelines yet. Migration can be tested on these new timelines.
To keep control plane and storage_controller databases in sync while control
plane still chooses the safekeepers initially (until all timelines are imported
it can choose better), `TimelineCreateRequest` can get optional safekeepers
field with safekeepers chosen by cplane.
- Then we can import all existing timelines from control plane to
storage_controller and gradually enable configurations region by region.
Until all timelines are managed by storcon we'd need to use current ad hoc
script to migrate if needed. To keep state clean, all storage controller managed
timelines must be migrated before that, or controller db and configurations
state of safekeepers dropped manually.
Very rough implementation order:
- Add concept of configurations to safekeepers (including control file),
@@ -514,10 +458,10 @@ Very rough implementation order:
- Implement walproposer changes, including protocol.
- Implement storconn part. Use it in neon_local (and pytest).
- Make cplane store safekeepers per timeline instead of per tenant.
- Implement cplane/storcon integration. Route branch creation/deletion
- Implement cplane/storcon integration. Route branch creation/deletion
through storcon. Then we can test migration of new branches.
- Finally import existing branches. Then we can drop cplane
safekeeper selection code. Gradually enable configurations at
- Finally import existing branches. Then we can drop cplane
safekeeper selection code. Gradually enable configurations at
computes and safekeepers. Before that, all computes must talk only
v3 protocol version.

View File

@@ -1,247 +0,0 @@
# CPU and Memory Profiling
Created 2025-01-12 by Erik Grinaker.
See also [internal user guide](https://www.notion.so/neondatabase/Storage-CPU-Memory-Profiling-14bf189e004780228ec7d04442742324?pvs=4).
## Summary
This document proposes a standard cross-team pattern for CPU and memory profiling across
applications and languages, using the [pprof](https://github.com/google/pprof) profile format.
It enables both ad hoc profiles via HTTP endpoints, and continuous profiling across the fleet via
[Grafana Cloud Profiles](https://grafana.com/docs/grafana-cloud/monitor-applications/profiles/).
Continuous profiling incurs an overhead of about 0.1% CPU usage and 3% slower heap allocations.
## Motivation
CPU and memory profiles are crucial observability tools for understanding performance issues,
resource exhaustion, and resource costs. They allow answering questions like:
* Why is this process using 100% CPU?
* How do I make this go faster?
* Why did this process run out of memory?
* Why are we paying for all these CPU cores and memory chips?
Go has [first-class support](https://pkg.go.dev/net/http/pprof) for profiling included in its
standard library, using the [pprof](https://github.com/google/pprof) profile format and associated
tooling.
This is not the case for Rust and C, where obtaining profiles can be rather cumbersome. It requires
installing and running additional tools like `perf` as root on production nodes, with analysis tools
that can be hard to use and often don't give good results. This is not only annoying, but can also
significantly affect the resolution time of production incidents.
This proposal will:
* Provide CPU and heap profiles in pprof format via HTTP API.
* Record continuous profiles in Grafana for aggregate historical analysis.
* Make it easy for anyone to see a flamegraph in less than one minute.
* Be reasonably consistent across teams and services (Rust, Go, C).
## Non Goals (For Now)
* [Additional profile types](https://grafana.com/docs/pyroscope/next/configure-client/profile-types/)
like mutexes, locks, goroutines, etc.
* [Runtime trace integration](https://grafana.com/docs/pyroscope/next/configure-client/trace-span-profiles/).
* [Profile-guided optimization](https://en.wikipedia.org/wiki/Profile-guided_optimization).
## Using Profiles
Ready-to-use profiles can be obtained using e.g. `curl`. For Rust services:
```
$ curl localhost:9898/profile/cpu >profile.pb.gz
```
pprof profiles can be explored using the [`pprof`](https://github.com/google/pprof) web UI, which
provides flamegraphs, call graphs, plain text listings, and more:
```
$ pprof -http :6060 <profile>
```
Some endpoints (e.g. Rust-based ones) can also generate flamegraph SVGs directly:
```
$ curl localhost:9898/profile/cpu?format=svg >profile.svg
$ open profile.svg
```
Continuous profiles are available in Grafana under Explore → Profiles → Explore Profiles
(currently only in [staging](https://neonstaging.grafana.net/a/grafana-pyroscope-app/profiles-explorer)).
## API Requirements
* HTTP endpoints that return a profile in pprof format (with symbols).
* CPU: records a profile over the request time interval (`seconds` query parameter).
* Memory: returns the current in-use heap allocations.
* Unauthenticated, as it should not expose user data or pose a denial-of-service risk.
* Default sample frequency should not impact service (maximum 5% CPU overhead).
* Linux-compatibility.
Nice to have:
* Return flamegraph SVG directly from the HTTP endpoint if requested.
* Configurable sample frequency for CPU profiles.
* Historical heap allocations, by count and bytes.
* macOS-compatiblity.
## Rust Profiling
[`libs/utils/src/http/endpoint.rs`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs)
contains ready-to-use HTTP endpoints for CPU and memory profiling:
[`profile_cpu_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L338) and [`profile_heap_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L416).
### CPU
CPU profiles are provided by [pprof-rs](https://github.com/tikv/pprof-rs) via
[`profile_cpu_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L338).
Expose it unauthenticated at `/profile/cpu`.
Parameters:
* `format`: profile output format (`pprof` or `svg`; default `pprof`).
* `seconds`: duration to collect profile over, in seconds (default `5`).
* `frequency`: how often to sample thread stacks, in Hz (default `99`).
* `force`: if `true`, cancel a running profile and start a new one (default `false`).
Works on Linux and macOS.
### Memory
Use the jemalloc allocator via [`tikv-jemallocator`](https://github.com/tikv/jemallocator),
and enable profiling with samples every 2 MB allocated:
```rust
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
#[allow(non_upper_case_globals)]
#[export_name = "malloc_conf"]
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
```
pprof profiles are generated by
[`jemalloc-pprof`](https://github.com/polarsignals/rust-jemalloc-pprof) via
[`profile_heap_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L416).
Expose it unauthenticated at `/profile/heap`.
Parameters:
* `format`: profile output format (`pprof`, `svg`, or `jemalloc`; default `pprof`).
Works on Linux only, due to [jemalloc limitations](https://github.com/jemalloc/jemalloc/issues/26).
## Go Profiling
The Go standard library includes pprof profiling via HTTP API in
[`net/http/pprof`](https://pkg.go.dev/net/http/pprof). Expose it unauthenticated at
`/debug/pprof`.
Works on Linux and macOS.
### CPU
Via `/debug/pprof/profile`. Parameters:
* `debug`: profile output format (`0` is pprof, `1` or above is plaintext; default `0`).
* `seconds`: duration to collect profile over, in seconds (default `30`).
Does not support a frequency parameter (see [#57488](https://github.com/golang/go/issues/57488)),
and defaults to 100 Hz. A lower frequency can be hardcoded via `SetCPUProfileRate`, but the default
is likely ok (estimated 1% overhead).
### Memory
Via `/debug/pprof/heap`. Parameters:
* `seconds`: take a delta profile over the given duration, in seconds (default `0`).
* `gc`: if `1`, garbage collect before taking profile.
## C Profiling
[gperftools](https://github.com/gperftools/gperftools) provides in-process CPU and heap profiling
with pprof output.
However, continuous profiling of PostgreSQL is expensive (many computes), and has limited value
since we don't own the internals anyway.
Ad hoc profiling might still be useful, but the compute team considers existing tooling sufficient,
so this is not a priority at the moment.
## Grafana Continuous Profiling
[Grafana Alloy](https://grafana.com/docs/alloy/latest/) continually scrapes CPU and memory profiles
across the fleet, and archives them as time series. This can be used to analyze resource usage over
time, either in aggregate or zoomed in to specific events and nodes.
Profiles are retained for 30 days. Profile ingestion volume for CPU+heap at 60-second intervals
is about 0.5 GB/node/day, or about $0.25/node/day = $7.5/node/month ($0.50/GB).
It is currently enabled in [staging](https://neonstaging.grafana.net/a/grafana-pyroscope-app/profiles-explorer)
for Pageserver and Safekeeper.
### Scraping
* CPU profiling: 59 seconds at 19 Hz every 60 seconds.
* Heap profiling: heap snapshot with 2 MB frequency every 60 seconds.
There are two main approaches that can be taken for CPU profiles:
* Continuous low-frequency profiles (e.g. 19 Hz for 60 seconds every 60 seconds).
* Occasional high-frequency profiles (e.g. 99 Hz for 5 seconds every 60 seconds).
We choose continuous low-frequency profiles where possible. This has a fixed low overhead, instead
of a spiky high overhead. It likely also gives a more representative view of resource usage.
However, a 19 Hz rate gives a minimum resolution of 52.6 ms per sample, which may be larger than the
actual runtime of small functions. Note that Go does not support a frequency parameter, so we must
use a fixed frequency for all profiles via `SetCPUProfileRate()` (default 100 Hz).
Only one CPU profile can be taken at a time. With continuous profiling, one will always be running.
To allow also taking an ad hoc CPU profile, the Rust endpoint supports a `force` query parameter to
cancel a running profile and start a new one.
### Overhead
With Rust:
* CPU profiles at 19 Hz frequency: 0.1% overhead.
* Heap profiles at 2 MB frequency: 3% allocation overhead.
* Profile call/encoding/symbolization: 20 ms every 60 seconds, or 0.03% of 1 CPU (for Pageserver).
* Profile symbolization caches: 125 MB memory, or 0.4% of 32 GB (for Pageserver).
Benchmarks with pprof-rs showed that the CPU time for taking a stack trace of a 40-frame stack was
11 µs using the `frame-pointer` feature, and 1.4 µs using `libunwind` with DWARF. `libunwind` saw
frequent seg faults, so we use `frame-pointer` and build binaries with frame pointers (negligible
overhead).
CPU profiles work by installing an `ITIMER_PROF` for the process, which triggers a `SIGPROF` signal
after a given amount of cumulative CPU time across all CPUs. The signal handler will run for one
of the currently executing threads and take a stack trace. Thus, a 19 Hz profile will take 1 stack
trace every 52.6 ms CPU time -- assuming 11 µs for a stack trace, this is 0.02% overhead, but
likely 0.1% in practice (given e.g. context switches).
Heap profiles work by probabilistically taking a stack trace on allocations, adjusted for the
allocation size. A 1 MB allocation takes about 15 µs in benchmarks, and a stack trace about 1 µs,
so we can estimate that a 2 MB sampling frequency has about 3% allocation overhead -- this is
consistent with benchmarks. This is significantly larger than CPU profiles, but mitigated by the
fact that performance-sensitive code will avoid allocations as far as possible.
Profile symbolization uses in-memory caches for symbol lookups. These take about 125 MB for
Pageserver.
## Alternatives Considered
* eBPF profiles.
* Don't require instrumenting the binary.
* Use less resources.
* Can profile in kernel space too.
* Supported by Grafana.
* Less information about stack frames and spans.
* Limited tooling for local analysis.
* Does not support heap profiles.
* Does not work on macOS.
* [Polar Signals](https://www.polarsignals.com) instead of Grafana.
* We already use Grafana for everything else. Appears good enough.

View File

@@ -1,255 +0,0 @@
#
Created on Aug 2024
Implemented on Jan 2025
## Summary
Data in large tenants is split up between multiple pageservers according to key hashes, as
introduced in the [sharding RFC](031-sharding-static.md) and [shard splitting RFC](032-shard-splitting.md).
Whereas currently we send all WAL to all pageserver shards, and each shard filters out the data that it needs,
in this RFC we add a mechanism to filter the WAL on the safekeeper, so that each shard receives
only the data it needs.
This will place some extra CPU load on the safekeepers, in exchange for reducing the network bandwidth
for ingesting WAL back to scaling as O(1) with shard count, rather than O(N_shards).
## Motivation
1. Large databases require higher shard counts. Whereas currently we run with up to 8 shards for tenants
with a few TB of storage, the next order of magnitude capacity increase will require tens of shards, such
that sending all WAL to all shards is impractical in terms of bandwidth.
2. For contemporary database sizes (~2TB), the pageserver is the bottleneck for ingest: since each
shard has to decode and process the whole WAL, sharding doesn't fully relieve this bottleneck. To achieve significantly higher ingest speeds, we need to filter the WAL earlier so that each pageserver
only has to process relevant parts.
## Non Goals (if relevant)
We do not seek to introduce multiple WALs per timeline, or to share the work of handling a timeline's
WAL across safekeepers (beyond simple 3x replication). This RFC may be thought of as an incremental
move of the ingestion bottleneck up the stack: instead of high write rates bottlenecking on the
pageserver, they will bottleneck on the safekeeper.
## Impacted components (e.g. pageserver, safekeeper, console, etc)
Safekeeper, pageserver.
There will be no control plane or storage controller coordination needed, as pageservers will directly
indicate their sharding parameters to the safekeeper when subscribing for WAL.
## Proposed implementation
Terminology:
- "Data pages" refers to postgres relation blocks, and SLRU blocks.
- "Metadata pages" refers to everything else the pageserver stores, such as relation sizes and
directories of relations.
### Phase 1: Refactor ingest
Currently, pageserver ingest code is structured approximately as follows:
1. `handle_walreceiver_connection` reads a stream of binary WAL records off a network
socket
2. `WalIngest::ingest_record` to translate the record into a series of page-level modifications
3. `DatadirModification` accumulates page updates from several `ingest_record` calls, and when
its `commit()` method is called, flushes these into a Timeline's open `InMemoryLayer`.
This process currently assumes access to a pageserver `Timeline` throughout `ingest_record` and
from `DatadirModification`, which is used to do read-modify-write cycles on metadata pages
such as relation sizes and the master DBDIR page. It also assumes that records are ingested
strictly one after the other: they cannot be ingested in parallel because each record assumes
that earlier records' changes have already been applied to `Timeline`.
This code will be refactored to disentangle the simple, fast decode of relation page writes
from the more complex logic for updating internal metadata. An intermediate representation
called `InterpretedWalRecords` will be introduced. This is similar to the internal state of
a `DatadirModification`, but does not require access to a Timeline. Instead of storing
metadata updates as materialized writes to pages, it will accumulate these as abstract operations,
for example rather than including a write to a relation size key, this structure will include
an operation that indicates "Update relation _foo_'s size to the max of its current value and
_bar_", such that these may be applied later to a real Timeline.
The `DatadirModification` will be aware of the `EphemeralFile` format, so that as it accumulates
simple page writes of relation blocks, it can write them directly into a buffer in the serialized
format. This will avoid the need to later deserialize/reserialize this data when passing the
structure between safekeeper and pageserver.
The new pipeline will be:
1. `handle_walreceiver_connection` reads a stream of binary WAL records off a network
2. A `InterpretedWalRecords` is generated from the incoming WAL records. This does not
require a reference to a Timeline.
3. The logic that is current spread between `WalIngest` and `DatadirModification` for updating
metadata will be refactored to consume the metadata operations from the `InterpretedWalRecords`
and turn them into literal writes to metadata pages. This part must be done sequentially.
4. The resulting buffer of metadata page writes is combined with the buffer of relation block
writes, and written into the `InMemoryLayer`.
Implemented in:
1. https://github.com/neondatabase/neon/pull/9472
2. https://github.com/neondatabase/neon/pull/9504
3. https://github.com/neondatabase/neon/pull/9524
### Phase 2: Decode & filter on safekeeper
In the previous phase, the ingest code was modified to be able to do most of its work without access to
a Timeline: this first stage of ingest simply converts a series of binary wal records into
a buffer of relation/SLRU page writes, and a buffer of abstract metadata writes.
The modified ingest code may be transplanted from pageserver to safekeeper (probably via a
shared crate). The safekeeper->pageserver network protocol is modified to:
- in subscription requests, send the `ShardIdentity` from the pageserver to the safekeeper
- in responses, transmit a `InterpretedWalRecords` instead of a raw `WalRecord`.
- use the `ShardIdentity` to filter the `ProcessedWalIngest` to relevant content for
the subscribing shard before transmitting it.
The overall behavior of the pageserver->safekeeper interaction remains the same, in terms of
consistent LSN feedback, and connection management. Only the payload of the subscriptions
changes, to express an LSN range of WAL as a filtered `ProcessedWalIngest` instead of the
raw data.
The ingest code on the pageserver can now skip the part where it does the first phase of
processing, as it will receive pre-processed, compressed data off the wire.
Note that `InterpretedWalRecord` batches multiple `InterpretedWalRecord(s)` in the same network
message. Safekeeper reads WAL in chunks of 16 blocks and then decodes as many Postgres WAL records
as possible. Each Postgres WAL record maps to one `InterpretedWalRecord` for potentially multiple shards.
Hence, the size of the batch is given by the number of Postgres WAL records that fit in 16 blocks.
The protocol needs to support evolution. Protobuf was chosen here with the view that, in the future,
we may migrate it to GRPC altogether
Implemented in:
1. https://github.com/neondatabase/neon/pull/9746
2. https://github.com/neondatabase/neon/pull/9821
### Phase 3: Fan out interpreted WAL
In the previous phase, the initial processing of WAL was moved to the safekeeper, but it is still
done once for each shard: this will generate O(N_shards) CPU work on the safekeeper (especially
when considering converting to Protobuf format and compression).
To avoid this, we fan-out WAL from one (tenant, timeline, shard) to all other shards subscribed on
the same safekeeper. Under normal operation, the WAL will be read from disk, decoded and interpreted
_only_ once per (safekeeper, timeline).
When the first shard of a sharded timeline subscribes to a given safekeeper a task is spawned
for the WAL reader (`InterpretedWalReader`). This task reads WAL, decodes, interprets it and sends
it to the sender (`InterpretedWalSender`). The sender is a future that is polled from the connection
task. When further shards subscribe on the safekeeper they will attach themselves to the existing WAL reader.
There's two cases to consider:
1. The shard's requested `start_lsn` is ahead of the current position of the WAL reader. In this case, the shard
will start receiving data when the reader reaches that LSN. The intuition here is that there's little to gain
by letting shards "front-run" since compute backpressure is based on the laggard LSN.
2. The shard's requested `start_lsn` is below the current position of the WAL reader. In this case, the WAL reader
gets reset to this requested position (same intuition). Special care is taken such that advanced shards do not receive
interpreted WAL records below their current position.
The approach above implies that there is at most one WAL reader per (tenant, timeline) on a given safekeeper at any point in time.
If this turns out to be operationally problematic, there's a trick we can deploy: `--max-delta-for-fanout` is an optional safekeeper
argument that controls the max absolute delta between a new shard and the current WAL position of the WAL reader. If the absolute
delta is above that value, a new reader is spawned. Note that there's currently no concurrency control on the number of WAL readers,
so it's recommended to use large values to avoid pushing CPU utilisation too high.
Unsharded tenants do not spawn a separate task for the interpreted WAL reader since there's no benefit to it. Instead they poll
the reader and sender concurrently from the connection task.
Shard splits are interesting here because it is the only case when the same shard might have two subscriptions at the same time.
This is handled by giving readers a unique identifier. Both shards will receive the same data while respecting their requested start
position.
Implemented in:
1. https://github.com/neondatabase/neon/pull/10190
## Deployment
Each phase shall be deployed independently. Special care should be taken around protocol changes.
## Observability Tips
* The safekeeper logs the protocol requested by the pageserver
along with the pageserver ID, tenant, timeline and shard: `starting streaming from`.
* There's metrics for the number of wal readers:
* `safekeeper_wal_readers{kind="task", target=~"pageserver.*"}` gives the number of wal reader tasks for each SK
* `safekeeper_wal_readers{kind="future", target=~"pageserver.*"}` gives the numer of wal readers polled inline by each SK
* `safekeeper_interpreted_wal_reader_tasks` gives the number of wal reader tasks per tenant, timeline
* Interesting log lines for the fan-out reader:
* `Spawning interpreted`: first shard creates the interpreted wal reader
* `Fanning out`: a subsequent shard attaches itself to an interpreted wal reader
* `Aborting interpreted`: all senders have finished and the reader task is being aborted
## Future Optimizations
This sections describes some improvement areas which may be revisited in the future.
### Buffering of Interpreted WAL
The interpreted WAL reader may buffer interpreted WAL records in user space to help with serving
subscriptions that are lagging behind the current position of the reader.
Counterpoints:
* Safekeepers serve many thousands of timelines and allocating a buffer for each might be wasteful,
especially given that it would go unused on the happy path.
* WAL is buffered in the kernel page cache. Usually we'd only pay the CPU cost of decoding and interpreting.
### Tweaking the Pagserver Safekeeper Selection Algorithm
We could make the pageserver aware of which safekeeper's already host shards for the timeline along
with their current WAL positions. The pageserver should then prefer safkeepers that are in the same
AZ _and_ already have a shard with a position close to the desired start position.
We currently run one safekeeper per AZ, so the point is mute until that changes.
### Pipelining first ingest phase
The first ingest phase is a stateless transformation of a binary WAL record into a pre-processed
output per shard. To put multiple CPUs to work, we may pipeline this processing up to some defined buffer
depth.
## Alternatives considered
### Give safekeepers enough state to fully decode WAL
In this RFC, we only do the first phase of ingest on the safekeeper, because this is
the phase that is stateless. Subsequent changes then happen on the pageserver, with
access to the `Timeline` state.
We could do more work on the safekeeper if we transmitted metadata state to the safekeeper
when subscribing to the WAL: for example, by telling the safekeeper all the relation sizes,
so that it could then generate all the metadata writes for relation sizes.
We avoid doing this for several reasons:
1. Complexity: it's a more invasive protocol change
2. Decoupling: having the safekeeper understand the `ProcessedWalIngest` already somewhat
infects it with knowledge of the pageserver, but this is mainly an abstract structure
that describes postgres writes. However, if we taught the safekeeper about the exact
way that pageserver deals with metadata keys, this would be a much tighter coupling.
3. Load: once the WAL has been processed to the point that it can be split between shards,
it is preferable to share out work on the remaining shards rather than adding extra CPU
load to the safekeeper.
### Do pre-processing on the compute instead of the safekeeper
Since our first stage of ingest is stateless, it could be done at any stage in the pipeline,
all the way up to the compute.
We choose not to do this, because it is useful for the safekeeper to store the raw WAL rather
than just the preprocessed WAL:
- The safekeeper still needs to be able to serve raw WAL back to postgres for e.g. physical replication
- It simplifies our paxos implementation to have the offset in the write log be literally
the same as the LSN
- Raw WAL must have a stable protocol since we might have to re-ingest it at arbitrary points in the future.
Storing raw WAL give us more flexibility to evolve the pageserver, safekeeper protocol.
### Do wal pre-processing on shard 0 or a separate service, send it to other shards from there
If we wanted to keep the safekeepers as entirely pure stores of raw WAL bytes, then
we could do the initial decode and shard-splitting in some other location:
- Shard 0 could subscribe to the full WAL and then send writes to other shards
- A new intermediate service between the safekeeper and pageserver could do the splitting.
So why not?
- Extra network hop from shard 0 to the final destination shard
- Clearly there is more infrastructure involved here compared with doing it inline on the safekeeper.
- Safekeepers already have very light CPU load: typical cloud instances shapes with appropriate
disks for the safekeepers effectively have "free" CPU resources.
- Doing extra work on shard 0 would complicate scheduling of shards on pageservers, because
shard 0 would have significantly higher CPU load under write workloads than other shards.

View File

@@ -15,6 +15,11 @@ pub struct GenericAPIError {
pub error: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct InfoResponse {
pub num_cpus: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct ExtensionInstallResponse {
pub extension: PgIdent,

View File

@@ -138,13 +138,6 @@ pub struct ComputeSpec {
/// enough spare connections for reconfiguration process to succeed.
#[serde(default = "default_reconfigure_concurrency")]
pub reconfigure_concurrency: usize,
/// If set to true, the compute_ctl will drop all subscriptions before starting the
/// compute. This is needed when we start an endpoint on a branch, so that child
/// would not compete with parent branch subscriptions
/// over the same replication content from publisher.
#[serde(default)] // Default false
pub drop_subscriptions_before_start: bool,
}
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
@@ -204,16 +197,14 @@ impl RemoteExtSpec {
// Check if extension is present in public or custom.
// If not, then it is not allowed to be used by this compute.
if !self
.public_extensions
.as_ref()
.is_some_and(|exts| exts.iter().any(|e| e == ext_name))
&& !self
.custom_extensions
.as_ref()
.is_some_and(|exts| exts.iter().any(|e| e == ext_name))
{
return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
if let Some(public_extensions) = &self.public_extensions {
if !public_extensions.contains(&real_ext_name.to_string()) {
if let Some(custom_extensions) = &self.custom_extensions {
if !custom_extensions.contains(&real_ext_name.to_string()) {
return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
}
}
}
}
match self.extension_data.get(real_ext_name) {
@@ -342,96 +333,6 @@ mod tests {
use super::*;
use std::fs::File;
#[test]
fn allow_installing_remote_extensions() {
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
"public_extensions": null,
"custom_extensions": null,
"library_index": {},
"extension_data": {},
}))
.unwrap();
rspec
.get_ext("ext", false, "latest", "v17")
.expect_err("Extension should not be found");
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
"public_extensions": [],
"custom_extensions": null,
"library_index": {},
"extension_data": {},
}))
.unwrap();
rspec
.get_ext("ext", false, "latest", "v17")
.expect_err("Extension should not be found");
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
"public_extensions": [],
"custom_extensions": [],
"library_index": {
"ext": "ext"
},
"extension_data": {
"ext": {
"control_data": {
"ext.control": ""
},
"archive_path": ""
}
},
}))
.unwrap();
rspec
.get_ext("ext", false, "latest", "v17")
.expect_err("Extension should not be found");
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
"public_extensions": [],
"custom_extensions": ["ext"],
"library_index": {
"ext": "ext"
},
"extension_data": {
"ext": {
"control_data": {
"ext.control": ""
},
"archive_path": ""
}
},
}))
.unwrap();
rspec
.get_ext("ext", false, "latest", "v17")
.expect("Extension should be found");
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
"public_extensions": ["ext"],
"custom_extensions": [],
"library_index": {
"ext": "ext"
},
"extension_data": {
"ext": {
"control_data": {
"ext.control": ""
},
"archive_path": ""
}
},
}))
.unwrap();
rspec
.get_ext("ext", false, "latest", "v17")
.expect("Extension should be found");
}
#[test]
fn parse_spec_file() {
let file = File::open("tests/cluster_spec.json").unwrap();

View File

@@ -120,7 +120,6 @@ pub struct ConfigToml {
pub no_sync: Option<bool>,
pub wal_receiver_protocol: PostgresClientProtocol,
pub page_service_pipelining: PageServicePipeliningConfig,
pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -159,25 +158,6 @@ pub enum PageServiceProtocolPipelinedExecutionStrategy {
Tasks,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(tag = "mode", rename_all = "kebab-case")]
#[serde(deny_unknown_fields)]
pub enum GetVectoredConcurrentIo {
/// The read path is fully sequential: layers are visited
/// one after the other and IOs are issued and waited upon
/// from the same task that traverses the layers.
Sequential,
/// The read path still traverses layers sequentially, and
/// index blocks will be read into the PS PageCache from
/// that task, with waiting.
/// But data IOs are dispatched and waited upon from a sidecar
/// task so that the traversing task can continue to traverse
/// layers while the IOs are in flight.
/// If the PS PageCache miss rate is low, this improves
/// throughput dramatically.
SidecarTask,
}
pub mod statvfs {
pub mod mock {
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -254,26 +234,9 @@ pub struct TenantConfigToml {
// Duration::ZERO means automatic compaction is disabled.
#[serde(with = "humantime_serde")]
pub compaction_period: Duration,
/// Level0 delta layer threshold for compaction.
// Level0 delta layer threshold for compaction.
pub compaction_threshold: usize,
/// Controls the amount of L0 included in a single compaction iteration.
/// The unit is `checkpoint_distance`, i.e., a size.
/// We add L0s to the set of layers to compact until their cumulative
/// size exceeds `compaction_upper_limit * checkpoint_distance`.
pub compaction_upper_limit: usize,
pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
/// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
/// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
/// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
/// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
pub l0_flush_delay_threshold: Option<usize>,
/// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
/// to avoid deadlock. 0 to disable. Disabled by default.
pub l0_flush_stall_threshold: Option<usize>,
/// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
/// layer. This is a temporary backpressure mechanism which should be removed once
/// l0_flush_{delay,stall}_threshold is fully enabled.
pub l0_flush_wait_upload: bool,
// Determines how much history is retained, to allow
// branching and read replicas at an older point in time.
// The unit is #of bytes of WAL.
@@ -323,10 +286,6 @@ pub struct TenantConfigToml {
// Expresed in multiples of checkpoint distance.
pub image_layer_creation_check_threshold: u8,
// How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
// Set to 0 to disable preemption.
pub image_creation_preempt_threshold: usize,
/// The length for an explicit LSN lease request.
/// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
#[serde(with = "humantime_serde")]
@@ -342,20 +301,6 @@ pub struct TenantConfigToml {
pub timeline_offloading: bool,
pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
/// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
/// `index_part.json`, and it cannot be reversed.
pub rel_size_v2_enabled: Option<bool>,
// gc-compaction related configs
/// Enable automatic gc-compaction trigger on this tenant.
pub gc_compaction_enabled: bool,
/// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold,
/// gc-compaction will be triggered.
pub gc_compaction_initial_threshold_kb: u64,
/// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
/// is above this ratio, gc-compaction will be triggered.
pub gc_compaction_ratio_percent: u64,
}
pub mod defaults {
@@ -505,11 +450,6 @@ impl Default for ConfigToml {
execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
})
},
get_vectored_concurrent_io: if !cfg!(test) {
GetVectoredConcurrentIo::Sequential
} else {
GetVectoredConcurrentIo::SidecarTask
},
}
}
}
@@ -532,17 +472,9 @@ pub mod tenant_conf_defaults {
pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
// This value needs to be tuned to avoid OOM. We have 3/4 of the total CPU threads to do background works, that's 16*3/4=9 on
// most of our pageservers. Compaction ~50 layers requires about 2GB memory (could be reduced later by optimizing L0 hole
// calculation to avoid loading all keys into the memory). So with this config, we can get a maximum peak compaction usage of 18GB.
pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 50;
pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
crate::models::CompactionAlgorithm::Legacy;
pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = true;
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
// Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
@@ -551,10 +483,6 @@ pub mod tenant_conf_defaults {
// Relevant: https://github.com/neondatabase/neon/issues/3394
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
// If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
// layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
// want to enable this feature.
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
@@ -566,9 +494,6 @@ pub mod tenant_conf_defaults {
// By default ingest enough WAL for two new L0 layers before checking if new image
// image layers should be created.
pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 10240000;
pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
}
impl Default for TenantConfigToml {
@@ -582,13 +507,9 @@ impl Default for TenantConfigToml {
compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
.expect("cannot parse default compaction period"),
compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
compaction_upper_limit: DEFAULT_COMPACTION_UPPER_LIMIT,
compaction_algorithm: crate::models::CompactionAlgorithmSettings {
kind: DEFAULT_COMPACTION_ALGORITHM,
},
l0_flush_delay_threshold: None,
l0_flush_stall_threshold: None,
l0_flush_wait_upload: DEFAULT_L0_FLUSH_WAIT_UPLOAD,
gc_horizon: DEFAULT_GC_HORIZON,
gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
.expect("cannot parse default gc period"),
@@ -613,15 +534,10 @@ impl Default for TenantConfigToml {
lazy_slru_download: false,
timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
lsn_lease_length: LsnLease::DEFAULT_LENGTH,
lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
timeline_offloading: false,
wal_receiver_protocol_override: None,
rel_size_v2_enabled: None,
gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
}
}
}

View File

@@ -87,7 +87,7 @@ impl Display for AvailabilityZone {
#[derive(Serialize, Deserialize)]
pub struct ShardsPreferredAzsRequest {
#[serde(flatten)]
pub preferred_az_ids: HashMap<TenantShardId, Option<AvailabilityZone>>,
pub preferred_az_ids: HashMap<TenantShardId, AvailabilityZone>,
}
#[derive(Serialize, Deserialize)]
@@ -144,8 +144,6 @@ pub struct NodeDescribeResponse {
pub availability: NodeAvailabilityWrapper,
pub scheduling: NodeSchedulingPolicy,
pub availability_zone_id: String,
pub listen_http_addr: String,
pub listen_http_port: u16,
@@ -181,6 +179,7 @@ pub struct TenantDescribeResponseShard {
/// specifies some constraints, e.g. asking it to get off particular node(s)
#[derive(Serialize, Deserialize, Debug)]
pub struct TenantShardMigrateRequest {
pub tenant_shard_id: TenantShardId,
pub node_id: NodeId,
}
@@ -324,7 +323,7 @@ impl From<NodeSchedulingPolicy> for String {
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
pub enum SkSchedulingPolicy {
Active,
Pause,
Disabled,
Decomissioned,
}
@@ -334,13 +333,9 @@ impl FromStr for SkSchedulingPolicy {
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"active" => Self::Active,
"pause" => Self::Pause,
"disabled" => Self::Disabled,
"decomissioned" => Self::Decomissioned,
_ => {
return Err(anyhow::anyhow!(
"Unknown scheduling policy '{s}', try active,pause,decomissioned"
))
}
_ => return Err(anyhow::anyhow!("Unknown scheduling state '{s}'")),
})
}
}
@@ -350,7 +345,7 @@ impl From<SkSchedulingPolicy> for String {
use SkSchedulingPolicy::*;
match value {
Active => "active",
Pause => "pause",
Disabled => "disabled",
Decomissioned => "decomissioned",
}
.to_string()
@@ -373,16 +368,6 @@ pub enum PlacementPolicy {
Detached,
}
impl PlacementPolicy {
pub fn want_secondaries(&self) -> usize {
match self {
PlacementPolicy::Attached(secondary_count) => *secondary_count,
PlacementPolicy::Secondary => 1,
PlacementPolicy::Detached => 0,
}
}
}
#[derive(Serialize, Deserialize, Debug)]
pub struct TenantShardMigrateResponse {}
@@ -420,6 +405,8 @@ pub struct MetadataHealthListOutdatedResponse {
}
/// Publicly exposed safekeeper description
///
/// The `active` flag which we have in the DB is not included on purpose: it is deprecated.
#[derive(Serialize, Deserialize, Clone)]
pub struct SafekeeperDescribeResponse {
pub id: NodeId,
@@ -435,11 +422,6 @@ pub struct SafekeeperDescribeResponse {
pub scheduling_policy: SkSchedulingPolicy,
}
#[derive(Serialize, Deserialize, Clone)]
pub struct SafekeeperSchedulingPolicyRequest {
pub scheduling_policy: SkSchedulingPolicy,
}
#[cfg(test)]
mod test {
use super::*;

Some files were not shown because too many files have changed in this diff Show More