mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-03 18:50:38 +00:00
Compare commits
1 Commits
lfc_bug_fi
...
max_locks
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d05eabaedb |
1
.github/ISSUE_TEMPLATE/bug-template.md
vendored
1
.github/ISSUE_TEMPLATE/bug-template.md
vendored
@@ -3,7 +3,6 @@ name: Bug Template
|
||||
about: Used for describing bugs
|
||||
title: ''
|
||||
labels: t/bug
|
||||
type: Bug
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
1
.github/ISSUE_TEMPLATE/epic-template.md
vendored
1
.github/ISSUE_TEMPLATE/epic-template.md
vendored
@@ -4,7 +4,6 @@ about: A set of related tasks contributing towards specific outcome, comprising
|
||||
more than 1 week of work.
|
||||
title: 'Epic: '
|
||||
labels: t/Epic
|
||||
type: Epic
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
3
.github/actionlint.yml
vendored
3
.github/actionlint.yml
vendored
@@ -4,7 +4,6 @@ self-hosted-runner:
|
||||
- large
|
||||
- large-arm64
|
||||
- small
|
||||
- small-metal
|
||||
- small-arm64
|
||||
- us-east-2
|
||||
config-variables:
|
||||
@@ -26,5 +25,3 @@ config-variables:
|
||||
- PGREGRESS_PG17_PROJECT_ID
|
||||
- SLACK_ON_CALL_QA_STAGING_STREAM
|
||||
- DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
|
||||
- SLACK_ON_CALL_STORAGE_STAGING_STREAM
|
||||
- SLACK_CICD_CHANNEL_ID
|
||||
|
||||
54
.github/actions/neon-project-create/action.yml
vendored
54
.github/actions/neon-project-create/action.yml
vendored
@@ -17,34 +17,6 @@ inputs:
|
||||
compute_units:
|
||||
description: '[Min, Max] compute units'
|
||||
default: '[1, 1]'
|
||||
# settings below only needed if you want the project to be sharded from the beginning
|
||||
shard_split_project:
|
||||
description: 'by default new projects are not shard-split, specify true to shard-split'
|
||||
required: false
|
||||
default: 'false'
|
||||
admin_api_key:
|
||||
description: 'Admin API Key needed for shard-splitting. Must be specified if shard_split_project is true'
|
||||
required: false
|
||||
shard_count:
|
||||
description: 'Number of shards to split the project into, only applies if shard_split_project is true'
|
||||
required: false
|
||||
default: '8'
|
||||
stripe_size:
|
||||
description: 'Stripe size, optional, in 8kiB pages. e.g. set 2048 for 16MB stripes. Default is 128 MiB, only applies if shard_split_project is true'
|
||||
required: false
|
||||
default: '32768'
|
||||
psql_path:
|
||||
description: 'Path to psql binary - it is caller responsibility to provision the psql binary'
|
||||
required: false
|
||||
default: '/tmp/neon/pg_install/v16/bin/psql'
|
||||
libpq_lib_path:
|
||||
description: 'Path to directory containing libpq library - it is caller responsibility to provision the libpq library'
|
||||
required: false
|
||||
default: '/tmp/neon/pg_install/v16/lib'
|
||||
project_settings:
|
||||
description: 'A JSON object with project settings'
|
||||
required: false
|
||||
default: '{}'
|
||||
|
||||
outputs:
|
||||
dsn:
|
||||
@@ -76,7 +48,7 @@ runs:
|
||||
\"provisioner\": \"k8s-neonvm\",
|
||||
\"autoscaling_limit_min_cu\": ${MIN_CU},
|
||||
\"autoscaling_limit_max_cu\": ${MAX_CU},
|
||||
\"settings\": ${PROJECT_SETTINGS}
|
||||
\"settings\": { }
|
||||
}
|
||||
}")
|
||||
|
||||
@@ -91,23 +63,6 @@ runs:
|
||||
echo "project_id=${project_id}" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Project ${project_id} has been created"
|
||||
|
||||
if [ "${SHARD_SPLIT_PROJECT}" = "true" ]; then
|
||||
# determine tenant ID
|
||||
TENANT_ID=`${PSQL} ${dsn} -t -A -c "SHOW neon.tenant_id"`
|
||||
|
||||
echo "Splitting project ${project_id} with tenant_id ${TENANT_ID} into $((SHARD_COUNT)) shards with stripe size $((STRIPE_SIZE))"
|
||||
|
||||
echo "Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split"
|
||||
echo "with body {\"new_shard_count\": $((SHARD_COUNT)), \"new_stripe_size\": $((STRIPE_SIZE))}"
|
||||
|
||||
# we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)
|
||||
curl -X PUT \
|
||||
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split" \
|
||||
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
|
||||
-d "{\"new_shard_count\": $SHARD_COUNT, \"new_stripe_size\": $STRIPE_SIZE}"
|
||||
fi
|
||||
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
API_KEY: ${{ inputs.api_key }}
|
||||
@@ -115,10 +70,3 @@ runs:
|
||||
POSTGRES_VERSION: ${{ inputs.postgres_version }}
|
||||
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
|
||||
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
|
||||
SHARD_SPLIT_PROJECT: ${{ inputs.shard_split_project }}
|
||||
ADMIN_API_KEY: ${{ inputs.admin_api_key }}
|
||||
SHARD_COUNT: ${{ inputs.shard_count }}
|
||||
STRIPE_SIZE: ${{ inputs.stripe_size }}
|
||||
PSQL: ${{ inputs.psql_path }}
|
||||
LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
|
||||
PROJECT_SETTINGS: ${{ inputs.project_settings }}
|
||||
|
||||
1
.github/file-filters.yaml
vendored
1
.github/file-filters.yaml
vendored
@@ -1,5 +1,4 @@
|
||||
rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']
|
||||
rust_dependencies: ['**/Cargo.lock']
|
||||
|
||||
v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
|
||||
v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']
|
||||
|
||||
@@ -17,7 +17,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon, neon_pg17 ]
|
||||
platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon ]
|
||||
database: [ clickbench, tpch, userexample ]
|
||||
|
||||
env:
|
||||
@@ -41,9 +41,6 @@ jobs:
|
||||
neon)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
|
||||
;;
|
||||
neon_pg17)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR_PG17 }}
|
||||
;;
|
||||
aws-rds-postgres)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
|
||||
;;
|
||||
|
||||
34
.github/workflows/_build-and-test-locally.yml
vendored
34
.github/workflows/_build-and-test-locally.yml
vendored
@@ -158,6 +158,8 @@ jobs:
|
||||
|
||||
- name: Run cargo build
|
||||
run: |
|
||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||
export PQ_LIB_DIR
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
||||
|
||||
# Do install *before* running rust tests because they might recompile the
|
||||
@@ -215,6 +217,8 @@ jobs:
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||
export PQ_LIB_DIR
|
||||
LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
|
||||
export LD_LIBRARY_PATH
|
||||
|
||||
@@ -225,13 +229,8 @@ jobs:
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
|
||||
|
||||
# run pageserver tests with different settings
|
||||
for get_vectored_concurrent_io in sequential sidecar-task; do
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
|
||||
${cov_prefix} \
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
done
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
done
|
||||
|
||||
# Run separate tests for real S3
|
||||
@@ -267,26 +266,6 @@ jobs:
|
||||
path: /tmp/neon
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Check diesel schema
|
||||
if: inputs.build-type == 'release' && inputs.arch == 'x64'
|
||||
env:
|
||||
DATABASE_URL: postgresql://localhost:1235/storage_controller
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
run: |
|
||||
/tmp/neon/bin/neon_local init
|
||||
/tmp/neon/bin/neon_local storage_controller start
|
||||
|
||||
diesel print-schema > storage_controller/src/schema.rs
|
||||
|
||||
if [ -n "$(git diff storage_controller/src/schema.rs)" ]; then
|
||||
echo >&2 "Uncommitted changes in diesel schema"
|
||||
|
||||
git diff .
|
||||
exit 1
|
||||
fi
|
||||
|
||||
/tmp/neon/bin/neon_local storage_controller stop
|
||||
|
||||
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
|
||||
- name: Merge and upload coverage data
|
||||
if: inputs.build-type == 'debug'
|
||||
@@ -335,7 +314,6 @@ jobs:
|
||||
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
||||
BUILD_TAG: ${{ inputs.build-tag }}
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
|
||||
89
.github/workflows/_check-codestyle-rust.yml
vendored
89
.github/workflows/_check-codestyle-rust.yml
vendored
@@ -1,89 +0,0 @@
|
||||
name: Check Codestyle Rust
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
build-tools-image:
|
||||
description: "build-tools image"
|
||||
required: true
|
||||
type: string
|
||||
archs:
|
||||
description: "Json array of architectures to run on"
|
||||
type: string
|
||||
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
check-codestyle-rust:
|
||||
strategy:
|
||||
matrix:
|
||||
arch: ${{ fromJson(inputs.archs) }}
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Cache cargo deps
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
# Some of our rust modules use FFI and need those to be checked
|
||||
- name: Get postgres headers
|
||||
run: make postgres-headers -j$(nproc)
|
||||
|
||||
# cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
|
||||
# This will catch compiler & clippy warnings in all feature combinations.
|
||||
# TODO: use cargo hack for build and test as well, but, that's quite expensive.
|
||||
# NB: keep clippy args in sync with ./run_clippy.sh
|
||||
#
|
||||
# The only difference between "clippy --debug" and "clippy --release" is that in --release mode,
|
||||
# #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second
|
||||
# time just for that, so skip "clippy --release".
|
||||
- run: |
|
||||
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
|
||||
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
|
||||
echo "No clippy args found in .neon_clippy_args"
|
||||
exit 1
|
||||
fi
|
||||
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
|
||||
- name: Run cargo clippy (debug)
|
||||
run: cargo hack --features default --ignore-unknown-features --feature-powerset clippy $CLIPPY_COMMON_ARGS
|
||||
|
||||
- name: Check documentation generation
|
||||
run: cargo doc --workspace --no-deps --document-private-items
|
||||
env:
|
||||
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
|
||||
|
||||
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
|
||||
- name: Check formatting
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo fmt --all -- --check
|
||||
|
||||
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
|
||||
- name: Check rust dependencies
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
|
||||
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
|
||||
4
.github/workflows/approved-for-ci-run.yml
vendored
4
.github/workflows/approved-for-ci-run.yml
vendored
@@ -94,9 +94,7 @@ jobs:
|
||||
echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
|
||||
echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
- run: gh pr checkout "${PR_NUMBER}"
|
||||
|
||||
- run: git checkout -b "${BRANCH}"
|
||||
|
||||
|
||||
171
.github/workflows/benchmarking.yml
vendored
171
.github/workflows/benchmarking.yml
vendored
@@ -63,15 +63,11 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- PG_VERSION: 16
|
||||
- DEFAULT_PG_VERSION: 16
|
||||
PLATFORM: "neon-staging"
|
||||
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||
RUNNER: [ self-hosted, us-east-2, x64 ]
|
||||
- PG_VERSION: 17
|
||||
PLATFORM: "neon-staging"
|
||||
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||
RUNNER: [ self-hosted, us-east-2, x64 ]
|
||||
- PG_VERSION: 16
|
||||
- DEFAULT_PG_VERSION: 16
|
||||
PLATFORM: "azure-staging"
|
||||
region_id: 'azure-eastus2'
|
||||
RUNNER: [ self-hosted, eastus2, x64 ]
|
||||
@@ -79,7 +75,7 @@ jobs:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: ${{ matrix.PG_VERSION }}
|
||||
DEFAULT_PG_VERSION: ${{ matrix.DEFAULT_PG_VERSION }}
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
@@ -116,7 +112,7 @@ jobs:
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: ${{ matrix.region_id }}
|
||||
postgres_version: ${{ env.PG_VERSION }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Run benchmark
|
||||
@@ -126,7 +122,7 @@ jobs:
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
# Set --sparse-ordering option of pytest-order plugin
|
||||
# to ensure tests are running in order of appears in the file.
|
||||
@@ -317,11 +313,7 @@ jobs:
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
|
||||
}'
|
||||
|
||||
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
|
||||
@@ -337,15 +329,12 @@ jobs:
|
||||
matrix='{
|
||||
"platform": [
|
||||
"neonvm-captest-reuse"
|
||||
],
|
||||
"pg_version" : [
|
||||
16,17
|
||||
]
|
||||
}'
|
||||
|
||||
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
|
||||
matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 16, "platform": "rds-postgres" },
|
||||
{ "pg_version": 16, "platform": "rds-aurora" }]')
|
||||
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres" },
|
||||
{ "platform": "rds-aurora" }]')
|
||||
fi
|
||||
|
||||
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
|
||||
@@ -357,14 +346,14 @@ jobs:
|
||||
"platform": [
|
||||
"neonvm-captest-reuse"
|
||||
],
|
||||
"pg_version" : [
|
||||
16,17
|
||||
"scale": [
|
||||
"10"
|
||||
]
|
||||
}'
|
||||
|
||||
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
|
||||
matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 16, "platform": "rds-postgres" },
|
||||
{ "pg_version": 16, "platform": "rds-aurora" }]')
|
||||
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "scale": "10" },
|
||||
{ "platform": "rds-aurora", "scale": "10" }]')
|
||||
fi
|
||||
|
||||
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
|
||||
@@ -389,7 +378,7 @@ jobs:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: ${{ matrix.pg_version }}
|
||||
DEFAULT_PG_VERSION: ${{ matrix.pg_version }}
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
@@ -427,7 +416,7 @@ jobs:
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: ${{ matrix.region_id }}
|
||||
postgres_version: ${{ env.PG_VERSION }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
compute_units: ${{ (contains(matrix.platform, 'captest-freetier') && '[0.25, 0.25]') || '[1, 1]' }}
|
||||
|
||||
@@ -458,7 +447,7 @@ jobs:
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
# we want to compare Neon project OLTP throughput and latency at scale factor 10 GB
|
||||
# we want to compare Neon project OLTP throughput and latency at scale factor 10 GB
|
||||
# without (neonvm-captest-new)
|
||||
# and with (neonvm-captest-new-many-tables) many relations in the database
|
||||
- name: Create many relations before the run
|
||||
@@ -470,7 +459,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
@@ -486,7 +475,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
@@ -501,7 +490,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
@@ -516,7 +505,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
@@ -560,19 +549,14 @@ jobs:
|
||||
include:
|
||||
- PLATFORM: "neonvm-captest-pgvector"
|
||||
RUNNER: [ self-hosted, us-east-2, x64 ]
|
||||
postgres_version: 16
|
||||
- PLATFORM: "neonvm-captest-pgvector-pg17"
|
||||
RUNNER: [ self-hosted, us-east-2, x64 ]
|
||||
postgres_version: 17
|
||||
- PLATFORM: "azure-captest-pgvector"
|
||||
RUNNER: [ self-hosted, eastus2, x64 ]
|
||||
postgres_version: 16
|
||||
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "1"
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: ${{ matrix.postgres_version }}
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
|
||||
@@ -590,20 +574,32 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
# until https://github.com/neondatabase/neon/issues/8275 is fixed we temporarily install postgresql-16
|
||||
# instead of using Neon artifacts containing pgbench
|
||||
- name: Install postgresql-16 where pytest expects it
|
||||
run: |
|
||||
# Just to make it easier to test things locally on macOS (with arm64)
|
||||
arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
cd /home/nonroot
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.2-1.pgdg120+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.6-1.pgdg120+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.6-1.pgdg120+1_${arch}.deb"
|
||||
dpkg -x libpq5_17.2-1.pgdg120+1_${arch}.deb pg
|
||||
dpkg -x postgresql-16_16.6-1.pgdg120+1_${arch}.deb pg
|
||||
dpkg -x postgresql-client-16_16.6-1.pgdg120+1_${arch}.deb pg
|
||||
|
||||
mkdir -p /tmp/neon/pg_install/v16/bin
|
||||
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
|
||||
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql
|
||||
ln -s /home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu /tmp/neon/pg_install/v16/lib
|
||||
|
||||
LD_LIBRARY_PATH="/home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu:${LD_LIBRARY_PATH:-}"
|
||||
export LD_LIBRARY_PATH
|
||||
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> ${GITHUB_ENV}
|
||||
|
||||
/tmp/neon/pg_install/v16/bin/pgbench --version
|
||||
/tmp/neon/pg_install/v16/bin/psql --version
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
@@ -612,9 +608,6 @@ jobs:
|
||||
neonvm-captest-pgvector)
|
||||
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
|
||||
;;
|
||||
neonvm-captest-pgvector-pg17)
|
||||
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_PG17 }}
|
||||
;;
|
||||
azure-captest-pgvector)
|
||||
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_AZURE }}
|
||||
;;
|
||||
@@ -626,6 +619,13 @@ jobs:
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Benchmark pgvector hnsw indexing
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
@@ -634,7 +634,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -649,7 +649,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
@@ -696,7 +696,7 @@ jobs:
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: ${{ matrix.pg_version }}
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain }}
|
||||
TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements }}
|
||||
@@ -739,18 +739,7 @@ jobs:
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neonvm-captest-reuse)
|
||||
case "${PG_VERSION}" in
|
||||
16)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
|
||||
;;
|
||||
17)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_CONNSTR_PG17 }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
|
||||
;;
|
||||
rds-aurora)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CLICKBENCH_10M_CONNSTR }}
|
||||
@@ -774,7 +763,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -810,7 +799,7 @@ jobs:
|
||||
# We might change it after https://github.com/neondatabase/neon/issues/2900.
|
||||
#
|
||||
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
|
||||
# if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
@@ -823,11 +812,12 @@ jobs:
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: ${{ matrix.pg_version }}
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
TEST_OLAP_SCALE: ${{ matrix.scale }}
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
@@ -859,24 +849,13 @@ jobs:
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neonvm-captest-reuse)
|
||||
case "${PG_VERSION}" in
|
||||
16)
|
||||
CONNSTR_SECRET_NAME="BENCHMARK_CAPTEST_TPCH_S10_CONNSTR"
|
||||
;;
|
||||
17)
|
||||
CONNSTR_SECRET_NAME="BENCHMARK_CAPTEST_TPCH_CONNSTR_PG17"
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
ENV_PLATFORM=CAPTEST_TPCH
|
||||
;;
|
||||
rds-aurora)
|
||||
CONNSTR_SECRET_NAME="BENCHMARK_RDS_AURORA_TPCH_S10_CONNSTR"
|
||||
ENV_PLATFORM=RDS_AURORA_TPCH
|
||||
;;
|
||||
rds-postgres)
|
||||
CONNSTR_SECRET_NAME="BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR"
|
||||
ENV_PLATFORM=RDS_POSTGRES_TPCH
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
||||
@@ -884,6 +863,7 @@ jobs:
|
||||
;;
|
||||
esac
|
||||
|
||||
CONNSTR_SECRET_NAME="BENCHMARK_${ENV_PLATFORM}_S${TEST_OLAP_SCALE}_CONNSTR"
|
||||
echo "CONNSTR_SECRET_NAME=${CONNSTR_SECRET_NAME}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up Connection String
|
||||
@@ -901,13 +881,13 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
TEST_OLAP_SCALE: 10
|
||||
TEST_OLAP_SCALE: ${{ matrix.scale }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
@@ -929,7 +909,7 @@ jobs:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
user-examples-compare:
|
||||
# if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
@@ -942,7 +922,7 @@ jobs:
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: ${{ matrix.pg_version }}
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
@@ -979,18 +959,7 @@ jobs:
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neonvm-captest-reuse)
|
||||
case "${PG_VERSION}" in
|
||||
16)
|
||||
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
|
||||
;;
|
||||
17)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_USER_EXAMPLE_CONNSTR_PG17 }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
|
||||
;;
|
||||
rds-aurora)
|
||||
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_AURORA_CONNSTR }}
|
||||
@@ -1014,7 +983,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
|
||||
2
.github/workflows/build-macos.yml
vendored
2
.github/workflows/build-macos.yml
vendored
@@ -235,7 +235,7 @@ jobs:
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Run cargo build (only for v17)
|
||||
run: cargo build --all --release -j$(sysctl -n hw.ncpu)
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Check that no warnings are produced (only for v17)
|
||||
run: ./run_clippy.sh
|
||||
|
||||
221
.github/workflows/build_and_test.yml
vendored
221
.github/workflows/build_and_test.yml
vendored
@@ -45,26 +45,6 @@ jobs:
|
||||
run cancel-previous-in-concurrency-group.yml \
|
||||
--field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
|
||||
|
||||
files-changed:
|
||||
needs: [ check-permissions ]
|
||||
runs-on: [ self-hosted, small ]
|
||||
timeout-minutes: 3
|
||||
outputs:
|
||||
check-rust-dependencies: ${{ steps.files-changed.outputs.rust_dependencies }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Check for file changes
|
||||
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
|
||||
id: files-changed
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
filters: .github/file-filters.yaml
|
||||
|
||||
tag:
|
||||
needs: [ check-permissions ]
|
||||
runs-on: [ self-hosted, small ]
|
||||
@@ -184,19 +164,77 @@ jobs:
|
||||
|
||||
check-codestyle-rust:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
uses: ./.github/workflows/_check-codestyle-rust.yml
|
||||
with:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
archs: '["x64", "arm64"]'
|
||||
secrets: inherit
|
||||
strategy:
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||
|
||||
check-dependencies-rust:
|
||||
needs: [ files-changed, build-build-tools-image ]
|
||||
if: ${{ needs.files-changed.outputs.check-rust-dependencies == 'true' }}
|
||||
uses: ./.github/workflows/cargo-deny.yml
|
||||
with:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
secrets: inherit
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Cache cargo deps
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
# Some of our rust modules use FFI and need those to be checked
|
||||
- name: Get postgres headers
|
||||
run: make postgres-headers -j$(nproc)
|
||||
|
||||
# cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
|
||||
# This will catch compiler & clippy warnings in all feature combinations.
|
||||
# TODO: use cargo hack for build and test as well, but, that's quite expensive.
|
||||
# NB: keep clippy args in sync with ./run_clippy.sh
|
||||
#
|
||||
# The only difference between "clippy --debug" and "clippy --release" is that in --release mode,
|
||||
# #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second
|
||||
# time just for that, so skip "clippy --release".
|
||||
- run: |
|
||||
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
|
||||
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
|
||||
echo "No clippy args found in .neon_clippy_args"
|
||||
exit 1
|
||||
fi
|
||||
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
|
||||
- name: Run cargo clippy (debug)
|
||||
run: cargo hack --features default --ignore-unknown-features --feature-powerset clippy $CLIPPY_COMMON_ARGS
|
||||
|
||||
- name: Check documentation generation
|
||||
run: cargo doc --workspace --no-deps --document-private-items
|
||||
env:
|
||||
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
|
||||
|
||||
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
|
||||
- name: Check formatting
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo fmt --all -- --check
|
||||
|
||||
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
|
||||
- name: Check rust dependencies
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
|
||||
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
|
||||
|
||||
# https://github.com/EmbarkStudios/cargo-deny
|
||||
- name: Check rust licenses/bans/advisories/sources
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo deny check --hide-inclusion-graph
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ tag, build-build-tools-image ]
|
||||
@@ -270,7 +308,7 @@ jobs:
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: [ self-hosted, small-metal ]
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
@@ -308,22 +346,25 @@ jobs:
|
||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||
# while coverage is currently collected for the debug ones
|
||||
|
||||
report-benchmarks-results-to-slack:
|
||||
report-benchmarks-failures:
|
||||
needs: [ benchmarks, create-test-report ]
|
||||
if: github.ref_name == 'main' && !cancelled() && contains(fromJSON('["success", "failure"]'), needs.benchmarks.result)
|
||||
if: github.ref_name == 'main' && failure() && needs.benchmarks.result == 'failure'
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: slackapi/slack-github-action@v2
|
||||
- uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: "${{ vars.SLACK_ON_CALL_STORAGE_STAGING_STREAM }}"
|
||||
text: |
|
||||
Benchmarks on main: *${{ needs.benchmarks.result }}*
|
||||
- <${{ needs.create-test-report.outputs.report-url }}|Allure report>
|
||||
- <${{ github.event.head_commit.url }}|${{ github.sha }}>
|
||||
channel-id: C060CNA47S9 # on-call-staging-storage-stream
|
||||
slack-message: |
|
||||
Benchmarks failed on main <${{ github.event.head_commit.url }}|${{ github.sha }}>
|
||||
<${{ needs.create-test-report.outputs.report-url }}|Allure report>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
create-test-report:
|
||||
needs: [ check-permissions, build-and-test-locally, coverage-report, build-build-tools-image, benchmarks ]
|
||||
@@ -682,7 +723,7 @@ jobs:
|
||||
push: true
|
||||
pull: true
|
||||
file: compute/compute-node.Dockerfile
|
||||
target: extension-tests
|
||||
target: neon-pg-ext-test
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
tags: |
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
@@ -814,17 +855,6 @@ jobs:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Get the last compute release tag
|
||||
id: get-last-compute-release-tag
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
tag=$(gh api -q '[.[].tag_name | select(startswith("release-compute"))][0]'\
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"/repos/${{ github.repository }}/releases")
|
||||
echo tag=${tag} >> ${GITHUB_OUTPUT}
|
||||
|
||||
# `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
|
||||
# Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
|
||||
# Regular pageserver version string looks like
|
||||
@@ -856,28 +886,14 @@ jobs:
|
||||
TEST_VERSION_ONLY: ${{ matrix.pg_version }}
|
||||
run: ./docker-compose/docker_compose_test.sh
|
||||
|
||||
- name: Print logs and clean up docker-compose test
|
||||
if: always()
|
||||
run: |
|
||||
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true
|
||||
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
|
||||
|
||||
- name: Test extension upgrade
|
||||
timeout-minutes: 20
|
||||
if: ${{ needs.tag.outputs.build-tag == github.run_id }}
|
||||
env:
|
||||
NEWTAG: ${{ needs.tag.outputs.build-tag }}
|
||||
OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
|
||||
run: ./docker-compose/test_extensions_upgrade.sh
|
||||
|
||||
- name: Print logs and clean up
|
||||
if: always()
|
||||
run: |
|
||||
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true
|
||||
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
|
||||
docker compose -f ./docker-compose/docker-compose.yml logs || 0
|
||||
docker compose -f ./docker-compose/docker-compose.yml down
|
||||
|
||||
promote-images-dev:
|
||||
needs: [ check-permissions, tag, vm-compute-node-image, neon-image ]
|
||||
needs: [ check-permissions, tag, vm-compute-node-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
@@ -912,7 +928,7 @@ jobs:
|
||||
done
|
||||
|
||||
promote-images-prod:
|
||||
needs: [ check-permissions, tag, test-images, promote-images-dev ]
|
||||
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
|
||||
|
||||
@@ -1103,7 +1119,6 @@ jobs:
|
||||
retries: 5
|
||||
script: |
|
||||
const tag = "${{ needs.tag.outputs.build-tag }}";
|
||||
const branch = "${{ github.ref_name }}";
|
||||
|
||||
try {
|
||||
const existingRef = await github.rest.git.getRef({
|
||||
@@ -1132,6 +1147,12 @@ jobs:
|
||||
console.log(`Tag ${tag} created successfully.`);
|
||||
}
|
||||
|
||||
// TODO: check how GitHub releases looks for proxy/compute releases and enable them if they're ok
|
||||
if (context.ref !== 'refs/heads/release') {
|
||||
console.log(`GitHub release skipped for ${context.ref}.`);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const existingRelease = await github.rest.repos.getReleaseByTag({
|
||||
owner: context.repo.owner,
|
||||
@@ -1146,48 +1167,11 @@ jobs:
|
||||
}
|
||||
|
||||
console.log(`Release for tag ${tag} does not exist. Creating it...`);
|
||||
|
||||
// Find the PR number using the commit SHA
|
||||
const pullRequests = await github.rest.pulls.list({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
state: 'closed',
|
||||
base: branch,
|
||||
});
|
||||
|
||||
const pr = pullRequests.data.find(pr => pr.merge_commit_sha === context.sha);
|
||||
const prNumber = pr ? pr.number : null;
|
||||
|
||||
// Find the previous release on the branch
|
||||
const releases = await github.rest.repos.listReleases({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
per_page: 100,
|
||||
});
|
||||
|
||||
const branchReleases = releases.data
|
||||
.filter((release) => {
|
||||
const regex = new RegExp(`^${branch}-\\d+$`);
|
||||
return regex.test(release.tag_name) && !release.draft && !release.prerelease;
|
||||
})
|
||||
.sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
|
||||
|
||||
const previousTag = branchReleases.length > 0 ? branchReleases[0].tag_name : null;
|
||||
|
||||
const releaseNotes = [
|
||||
prNumber
|
||||
? `Release PR https://github.com/${context.repo.owner}/${context.repo.repo}/pull/${prNumber}.`
|
||||
: 'Release PR not found.',
|
||||
previousTag
|
||||
? `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${previousTag}...${tag}.`
|
||||
: `No previous release found on branch ${branch}.`,
|
||||
].join('\n\n');
|
||||
|
||||
await github.rest.repos.createRelease({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
tag_name: tag,
|
||||
body: releaseNotes,
|
||||
generate_release_notes: true,
|
||||
});
|
||||
console.log(`Release for tag ${tag} created successfully.`);
|
||||
}
|
||||
@@ -1360,8 +1344,6 @@ jobs:
|
||||
- build-and-test-locally
|
||||
- check-codestyle-python
|
||||
- check-codestyle-rust
|
||||
- check-dependencies-rust
|
||||
- files-changed
|
||||
- promote-images-dev
|
||||
- test-images
|
||||
- trigger-custom-extensions-build-and-wait
|
||||
@@ -1374,11 +1356,4 @@ jobs:
|
||||
if: |
|
||||
contains(needs.*.result, 'failure')
|
||||
|| contains(needs.*.result, 'cancelled')
|
||||
|| (needs.check-dependencies-rust.result == 'skipped' && needs.files-changed.outputs.check-rust-dependencies == 'true')
|
||||
|| needs.build-and-test-locally.result == 'skipped'
|
||||
|| needs.check-codestyle-python.result == 'skipped'
|
||||
|| needs.check-codestyle-rust.result == 'skipped'
|
||||
|| needs.files-changed.result == 'skipped'
|
||||
|| needs.promote-images-dev.result == 'skipped'
|
||||
|| needs.test-images.result == 'skipped'
|
||||
|| needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
|
||||
|| contains(needs.*.result, 'skipped')
|
||||
|
||||
57
.github/workflows/cargo-deny.yml
vendored
57
.github/workflows/cargo-deny.yml
vendored
@@ -1,57 +0,0 @@
|
||||
name: cargo deny checks
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
build-tools-image:
|
||||
required: false
|
||||
type: string
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
jobs:
|
||||
cargo-deny:
|
||||
strategy:
|
||||
matrix:
|
||||
ref: >-
|
||||
${{
|
||||
fromJSON(
|
||||
github.event_name == 'schedule'
|
||||
&& '["main","release","release-proxy","release-compute"]'
|
||||
|| format('["{0}"]', github.sha)
|
||||
)
|
||||
}}
|
||||
|
||||
runs-on: [self-hosted, small]
|
||||
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image || 'neondatabase/build-tools:pinned' }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ matrix.ref }}
|
||||
|
||||
- name: Check rust licenses/bans/advisories/sources
|
||||
env:
|
||||
CARGO_DENY_TARGET: >-
|
||||
${{ github.event_name == 'schedule' && 'advisories' || 'all' }}
|
||||
run: cargo deny check --hide-inclusion-graph $CARGO_DENY_TARGET
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event_name == 'schedule' && failure() }}
|
||||
uses: slackapi/slack-github-action@v2
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: ${{ vars.SLACK_CICD_CHANNEL_ID }}
|
||||
text: |
|
||||
Periodic cargo-deny on ${{ matrix.ref }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
Pinging @oncall-devprod.
|
||||
33
.github/workflows/ingest_benchmark.yml
vendored
33
.github/workflows/ingest_benchmark.yml
vendored
@@ -28,24 +28,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
include:
|
||||
- target_project: new_empty_project_stripe_size_2048
|
||||
stripe_size: 2048 # 16 MiB
|
||||
postgres_version: 16
|
||||
- target_project: new_empty_project_stripe_size_32768
|
||||
stripe_size: 32768 # 256 MiB # note that this is different from null because using null will shard_split the project only if it reaches the threshold
|
||||
# while here it is sharded from the beginning with a shard size of 256 MiB
|
||||
postgres_version: 16
|
||||
- target_project: new_empty_project
|
||||
stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
|
||||
postgres_version: 16
|
||||
- target_project: new_empty_project
|
||||
stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
|
||||
postgres_version: 17
|
||||
- target_project: large_existing_project
|
||||
stripe_size: null # cannot re-shared or choose different stripe size for existing, already sharded project
|
||||
postgres_version: 16
|
||||
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
|
||||
target_project: [new_empty_project, large_existing_project]
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
@@ -84,21 +67,17 @@ jobs:
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
|
||||
if: ${{ matrix.target_project == 'new_empty_project' }}
|
||||
id: create-neon-project-ingest-target
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: aws-us-east-2
|
||||
postgres_version: ${{ matrix.postgres_version }}
|
||||
postgres_version: 16
|
||||
compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
shard_split_project: ${{ matrix.stripe_size != null && 'true' || 'false' }}
|
||||
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
|
||||
shard_count: 8
|
||||
stripe_size: ${{ matrix.stripe_size }}
|
||||
|
||||
- name: Initialize Neon project
|
||||
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
|
||||
if: ${{ matrix.target_project == 'new_empty_project' }}
|
||||
env:
|
||||
BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
|
||||
NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
|
||||
@@ -151,7 +130,7 @@ jobs:
|
||||
test_selection: performance/test_perf_ingest_using_pgcopydb.py
|
||||
run_in_parallel: false
|
||||
extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
|
||||
pg_version: v${{ matrix.postgres_version }}
|
||||
pg_version: v16
|
||||
save_perf_report: true
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
@@ -167,7 +146,7 @@ jobs:
|
||||
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+"
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ always() && startsWith(matrix.target_project, 'new_empty_project') }}
|
||||
if: ${{ always() && matrix.target_project == 'new_empty_project' }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
project_id: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
|
||||
|
||||
2
.github/workflows/neon_extra_builds.yml
vendored
2
.github/workflows/neon_extra_builds.yml
vendored
@@ -114,7 +114,7 @@ jobs:
|
||||
run: make walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Produce the build stats
|
||||
run: cargo build --all --release --timings -j$(nproc)
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
|
||||
6
.github/workflows/pg-clients.yml
vendored
6
.github/workflows/pg-clients.yml
vendored
@@ -12,8 +12,8 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/pg-clients.yml'
|
||||
- 'test_runner/pg_clients/**/*.py'
|
||||
- 'test_runner/logical_repl/**/*.py'
|
||||
- 'test_runner/pg_clients/**'
|
||||
- 'test_runner/logical_repl/**'
|
||||
- 'poetry.lock'
|
||||
workflow_dispatch:
|
||||
|
||||
@@ -104,8 +104,6 @@ jobs:
|
||||
with:
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
project_settings: >-
|
||||
{"enable_logical_replication": true}
|
||||
|
||||
- name: Run tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
|
||||
45
.github/workflows/pre-merge-checks.yml
vendored
45
.github/workflows/pre-merge-checks.yml
vendored
@@ -1,12 +1,6 @@
|
||||
name: Pre-merge checks
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/_check-codestyle-python.yml
|
||||
- .github/workflows/_check-codestyle-rust.yml
|
||||
- .github/workflows/build-build-tools-image.yml
|
||||
- .github/workflows/pre-merge-checks.yml
|
||||
merge_group:
|
||||
branches:
|
||||
- main
|
||||
@@ -23,10 +17,8 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
python-changed: ${{ steps.python-src.outputs.any_changed }}
|
||||
rust-changed: ${{ steps.rust-src.outputs.any_changed }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
|
||||
id: python-src
|
||||
with:
|
||||
@@ -38,31 +30,14 @@ jobs:
|
||||
poetry.lock
|
||||
pyproject.toml
|
||||
|
||||
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
|
||||
id: rust-src
|
||||
with:
|
||||
files: |
|
||||
.github/workflows/_check-codestyle-rust.yml
|
||||
.github/workflows/build-build-tools-image.yml
|
||||
.github/workflows/pre-merge-checks.yml
|
||||
**/**.rs
|
||||
**/Cargo.toml
|
||||
Cargo.toml
|
||||
Cargo.lock
|
||||
|
||||
- name: PRINT ALL CHANGED FILES FOR DEBUG PURPOSES
|
||||
env:
|
||||
PYTHON_CHANGED_FILES: ${{ steps.python-src.outputs.all_changed_files }}
|
||||
RUST_CHANGED_FILES: ${{ steps.rust-src.outputs.all_changed_files }}
|
||||
run: |
|
||||
echo "${PYTHON_CHANGED_FILES}"
|
||||
echo "${RUST_CHANGED_FILES}"
|
||||
|
||||
build-build-tools-image:
|
||||
if: |
|
||||
false
|
||||
|| needs.get-changed-files.outputs.python-changed == 'true'
|
||||
|| needs.get-changed-files.outputs.rust-changed == 'true'
|
||||
if: needs.get-changed-files.outputs.python-changed == 'true'
|
||||
needs: [ get-changed-files ]
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
with:
|
||||
@@ -80,30 +55,18 @@ jobs:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64
|
||||
secrets: inherit
|
||||
|
||||
check-codestyle-rust:
|
||||
if: needs.get-changed-files.outputs.rust-changed == 'true'
|
||||
needs: [ get-changed-files, build-build-tools-image ]
|
||||
uses: ./.github/workflows/_check-codestyle-rust.yml
|
||||
with:
|
||||
# `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64
|
||||
archs: '["x64"]'
|
||||
secrets: inherit
|
||||
|
||||
# To get items from the merge queue merged into main we need to satisfy "Status checks that are required".
|
||||
# Currently we require 2 jobs (checks with exact name):
|
||||
# - conclusion
|
||||
# - neon-cloud-e2e
|
||||
conclusion:
|
||||
# Do not run job on Pull Requests as it interferes with the `conclusion` job from the `build_and_test` workflow
|
||||
if: always() && github.event_name == 'merge_group'
|
||||
if: always()
|
||||
permissions:
|
||||
statuses: write # for `github.repos.createCommitStatus(...)`
|
||||
contents: write
|
||||
needs:
|
||||
- get-changed-files
|
||||
- check-codestyle-python
|
||||
- check-codestyle-rust
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Create fake `neon-cloud-e2e` check
|
||||
@@ -128,8 +91,6 @@ jobs:
|
||||
- name: Fail the job if any of the dependencies do not succeed or skipped
|
||||
run: exit 1
|
||||
if: |
|
||||
false
|
||||
|| (needs.check-codestyle-python.result == 'skipped' && needs.get-changed-files.outputs.python-changed == 'true')
|
||||
|| (needs.check-codestyle-rust.result == 'skipped' && needs.get-changed-files.outputs.rust-changed == 'true')
|
||||
(contains(needs.check-codestyle-python.result, 'skipped') && needs.get-changed-files.outputs.python-changed == 'true')
|
||||
|| contains(needs.*.result, 'failure')
|
||||
|| contains(needs.*.result, 'cancelled')
|
||||
|
||||
5
.github/workflows/release.yml
vendored
5
.github/workflows/release.yml
vendored
@@ -3,9 +3,8 @@ name: Create Release Branch
|
||||
on:
|
||||
schedule:
|
||||
# It should be kept in sync with if-condition in jobs
|
||||
- cron: '0 6 * * THU' # Proxy release
|
||||
- cron: '0 6 * * FRI' # Storage release
|
||||
- cron: '0 7 * * FRI' # Compute release
|
||||
- cron: '0 6 * * THU' # Proxy release
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
create-storage-release-branch:
|
||||
@@ -56,7 +55,7 @@ jobs:
|
||||
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
create-compute-release-branch:
|
||||
if: ${{ github.event.schedule == '0 7 * * FRI' || inputs.create-compute-release-branch }}
|
||||
if: inputs.create-compute-release-branch
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
572
Cargo.lock
generated
572
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
13
Cargo.toml
13
Cargo.toml
@@ -54,7 +54,6 @@ async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
|
||||
atomic-take = "1.1.0"
|
||||
backtrace = "0.3.74"
|
||||
flate2 = "1.0.26"
|
||||
assert-json-diff = "2"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
|
||||
@@ -66,7 +65,7 @@ aws-smithy-types = "1.2"
|
||||
aws-credential-types = "1.2.0"
|
||||
aws-sigv4 = { version = "1.2", features = ["sign-http"] }
|
||||
aws-types = "1.3"
|
||||
axum = { version = "0.8.1", features = ["ws"] }
|
||||
axum = { version = "0.7.9", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
bindgen = "0.70"
|
||||
@@ -78,10 +77,10 @@ camino = "1.1.6"
|
||||
cfg-if = "1.0.0"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
clap = { version = "4.0", features = ["derive", "env"] }
|
||||
clashmap = { version = "1.0", features = ["raw-api"] }
|
||||
comfy-table = "7.1"
|
||||
const_format = "0.2"
|
||||
crc32c = "0.6"
|
||||
dashmap = { version = "5.5.0", features = ["raw-api"] }
|
||||
diatomic-waker = { version = "0.2.3" }
|
||||
either = "1.8"
|
||||
enum-map = "2.4.2"
|
||||
@@ -124,7 +123,7 @@ measured = { version = "0.0.22", features=["lasso"] }
|
||||
measured-process = { version = "0.0.22" }
|
||||
memoffset = "0.9"
|
||||
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
|
||||
notify = "8.0.0"
|
||||
notify = "6.0.0"
|
||||
num_cpus = "1.15"
|
||||
num-traits = "0.2.15"
|
||||
once_cell = "1.13"
|
||||
@@ -178,7 +177,7 @@ test-context = "0.3"
|
||||
thiserror = "1.0"
|
||||
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
|
||||
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
|
||||
tokio = { version = "1.41", features = ["macros"] }
|
||||
tokio = { version = "1.17", features = ["macros"] }
|
||||
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
|
||||
tokio-io-timeout = "1.2.0"
|
||||
tokio-postgres-rustls = "0.12.0"
|
||||
@@ -188,15 +187,13 @@ tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
toml = "0.8"
|
||||
toml_edit = "0.22"
|
||||
tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
|
||||
tonic = {version = "0.12.3", features = ["tls", "tls-roots"]}
|
||||
tower = { version = "0.5.2", default-features = false }
|
||||
tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
|
||||
tower-service = "0.3.3"
|
||||
tracing = "0.1"
|
||||
tracing-error = "0.2"
|
||||
tracing-log = "0.2"
|
||||
tracing-opentelemetry = "0.28"
|
||||
tracing-serde = "0.2.0"
|
||||
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
|
||||
try-lock = "0.2.5"
|
||||
twox-hash = { version = "1.6.3", default-features = false }
|
||||
|
||||
@@ -45,7 +45,7 @@ COPY --chown=nonroot . .
|
||||
|
||||
ARG ADDITIONAL_RUSTFLAGS
|
||||
RUN set -e \
|
||||
&& RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
--bin pg_sni_router \
|
||||
--bin pageserver \
|
||||
--bin pagectl \
|
||||
@@ -64,7 +64,6 @@ ARG DEFAULT_PG_VERSION
|
||||
WORKDIR /data
|
||||
|
||||
RUN set -e \
|
||||
&& echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries \
|
||||
&& apt update \
|
||||
&& apt install -y \
|
||||
libreadline-dev \
|
||||
@@ -73,7 +72,6 @@ RUN set -e \
|
||||
# System postgres for use with client libraries (e.g. in storage controller)
|
||||
postgresql-15 \
|
||||
openssl \
|
||||
&& rm -f /etc/apt/apt.conf.d/80-retries \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
|
||||
&& useradd -d /data neon \
|
||||
&& chown -R neon:neon /data
|
||||
|
||||
2
Makefile
2
Makefile
@@ -64,6 +64,8 @@ CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
|
||||
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
|
||||
# Force cargo not to print progress bar
|
||||
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
|
||||
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
|
||||
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
|
||||
|
||||
CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
|
||||
|
||||
|
||||
@@ -21,10 +21,8 @@ The Neon storage engine consists of two major components:
|
||||
|
||||
See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more information.
|
||||
|
||||
## Running a local development environment
|
||||
## Running local installation
|
||||
|
||||
Neon can be run on a workstation for small experiments and to test code changes, by
|
||||
following these instructions.
|
||||
|
||||
#### Installing dependencies on Linux
|
||||
1. Install build dependencies and other applicable packages
|
||||
@@ -240,7 +238,7 @@ postgres=# select * from t;
|
||||
> cargo neon stop
|
||||
```
|
||||
|
||||
More advanced usages can be found at [Local Development Control Plane (`neon_local`))](./control_plane/README.md).
|
||||
More advanced usages can be found at [Control Plane and Neon Local](./control_plane/README.md).
|
||||
|
||||
#### Handling build failures
|
||||
|
||||
|
||||
@@ -3,15 +3,6 @@ ARG DEBIAN_VERSION=bookworm
|
||||
FROM debian:bookworm-slim AS pgcopydb_builder
|
||||
ARG DEBIAN_VERSION
|
||||
|
||||
# Use strict mode for bash to catch errors early
|
||||
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
|
||||
|
||||
# By default, /bin/sh used in debian images will treat '\n' as eol,
|
||||
# but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.
|
||||
RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
|
||||
echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
|
||||
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
|
||||
|
||||
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
|
||||
set -e && \
|
||||
apt update && \
|
||||
@@ -60,8 +51,7 @@ ARG DEBIAN_VERSION
|
||||
|
||||
# Add nonroot user
|
||||
RUN useradd -ms /bin/bash nonroot -b /home
|
||||
# Use strict mode for bash to catch errors early
|
||||
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
RUN mkdir -p /pgcopydb/bin && \
|
||||
mkdir -p /pgcopydb/lib && \
|
||||
@@ -71,10 +61,6 @@ RUN mkdir -p /pgcopydb/bin && \
|
||||
COPY --from=pgcopydb_builder /usr/lib/postgresql/16/bin/pgcopydb /pgcopydb/bin/pgcopydb
|
||||
COPY --from=pgcopydb_builder /pgcopydb/lib/libpq.so.5 /pgcopydb/lib/libpq.so.5
|
||||
|
||||
RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
|
||||
echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
|
||||
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
|
||||
|
||||
# System deps
|
||||
#
|
||||
# 'gdb' is included so that we get backtraces of core dumps produced in
|
||||
@@ -129,7 +115,7 @@ RUN set -e \
|
||||
|
||||
# Keep the version the same as in compute/compute-node.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py.
|
||||
ENV SQL_EXPORTER_VERSION=0.17.0
|
||||
ENV SQL_EXPORTER_VERSION=0.16.0
|
||||
RUN curl -fsSL \
|
||||
"https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case "$(uname -m)" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz" \
|
||||
--output sql_exporter.tar.gz \
|
||||
@@ -196,14 +182,8 @@ RUN set -e \
|
||||
# It includes several bug fixes on top on v2.0 release (https://github.com/linux-test-project/lcov/compare/v2.0...master)
|
||||
# And patches from us:
|
||||
# - Generates json file with code coverage summary (https://github.com/neondatabase/lcov/commit/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz)
|
||||
RUN set +o pipefail && \
|
||||
for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JSON::XS Memory::Process Time::HiRes JSON; do \
|
||||
yes | perl -MCPAN -e "CPAN::Shell->notest('install', '$package')";\
|
||||
done && \
|
||||
set -o pipefail
|
||||
# Split into separate step to debug flaky failures here
|
||||
RUN wget https://github.com/neondatabase/lcov/archive/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz -O lcov.tar.gz \
|
||||
&& ls -laht lcov.tar.gz && sha256sum lcov.tar.gz \
|
||||
RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JSON::XS Memory::Process Time::HiRes JSON; do yes | perl -MCPAN -e "CPAN::Shell->notest('install', '$package')"; done \
|
||||
&& wget https://github.com/neondatabase/lcov/archive/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz -O lcov.tar.gz \
|
||||
&& echo "61a22a62e20908b8b9e27d890bd0ea31f567a7b9668065589266371dcbca0992 lcov.tar.gz" | sha256sum --check \
|
||||
&& mkdir -p lcov && tar -xzf lcov.tar.gz -C lcov --strip-components=1 \
|
||||
&& cd lcov \
|
||||
@@ -238,8 +218,6 @@ RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/re
|
||||
USER nonroot:nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /home/nonroot/.curlrc
|
||||
|
||||
# Python
|
||||
ENV PYTHON_VERSION=3.11.10 \
|
||||
PYENV_ROOT=/home/nonroot/.pyenv \
|
||||
@@ -265,7 +243,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.84.1
|
||||
ENV RUSTC_VERSION=1.84.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
@@ -273,7 +251,6 @@ ARG CARGO_HAKARI_VERSION=0.9.33
|
||||
ARG CARGO_DENY_VERSION=0.16.2
|
||||
ARG CARGO_HACK_VERSION=0.6.33
|
||||
ARG CARGO_NEXTEST_VERSION=0.9.85
|
||||
ARG CARGO_DIESEL_CLI_VERSION=2.2.6
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
|
||||
@@ -287,8 +264,6 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
|
||||
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \
|
||||
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \
|
||||
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} \
|
||||
--features postgres-bundled --no-default-features && \
|
||||
rm -rf /home/nonroot/.cargo/registry && \
|
||||
rm -rf /home/nonroot/.cargo/git
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,8 +19,6 @@ max_prepared_statements=0
|
||||
admin_users=postgres
|
||||
unix_socket_dir=/tmp/
|
||||
unix_socket_mode=0777
|
||||
; required for pgbouncer_exporter
|
||||
ignore_startup_parameters=extra_float_digits
|
||||
|
||||
;; Disable connection logging. It produces a lot of logs that no one looks at,
|
||||
;; and we can get similar log entries from the proxy too. We had incidents in
|
||||
|
||||
@@ -1,242 +0,0 @@
|
||||
diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out
|
||||
index 979e5e8..2375b45 100644
|
||||
--- a/contrib/amcheck/expected/check_heap.out
|
||||
+++ b/contrib/amcheck/expected/check_heap.out
|
||||
@@ -80,12 +80,9 @@ INSERT INTO heaptest (a, b)
|
||||
-- same transaction. The heaptest table is smaller than the default
|
||||
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
|
||||
-- shared_buffers. A transaction delays that and excludes any autovacuum.
|
||||
-SET allow_in_place_tablespaces = true;
|
||||
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
|
||||
SELECT sum(reads) AS stats_bulkreads_before
|
||||
FROM pg_stat_io WHERE context = 'bulkread' \gset
|
||||
BEGIN;
|
||||
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
|
||||
-- Check that valid options are not rejected nor corruption reported
|
||||
-- for a non-empty table
|
||||
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
|
||||
@@ -118,14 +115,6 @@ SELECT pg_stat_force_next_flush();
|
||||
|
||||
(1 row)
|
||||
|
||||
-SELECT sum(reads) AS stats_bulkreads_after
|
||||
- FROM pg_stat_io WHERE context = 'bulkread' \gset
|
||||
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
|
||||
- ?column?
|
||||
-----------
|
||||
- t
|
||||
-(1 row)
|
||||
-
|
||||
CREATE ROLE regress_heaptest_role;
|
||||
-- verify permissions are checked (error due to function not callable)
|
||||
SET ROLE regress_heaptest_role;
|
||||
@@ -233,7 +222,6 @@ ERROR: cannot check relation "test_foreign_table"
|
||||
DETAIL: This operation is not supported for foreign tables.
|
||||
-- cleanup
|
||||
DROP TABLE heaptest;
|
||||
-DROP TABLESPACE regress_test_stats_tblspc;
|
||||
DROP TABLE test_partition;
|
||||
DROP TABLE test_partitioned;
|
||||
DROP OWNED BY regress_heaptest_role; -- permissions
|
||||
diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql
|
||||
index 1745bae..3b429c3 100644
|
||||
--- a/contrib/amcheck/sql/check_heap.sql
|
||||
+++ b/contrib/amcheck/sql/check_heap.sql
|
||||
@@ -40,12 +40,9 @@ INSERT INTO heaptest (a, b)
|
||||
-- same transaction. The heaptest table is smaller than the default
|
||||
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
|
||||
-- shared_buffers. A transaction delays that and excludes any autovacuum.
|
||||
-SET allow_in_place_tablespaces = true;
|
||||
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
|
||||
SELECT sum(reads) AS stats_bulkreads_before
|
||||
FROM pg_stat_io WHERE context = 'bulkread' \gset
|
||||
BEGIN;
|
||||
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
|
||||
-- Check that valid options are not rejected nor corruption reported
|
||||
-- for a non-empty table
|
||||
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
|
||||
@@ -58,9 +55,6 @@ COMMIT;
|
||||
-- ALTER TABLE ... SET TABLESPACE ...
|
||||
-- causing an additional bulkread, which should be reflected in pg_stat_io.
|
||||
SELECT pg_stat_force_next_flush();
|
||||
-SELECT sum(reads) AS stats_bulkreads_after
|
||||
- FROM pg_stat_io WHERE context = 'bulkread' \gset
|
||||
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
|
||||
|
||||
CREATE ROLE regress_heaptest_role;
|
||||
|
||||
@@ -140,7 +134,6 @@ SELECT * FROM verify_heapam('test_foreign_table',
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE heaptest;
|
||||
-DROP TABLESPACE regress_test_stats_tblspc;
|
||||
DROP TABLE test_partition;
|
||||
DROP TABLE test_partitioned;
|
||||
DROP OWNED BY regress_heaptest_role; -- permissions
|
||||
diff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out
|
||||
index 33be13a..70a406c 100644
|
||||
--- a/contrib/citext/expected/create_index_acl.out
|
||||
+++ b/contrib/citext/expected/create_index_acl.out
|
||||
@@ -5,9 +5,6 @@
|
||||
-- owner having as few applicable privileges as possible. (The privileges.sql
|
||||
-- regress_sro_user tests look for the opposite defect; they confirm that
|
||||
-- DefineIndex() uses the table owner userid where necessary.)
|
||||
-SET allow_in_place_tablespaces = true;
|
||||
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
|
||||
-RESET allow_in_place_tablespaces;
|
||||
BEGIN;
|
||||
CREATE ROLE regress_minimal;
|
||||
CREATE SCHEMA s;
|
||||
@@ -49,11 +46,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
|
||||
-- Empty-table DefineIndex()
|
||||
CREATE UNIQUE INDEX u0rows ON s.x USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
|
||||
- TABLESPACE regress_create_idx_tblspace
|
||||
WHERE s.index_row_if(y);
|
||||
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
-- Make the table nonempty.
|
||||
INSERT INTO s.x VALUES ('foo'), ('bar');
|
||||
@@ -66,11 +61,9 @@ RESET search_path;
|
||||
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
|
||||
CREATE UNIQUE INDEX u2rows ON s.x USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
|
||||
- TABLESPACE regress_create_idx_tblspace
|
||||
WHERE s.index_row_if(y);
|
||||
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
-- Shall not find s.coll via search_path, despite the s.const->public.setter
|
||||
-- call having set search_path=s during expression planning. Suppress the
|
||||
@@ -78,9 +71,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
|
||||
\set VERBOSITY sqlstate
|
||||
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
ERROR: 42704
|
||||
\set VERBOSITY default
|
||||
ROLLBACK;
|
||||
-DROP TABLESPACE regress_create_idx_tblspace;
|
||||
diff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql
|
||||
index 10b5225..ae442e1 100644
|
||||
--- a/contrib/citext/sql/create_index_acl.sql
|
||||
+++ b/contrib/citext/sql/create_index_acl.sql
|
||||
@@ -6,10 +6,6 @@
|
||||
-- regress_sro_user tests look for the opposite defect; they confirm that
|
||||
-- DefineIndex() uses the table owner userid where necessary.)
|
||||
|
||||
-SET allow_in_place_tablespaces = true;
|
||||
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
|
||||
-RESET allow_in_place_tablespaces;
|
||||
-
|
||||
BEGIN;
|
||||
CREATE ROLE regress_minimal;
|
||||
CREATE SCHEMA s;
|
||||
@@ -51,11 +47,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
|
||||
-- Empty-table DefineIndex()
|
||||
CREATE UNIQUE INDEX u0rows ON s.x USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
|
||||
- TABLESPACE regress_create_idx_tblspace
|
||||
WHERE s.index_row_if(y);
|
||||
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
-- Make the table nonempty.
|
||||
INSERT INTO s.x VALUES ('foo'), ('bar');
|
||||
@@ -68,11 +62,9 @@ RESET search_path;
|
||||
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
|
||||
CREATE UNIQUE INDEX u2rows ON s.x USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
|
||||
- TABLESPACE regress_create_idx_tblspace
|
||||
WHERE s.index_row_if(y);
|
||||
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
-- Shall not find s.coll via search_path, despite the s.const->public.setter
|
||||
-- call having set search_path=s during expression planning. Suppress the
|
||||
@@ -80,9 +72,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
|
||||
\set VERBOSITY sqlstate
|
||||
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
\set VERBOSITY default
|
||||
ROLLBACK;
|
||||
|
||||
-DROP TABLESPACE regress_create_idx_tblspace;
|
||||
diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
|
||||
index 72304e0..ebe131b 100644
|
||||
--- a/contrib/file_fdw/expected/file_fdw.out
|
||||
+++ b/contrib/file_fdw/expected/file_fdw.out
|
||||
@@ -4,6 +4,7 @@
|
||||
-- directory paths are passed to us in environment variables
|
||||
\getenv abs_srcdir PG_ABS_SRCDIR
|
||||
-- Clean up in case a prior regression run failed
|
||||
+SET compute_query_id TO 'off';
|
||||
SET client_min_messages TO 'warning';
|
||||
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
|
||||
RESET client_min_messages;
|
||||
diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
|
||||
index f0548e1..848a08c 100644
|
||||
--- a/contrib/file_fdw/sql/file_fdw.sql
|
||||
+++ b/contrib/file_fdw/sql/file_fdw.sql
|
||||
@@ -6,6 +6,7 @@
|
||||
\getenv abs_srcdir PG_ABS_SRCDIR
|
||||
|
||||
-- Clean up in case a prior regression run failed
|
||||
+SET compute_query_id TO 'off';
|
||||
SET client_min_messages TO 'warning';
|
||||
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
|
||||
RESET client_min_messages;
|
||||
diff --git a/contrib/pageinspect/expected/gist.out b/contrib/pageinspect/expected/gist.out
|
||||
index d1adbab..38b52ac 100644
|
||||
--- a/contrib/pageinspect/expected/gist.out
|
||||
+++ b/contrib/pageinspect/expected/gist.out
|
||||
@@ -10,25 +10,6 @@ BEGIN;
|
||||
CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM
|
||||
generate_series(1,1000) i;
|
||||
CREATE INDEX test_gist_idx ON test_gist USING gist (p);
|
||||
--- Page 0 is the root, the rest are leaf pages
|
||||
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));
|
||||
- lsn | nsn | rightlink | flags
|
||||
------+-----+------------+-------
|
||||
- 0/1 | 0/0 | 4294967295 | {}
|
||||
-(1 row)
|
||||
-
|
||||
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1));
|
||||
- lsn | nsn | rightlink | flags
|
||||
------+-----+------------+--------
|
||||
- 0/1 | 0/0 | 4294967295 | {leaf}
|
||||
-(1 row)
|
||||
-
|
||||
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));
|
||||
- lsn | nsn | rightlink | flags
|
||||
------+-----+-----------+--------
|
||||
- 0/1 | 0/0 | 1 | {leaf}
|
||||
-(1 row)
|
||||
-
|
||||
COMMIT;
|
||||
SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');
|
||||
itemoffset | ctid | itemlen | dead | keys
|
||||
diff --git a/contrib/pageinspect/sql/gist.sql b/contrib/pageinspect/sql/gist.sql
|
||||
index d263542..607992f 100644
|
||||
--- a/contrib/pageinspect/sql/gist.sql
|
||||
+++ b/contrib/pageinspect/sql/gist.sql
|
||||
@@ -12,11 +12,6 @@ CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM
|
||||
generate_series(1,1000) i;
|
||||
CREATE INDEX test_gist_idx ON test_gist USING gist (p);
|
||||
|
||||
--- Page 0 is the root, the rest are leaf pages
|
||||
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));
|
||||
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1));
|
||||
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));
|
||||
-
|
||||
COMMIT;
|
||||
|
||||
SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');
|
||||
@@ -1,196 +0,0 @@
|
||||
diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out
|
||||
index 979e5e8..2375b45 100644
|
||||
--- a/contrib/amcheck/expected/check_heap.out
|
||||
+++ b/contrib/amcheck/expected/check_heap.out
|
||||
@@ -80,12 +80,9 @@ INSERT INTO heaptest (a, b)
|
||||
-- same transaction. The heaptest table is smaller than the default
|
||||
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
|
||||
-- shared_buffers. A transaction delays that and excludes any autovacuum.
|
||||
-SET allow_in_place_tablespaces = true;
|
||||
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
|
||||
SELECT sum(reads) AS stats_bulkreads_before
|
||||
FROM pg_stat_io WHERE context = 'bulkread' \gset
|
||||
BEGIN;
|
||||
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
|
||||
-- Check that valid options are not rejected nor corruption reported
|
||||
-- for a non-empty table
|
||||
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
|
||||
@@ -118,14 +115,6 @@ SELECT pg_stat_force_next_flush();
|
||||
|
||||
(1 row)
|
||||
|
||||
-SELECT sum(reads) AS stats_bulkreads_after
|
||||
- FROM pg_stat_io WHERE context = 'bulkread' \gset
|
||||
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
|
||||
- ?column?
|
||||
-----------
|
||||
- t
|
||||
-(1 row)
|
||||
-
|
||||
CREATE ROLE regress_heaptest_role;
|
||||
-- verify permissions are checked (error due to function not callable)
|
||||
SET ROLE regress_heaptest_role;
|
||||
@@ -233,7 +222,6 @@ ERROR: cannot check relation "test_foreign_table"
|
||||
DETAIL: This operation is not supported for foreign tables.
|
||||
-- cleanup
|
||||
DROP TABLE heaptest;
|
||||
-DROP TABLESPACE regress_test_stats_tblspc;
|
||||
DROP TABLE test_partition;
|
||||
DROP TABLE test_partitioned;
|
||||
DROP OWNED BY regress_heaptest_role; -- permissions
|
||||
diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql
|
||||
index 1745bae..3b429c3 100644
|
||||
--- a/contrib/amcheck/sql/check_heap.sql
|
||||
+++ b/contrib/amcheck/sql/check_heap.sql
|
||||
@@ -40,12 +40,9 @@ INSERT INTO heaptest (a, b)
|
||||
-- same transaction. The heaptest table is smaller than the default
|
||||
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
|
||||
-- shared_buffers. A transaction delays that and excludes any autovacuum.
|
||||
-SET allow_in_place_tablespaces = true;
|
||||
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
|
||||
SELECT sum(reads) AS stats_bulkreads_before
|
||||
FROM pg_stat_io WHERE context = 'bulkread' \gset
|
||||
BEGIN;
|
||||
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
|
||||
-- Check that valid options are not rejected nor corruption reported
|
||||
-- for a non-empty table
|
||||
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
|
||||
@@ -58,9 +55,6 @@ COMMIT;
|
||||
-- ALTER TABLE ... SET TABLESPACE ...
|
||||
-- causing an additional bulkread, which should be reflected in pg_stat_io.
|
||||
SELECT pg_stat_force_next_flush();
|
||||
-SELECT sum(reads) AS stats_bulkreads_after
|
||||
- FROM pg_stat_io WHERE context = 'bulkread' \gset
|
||||
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
|
||||
|
||||
CREATE ROLE regress_heaptest_role;
|
||||
|
||||
@@ -140,7 +134,6 @@ SELECT * FROM verify_heapam('test_foreign_table',
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE heaptest;
|
||||
-DROP TABLESPACE regress_test_stats_tblspc;
|
||||
DROP TABLE test_partition;
|
||||
DROP TABLE test_partitioned;
|
||||
DROP OWNED BY regress_heaptest_role; -- permissions
|
||||
diff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out
|
||||
index 33be13a..70a406c 100644
|
||||
--- a/contrib/citext/expected/create_index_acl.out
|
||||
+++ b/contrib/citext/expected/create_index_acl.out
|
||||
@@ -5,9 +5,6 @@
|
||||
-- owner having as few applicable privileges as possible. (The privileges.sql
|
||||
-- regress_sro_user tests look for the opposite defect; they confirm that
|
||||
-- DefineIndex() uses the table owner userid where necessary.)
|
||||
-SET allow_in_place_tablespaces = true;
|
||||
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
|
||||
-RESET allow_in_place_tablespaces;
|
||||
BEGIN;
|
||||
CREATE ROLE regress_minimal;
|
||||
CREATE SCHEMA s;
|
||||
@@ -49,11 +46,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
|
||||
-- Empty-table DefineIndex()
|
||||
CREATE UNIQUE INDEX u0rows ON s.x USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
|
||||
- TABLESPACE regress_create_idx_tblspace
|
||||
WHERE s.index_row_if(y);
|
||||
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
-- Make the table nonempty.
|
||||
INSERT INTO s.x VALUES ('foo'), ('bar');
|
||||
@@ -66,11 +61,9 @@ RESET search_path;
|
||||
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
|
||||
CREATE UNIQUE INDEX u2rows ON s.x USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
|
||||
- TABLESPACE regress_create_idx_tblspace
|
||||
WHERE s.index_row_if(y);
|
||||
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
-- Shall not find s.coll via search_path, despite the s.const->public.setter
|
||||
-- call having set search_path=s during expression planning. Suppress the
|
||||
@@ -78,9 +71,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
|
||||
\set VERBOSITY sqlstate
|
||||
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
ERROR: 42704
|
||||
\set VERBOSITY default
|
||||
ROLLBACK;
|
||||
-DROP TABLESPACE regress_create_idx_tblspace;
|
||||
diff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql
|
||||
index 10b5225..ae442e1 100644
|
||||
--- a/contrib/citext/sql/create_index_acl.sql
|
||||
+++ b/contrib/citext/sql/create_index_acl.sql
|
||||
@@ -6,10 +6,6 @@
|
||||
-- regress_sro_user tests look for the opposite defect; they confirm that
|
||||
-- DefineIndex() uses the table owner userid where necessary.)
|
||||
|
||||
-SET allow_in_place_tablespaces = true;
|
||||
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
|
||||
-RESET allow_in_place_tablespaces;
|
||||
-
|
||||
BEGIN;
|
||||
CREATE ROLE regress_minimal;
|
||||
CREATE SCHEMA s;
|
||||
@@ -51,11 +47,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
|
||||
-- Empty-table DefineIndex()
|
||||
CREATE UNIQUE INDEX u0rows ON s.x USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
|
||||
- TABLESPACE regress_create_idx_tblspace
|
||||
WHERE s.index_row_if(y);
|
||||
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
-- Make the table nonempty.
|
||||
INSERT INTO s.x VALUES ('foo'), ('bar');
|
||||
@@ -68,11 +62,9 @@ RESET search_path;
|
||||
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
|
||||
CREATE UNIQUE INDEX u2rows ON s.x USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
|
||||
- TABLESPACE regress_create_idx_tblspace
|
||||
WHERE s.index_row_if(y);
|
||||
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
-- Shall not find s.coll via search_path, despite the s.const->public.setter
|
||||
-- call having set search_path=s during expression planning. Suppress the
|
||||
@@ -80,9 +72,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
|
||||
\set VERBOSITY sqlstate
|
||||
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
|
||||
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
|
||||
- USING INDEX TABLESPACE regress_create_idx_tblspace
|
||||
WHERE (s.index_row_if(y));
|
||||
\set VERBOSITY default
|
||||
ROLLBACK;
|
||||
|
||||
-DROP TABLESPACE regress_create_idx_tblspace;
|
||||
diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
|
||||
index 86c148a..81bdb2c 100644
|
||||
--- a/contrib/file_fdw/expected/file_fdw.out
|
||||
+++ b/contrib/file_fdw/expected/file_fdw.out
|
||||
@@ -4,6 +4,7 @@
|
||||
-- directory paths are passed to us in environment variables
|
||||
\getenv abs_srcdir PG_ABS_SRCDIR
|
||||
-- Clean up in case a prior regression run failed
|
||||
+SET compute_query_id TO 'off';
|
||||
SET client_min_messages TO 'warning';
|
||||
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
|
||||
RESET client_min_messages;
|
||||
diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
|
||||
index f0548e1..848a08c 100644
|
||||
--- a/contrib/file_fdw/sql/file_fdw.sql
|
||||
+++ b/contrib/file_fdw/sql/file_fdw.sql
|
||||
@@ -6,6 +6,7 @@
|
||||
\getenv abs_srcdir PG_ABS_SRCDIR
|
||||
|
||||
-- Clean up in case a prior regression run failed
|
||||
+SET compute_query_id TO 'off';
|
||||
SET client_min_messages TO 'warning';
|
||||
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
|
||||
RESET client_min_messages;
|
||||
@@ -1,19 +0,0 @@
|
||||
commit ec6a491d126882966a696f9ad5d3698935361d55
|
||||
Author: Alexey Masterov <alexeymasterov@neon.tech>
|
||||
Date: Tue Dec 17 10:25:00 2024 +0100
|
||||
|
||||
Changes required to run tests on Neon
|
||||
|
||||
diff --git a/test/expected/permissions_functions.out b/test/expected/permissions_functions.out
|
||||
index 1e9fbc2..94cbe25 100644
|
||||
--- a/test/expected/permissions_functions.out
|
||||
+++ b/test/expected/permissions_functions.out
|
||||
@@ -64,7 +64,7 @@ begin;
|
||||
select current_user;
|
||||
current_user
|
||||
--------------
|
||||
- postgres
|
||||
+ cloud_admin
|
||||
(1 row)
|
||||
|
||||
-- revoke default access from the public role for new functions
|
||||
@@ -1,24 +1,8 @@
|
||||
diff --git a/Makefile b/Makefile
|
||||
index 7a4b88c..56678af 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -3,7 +3,10 @@ EXTVERSION = 0.8.0
|
||||
|
||||
MODULE_big = vector
|
||||
DATA = $(wildcard sql/*--*--*.sql)
|
||||
-DATA_built = sql/$(EXTENSION)--$(EXTVERSION).sql
|
||||
+# This change is needed to install different per-version SQL files
|
||||
+# like pgvector--0.8.0.sql and pgvector--0.7.4.sql
|
||||
+# The corresponding file is downloaded during the Docker image build process
|
||||
+DATA_built = sql/$(EXTENSION)--$(EXTVERSION).sql sql/vector--0.7.4.sql
|
||||
OBJS = src/bitutils.o src/bitvec.o src/halfutils.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/sparsevec.o src/vector.o
|
||||
HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
|
||||
|
||||
diff --git a/src/hnswbuild.c b/src/hnswbuild.c
|
||||
index b667478..fc1897c 100644
|
||||
index dcfb2bd..d5189ee 100644
|
||||
--- a/src/hnswbuild.c
|
||||
+++ b/src/hnswbuild.c
|
||||
@@ -843,9 +843,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
|
||||
@@ -860,9 +860,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
|
||||
|
||||
hnswarea = shm_toc_lookup(toc, PARALLEL_KEY_HNSW_AREA, false);
|
||||
|
||||
@@ -36,7 +20,7 @@ index b667478..fc1897c 100644
|
||||
/* Close relations within worker */
|
||||
index_close(indexRel, indexLockmode);
|
||||
table_close(heapRel, heapLockmode);
|
||||
@@ -1100,12 +1108,38 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
|
||||
@@ -1117,12 +1125,38 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
|
||||
SeedRandom(42);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -27,10 +27,6 @@ commands:
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
|
||||
- name: pgbouncer-exporter
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/pgbouncer_exporter --pgBouncer.connectionString="postgres:///pgbouncer?host=/tmp&port=6432&dbname=pgbouncer&user=pgbouncer"'
|
||||
- name: sql-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -27,10 +27,6 @@ commands:
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
|
||||
- name: pgbouncer-exporter
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/pgbouncer_exporter --pgBouncer.connectionString="postgres:///pgbouncer?host=/tmp&port=6432&dbname=pgbouncer&user=pgbouncer"'
|
||||
- name: sql-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -51,7 +51,6 @@ tracing-subscriber.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
thiserror.workspace = true
|
||||
url.workspace = true
|
||||
uuid.workspace = true
|
||||
prometheus.workspace = true
|
||||
|
||||
postgres_initdb.workspace = true
|
||||
|
||||
@@ -34,7 +34,6 @@
|
||||
//! -r http://pg-ext-s3-gateway \
|
||||
//! ```
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
@@ -45,7 +44,7 @@ use std::{thread, time::Duration};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Parser;
|
||||
use clap::Arg;
|
||||
use compute_tools::disk_quota::set_disk_quota;
|
||||
use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use signal_hook::consts::{SIGQUIT, SIGTERM};
|
||||
@@ -74,76 +73,11 @@ use utils::failpoint_support;
|
||||
// in-case of not-set environment var
|
||||
const BUILD_TAG_DEFAULT: &str = "latest";
|
||||
|
||||
// Compatibility hack: if the control plane specified any remote-ext-config
|
||||
// use the default value for extension storage proxy gateway.
|
||||
// Remove this once the control plane is updated to pass the gateway URL
|
||||
fn parse_remote_ext_config(arg: &str) -> Result<String> {
|
||||
if arg.starts_with("http") {
|
||||
Ok(arg.trim_end_matches('/').to_string())
|
||||
} else {
|
||||
Ok("http://pg-ext-s3-gateway".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(rename_all = "kebab-case")]
|
||||
struct Cli {
|
||||
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
|
||||
pub pgbin: String,
|
||||
|
||||
#[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
|
||||
pub remote_ext_config: Option<String>,
|
||||
|
||||
#[arg(long, default_value_t = 3080)]
|
||||
pub http_port: u16,
|
||||
|
||||
#[arg(short = 'D', long, value_name = "DATADIR")]
|
||||
pub pgdata: String,
|
||||
|
||||
#[arg(short = 'C', long, value_name = "DATABASE_URL")]
|
||||
pub connstr: String,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[arg(long, default_value = "neon-postgres")]
|
||||
pub cgroup: String,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[arg(
|
||||
long,
|
||||
default_value = "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor"
|
||||
)]
|
||||
pub filecache_connstr: String,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[arg(long, default_value = "0.0.0.0:10301")]
|
||||
pub vm_monitor_addr: String,
|
||||
|
||||
#[arg(long, action = clap::ArgAction::SetTrue)]
|
||||
pub resize_swap_on_bind: bool,
|
||||
|
||||
#[arg(long)]
|
||||
pub set_disk_quota_for_fs: Option<String>,
|
||||
|
||||
#[arg(short = 's', long = "spec", group = "spec")]
|
||||
pub spec_json: Option<String>,
|
||||
|
||||
#[arg(short = 'S', long, group = "spec-path")]
|
||||
pub spec_path: Option<OsString>,
|
||||
|
||||
#[arg(short = 'i', long, group = "compute-id", conflicts_with_all = ["spec", "spec-path"])]
|
||||
pub compute_id: Option<String>,
|
||||
|
||||
#[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], requires = "compute-id", value_name = "CONTROL_PLANE_API_BASE_URL")]
|
||||
pub control_plane_uri: Option<String>,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
let build_tag = init()?;
|
||||
|
||||
let scenario = failpoint_support::init();
|
||||
|
||||
let (build_tag, clap_args) = init()?;
|
||||
|
||||
// enable core dumping for all child processes
|
||||
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
|
||||
|
||||
@@ -151,11 +85,13 @@ fn main() -> Result<()> {
|
||||
// Enter startup tracing context
|
||||
let _startup_context_guard = startup_context_from_env();
|
||||
|
||||
let cli_spec = try_spec_from_cli(&cli)?;
|
||||
let cli_args = process_cli(&clap_args)?;
|
||||
|
||||
let compute = wait_spec(build_tag, &cli, cli_spec)?;
|
||||
let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
|
||||
|
||||
start_postgres(&cli, compute)?
|
||||
let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
|
||||
|
||||
start_postgres(&clap_args, wait_spec_result)?
|
||||
|
||||
// Startup is finished, exit the startup tracing span
|
||||
};
|
||||
@@ -172,7 +108,7 @@ fn main() -> Result<()> {
|
||||
deinit_and_exit(wait_pg_result);
|
||||
}
|
||||
|
||||
fn init() -> Result<String> {
|
||||
fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
@@ -187,7 +123,66 @@ fn init() -> Result<String> {
|
||||
.to_string();
|
||||
info!("build_tag: {build_tag}");
|
||||
|
||||
Ok(build_tag)
|
||||
Ok((build_tag, cli().get_matches()))
|
||||
}
|
||||
|
||||
fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
|
||||
let pgbin_default = "postgres";
|
||||
let pgbin = matches
|
||||
.get_one::<String>("pgbin")
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or(pgbin_default);
|
||||
|
||||
let ext_remote_storage = matches
|
||||
.get_one::<String>("remote-ext-config")
|
||||
// Compatibility hack: if the control plane specified any remote-ext-config
|
||||
// use the default value for extension storage proxy gateway.
|
||||
// Remove this once the control plane is updated to pass the gateway URL
|
||||
.map(|conf| {
|
||||
if conf.starts_with("http") {
|
||||
conf.trim_end_matches('/')
|
||||
} else {
|
||||
"http://pg-ext-s3-gateway"
|
||||
}
|
||||
});
|
||||
|
||||
let http_port = *matches
|
||||
.get_one::<u16>("http-port")
|
||||
.expect("http-port is required");
|
||||
let pgdata = matches
|
||||
.get_one::<String>("pgdata")
|
||||
.expect("PGDATA path is required");
|
||||
let connstr = matches
|
||||
.get_one::<String>("connstr")
|
||||
.expect("Postgres connection string is required");
|
||||
let spec_json = matches.get_one::<String>("spec");
|
||||
let spec_path = matches.get_one::<String>("spec-path");
|
||||
let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
|
||||
let set_disk_quota_for_fs = matches.get_one::<String>("set-disk-quota-for-fs");
|
||||
|
||||
Ok(ProcessCliResult {
|
||||
connstr,
|
||||
pgdata,
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
http_port,
|
||||
spec_json,
|
||||
spec_path,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
})
|
||||
}
|
||||
|
||||
struct ProcessCliResult<'clap> {
|
||||
connstr: &'clap str,
|
||||
pgdata: &'clap str,
|
||||
pgbin: &'clap str,
|
||||
ext_remote_storage: Option<&'clap str>,
|
||||
http_port: u16,
|
||||
spec_json: Option<&'clap String>,
|
||||
spec_path: Option<&'clap String>,
|
||||
resize_swap_on_bind: bool,
|
||||
set_disk_quota_for_fs: Option<&'clap String>,
|
||||
}
|
||||
|
||||
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
@@ -240,9 +235,19 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
}
|
||||
}
|
||||
|
||||
fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
|
||||
fn try_spec_from_cli(
|
||||
matches: &clap::ArgMatches,
|
||||
ProcessCliResult {
|
||||
spec_json,
|
||||
spec_path,
|
||||
..
|
||||
}: &ProcessCliResult,
|
||||
) -> Result<CliSpecParams> {
|
||||
let compute_id = matches.get_one::<String>("compute-id");
|
||||
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
|
||||
|
||||
// First, try to get cluster spec from the cli argument
|
||||
if let Some(ref spec_json) = cli.spec_json {
|
||||
if let Some(spec_json) = spec_json {
|
||||
info!("got spec from cli argument {}", spec_json);
|
||||
return Ok(CliSpecParams {
|
||||
spec: Some(serde_json::from_str(spec_json)?),
|
||||
@@ -251,7 +256,7 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
|
||||
}
|
||||
|
||||
// Second, try to read it from the file if path is provided
|
||||
if let Some(ref spec_path) = cli.spec_path {
|
||||
if let Some(spec_path) = spec_path {
|
||||
let file = File::open(Path::new(spec_path))?;
|
||||
return Ok(CliSpecParams {
|
||||
spec: Some(serde_json::from_reader(file)?),
|
||||
@@ -259,20 +264,17 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
|
||||
});
|
||||
}
|
||||
|
||||
if cli.compute_id.is_none() {
|
||||
let Some(compute_id) = compute_id else {
|
||||
panic!(
|
||||
"compute spec should be provided by one of the following ways: \
|
||||
--spec OR --spec-path OR --control-plane-uri and --compute-id"
|
||||
);
|
||||
};
|
||||
if cli.control_plane_uri.is_none() {
|
||||
let Some(control_plane_uri) = control_plane_uri else {
|
||||
panic!("must specify both --control-plane-uri and --compute-id or none");
|
||||
};
|
||||
|
||||
match get_spec_from_control_plane(
|
||||
cli.control_plane_uri.as_ref().unwrap(),
|
||||
cli.compute_id.as_ref().unwrap(),
|
||||
) {
|
||||
match get_spec_from_control_plane(control_plane_uri, compute_id) {
|
||||
Ok(spec) => Ok(CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed: true,
|
||||
@@ -296,12 +298,21 @@ struct CliSpecParams {
|
||||
|
||||
fn wait_spec(
|
||||
build_tag: String,
|
||||
cli: &Cli,
|
||||
ProcessCliResult {
|
||||
connstr,
|
||||
pgdata,
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
http_port,
|
||||
..
|
||||
}: ProcessCliResult,
|
||||
CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
}: CliSpecParams,
|
||||
) -> Result<Arc<ComputeNode>> {
|
||||
) -> Result<WaitSpecResult> {
|
||||
let mut new_state = ComputeState::new();
|
||||
let spec_set;
|
||||
|
||||
@@ -313,7 +324,7 @@ fn wait_spec(
|
||||
} else {
|
||||
spec_set = false;
|
||||
}
|
||||
let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
|
||||
let connstr = Url::parse(connstr).context("cannot parse connstr as a URL")?;
|
||||
let conn_conf = postgres::config::Config::from_str(connstr.as_str())
|
||||
.context("cannot build postgres config from connstr")?;
|
||||
let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str())
|
||||
@@ -322,14 +333,14 @@ fn wait_spec(
|
||||
connstr,
|
||||
conn_conf,
|
||||
tokio_conn_conf,
|
||||
pgdata: cli.pgdata.clone(),
|
||||
pgbin: cli.pgbin.clone(),
|
||||
pgversion: get_pg_version_string(&cli.pgbin),
|
||||
http_port: cli.http_port,
|
||||
pgdata: pgdata.to_string(),
|
||||
pgbin: pgbin.to_string(),
|
||||
pgversion: get_pg_version_string(pgbin),
|
||||
http_port,
|
||||
live_config_allowed,
|
||||
state: Mutex::new(new_state),
|
||||
state_changed: Condvar::new(),
|
||||
ext_remote_storage: cli.remote_ext_config.clone(),
|
||||
ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
|
||||
ext_download_progress: RwLock::new(HashMap::new()),
|
||||
build_tag,
|
||||
};
|
||||
@@ -346,7 +357,7 @@ fn wait_spec(
|
||||
// Launch http service first, so that we can serve control-plane requests
|
||||
// while configuration is still in progress.
|
||||
let _http_handle =
|
||||
launch_http_server(cli.http_port, &compute).expect("cannot launch http endpoint thread");
|
||||
launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
|
||||
|
||||
if !spec_set {
|
||||
// No spec provided, hang waiting for it.
|
||||
@@ -378,12 +389,27 @@ fn wait_spec(
|
||||
|
||||
launch_lsn_lease_bg_task_for_static(&compute);
|
||||
|
||||
Ok(compute)
|
||||
Ok(WaitSpecResult {
|
||||
compute,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(),
|
||||
})
|
||||
}
|
||||
|
||||
struct WaitSpecResult {
|
||||
compute: Arc<ComputeNode>,
|
||||
resize_swap_on_bind: bool,
|
||||
set_disk_quota_for_fs: Option<String>,
|
||||
}
|
||||
|
||||
fn start_postgres(
|
||||
cli: &Cli,
|
||||
compute: Arc<ComputeNode>,
|
||||
// need to allow unused because `matches` is only used if target_os = "linux"
|
||||
#[allow(unused_variables)] matches: &clap::ArgMatches,
|
||||
WaitSpecResult {
|
||||
compute,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
}: WaitSpecResult,
|
||||
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
|
||||
// We got all we need, update the state.
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
@@ -411,7 +437,7 @@ fn start_postgres(
|
||||
let mut delay_exit = false;
|
||||
|
||||
// Resize swap to the desired size if the compute spec says so
|
||||
if let (Some(size_bytes), true) = (swap_size_bytes, cli.resize_swap_on_bind) {
|
||||
if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
|
||||
// To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
|
||||
// *before* starting postgres.
|
||||
//
|
||||
@@ -438,9 +464,9 @@ fn start_postgres(
|
||||
|
||||
// Set disk quota if the compute spec says so
|
||||
if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
|
||||
(disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref())
|
||||
(disk_quota_bytes, set_disk_quota_for_fs)
|
||||
{
|
||||
match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) {
|
||||
match set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) {
|
||||
Ok(()) => {
|
||||
let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%disk_quota_bytes, %size_mib, "set disk quota");
|
||||
@@ -483,7 +509,13 @@ fn start_postgres(
|
||||
if #[cfg(target_os = "linux")] {
|
||||
use std::env;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
let vm_monitor_addr = matches
|
||||
.get_one::<String>("vm-monitor-addr")
|
||||
.expect("--vm-monitor-addr should always be set because it has a default arg");
|
||||
let file_cache_connstr = matches.get_one::<String>("filecache-connstr");
|
||||
let cgroup = matches.get_one::<String>("cgroup");
|
||||
|
||||
// Only make a runtime if we need to.
|
||||
// Note: it seems like you can make a runtime in an inner scope and
|
||||
// if you start a task in it it won't be dropped. However, make it
|
||||
// in the outermost scope just to be safe.
|
||||
@@ -506,15 +538,15 @@ fn start_postgres(
|
||||
let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
|
||||
None
|
||||
} else {
|
||||
Some(cli.filecache_connstr.clone())
|
||||
file_cache_connstr.cloned()
|
||||
};
|
||||
|
||||
let vm_monitor = rt.as_ref().map(|rt| {
|
||||
rt.spawn(vm_monitor::start(
|
||||
Box::leak(Box::new(vm_monitor::Args {
|
||||
cgroup: Some(cli.cgroup.clone()),
|
||||
cgroup: cgroup.cloned(),
|
||||
pgconnstr,
|
||||
addr: cli.vm_monitor_addr.clone(),
|
||||
addr: vm_monitor_addr.clone(),
|
||||
})),
|
||||
token.clone(),
|
||||
))
|
||||
@@ -670,6 +702,105 @@ fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
|
||||
exit(exit_code.unwrap_or(1))
|
||||
}
|
||||
|
||||
fn cli() -> clap::Command {
|
||||
// Env variable is set by `cargo`
|
||||
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
|
||||
clap::Command::new("compute_ctl")
|
||||
.version(version)
|
||||
.arg(
|
||||
Arg::new("http-port")
|
||||
.long("http-port")
|
||||
.value_name("HTTP_PORT")
|
||||
.default_value("3080")
|
||||
.value_parser(clap::value_parser!(u16))
|
||||
.required(false),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("connstr")
|
||||
.short('C')
|
||||
.long("connstr")
|
||||
.value_name("DATABASE_URL")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgdata")
|
||||
.short('D')
|
||||
.long("pgdata")
|
||||
.value_name("DATADIR")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgbin")
|
||||
.short('b')
|
||||
.long("pgbin")
|
||||
.default_value("postgres")
|
||||
.value_name("POSTGRES_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec")
|
||||
.short('s')
|
||||
.long("spec")
|
||||
.value_name("SPEC_JSON"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec-path")
|
||||
.short('S')
|
||||
.long("spec-path")
|
||||
.value_name("SPEC_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("compute-id")
|
||||
.short('i')
|
||||
.long("compute-id")
|
||||
.value_name("COMPUTE_ID"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("control-plane-uri")
|
||||
.short('p')
|
||||
.long("control-plane-uri")
|
||||
.value_name("CONTROL_PLANE_API_BASE_URI"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("remote-ext-config")
|
||||
.short('r')
|
||||
.long("remote-ext-config")
|
||||
.value_name("REMOTE_EXT_CONFIG"),
|
||||
)
|
||||
// TODO(fprasx): we currently have default arguments because the cloud PR
|
||||
// to pass them in hasn't been merged yet. We should get rid of them once
|
||||
// the PR is merged.
|
||||
.arg(
|
||||
Arg::new("vm-monitor-addr")
|
||||
.long("vm-monitor-addr")
|
||||
.default_value("0.0.0.0:10301")
|
||||
.value_name("VM_MONITOR_ADDR"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("cgroup")
|
||||
.long("cgroup")
|
||||
.default_value("neon-postgres")
|
||||
.value_name("CGROUP"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("filecache-connstr")
|
||||
.long("filecache-connstr")
|
||||
.default_value(
|
||||
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
|
||||
)
|
||||
.value_name("FILECACHE_CONNSTR"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("resize-swap-on-bind")
|
||||
.long("resize-swap-on-bind")
|
||||
.action(clap::ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("set-disk-quota-for-fs")
|
||||
.long("set-disk-quota-for-fs")
|
||||
.value_name("SET_DISK_QUOTA_FOR_FS")
|
||||
)
|
||||
}
|
||||
|
||||
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
|
||||
/// to prevent leakage. TODO: it is better to convert compute_ctl to async and
|
||||
/// wait for termination which would be easy then.
|
||||
@@ -679,14 +810,7 @@ fn handle_exit_signal(sig: i32) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use clap::CommandFactory;
|
||||
|
||||
use super::Cli;
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
Cli::command().debug_assert()
|
||||
}
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert()
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use clap::Parser;
|
||||
use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
|
||||
use nix::unistd::Pid;
|
||||
use tracing::{error, info, info_span, warn, Instrument};
|
||||
use tracing::{info, info_span, warn, Instrument};
|
||||
use utils::fs_ext::is_directory_empty;
|
||||
|
||||
#[path = "fast_import/aws_s3_sync.rs"]
|
||||
@@ -41,25 +41,16 @@ mod child_stdio_to_log;
|
||||
#[path = "fast_import/s3_uri.rs"]
|
||||
mod s3_uri;
|
||||
|
||||
const PG_WAIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(600);
|
||||
const PG_WAIT_RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_millis(300);
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long)]
|
||||
working_directory: Utf8PathBuf,
|
||||
#[clap(long, env = "NEON_IMPORTER_S3_PREFIX")]
|
||||
s3_prefix: Option<s3_uri::S3Uri>,
|
||||
#[clap(long)]
|
||||
source_connection_string: Option<String>,
|
||||
#[clap(short, long)]
|
||||
interactive: bool,
|
||||
s3_prefix: s3_uri::S3Uri,
|
||||
#[clap(long)]
|
||||
pg_bin_dir: Utf8PathBuf,
|
||||
#[clap(long)]
|
||||
pg_lib_dir: Utf8PathBuf,
|
||||
#[clap(long)]
|
||||
pg_port: Option<u16>, // port to run postgres on, 5432 is default
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
@@ -76,13 +67,6 @@ enum EncryptionSecret {
|
||||
KMS { key_id: String },
|
||||
}
|
||||
|
||||
// copied from pageserver_api::config::defaults::DEFAULT_LOCALE to avoid dependency just for a constant
|
||||
const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
|
||||
"C"
|
||||
} else {
|
||||
"C.UTF-8"
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
utils::logging::init(
|
||||
@@ -93,74 +77,30 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
|
||||
info!("starting");
|
||||
|
||||
let args = Args::parse();
|
||||
let Args {
|
||||
working_directory,
|
||||
s3_prefix,
|
||||
pg_bin_dir,
|
||||
pg_lib_dir,
|
||||
} = Args::parse();
|
||||
|
||||
// Validate arguments
|
||||
if args.s3_prefix.is_none() && args.source_connection_string.is_none() {
|
||||
anyhow::bail!("either s3_prefix or source_connection_string must be specified");
|
||||
}
|
||||
if args.s3_prefix.is_some() && args.source_connection_string.is_some() {
|
||||
anyhow::bail!("only one of s3_prefix or source_connection_string can be specified");
|
||||
}
|
||||
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
|
||||
let working_directory = args.working_directory;
|
||||
let pg_bin_dir = args.pg_bin_dir;
|
||||
let pg_lib_dir = args.pg_lib_dir;
|
||||
let pg_port = args.pg_port.unwrap_or_else(|| {
|
||||
info!("pg_port not specified, using default 5432");
|
||||
5432
|
||||
});
|
||||
|
||||
// Initialize AWS clients only if s3_prefix is specified
|
||||
let (aws_config, kms_client) = if args.s3_prefix.is_some() {
|
||||
let config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
let kms = aws_sdk_kms::Client::new(&config);
|
||||
(Some(config), Some(kms))
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
// Get source connection string either from S3 spec or direct argument
|
||||
let source_connection_string = if let Some(s3_prefix) = &args.s3_prefix {
|
||||
let spec: Spec = {
|
||||
let spec_key = s3_prefix.append("/spec.json");
|
||||
let s3_client = aws_sdk_s3::Client::new(aws_config.as_ref().unwrap());
|
||||
let object = s3_client
|
||||
.get_object()
|
||||
.bucket(&spec_key.bucket)
|
||||
.key(spec_key.key)
|
||||
.send()
|
||||
.await
|
||||
.context("get spec from s3")?
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.context("download spec body")?;
|
||||
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
|
||||
};
|
||||
|
||||
match spec.encryption_secret {
|
||||
EncryptionSecret::KMS { key_id } => {
|
||||
let mut output = kms_client
|
||||
.unwrap()
|
||||
.decrypt()
|
||||
.key_id(key_id)
|
||||
.ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
|
||||
spec.source_connstring_ciphertext_base64,
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.context("decrypt source connection string")?;
|
||||
let plaintext = output
|
||||
.plaintext
|
||||
.take()
|
||||
.context("get plaintext source connection string")?;
|
||||
String::from_utf8(plaintext.into_inner())
|
||||
.context("parse source connection string as utf8")?
|
||||
}
|
||||
}
|
||||
} else {
|
||||
args.source_connection_string.unwrap()
|
||||
let spec: Spec = {
|
||||
let spec_key = s3_prefix.append("/spec.json");
|
||||
let s3_client = aws_sdk_s3::Client::new(&aws_config);
|
||||
let object = s3_client
|
||||
.get_object()
|
||||
.bucket(&spec_key.bucket)
|
||||
.key(spec_key.key)
|
||||
.send()
|
||||
.await
|
||||
.context("get spec from s3")?
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.context("download spec body")?;
|
||||
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
|
||||
};
|
||||
|
||||
match tokio::fs::create_dir(&working_directory).await {
|
||||
@@ -183,6 +123,15 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.await
|
||||
.context("create pgdata directory")?;
|
||||
|
||||
//
|
||||
// Setup clients
|
||||
//
|
||||
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
let kms_client = aws_sdk_kms::Client::new(&aws_config);
|
||||
|
||||
//
|
||||
// Initialize pgdata
|
||||
//
|
||||
let pgbin = pg_bin_dir.join("postgres");
|
||||
let pg_version = match get_pg_version(pgbin.as_ref()) {
|
||||
PostgresMajorVersion::V14 => 14,
|
||||
@@ -193,7 +142,7 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
let superuser = "cloud_admin"; // XXX: this shouldn't be hard-coded
|
||||
postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
|
||||
superuser,
|
||||
locale: DEFAULT_LOCALE, // XXX: this shouldn't be hard-coded,
|
||||
locale: "en_US.UTF-8", // XXX: this shouldn't be hard-coded,
|
||||
pg_version,
|
||||
initdb_bin: pg_bin_dir.join("initdb").as_ref(),
|
||||
library_search_path: &pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
|
||||
@@ -210,7 +159,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
let mut postgres_proc = tokio::process::Command::new(pgbin)
|
||||
.arg("-D")
|
||||
.arg(&pgdata_dir)
|
||||
.args(["-p", &format!("{pg_port}")])
|
||||
.args(["-c", "wal_level=minimal"])
|
||||
.args(["-c", "shared_buffers=10GB"])
|
||||
.args(["-c", "max_wal_senders=0"])
|
||||
@@ -222,15 +170,8 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.args(["-c", &format!("max_parallel_workers={nproc}")])
|
||||
.args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
|
||||
.args(["-c", &format!("max_worker_processes={nproc}")])
|
||||
.args([
|
||||
"-c",
|
||||
&format!(
|
||||
"effective_io_concurrency={}",
|
||||
if cfg!(target_os = "macos") { 0 } else { 100 }
|
||||
),
|
||||
])
|
||||
.args(["-c", "effective_io_concurrency=100"])
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
@@ -244,58 +185,44 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.instrument(info_span!("postgres")),
|
||||
);
|
||||
|
||||
// Create neondb database in the running postgres
|
||||
let restore_pg_connstring =
|
||||
format!("host=localhost port={pg_port} user={superuser} dbname=postgres");
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
|
||||
format!("host=localhost port=5432 user={superuser} dbname=postgres");
|
||||
loop {
|
||||
if start_time.elapsed() > PG_WAIT_TIMEOUT {
|
||||
error!(
|
||||
"timeout exceeded: failed to poll postgres and create database within 10 minutes"
|
||||
);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
match tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await {
|
||||
Ok((client, connection)) => {
|
||||
// Spawn the connection handling task to maintain the connection
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
warn!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
match client.simple_query("CREATE DATABASE neondb;").await {
|
||||
Ok(_) => {
|
||||
info!("created neondb database");
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"failed to create database: {}, retying in {}s",
|
||||
e,
|
||||
PG_WAIT_RETRY_INTERVAL.as_secs_f32()
|
||||
);
|
||||
tokio::time::sleep(PG_WAIT_RETRY_INTERVAL).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
info!(
|
||||
"postgres not ready yet, retrying in {}s",
|
||||
PG_WAIT_RETRY_INTERVAL.as_secs_f32()
|
||||
);
|
||||
tokio::time::sleep(PG_WAIT_RETRY_INTERVAL).await;
|
||||
continue;
|
||||
}
|
||||
let res = tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await;
|
||||
if res.is_ok() {
|
||||
info!("postgres is ready, could connect to it");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let restore_pg_connstring = restore_pg_connstring.replace("dbname=postgres", "dbname=neondb");
|
||||
//
|
||||
// Decrypt connection string
|
||||
//
|
||||
let source_connection_string = {
|
||||
match spec.encryption_secret {
|
||||
EncryptionSecret::KMS { key_id } => {
|
||||
let mut output = kms_client
|
||||
.decrypt()
|
||||
.key_id(key_id)
|
||||
.ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
|
||||
spec.source_connstring_ciphertext_base64,
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.context("decrypt source connection string")?;
|
||||
let plaintext = output
|
||||
.plaintext
|
||||
.take()
|
||||
.context("get plaintext source connection string")?;
|
||||
String::from_utf8(plaintext.into_inner())
|
||||
.context("parse source connection string as utf8")?
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// Start the work
|
||||
//
|
||||
|
||||
let dumpdir = working_directory.join("dumpdir");
|
||||
|
||||
@@ -329,7 +256,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.arg(&source_connection_string)
|
||||
// how we run it
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir)
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
@@ -363,7 +289,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.arg(&dumpdir)
|
||||
// how we run it
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir)
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
@@ -385,12 +310,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
// If interactive mode, wait for Ctrl+C
|
||||
if args.interactive {
|
||||
info!("Running in interactive mode. Press Ctrl+C to shut down.");
|
||||
tokio::signal::ctrl_c().await.context("wait for ctrl-c")?;
|
||||
}
|
||||
|
||||
info!("shutdown postgres");
|
||||
{
|
||||
nix::sys::signal::kill(
|
||||
@@ -406,24 +325,21 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.context("wait for postgres to shut down")?;
|
||||
}
|
||||
|
||||
// Only sync if s3_prefix was specified
|
||||
if let Some(s3_prefix) = args.s3_prefix {
|
||||
info!("upload pgdata");
|
||||
aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
|
||||
.await
|
||||
.context("sync dump directory to destination")?;
|
||||
info!("upload pgdata");
|
||||
aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
|
||||
.await
|
||||
.context("sync dump directory to destination")?;
|
||||
|
||||
info!("write status");
|
||||
{
|
||||
let status_dir = working_directory.join("status");
|
||||
std::fs::create_dir(&status_dir).context("create status directory")?;
|
||||
let status_file = status_dir.join("pgdata");
|
||||
std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
|
||||
.context("write status file")?;
|
||||
aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
|
||||
.await
|
||||
.context("sync status directory to destination")?;
|
||||
}
|
||||
info!("write status");
|
||||
{
|
||||
let status_dir = working_directory.join("status");
|
||||
std::fs::create_dir(&status_dir).context("create status directory")?;
|
||||
let status_file = status_dir.join("pgdata");
|
||||
std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
|
||||
.context("write status file")?;
|
||||
aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
|
||||
.await
|
||||
.context("sync status directory to destination")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -41,14 +41,14 @@ use crate::local_proxy;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::spec::*;
|
||||
use crate::spec_apply::ApplySpecPhase::{
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon,
|
||||
CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
|
||||
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
|
||||
RunInEachDatabase,
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSuperUser,
|
||||
DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
|
||||
RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
|
||||
};
|
||||
use crate::spec_apply::PerDatabasePhase;
|
||||
use crate::spec_apply::PerDatabasePhase::{
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropSubscriptionsForDeletedDatabases,
|
||||
HandleAnonExtension,
|
||||
};
|
||||
use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
|
||||
use crate::sync_sk::{check_if_synced, ping_safekeeper};
|
||||
@@ -340,15 +340,6 @@ impl ComputeNode {
|
||||
self.state.lock().unwrap().status
|
||||
}
|
||||
|
||||
pub fn get_timeline_id(&self) -> Option<TimelineId> {
|
||||
self.state
|
||||
.lock()
|
||||
.unwrap()
|
||||
.pspec
|
||||
.as_ref()
|
||||
.map(|s| s.timeline_id)
|
||||
}
|
||||
|
||||
// Remove `pgdata` directory and create it again with right permissions.
|
||||
fn create_pgdata(&self) -> Result<()> {
|
||||
// Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
|
||||
@@ -938,48 +929,6 @@ impl ComputeNode {
|
||||
.map(|role| (role.name.clone(), role))
|
||||
.collect::<HashMap<String, Role>>();
|
||||
|
||||
// Check if we need to drop subscriptions before starting the endpoint.
|
||||
//
|
||||
// It is important to do this operation exactly once when endpoint starts on a new branch.
|
||||
// Otherwise, we may drop not inherited, but newly created subscriptions.
|
||||
//
|
||||
// We cannot rely only on spec.drop_subscriptions_before_start flag,
|
||||
// because if for some reason compute restarts inside VM,
|
||||
// it will start again with the same spec and flag value.
|
||||
//
|
||||
// To handle this, we save the fact of the operation in the database
|
||||
// in the neon.drop_subscriptions_done table.
|
||||
// If the table does not exist, we assume that the operation was never performed, so we must do it.
|
||||
// If table exists, we check if the operation was performed on the current timelilne.
|
||||
//
|
||||
let mut drop_subscriptions_done = false;
|
||||
|
||||
if spec.drop_subscriptions_before_start {
|
||||
let timeline_id = self.get_timeline_id().context("timeline_id must be set")?;
|
||||
let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id);
|
||||
|
||||
info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);
|
||||
|
||||
drop_subscriptions_done = match
|
||||
client.simple_query(&query).await {
|
||||
Ok(result) => {
|
||||
matches!(&result[0], postgres::SimpleQueryMessage::Row(_))
|
||||
},
|
||||
Err(e) =>
|
||||
{
|
||||
match e.code() {
|
||||
Some(&SqlState::UNDEFINED_TABLE) => false,
|
||||
_ => {
|
||||
// We don't expect any other error here, except for the schema/table not existing
|
||||
error!("Error checking if drop subscription operation was already performed: {}", e);
|
||||
return Err(e.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
let jwks_roles = Arc::new(
|
||||
spec.as_ref()
|
||||
.local_proxy_config
|
||||
@@ -1047,7 +996,7 @@ impl ComputeNode {
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
[DropLogicalSubscriptions].to_vec(),
|
||||
[DropSubscriptionsForDeletedDatabases].to_vec(),
|
||||
);
|
||||
|
||||
Ok(spawn(fut))
|
||||
@@ -1075,7 +1024,6 @@ impl ComputeNode {
|
||||
CreateAndAlterRoles,
|
||||
RenameAndDeleteDatabases,
|
||||
CreateAndAlterDatabases,
|
||||
CreateSchemaNeon,
|
||||
] {
|
||||
info!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
@@ -1116,17 +1064,6 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
let conf = Arc::new(conf);
|
||||
let mut phases = vec![
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
];
|
||||
|
||||
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
|
||||
info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
|
||||
phases.push(DropLogicalSubscriptions);
|
||||
}
|
||||
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
spec.clone(),
|
||||
conf,
|
||||
@@ -1134,7 +1071,12 @@ impl ComputeNode {
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
phases,
|
||||
[
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
]
|
||||
.to_vec(),
|
||||
);
|
||||
|
||||
Ok(spawn(fut))
|
||||
@@ -1146,20 +1088,12 @@ impl ComputeNode {
|
||||
handle.await??;
|
||||
}
|
||||
|
||||
let mut phases = vec![
|
||||
for phase in vec![
|
||||
HandleOtherExtensions,
|
||||
HandleNeonExtension, // This step depends on CreateSchemaNeon
|
||||
HandleNeonExtension,
|
||||
CreateAvailabilityCheck,
|
||||
DropRoles,
|
||||
];
|
||||
|
||||
// This step depends on CreateSchemaNeon
|
||||
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
|
||||
info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
|
||||
phases.push(FinalizeDropLogicalSubscriptions);
|
||||
}
|
||||
|
||||
for phase in phases {
|
||||
] {
|
||||
debug!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
spec.clone(),
|
||||
@@ -1529,14 +1463,6 @@ impl ComputeNode {
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
|
||||
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
|
||||
if config::line_in_file(
|
||||
&postgresql_conf_path,
|
||||
"neon.disable_logical_replication_subscribers=false",
|
||||
)? {
|
||||
info!("updated postgresql.conf to set neon.disable_logical_replication_subscribers=false");
|
||||
}
|
||||
self.pg_reload_conf()?;
|
||||
}
|
||||
self.post_apply_config()?;
|
||||
|
||||
@@ -129,13 +129,6 @@ pub fn write_postgres_conf(
|
||||
|
||||
writeln!(file, "neon.extension_server_port={}", extension_server_port)?;
|
||||
|
||||
if spec.drop_subscriptions_before_start {
|
||||
writeln!(file, "neon.disable_logical_replication_subscribers=true")?;
|
||||
} else {
|
||||
// be explicit about the default value
|
||||
writeln!(file, "neon.disable_logical_replication_subscribers=false")?;
|
||||
}
|
||||
|
||||
// This is essential to keep this line at the end of the file,
|
||||
// because it is intended to override any settings above.
|
||||
writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;
|
||||
|
||||
@@ -85,8 +85,6 @@ use tracing::info;
|
||||
use tracing::log::warn;
|
||||
use zstd::stream::read::Decoder;
|
||||
|
||||
use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
|
||||
|
||||
fn get_pg_config(argument: &str, pgbin: &str) -> String {
|
||||
// gives the result of `pg_config [argument]`
|
||||
// where argument is a flag like `--version` or `--sharedir`
|
||||
@@ -258,60 +256,23 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
|
||||
async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
|
||||
let uri = format!("{}/{}", ext_remote_storage, ext_path);
|
||||
|
||||
info!("Download extension {} from uri {}", ext_path, uri);
|
||||
info!("Download extension {:?} from uri {:?}", ext_path, uri);
|
||||
|
||||
match do_extension_server_request(&uri).await {
|
||||
Ok(resp) => {
|
||||
info!("Successfully downloaded remote extension data {}", ext_path);
|
||||
REMOTE_EXT_REQUESTS_TOTAL
|
||||
.with_label_values(&[&StatusCode::OK.to_string()])
|
||||
.inc();
|
||||
Ok(resp)
|
||||
}
|
||||
Err((msg, status)) => {
|
||||
REMOTE_EXT_REQUESTS_TOTAL
|
||||
.with_label_values(&[&status])
|
||||
.inc();
|
||||
bail!(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
let resp = reqwest::get(uri).await?;
|
||||
|
||||
// Do a single remote extensions server request.
|
||||
// Return result or (error message + stringified status code) in case of any failures.
|
||||
async fn do_extension_server_request(uri: &str) -> Result<Bytes, (String, String)> {
|
||||
let resp = reqwest::get(uri).await.map_err(|e| {
|
||||
(
|
||||
format!(
|
||||
"could not perform remote extensions server request: {:?}",
|
||||
e
|
||||
),
|
||||
UNKNOWN_HTTP_STATUS.to_string(),
|
||||
)
|
||||
})?;
|
||||
let status = resp.status();
|
||||
|
||||
match status {
|
||||
match resp.status() {
|
||||
StatusCode::OK => match resp.bytes().await {
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => Err((
|
||||
format!("could not read remote extensions server response: {:?}", e),
|
||||
// It's fine to return and report error with status as 200 OK,
|
||||
// because we still failed to read the response.
|
||||
status.to_string(),
|
||||
)),
|
||||
Ok(resp) => {
|
||||
info!("Download extension {:?} completed successfully", ext_path);
|
||||
Ok(resp)
|
||||
}
|
||||
Err(e) => bail!("could not deserialize remote extension response: {}", e),
|
||||
},
|
||||
StatusCode::SERVICE_UNAVAILABLE => Err((
|
||||
"remote extensions server is temporarily unavailable".to_string(),
|
||||
status.to_string(),
|
||||
)),
|
||||
_ => Err((
|
||||
format!(
|
||||
"unexpected remote extensions server response status code: {}",
|
||||
status
|
||||
),
|
||||
status.to_string(),
|
||||
)),
|
||||
StatusCode::SERVICE_UNAVAILABLE => bail!("remote extension is temporarily unavailable"),
|
||||
_ => bail!(
|
||||
"unexpected remote extension response status code: {}",
|
||||
resp.status()
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::extract::{rejection::JsonRejection, FromRequest, Request};
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::JsonRejection, FromRequest, Request},
|
||||
};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::StatusCode;
|
||||
|
||||
@@ -9,6 +12,7 @@ use http::StatusCode;
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Json<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequest<S> for Json<T>
|
||||
where
|
||||
axum::Json<T>: FromRequest<S, Rejection = JsonRejection>,
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::extract::{rejection::PathRejection, FromRequestParts};
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::PathRejection, FromRequestParts},
|
||||
};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::{request::Parts, StatusCode};
|
||||
|
||||
@@ -9,6 +12,7 @@ use http::{request::Parts, StatusCode};
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Path<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequestParts<S> for Path<T>
|
||||
where
|
||||
axum::extract::Path<T>: FromRequestParts<S, Rejection = PathRejection>,
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::extract::{rejection::QueryRejection, FromRequestParts};
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::QueryRejection, FromRequestParts},
|
||||
};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::{request::Parts, StatusCode};
|
||||
|
||||
@@ -9,6 +12,7 @@ use http::{request::Parts, StatusCode};
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Query<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequestParts<S> for Query<T>
|
||||
where
|
||||
axum::extract::Query<T>: FromRequestParts<S, Rejection = QueryRejection>,
|
||||
|
||||
@@ -68,6 +68,35 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ComputeInsights"
|
||||
|
||||
/installed_extensions:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
summary: Get installed extensions.
|
||||
description: ""
|
||||
operationId: getInstalledExtensions
|
||||
responses:
|
||||
200:
|
||||
description: List of installed extensions
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/InstalledExtensions"
|
||||
/info:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
summary: Get info about the compute pod / VM.
|
||||
description: ""
|
||||
operationId: getInfo
|
||||
responses:
|
||||
200:
|
||||
description: Info
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Info"
|
||||
|
||||
/dbs_and_roles:
|
||||
get:
|
||||
tags:
|
||||
|
||||
@@ -17,8 +17,7 @@ use crate::{
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub(in crate::http) struct ExtensionServerParams {
|
||||
#[serde(default)]
|
||||
is_library: bool,
|
||||
is_library: Option<bool>,
|
||||
}
|
||||
|
||||
/// Download a remote extension.
|
||||
@@ -52,7 +51,7 @@ pub(in crate::http) async fn download_extension(
|
||||
|
||||
remote_extensions.get_ext(
|
||||
&filename,
|
||||
params.is_library,
|
||||
params.is_library.unwrap_or(false),
|
||||
&compute.build_tag,
|
||||
&compute.pgversion,
|
||||
)
|
||||
|
||||
11
compute_tools/src/http/routes/info.rs
Normal file
11
compute_tools/src/http/routes/info.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
use axum::response::Response;
|
||||
use compute_api::responses::InfoResponse;
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::http::JsonResponse;
|
||||
|
||||
/// Get information about the physical characteristics about the compute.
|
||||
pub(in crate::http) async fn get_info() -> Response {
|
||||
let num_cpus = num_cpus::get_physical();
|
||||
JsonResponse::success(StatusCode::OK, &InfoResponse { num_cpus })
|
||||
}
|
||||
33
compute_tools/src/http/routes/installed_extensions.rs
Normal file
33
compute_tools/src/http/routes/installed_extensions.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
|
||||
use crate::{compute::ComputeNode, http::JsonResponse, installed_extensions};
|
||||
|
||||
/// Get a list of installed extensions.
|
||||
pub(in crate::http) async fn get_installed_extensions(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
let conf = compute.get_conn_conf(None);
|
||||
let res = task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
match res {
|
||||
Ok(installed_extensions) => {
|
||||
JsonResponse::success(StatusCode::OK, Some(installed_extensions))
|
||||
}
|
||||
Err(e) => JsonResponse::error(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("failed to get list of installed extensions: {e}"),
|
||||
),
|
||||
}
|
||||
}
|
||||
@@ -2,16 +2,17 @@ use axum::{body::Body, response::Response};
|
||||
use http::header::CONTENT_TYPE;
|
||||
use http::StatusCode;
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::{Encoder, TextEncoder};
|
||||
use metrics::Encoder;
|
||||
use metrics::TextEncoder;
|
||||
|
||||
use crate::{http::JsonResponse, metrics::collect};
|
||||
use crate::{http::JsonResponse, installed_extensions};
|
||||
|
||||
/// Expose Prometheus metrics.
|
||||
pub(in crate::http) async fn get_metrics() -> Response {
|
||||
// When we call TextEncoder::encode() below, it will immediately return an
|
||||
// error if a metric family has no metrics, so we need to preemptively
|
||||
// filter out metric families with no metrics.
|
||||
let metrics = collect()
|
||||
let metrics = installed_extensions::collect()
|
||||
.into_iter()
|
||||
.filter(|m| !m.get_metric().is_empty())
|
||||
.collect::<Vec<MetricFamily>>();
|
||||
|
||||
@@ -10,7 +10,9 @@ pub(in crate::http) mod extension_server;
|
||||
pub(in crate::http) mod extensions;
|
||||
pub(in crate::http) mod failpoints;
|
||||
pub(in crate::http) mod grants;
|
||||
pub(in crate::http) mod info;
|
||||
pub(in crate::http) mod insights;
|
||||
pub(in crate::http) mod installed_extensions;
|
||||
pub(in crate::http) mod metrics;
|
||||
pub(in crate::http) mod metrics_json;
|
||||
pub(in crate::http) mod status;
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
use std::{
|
||||
net::{IpAddr, Ipv6Addr, SocketAddr},
|
||||
sync::Arc,
|
||||
sync::{
|
||||
atomic::{AtomicU64, Ordering},
|
||||
Arc,
|
||||
},
|
||||
thread,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use axum::{
|
||||
extract::Request,
|
||||
middleware::{self, Next},
|
||||
response::{IntoResponse, Response},
|
||||
routing::{get, post},
|
||||
Router,
|
||||
@@ -16,13 +17,16 @@ use axum::{
|
||||
use http::StatusCode;
|
||||
use tokio::net::TcpListener;
|
||||
use tower::ServiceBuilder;
|
||||
use tower_http::{request_id::PropagateRequestIdLayer, trace::TraceLayer};
|
||||
use tower_http::{
|
||||
request_id::{MakeRequestId, PropagateRequestIdLayer, RequestId, SetRequestIdLayer},
|
||||
trace::TraceLayer,
|
||||
};
|
||||
use tracing::{debug, error, info, Span};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::routes::{
|
||||
check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
|
||||
grants, insights, metrics, metrics_json, status, terminate,
|
||||
grants, info as info_route, insights, installed_extensions, metrics, metrics_json, status,
|
||||
terminate,
|
||||
};
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
@@ -30,36 +34,47 @@ async fn handle_404() -> Response {
|
||||
StatusCode::NOT_FOUND.into_response()
|
||||
}
|
||||
|
||||
const X_REQUEST_ID: &str = "x-request-id";
|
||||
#[derive(Clone, Default)]
|
||||
struct ComputeMakeRequestId(Arc<AtomicU64>);
|
||||
|
||||
/// This middleware function allows compute_ctl to generate its own request ID
|
||||
/// if one isn't supplied. The control plane will always send one as a UUID. The
|
||||
/// neon Postgres extension on the other hand does not send one.
|
||||
async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
|
||||
let headers = request.headers_mut();
|
||||
impl MakeRequestId for ComputeMakeRequestId {
|
||||
fn make_request_id<B>(
|
||||
&mut self,
|
||||
_request: &http::Request<B>,
|
||||
) -> Option<tower_http::request_id::RequestId> {
|
||||
let request_id = self
|
||||
.0
|
||||
.fetch_add(1, Ordering::SeqCst)
|
||||
.to_string()
|
||||
.parse()
|
||||
.unwrap();
|
||||
|
||||
if headers.get(X_REQUEST_ID).is_none() {
|
||||
headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
|
||||
Some(RequestId::new(request_id))
|
||||
}
|
||||
|
||||
next.run(request).await
|
||||
}
|
||||
|
||||
/// Run the HTTP server and wait on it forever.
|
||||
#[tokio::main]
|
||||
async fn serve(port: u16, compute: Arc<ComputeNode>) {
|
||||
const X_REQUEST_ID: &str = "x-request-id";
|
||||
|
||||
let mut app = Router::new()
|
||||
.route("/check_writability", post(check_writability::is_writable))
|
||||
.route("/configure", post(configure::configure))
|
||||
.route("/database_schema", get(database_schema::get_schema_dump))
|
||||
.route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
|
||||
.route(
|
||||
"/extension_server/{*filename}",
|
||||
"/extension_server/*filename",
|
||||
post(extension_server::download_extension),
|
||||
)
|
||||
.route("/extensions", post(extensions::install_extension))
|
||||
.route("/grants", post(grants::add_grant))
|
||||
.route("/info", get(info_route::get_info))
|
||||
.route("/insights", get(insights::get_insights))
|
||||
.route(
|
||||
"/installed_extensions",
|
||||
get(installed_extensions::get_installed_extensions),
|
||||
)
|
||||
.route("/metrics", get(metrics::get_metrics))
|
||||
.route("/metrics.json", get(metrics_json::get_metrics))
|
||||
.route("/status", get(status::get_status))
|
||||
@@ -67,8 +82,9 @@ async fn serve(port: u16, compute: Arc<ComputeNode>) {
|
||||
.fallback(handle_404)
|
||||
.layer(
|
||||
ServiceBuilder::new()
|
||||
// Add this middleware since we assume the request ID exists
|
||||
.layer(middleware::from_fn(maybe_add_request_id_header))
|
||||
.layer(SetRequestIdLayer::x_request_id(
|
||||
ComputeMakeRequestId::default(),
|
||||
))
|
||||
.layer(
|
||||
TraceLayer::new_for_http()
|
||||
.on_request(|request: &http::Request<_>, _span: &Span| {
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||
use metrics::proto::MetricFamily;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use postgres::{Client, NoTls};
|
||||
|
||||
use crate::metrics::INSTALLED_EXTENSIONS;
|
||||
use metrics::core::Collector;
|
||||
use metrics::{register_uint_gauge_vec, UIntGaugeVec};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
/// We don't reuse get_existing_dbs() just for code clarity
|
||||
/// and to make database listing query here more explicit.
|
||||
@@ -99,3 +102,16 @@ pub fn get_installed_extensions(mut conf: postgres::config::Config) -> Result<In
|
||||
extensions: extensions_map.into_values().collect(),
|
||||
})
|
||||
}
|
||||
|
||||
static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"compute_installed_extensions",
|
||||
"Number of databases where the version of extension is installed",
|
||||
&["extension_name", "version", "owned_by_superuser"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn collect() -> Vec<MetricFamily> {
|
||||
INSTALLED_EXTENSIONS.collect()
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ pub mod extension_server;
|
||||
pub mod installed_extensions;
|
||||
pub mod local_proxy;
|
||||
pub mod lsn_lease;
|
||||
pub mod metrics;
|
||||
mod migration;
|
||||
pub mod monitor;
|
||||
pub mod params;
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
use metrics::core::Collector;
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::{register_int_counter_vec, register_uint_gauge_vec, IntCounterVec, UIntGaugeVec};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"compute_installed_extensions",
|
||||
"Number of databases where the version of extension is installed",
|
||||
&["extension_name", "version", "owned_by_superuser"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
// Normally, any HTTP API request is described by METHOD (e.g. GET, POST, etc.) + PATH,
|
||||
// but for all our APIs we defined a 'slug'/method/operationId in the OpenAPI spec.
|
||||
// And it's fair to call it a 'RPC' (Remote Procedure Call).
|
||||
pub enum CPlaneRequestRPC {
|
||||
GetSpec,
|
||||
}
|
||||
|
||||
impl CPlaneRequestRPC {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
CPlaneRequestRPC::GetSpec => "GetSpec",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const UNKNOWN_HTTP_STATUS: &str = "unknown";
|
||||
|
||||
pub(crate) static CPLANE_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"compute_ctl_cplane_requests_total",
|
||||
"Total number of control plane requests made by compute_ctl by status",
|
||||
&["rpc", "http_status"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
/// Total number of failed database migrations. Per-compute, this is actually a boolean metric,
|
||||
/// either empty or with a single value (1, migration_id) because we stop at the first failure.
|
||||
/// Yet, the sum over the fleet will provide the total number of failures.
|
||||
pub(crate) static DB_MIGRATION_FAILED: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"compute_ctl_db_migration_failed_total",
|
||||
"Total number of failed database migrations",
|
||||
&["migration_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"compute_ctl_remote_ext_requests_total",
|
||||
"Total number of requests made by compute_ctl to download extensions from S3 proxy by status",
|
||||
// Do not use any labels like extension name yet.
|
||||
// We can add them later if needed.
|
||||
&["http_status"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn collect() -> Vec<MetricFamily> {
|
||||
let mut metrics = INSTALLED_EXTENSIONS.collect();
|
||||
metrics.extend(CPLANE_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(DB_MIGRATION_FAILED.collect());
|
||||
metrics
|
||||
}
|
||||
@@ -1,9 +1,7 @@
|
||||
use anyhow::{Context, Result};
|
||||
use fail::fail_point;
|
||||
use postgres::{Client, Transaction};
|
||||
use tracing::{error, info};
|
||||
|
||||
use crate::metrics::DB_MIGRATION_FAILED;
|
||||
use tracing::info;
|
||||
|
||||
/// Runs a series of migrations on a target database
|
||||
pub(crate) struct MigrationRunner<'m> {
|
||||
@@ -80,31 +78,24 @@ impl<'m> MigrationRunner<'m> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run an individual migration in a separate transaction block.
|
||||
fn run_migration(client: &mut Client, migration_id: i64, migration: &str) -> Result<()> {
|
||||
let mut txn = client
|
||||
.transaction()
|
||||
.with_context(|| format!("begin transaction for migration {migration_id}"))?;
|
||||
|
||||
/// Run an individual migration
|
||||
fn run_migration(txn: &mut Transaction, migration_id: i64, migration: &str) -> Result<()> {
|
||||
if migration.starts_with("-- SKIP") {
|
||||
info!("Skipping migration id={}", migration_id);
|
||||
|
||||
// Even though we are skipping the migration, updating the
|
||||
// migration ID should help keep logic easy to understand when
|
||||
// trying to understand the state of a cluster.
|
||||
Self::update_migration_id(&mut txn, migration_id)?;
|
||||
Self::update_migration_id(txn, migration_id)?;
|
||||
} else {
|
||||
info!("Running migration id={}:\n{}\n", migration_id, migration);
|
||||
|
||||
txn.simple_query(migration)
|
||||
.with_context(|| format!("apply migration {migration_id}"))?;
|
||||
|
||||
Self::update_migration_id(&mut txn, migration_id)?;
|
||||
Self::update_migration_id(txn, migration_id)?;
|
||||
}
|
||||
|
||||
txn.commit()
|
||||
.with_context(|| format!("commit transaction for migration {migration_id}"))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -118,20 +109,19 @@ impl<'m> MigrationRunner<'m> {
|
||||
// The index lags the migration ID by 1, so the current migration
|
||||
// ID is also the next index
|
||||
let migration_id = (current_migration + 1) as i64;
|
||||
let migration = self.migrations[current_migration];
|
||||
|
||||
match Self::run_migration(self.client, migration_id, migration) {
|
||||
Ok(_) => {
|
||||
info!("Finished migration id={}", migration_id);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to run migration id={}: {:?}", migration_id, e);
|
||||
DB_MIGRATION_FAILED
|
||||
.with_label_values(&[migration_id.to_string().as_str()])
|
||||
.inc();
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
let mut txn = self
|
||||
.client
|
||||
.transaction()
|
||||
.with_context(|| format!("begin transaction for migration {migration_id}"))?;
|
||||
|
||||
Self::run_migration(&mut txn, migration_id, self.migrations[current_migration])
|
||||
.with_context(|| format!("running migration {migration_id}"))?;
|
||||
|
||||
txn.commit()
|
||||
.with_context(|| format!("commit transaction for migration {migration_id}"))?;
|
||||
|
||||
info!("Finished migration id={}", migration_id);
|
||||
|
||||
current_migration += 1;
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@ use std::path::Path;
|
||||
use tracing::{error, info, instrument, warn};
|
||||
|
||||
use crate::config;
|
||||
use crate::metrics::{CPlaneRequestRPC, CPLANE_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
|
||||
use crate::migration::MigrationRunner;
|
||||
use crate::params::PG_HBA_ALL_MD5;
|
||||
use crate::pg_helpers::*;
|
||||
@@ -20,7 +19,7 @@ use compute_api::spec::ComputeSpec;
|
||||
fn do_control_plane_request(
|
||||
uri: &str,
|
||||
jwt: &str,
|
||||
) -> Result<ControlPlaneSpecResponse, (bool, String, String)> {
|
||||
) -> Result<ControlPlaneSpecResponse, (bool, String)> {
|
||||
let resp = reqwest::blocking::Client::new()
|
||||
.get(uri)
|
||||
.header("Authorization", format!("Bearer {}", jwt))
|
||||
@@ -28,42 +27,35 @@ fn do_control_plane_request(
|
||||
.map_err(|e| {
|
||||
(
|
||||
true,
|
||||
format!("could not perform spec request to control plane: {:?}", e),
|
||||
UNKNOWN_HTTP_STATUS.to_string(),
|
||||
format!("could not perform spec request to control plane: {}", e),
|
||||
)
|
||||
})?;
|
||||
|
||||
let status = resp.status();
|
||||
match status {
|
||||
match resp.status() {
|
||||
StatusCode::OK => match resp.json::<ControlPlaneSpecResponse>() {
|
||||
Ok(spec_resp) => Ok(spec_resp),
|
||||
Err(e) => Err((
|
||||
true,
|
||||
format!("could not deserialize control plane response: {:?}", e),
|
||||
status.to_string(),
|
||||
format!("could not deserialize control plane response: {}", e),
|
||||
)),
|
||||
},
|
||||
StatusCode::SERVICE_UNAVAILABLE => Err((
|
||||
true,
|
||||
"control plane is temporarily unavailable".to_string(),
|
||||
status.to_string(),
|
||||
)),
|
||||
StatusCode::SERVICE_UNAVAILABLE => {
|
||||
Err((true, "control plane is temporarily unavailable".to_string()))
|
||||
}
|
||||
StatusCode::BAD_GATEWAY => {
|
||||
// We have a problem with intermittent 502 errors now
|
||||
// https://github.com/neondatabase/cloud/issues/2353
|
||||
// It's fine to retry GET request in this case.
|
||||
Err((
|
||||
true,
|
||||
"control plane request failed with 502".to_string(),
|
||||
status.to_string(),
|
||||
))
|
||||
Err((true, "control plane request failed with 502".to_string()))
|
||||
}
|
||||
// Another code, likely 500 or 404, means that compute is unknown to the control plane
|
||||
// or some internal failure happened. Doesn't make much sense to retry in this case.
|
||||
_ => Err((
|
||||
false,
|
||||
format!("unexpected control plane response status code: {}", status),
|
||||
status.to_string(),
|
||||
format!(
|
||||
"unexpected control plane response status code: {}",
|
||||
resp.status()
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
@@ -91,28 +83,17 @@ pub fn get_spec_from_control_plane(
|
||||
// - got spec -> return Ok(Some(spec))
|
||||
while attempt < 4 {
|
||||
spec = match do_control_plane_request(&cp_uri, &jwt) {
|
||||
Ok(spec_resp) => {
|
||||
CPLANE_REQUESTS_TOTAL
|
||||
.with_label_values(&[
|
||||
CPlaneRequestRPC::GetSpec.as_str(),
|
||||
&StatusCode::OK.to_string(),
|
||||
])
|
||||
.inc();
|
||||
match spec_resp.status {
|
||||
ControlPlaneComputeStatus::Empty => Ok(None),
|
||||
ControlPlaneComputeStatus::Attached => {
|
||||
if let Some(spec) = spec_resp.spec {
|
||||
Ok(Some(spec))
|
||||
} else {
|
||||
bail!("compute is attached, but spec is empty")
|
||||
}
|
||||
Ok(spec_resp) => match spec_resp.status {
|
||||
ControlPlaneComputeStatus::Empty => Ok(None),
|
||||
ControlPlaneComputeStatus::Attached => {
|
||||
if let Some(spec) = spec_resp.spec {
|
||||
Ok(Some(spec))
|
||||
} else {
|
||||
bail!("compute is attached, but spec is empty")
|
||||
}
|
||||
}
|
||||
}
|
||||
Err((retry, msg, status)) => {
|
||||
CPLANE_REQUESTS_TOTAL
|
||||
.with_label_values(&[CPlaneRequestRPC::GetSpec.as_str(), &status])
|
||||
.inc();
|
||||
},
|
||||
Err((retry, msg)) => {
|
||||
if retry {
|
||||
Err(anyhow!(msg))
|
||||
} else {
|
||||
|
||||
@@ -47,7 +47,7 @@ pub enum PerDatabasePhase {
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
DropLogicalSubscriptions,
|
||||
DropSubscriptionsForDeletedDatabases,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -58,13 +58,11 @@ pub enum ApplySpecPhase {
|
||||
CreateAndAlterRoles,
|
||||
RenameAndDeleteDatabases,
|
||||
CreateAndAlterDatabases,
|
||||
CreateSchemaNeon,
|
||||
RunInEachDatabase { db: DB, subphase: PerDatabasePhase },
|
||||
HandleOtherExtensions,
|
||||
HandleNeonExtension,
|
||||
CreateAvailabilityCheck,
|
||||
DropRoles,
|
||||
FinalizeDropLogicalSubscriptions,
|
||||
}
|
||||
|
||||
pub struct Operation {
|
||||
@@ -333,7 +331,7 @@ async fn get_operations<'a>(
|
||||
// NB: there could be other db states, which prevent us from dropping
|
||||
// the database. For example, if db is used by any active subscription
|
||||
// or replication slot.
|
||||
// Such cases are handled in the DropLogicalSubscriptions
|
||||
// Such cases are handled in the DropSubscriptionsForDeletedDatabases
|
||||
// phase. We do all the cleanup before actually dropping the database.
|
||||
let drop_db_query: String = format!(
|
||||
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
|
||||
@@ -444,19 +442,13 @@ async fn get_operations<'a>(
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
ApplySpecPhase::CreateSchemaNeon => Ok(Box::new(once(Operation {
|
||||
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
|
||||
comment: Some(String::from(
|
||||
"create schema for neon extension and utils tables",
|
||||
)),
|
||||
}))),
|
||||
ApplySpecPhase::RunInEachDatabase { db, subphase } => {
|
||||
match subphase {
|
||||
PerDatabasePhase::DropLogicalSubscriptions => {
|
||||
PerDatabasePhase::DropSubscriptionsForDeletedDatabases => {
|
||||
match &db {
|
||||
DB::UserDB(db) => {
|
||||
let drop_subscription_query: String = format!(
|
||||
include_str!("sql/drop_subscriptions.sql"),
|
||||
include_str!("sql/drop_subscription_for_drop_dbs.sql"),
|
||||
datname_str = escape_literal(&db.name),
|
||||
);
|
||||
|
||||
@@ -674,6 +666,10 @@ async fn get_operations<'a>(
|
||||
}
|
||||
ApplySpecPhase::HandleNeonExtension => {
|
||||
let operations = vec![
|
||||
Operation {
|
||||
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
|
||||
comment: Some(String::from("init: add schema for extension")),
|
||||
},
|
||||
Operation {
|
||||
query: String::from("CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"),
|
||||
comment: Some(String::from(
|
||||
@@ -716,9 +712,5 @@ async fn get_operations<'a>(
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
ApplySpecPhase::FinalizeDropLogicalSubscriptions => Ok(Box::new(once(Operation {
|
||||
query: String::from(include_str!("sql/finalize_drop_subscriptions.sql")),
|
||||
comment: None,
|
||||
}))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS(
|
||||
SELECT 1
|
||||
FROM pg_catalog.pg_tables
|
||||
WHERE tablename = 'drop_subscriptions_done'
|
||||
AND schemaname = 'neon'
|
||||
)
|
||||
THEN
|
||||
CREATE TABLE neon.drop_subscriptions_done
|
||||
(id serial primary key, timeline_id text);
|
||||
END IF;
|
||||
|
||||
-- preserve the timeline_id of the last drop_subscriptions run
|
||||
-- to ensure that the cleanup of a timeline is executed only once.
|
||||
-- use upsert to avoid the table bloat in case of cascade branching (branch of a branch)
|
||||
INSERT INTO neon.drop_subscriptions_done VALUES (1, current_setting('neon.timeline_id'))
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET timeline_id = current_setting('neon.timeline_id');
|
||||
END
|
||||
$$
|
||||
@@ -1,10 +1,6 @@
|
||||
# Local Development Control Plane (`neon_local`)
|
||||
# Control Plane and Neon Local
|
||||
|
||||
This crate contains tools to start a Neon development environment locally. This utility can be used with the `cargo neon` command. This is a convenience to invoke
|
||||
the `neon_local` binary.
|
||||
|
||||
**Note**: this is a dev/test tool -- a minimal control plane suitable for testing
|
||||
code changes locally, but not suitable for running production systems.
|
||||
This crate contains tools to start a Neon development environment locally. This utility can be used with the `cargo neon` command.
|
||||
|
||||
## Example: Start with Postgres 16
|
||||
|
||||
|
||||
@@ -1357,7 +1357,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
args.pg_version,
|
||||
mode,
|
||||
!args.update_catalog,
|
||||
false,
|
||||
)?;
|
||||
}
|
||||
EndpointCmd::Start(args) => {
|
||||
|
||||
@@ -76,7 +76,6 @@ pub struct EndpointConf {
|
||||
http_port: u16,
|
||||
pg_version: u32,
|
||||
skip_pg_catalog_updates: bool,
|
||||
drop_subscriptions_before_start: bool,
|
||||
features: Vec<ComputeFeature>,
|
||||
}
|
||||
|
||||
@@ -144,7 +143,6 @@ impl ComputeControlPlane {
|
||||
pg_version: u32,
|
||||
mode: ComputeMode,
|
||||
skip_pg_catalog_updates: bool,
|
||||
drop_subscriptions_before_start: bool,
|
||||
) -> Result<Arc<Endpoint>> {
|
||||
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
|
||||
let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
|
||||
@@ -164,7 +162,6 @@ impl ComputeControlPlane {
|
||||
// with this we basically test a case of waking up an idle compute, where
|
||||
// we also skip catalog updates in the cloud.
|
||||
skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start,
|
||||
features: vec![],
|
||||
});
|
||||
|
||||
@@ -180,7 +177,6 @@ impl ComputeControlPlane {
|
||||
pg_port,
|
||||
pg_version,
|
||||
skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start,
|
||||
features: vec![],
|
||||
})?,
|
||||
)?;
|
||||
@@ -244,7 +240,6 @@ pub struct Endpoint {
|
||||
// Optimizations
|
||||
skip_pg_catalog_updates: bool,
|
||||
|
||||
drop_subscriptions_before_start: bool,
|
||||
// Feature flags
|
||||
features: Vec<ComputeFeature>,
|
||||
}
|
||||
@@ -296,7 +291,6 @@ impl Endpoint {
|
||||
tenant_id: conf.tenant_id,
|
||||
pg_version: conf.pg_version,
|
||||
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
|
||||
features: conf.features,
|
||||
})
|
||||
}
|
||||
@@ -631,7 +625,6 @@ impl Endpoint {
|
||||
shard_stripe_size: Some(shard_stripe_size),
|
||||
local_proxy_config: None,
|
||||
reconfigure_concurrency: 1,
|
||||
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
|
||||
};
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
|
||||
|
||||
@@ -347,31 +347,11 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_threshold' as an integer")?,
|
||||
compaction_upper_limit: settings
|
||||
.remove("compaction_upper_limit")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_upper_limit' as an integer")?,
|
||||
compaction_algorithm: settings
|
||||
.remove("compaction_algorithm")
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_algorithm' json")?,
|
||||
l0_flush_delay_threshold: settings
|
||||
.remove("l0_flush_delay_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'l0_flush_delay_threshold' as an integer")?,
|
||||
l0_flush_wait_upload: settings
|
||||
.remove("l0_flush_wait_upload")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'l0_flush_wait_upload' as a boolean")?,
|
||||
l0_flush_stall_threshold: settings
|
||||
.remove("l0_flush_stall_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'l0_flush_stall_threshold' as an integer")?,
|
||||
gc_horizon: settings
|
||||
.remove("gc_horizon")
|
||||
.map(|x| x.parse::<u64>())
|
||||
@@ -388,11 +368,6 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<u8>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_creation_check_threshold' as integer")?,
|
||||
image_creation_preempt_threshold: settings
|
||||
.remove("image_creation_preempt_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
|
||||
pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
|
||||
walreceiver_connect_timeout: settings
|
||||
.remove("walreceiver_connect_timeout")
|
||||
@@ -443,26 +418,6 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("parse `wal_receiver_protocol_override` from json")?,
|
||||
rel_size_v2_enabled: settings
|
||||
.remove("rel_size_v2_enabled")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'rel_size_v2_enabled' as bool")?,
|
||||
gc_compaction_enabled: settings
|
||||
.remove("gc_compaction_enabled")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_compaction_enabled' as bool")?,
|
||||
gc_compaction_initial_threshold_kb: settings
|
||||
.remove("gc_compaction_initial_threshold_kb")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_compaction_initial_threshold_kb' as integer")?,
|
||||
gc_compaction_ratio_percent: settings
|
||||
.remove("gc_compaction_ratio_percent")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_compaction_ratio_percent' as integer")?,
|
||||
};
|
||||
if !settings.is_empty() {
|
||||
bail!("Unrecognized tenant settings: {settings:?}")
|
||||
|
||||
@@ -822,7 +822,10 @@ impl StorageController {
|
||||
self.dispatch(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_shard_id}/migrate"),
|
||||
Some(TenantShardMigrateRequest { node_id }),
|
||||
Some(TenantShardMigrateRequest {
|
||||
tenant_shard_id,
|
||||
node_id,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -1,17 +1,12 @@
|
||||
use futures::StreamExt;
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
str::FromStr,
|
||||
time::Duration,
|
||||
};
|
||||
use std::{str::FromStr, time::Duration};
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
|
||||
SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
|
||||
ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy,
|
||||
TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||
SafekeeperDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest,
|
||||
TenantDescribeResponse, TenantPolicyRequest,
|
||||
},
|
||||
models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
||||
@@ -117,13 +112,6 @@ enum Command {
|
||||
#[arg(long)]
|
||||
node: NodeId,
|
||||
},
|
||||
/// Migrate the secondary location for a tenant shard to a specific pageserver.
|
||||
TenantShardMigrateSecondary {
|
||||
#[arg(long)]
|
||||
tenant_shard_id: TenantShardId,
|
||||
#[arg(long)]
|
||||
node: NodeId,
|
||||
},
|
||||
/// Cancel any ongoing reconciliation for this shard
|
||||
TenantShardCancelReconcile {
|
||||
#[arg(long)]
|
||||
@@ -158,12 +146,6 @@ enum Command {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
TenantSetPreferredAz {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
preferred_az: Option<String>,
|
||||
},
|
||||
/// Uncleanly drop a tenant from the storage controller: this doesn't delete anything from pageservers. Appropriate
|
||||
/// if you e.g. used `tenant-warmup` by mistake on a tenant ID that doesn't really exist, or is in some other region.
|
||||
TenantDrop {
|
||||
@@ -232,13 +214,6 @@ enum Command {
|
||||
},
|
||||
/// List safekeepers known to the storage controller
|
||||
Safekeepers {},
|
||||
/// Set the scheduling policy of the specified safekeeper
|
||||
SafekeeperScheduling {
|
||||
#[arg(long)]
|
||||
node_id: NodeId,
|
||||
#[arg(long)]
|
||||
scheduling_policy: SkSchedulingPolicyArg,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -291,17 +266,6 @@ impl FromStr for PlacementPolicyArg {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct SkSchedulingPolicyArg(SkSchedulingPolicy);
|
||||
|
||||
impl FromStr for SkSchedulingPolicyArg {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
SkSchedulingPolicy::from_str(s).map(Self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ShardSchedulingPolicyArg(ShardSchedulingPolicy);
|
||||
|
||||
@@ -431,12 +395,11 @@ async fn main() -> anyhow::Result<()> {
|
||||
resp.sort_by(|a, b| a.listen_http_addr.cmp(&b.listen_http_addr));
|
||||
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header(["Id", "Hostname", "AZ", "Scheduling", "Availability"]);
|
||||
table.set_header(["Id", "Hostname", "Scheduling", "Availability"]);
|
||||
for node in resp {
|
||||
table.add_row([
|
||||
format!("{}", node.id),
|
||||
node.listen_http_addr,
|
||||
node.availability_zone_id,
|
||||
format!("{:?}", node.scheduling),
|
||||
format!("{:?}", node.availability),
|
||||
]);
|
||||
@@ -496,65 +459,33 @@ async fn main() -> anyhow::Result<()> {
|
||||
println!("{table}");
|
||||
}
|
||||
Command::Tenants { node_id: None } => {
|
||||
// Set up output formatting
|
||||
let mut resp = storcon_client
|
||||
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/tenant".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
resp.sort_by(|a, b| a.tenant_id.cmp(&b.tenant_id));
|
||||
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header([
|
||||
"TenantId",
|
||||
"Preferred AZ",
|
||||
"ShardCount",
|
||||
"StripeSize",
|
||||
"Placement",
|
||||
"Scheduling",
|
||||
]);
|
||||
|
||||
// Pagination loop over listing API
|
||||
let mut start_after = None;
|
||||
const LIMIT: usize = 1000;
|
||||
loop {
|
||||
let path = match start_after {
|
||||
None => format!("control/v1/tenant?limit={LIMIT}"),
|
||||
Some(start_after) => {
|
||||
format!("control/v1/tenant?limit={LIMIT}&start_after={start_after}")
|
||||
}
|
||||
};
|
||||
|
||||
let resp = storcon_client
|
||||
.dispatch::<(), Vec<TenantDescribeResponse>>(Method::GET, path, None)
|
||||
.await?;
|
||||
|
||||
if resp.is_empty() {
|
||||
// End of data reached
|
||||
break;
|
||||
}
|
||||
|
||||
// Give some visual feedback while we're building up the table (comfy_table doesn't have
|
||||
// streaming output)
|
||||
if resp.len() >= LIMIT {
|
||||
eprint!(".");
|
||||
}
|
||||
|
||||
start_after = Some(resp.last().unwrap().tenant_id);
|
||||
|
||||
for tenant in resp {
|
||||
let shard_zero = tenant.shards.into_iter().next().unwrap();
|
||||
table.add_row([
|
||||
format!("{}", tenant.tenant_id),
|
||||
shard_zero
|
||||
.preferred_az_id
|
||||
.as_ref()
|
||||
.cloned()
|
||||
.unwrap_or("".to_string()),
|
||||
format!("{}", shard_zero.tenant_shard_id.shard_count.literal()),
|
||||
format!("{:?}", tenant.stripe_size),
|
||||
format!("{:?}", tenant.policy),
|
||||
format!("{:?}", shard_zero.scheduling_policy),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
// Terminate progress dots
|
||||
if table.row_count() > LIMIT {
|
||||
eprint!("");
|
||||
for tenant in resp {
|
||||
let shard_zero = tenant.shards.into_iter().next().unwrap();
|
||||
table.add_row([
|
||||
format!("{}", tenant.tenant_id),
|
||||
format!("{}", shard_zero.tenant_shard_id.shard_count.literal()),
|
||||
format!("{:?}", tenant.stripe_size),
|
||||
format!("{:?}", tenant.policy),
|
||||
format!("{:?}", shard_zero.scheduling_policy),
|
||||
]);
|
||||
}
|
||||
|
||||
println!("{table}");
|
||||
@@ -609,7 +540,10 @@ async fn main() -> anyhow::Result<()> {
|
||||
tenant_shard_id,
|
||||
node,
|
||||
} => {
|
||||
let req = TenantShardMigrateRequest { node_id: node };
|
||||
let req = TenantShardMigrateRequest {
|
||||
tenant_shard_id,
|
||||
node_id: node,
|
||||
};
|
||||
|
||||
storcon_client
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
@@ -619,20 +553,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantShardMigrateSecondary {
|
||||
tenant_shard_id,
|
||||
node,
|
||||
} => {
|
||||
let req = TenantShardMigrateRequest { node_id: node };
|
||||
|
||||
storcon_client
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_shard_id}/migrate_secondary"),
|
||||
Some(req),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantShardCancelReconcile { tenant_shard_id } => {
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(
|
||||
@@ -676,19 +596,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let nodes = storcon_client
|
||||
.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/node".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
let nodes = nodes
|
||||
.into_iter()
|
||||
.map(|n| (n.id, n))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
println!("Tenant {tenant_id}");
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.add_row(["Policy", &format!("{:?}", policy)]);
|
||||
@@ -697,14 +604,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
println!("{table}");
|
||||
println!("Shards:");
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header([
|
||||
"Shard",
|
||||
"Attached",
|
||||
"Attached AZ",
|
||||
"Secondary",
|
||||
"Last error",
|
||||
"status",
|
||||
]);
|
||||
table.set_header(["Shard", "Attached", "Secondary", "Last error", "status"]);
|
||||
for shard in shards {
|
||||
let secondary = shard
|
||||
.node_secondary
|
||||
@@ -727,18 +627,11 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
let status = status_parts.join(",");
|
||||
|
||||
let attached_node = shard
|
||||
.node_attached
|
||||
.as_ref()
|
||||
.map(|id| nodes.get(id).expect("Shard references nonexistent node"));
|
||||
|
||||
table.add_row([
|
||||
format!("{}", shard.tenant_shard_id),
|
||||
attached_node
|
||||
.map(|n| format!("{} ({})", n.listen_http_addr, n.id))
|
||||
.unwrap_or(String::new()),
|
||||
attached_node
|
||||
.map(|n| n.availability_zone_id.clone())
|
||||
shard
|
||||
.node_attached
|
||||
.map(|n| format!("{}", n))
|
||||
.unwrap_or(String::new()),
|
||||
secondary,
|
||||
shard.last_error,
|
||||
@@ -747,66 +640,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
println!("{table}");
|
||||
}
|
||||
Command::TenantSetPreferredAz {
|
||||
tenant_id,
|
||||
preferred_az,
|
||||
} => {
|
||||
// First learn about the tenant's shards
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/tenant/{tenant_id}"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Learn about nodes to validate the AZ ID
|
||||
let nodes = storcon_client
|
||||
.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/node".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(preferred_az) = &preferred_az {
|
||||
let azs = nodes
|
||||
.into_iter()
|
||||
.map(|n| (n.availability_zone_id))
|
||||
.collect::<HashSet<_>>();
|
||||
if !azs.contains(preferred_az) {
|
||||
anyhow::bail!(
|
||||
"AZ {} not found on any node: known AZs are: {:?}",
|
||||
preferred_az,
|
||||
azs
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// Make it obvious to the user that since they've omitted an AZ, we're clearing it
|
||||
eprintln!("Clearing preferred AZ for tenant {}", tenant_id);
|
||||
}
|
||||
|
||||
// Construct a request that modifies all the tenant's shards
|
||||
let req = ShardsPreferredAzsRequest {
|
||||
preferred_az_ids: describe_response
|
||||
.shards
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
(
|
||||
s.tenant_shard_id,
|
||||
preferred_az.clone().map(AvailabilityZone),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
storcon_client
|
||||
.dispatch::<ShardsPreferredAzsRequest, ShardsPreferredAzsResponse>(
|
||||
Method::PUT,
|
||||
"control/v1/preferred_azs".to_string(),
|
||||
Some(req),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantWarmup { tenant_id } => {
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
@@ -1082,7 +915,10 @@ async fn main() -> anyhow::Result<()> {
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id),
|
||||
Some(TenantShardMigrateRequest { node_id: mv.to }),
|
||||
Some(TenantShardMigrateRequest {
|
||||
tenant_shard_id: mv.tenant_shard_id,
|
||||
node_id: mv.to,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| (mv.tenant_shard_id, mv.from, mv.to, e))
|
||||
@@ -1221,23 +1057,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
println!("{table}");
|
||||
}
|
||||
Command::SafekeeperScheduling {
|
||||
node_id,
|
||||
scheduling_policy,
|
||||
} => {
|
||||
let scheduling_policy = scheduling_policy.0;
|
||||
storcon_client
|
||||
.dispatch::<SafekeeperSchedulingPolicyRequest, ()>(
|
||||
Method::POST,
|
||||
format!("control/v1/safekeeper/{node_id}/scheduling_policy"),
|
||||
Some(SafekeeperSchedulingPolicyRequest { scheduling_policy }),
|
||||
)
|
||||
.await?;
|
||||
println!(
|
||||
"Scheduling policy of {node_id} set to {}",
|
||||
String::from(scheduling_policy)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -32,7 +32,6 @@ reason = "the marvin attack only affects private key decryption, not public key
|
||||
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
|
||||
[licenses]
|
||||
allow = [
|
||||
"0BSD",
|
||||
"Apache-2.0",
|
||||
"BSD-2-Clause",
|
||||
"BSD-3-Clause",
|
||||
@@ -42,8 +41,8 @@ allow = [
|
||||
"MIT",
|
||||
"MPL-2.0",
|
||||
"OpenSSL",
|
||||
"Unicode-DFS-2016",
|
||||
"Unicode-3.0",
|
||||
"Zlib",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
exceptions = [
|
||||
|
||||
@@ -7,12 +7,14 @@ FROM $REPOSITORY/${COMPUTE_IMAGE}:$TAG
|
||||
ARG COMPUTE_IMAGE
|
||||
|
||||
USER root
|
||||
RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
|
||||
apt-get update && \
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl \
|
||||
jq \
|
||||
python3-pip \
|
||||
netcat-openbsd
|
||||
#Faker is required for the pg_anon test
|
||||
RUN case $COMPUTE_IMAGE in compute-node-v17) OPT="--break-system-packages";; *) OPT= ;; esac && pip3 install $OPT Faker
|
||||
#This is required for the pg_hintplan test
|
||||
RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw
|
||||
RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src
|
||||
|
||||
USER postgres
|
||||
USER postgres
|
||||
@@ -20,55 +20,30 @@ while ! nc -z pageserver 6400; do
|
||||
done
|
||||
echo "Page server is ready."
|
||||
|
||||
cp ${SPEC_FILE_ORG} ${SPEC_FILE}
|
||||
echo "Create a tenant and timeline"
|
||||
generate_id tenant_id
|
||||
PARAMS=(
|
||||
-X PUT
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"mode\": \"AttachedSingle\", \"generation\": 1, \"tenant_conf\": {}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/location_config"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
|
||||
if [ -n "${TENANT_ID:-}" ] && [ -n "${TIMELINE_ID:-}" ]; then
|
||||
tenant_id=${TENANT_ID}
|
||||
timeline_id=${TIMELINE_ID}
|
||||
else
|
||||
echo "Check if a tenant present"
|
||||
PARAMS=(
|
||||
-X GET
|
||||
-H "Content-Type: application/json"
|
||||
"http://pageserver:9898/v1/tenant"
|
||||
)
|
||||
tenant_id=$(curl "${PARAMS[@]}" | jq -r .[0].id)
|
||||
if [ -z "${tenant_id}" ] || [ "${tenant_id}" = null ]; then
|
||||
echo "Create a tenant"
|
||||
generate_id tenant_id
|
||||
PARAMS=(
|
||||
-X PUT
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"mode\": \"AttachedSingle\", \"generation\": 1, \"tenant_conf\": {}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/location_config"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
fi
|
||||
|
||||
echo "Check if a timeline present"
|
||||
PARAMS=(
|
||||
-X GET
|
||||
-H "Content-Type: application/json"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
|
||||
)
|
||||
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
|
||||
if [ -z "${timeline_id}" ] || [ "${timeline_id}" = null ]; then
|
||||
generate_id timeline_id
|
||||
PARAMS=(
|
||||
-sbf
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
fi
|
||||
fi
|
||||
generate_id timeline_id
|
||||
PARAMS=(
|
||||
-sbf
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
|
||||
echo "Overwrite tenant id and timeline id in spec file"
|
||||
sed -i "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE}
|
||||
sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
|
||||
sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}
|
||||
|
||||
cat ${SPEC_FILE}
|
||||
|
||||
@@ -149,13 +149,11 @@ services:
|
||||
args:
|
||||
- REPOSITORY=${REPOSITORY:-neondatabase}
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-16}
|
||||
- TAG=${COMPUTE_TAG:-${TAG:-latest}}
|
||||
- http_proxy=${http_proxy:-}
|
||||
- https_proxy=${https_proxy:-}
|
||||
- TAG=${TAG:-latest}
|
||||
- http_proxy=$http_proxy
|
||||
- https_proxy=$https_proxy
|
||||
environment:
|
||||
- PG_VERSION=${PG_VERSION:-16}
|
||||
- TENANT_ID=${TENANT_ID:-}
|
||||
- TIMELINE_ID=${TIMELINE_ID:-}
|
||||
#- RUST_BACKTRACE=1
|
||||
# Mount the test files directly, for faster editing cycle.
|
||||
volumes:
|
||||
@@ -187,8 +185,6 @@ services:
|
||||
neon-test-extensions:
|
||||
profiles: ["test-extensions"]
|
||||
image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest}
|
||||
environment:
|
||||
- PGPASSWORD=cloud_admin
|
||||
entrypoint:
|
||||
- "/bin/bash"
|
||||
- "-c"
|
||||
|
||||
@@ -18,10 +18,14 @@ cd $(dirname $0)
|
||||
COMPUTE_CONTAINER_NAME=docker-compose-compute-1
|
||||
TEST_CONTAINER_NAME=docker-compose-neon-test-extensions-1
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
|
||||
: ${http_proxy:=}
|
||||
: ${https_proxy:=}
|
||||
export http_proxy https_proxy
|
||||
|
||||
cleanup() {
|
||||
echo "show container information"
|
||||
docker ps
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE logs
|
||||
echo "stop containers..."
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE down
|
||||
}
|
||||
@@ -31,7 +35,13 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --quiet-pull --build -d
|
||||
# The support of pg_anon not yet added to PG17, so we have to add the corresponding option for other PG versions
|
||||
if [ "${pg_version}" -ne 17 ]; then
|
||||
SPEC_PATH="compute_wrapper/var/db/postgres/specs"
|
||||
mv $SPEC_PATH/spec.json $SPEC_PATH/spec.bak
|
||||
jq '.cluster.settings += [{"name": "session_preload_libraries","value": "anon","vartype": "string"}]' "${SPEC_PATH}/spec.bak" > "${SPEC_PATH}/spec.json"
|
||||
fi
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d
|
||||
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
cnt=0
|
||||
@@ -40,6 +50,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
cnt=`expr $cnt + 3`
|
||||
if [ $cnt -gt 60 ]; then
|
||||
echo "timeout before the compute is ready."
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
if docker compose --profile test-extensions -f $COMPOSE_FILE logs "compute_is_ready" | grep -q "accepting connections"; then
|
||||
@@ -51,46 +62,52 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
done
|
||||
|
||||
if [ $pg_version -ge 16 ]; then
|
||||
docker cp ext-src $TEST_CONTAINER_NAME:/
|
||||
docker exec $TEST_CONTAINER_NAME bash -c "apt update && apt install -y libtap-parser-sourcehandler-pgtap-perl"
|
||||
echo Enabling trust connection
|
||||
docker exec $COMPUTE_CONTAINER_NAME bash -c "sed -i '\$d' /var/db/postgres/compute/pg_hba.conf && echo -e 'host\t all\t all\t all\t trust' >> /var/db/postgres/compute/pg_hba.conf && psql $PSQL_OPTION -c 'select pg_reload_conf()' "
|
||||
echo Adding postgres role
|
||||
docker exec $COMPUTE_CONTAINER_NAME psql $PSQL_OPTION -c "CREATE ROLE postgres SUPERUSER LOGIN"
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
docker exec $COMPUTE_CONTAINER_NAME touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
|
||||
# This block is required for the pg_anon extension test.
|
||||
# The test assumes that it is running on the same host with the postgres engine.
|
||||
# In our case it's not true, that's why we are copying files to the compute node
|
||||
TMPDIR=$(mktemp -d)
|
||||
# Add support for pg_anon for pg_v16
|
||||
if [ $pg_version -ne 17 ]; then
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_anon-src/data $TMPDIR/data
|
||||
echo -e '1\t too \t many \t tabs' > $TMPDIR/data/bad.csv
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/tmp/tmp_anon_alternate_data
|
||||
rm -rf $TMPDIR
|
||||
fi
|
||||
TMPDIR=$(mktemp -d)
|
||||
# The following block does the same for the pg_hintplan test
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
|
||||
rm -rf $TMPDIR
|
||||
# The following block does the same for the contrib/file_fdw test
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker cp $TEST_CONTAINER_NAME:/postgres/contrib/file_fdw/data $TMPDIR/data
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/postgres/contrib/file_fdw/data
|
||||
rm -rf $TMPDIR
|
||||
# Apply patches
|
||||
cat ../compute/patches/contrib_pg${pg_version}.patch | docker exec -i $TEST_CONTAINER_NAME bash -c "(cd /postgres && patch -p1)"
|
||||
# We are running tests now
|
||||
rm -f testout.txt testout_contrib.txt
|
||||
docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
|
||||
docker exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
|
||||
if [ $EXT_SUCCESS -eq 0 ] || [ $CONTRIB_SUCCESS -eq 0 ]; then
|
||||
CONTRIB_FAILED=
|
||||
FAILED=
|
||||
[ $EXT_SUCCESS -eq 0 ] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
|
||||
[ $CONTRIB_SUCCESS -eq 0 ] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
|
||||
for d in $FAILED $CONTRIB_FAILED; do
|
||||
dn="$(basename $d)"
|
||||
rm -rf $dn
|
||||
mkdir $dn
|
||||
docker cp $TEST_CONTAINER_NAME:$d/regression.diffs $dn || [ $? -eq 1 ]
|
||||
docker cp $TEST_CONTAINER_NAME:$d/regression.out $dn || [ $? -eq 1 ]
|
||||
cat $dn/regression.out $dn/regression.diffs || true
|
||||
rm -rf $dn
|
||||
if docker exec -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
|
||||
then
|
||||
cleanup
|
||||
else
|
||||
FAILED=$(tail -1 testout.txt)
|
||||
for d in $FAILED
|
||||
do
|
||||
mkdir $d
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.diffs $d || true
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.out $d || true
|
||||
cat $d/regression.out $d/regression.diffs || true
|
||||
done
|
||||
rm -rf $FAILED
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
cleanup
|
||||
# Restore the original spec.json
|
||||
if [ "$pg_version" -ne 17 ]; then
|
||||
mv "$SPEC_PATH/spec.bak" "$SPEC_PATH/spec.json"
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression add_agg agg_oob auto_sparse card_op cast_shape copy_binary cumulative_add_cardinality_correction cumulative_add_comprehensive_promotion cumulative_add_sparse_edge cumulative_add_sparse_random cumulative_add_sparse_step cumulative_union_comprehensive cumulative_union_explicit_explicit cumulative_union_explicit_promotion cumulative_union_probabilistic_probabilistic cumulative_union_sparse_full_representation cumulative_union_sparse_promotion cumulative_union_sparse_sparse disable_hashagg equal explicit_thresh hash hash_any meta_func murmur_bigint murmur_bytea nosparse notequal scalar_oob storedproc transaction typmod typmod_insert union_op
|
||||
@@ -1,27 +0,0 @@
|
||||
diff --git a/expected/hypopg.out b/expected/hypopg.out
|
||||
index 90121d0..859260b 100644
|
||||
--- a/expected/hypopg.out
|
||||
+++ b/expected/hypopg.out
|
||||
@@ -11,7 +11,8 @@ BEGIN
|
||||
END;
|
||||
$_$
|
||||
LANGUAGE plpgsql;
|
||||
-CREATE EXTENSION hypopg;
|
||||
+CREATE EXTENSION IF NOT EXISTS hypopg;
|
||||
+NOTICE: extension "hypopg" already exists, skipping
|
||||
CREATE TABLE hypo (id integer, val text, "Id2" bigint);
|
||||
INSERT INTO hypo SELECT i, 'line ' || i
|
||||
FROM generate_series(1,100000) f(i);
|
||||
diff --git a/test/sql/hypopg.sql b/test/sql/hypopg.sql
|
||||
index 99722b0..8d6bacb 100644
|
||||
--- a/test/sql/hypopg.sql
|
||||
+++ b/test/sql/hypopg.sql
|
||||
@@ -12,7 +12,7 @@ END;
|
||||
$_$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
-CREATE EXTENSION hypopg;
|
||||
+CREATE EXTENSION IF NOT EXISTS hypopg;
|
||||
|
||||
CREATE TABLE hypo (id integer, val text, "Id2" bigint);
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --inputdir=test --dbname=contrib_regression hypopg hypo_brin hypo_index_part hypo_include hypo_hash hypo_hide_index
|
||||
@@ -1,23 +0,0 @@
|
||||
diff --git a/expected/ip4r.out b/expected/ip4r.out
|
||||
index 7527af3..b38ed29 100644
|
||||
--- a/expected/ip4r.out
|
||||
+++ b/expected/ip4r.out
|
||||
@@ -1,6 +1,5 @@
|
||||
--
|
||||
/*CUT-HERE*/
|
||||
-CREATE EXTENSION ip4r;
|
||||
-- Check whether any of our opclasses fail amvalidate
|
||||
DO $d$
|
||||
DECLARE
|
||||
diff --git a/sql/ip4r.sql b/sql/ip4r.sql
|
||||
index 65c49ec..24ade09 100644
|
||||
--- a/sql/ip4r.sql
|
||||
+++ b/sql/ip4r.sql
|
||||
@@ -1,7 +1,6 @@
|
||||
--
|
||||
|
||||
/*CUT-HERE*/
|
||||
-CREATE EXTENSION ip4r;
|
||||
|
||||
-- Check whether any of our opclasses fail amvalidate
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ip4r ip4r-softerr ip4r-v11
|
||||
@@ -1,75 +0,0 @@
|
||||
diff --git a/expected/pg_cron-test.out b/expected/pg_cron-test.out
|
||||
index d79d542..1663886 100644
|
||||
--- a/expected/pg_cron-test.out
|
||||
+++ b/expected/pg_cron-test.out
|
||||
@@ -1,30 +1,3 @@
|
||||
-CREATE EXTENSION pg_cron VERSION '1.0';
|
||||
-SELECT extversion FROM pg_extension WHERE extname='pg_cron';
|
||||
- extversion
|
||||
-------------
|
||||
- 1.0
|
||||
-(1 row)
|
||||
-
|
||||
--- Test binary compatibility with v1.4 function signature.
|
||||
-ALTER EXTENSION pg_cron UPDATE TO '1.4';
|
||||
-SELECT cron.unschedule(job_name := 'no_such_job');
|
||||
-ERROR: could not find valid entry for job 'no_such_job'
|
||||
-SELECT cron.schedule('testjob', '* * * * *', 'SELECT 1');
|
||||
- schedule
|
||||
-----------
|
||||
- 1
|
||||
-(1 row)
|
||||
-
|
||||
-SELECT cron.unschedule('testjob');
|
||||
- unschedule
|
||||
-------------
|
||||
- t
|
||||
-(1 row)
|
||||
-
|
||||
--- Test cache invalidation
|
||||
-DROP EXTENSION pg_cron;
|
||||
-CREATE EXTENSION pg_cron VERSION '1.4';
|
||||
-ALTER EXTENSION pg_cron UPDATE;
|
||||
-- Vacuum every day at 10:00am (GMT)
|
||||
SELECT cron.schedule('0 10 * * *', 'VACUUM');
|
||||
schedule
|
||||
@@ -300,8 +273,3 @@ SELECT jobid, jobname, schedule, command FROM cron.job ORDER BY jobid;
|
||||
SELECT cron.schedule('bad-last-dom-job1', '0 11 $foo * *', 'VACUUM FULL');
|
||||
ERROR: invalid schedule: 0 11 $foo * *
|
||||
HINT: Use cron format (e.g. 5 4 * * *), or interval format '[1-59] seconds'
|
||||
--- cleaning
|
||||
-DROP EXTENSION pg_cron;
|
||||
-drop user pgcron_cront;
|
||||
-drop database pgcron_dbno;
|
||||
-drop database pgcron_dbyes;
|
||||
diff --git a/sql/pg_cron-test.sql b/sql/pg_cron-test.sql
|
||||
index 45f94d9..241cf73 100644
|
||||
--- a/sql/pg_cron-test.sql
|
||||
+++ b/sql/pg_cron-test.sql
|
||||
@@ -1,17 +1,3 @@
|
||||
-CREATE EXTENSION pg_cron VERSION '1.0';
|
||||
-SELECT extversion FROM pg_extension WHERE extname='pg_cron';
|
||||
--- Test binary compatibility with v1.4 function signature.
|
||||
-ALTER EXTENSION pg_cron UPDATE TO '1.4';
|
||||
-SELECT cron.unschedule(job_name := 'no_such_job');
|
||||
-SELECT cron.schedule('testjob', '* * * * *', 'SELECT 1');
|
||||
-SELECT cron.unschedule('testjob');
|
||||
-
|
||||
--- Test cache invalidation
|
||||
-DROP EXTENSION pg_cron;
|
||||
-CREATE EXTENSION pg_cron VERSION '1.4';
|
||||
-
|
||||
-ALTER EXTENSION pg_cron UPDATE;
|
||||
-
|
||||
-- Vacuum every day at 10:00am (GMT)
|
||||
SELECT cron.schedule('0 10 * * *', 'VACUUM');
|
||||
|
||||
@@ -156,8 +142,3 @@ SELECT jobid, jobname, schedule, command FROM cron.job ORDER BY jobid;
|
||||
-- invalid last of day job
|
||||
SELECT cron.schedule('bad-last-dom-job1', '0 11 $foo * *', 'VACUUM FULL');
|
||||
|
||||
--- cleaning
|
||||
-DROP EXTENSION pg_cron;
|
||||
-drop user pgcron_cront;
|
||||
-drop database pgcron_dbno;
|
||||
-drop database pgcron_dbyes;
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_cron-test
|
||||
@@ -1,13 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
cd "$(dirname "${0}")"
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
PGXS="$(dirname "$(pg_config --pgxs)" )"
|
||||
REGRESS="${PGXS}/../test/regress/pg_regress"
|
||||
TESTDIR="test"
|
||||
TESTS=$(ls "${TESTDIR}/sql" | sort )
|
||||
TESTS=${TESTS//\.sql/}
|
||||
psql -v ON_ERROR_STOP=1 -f test/fixtures.sql -d contrib_regression
|
||||
${REGRESS} --use-existing --dbname=contrib_regression --inputdir=${TESTDIR} ${TESTS}
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
diff --git a/expected/pg_ivm.out b/expected/pg_ivm.out
|
||||
index e8798ee..cca58d0 100644
|
||||
--- a/expected/pg_ivm.out
|
||||
+++ b/expected/pg_ivm.out
|
||||
@@ -1,4 +1,3 @@
|
||||
-CREATE EXTENSION pg_ivm;
|
||||
GRANT ALL ON SCHEMA public TO public;
|
||||
-- create a table to use as a basis for views and materialized views in various combinations
|
||||
CREATE TABLE mv_base_a (i int, j int);
|
||||
diff --git a/sql/pg_ivm.sql b/sql/pg_ivm.sql
|
||||
index d3c1a01..9382d7f 100644
|
||||
--- a/sql/pg_ivm.sql
|
||||
+++ b/sql/pg_ivm.sql
|
||||
@@ -1,4 +1,3 @@
|
||||
-CREATE EXTENSION pg_ivm;
|
||||
GRANT ALL ON SCHEMA public TO public;
|
||||
|
||||
-- create a table to use as a basis for views and materialized views in various combinations
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_ivm create_immv refresh_immv
|
||||
@@ -1,25 +0,0 @@
|
||||
diff --git a/expected/roaringbitmap.out b/expected/roaringbitmap.out
|
||||
index de70531..a5f7c15 100644
|
||||
--- a/expected/roaringbitmap.out
|
||||
+++ b/expected/roaringbitmap.out
|
||||
@@ -1,7 +1,6 @@
|
||||
--
|
||||
-- Test roaringbitmap extension
|
||||
--
|
||||
-CREATE EXTENSION if not exists roaringbitmap;
|
||||
-- Test input and output
|
||||
set roaringbitmap.output_format='array';
|
||||
set extra_float_digits = 0;
|
||||
diff --git a/sql/roaringbitmap.sql b/sql/roaringbitmap.sql
|
||||
index a0e9c74..84bc966 100644
|
||||
--- a/sql/roaringbitmap.sql
|
||||
+++ b/sql/roaringbitmap.sql
|
||||
@@ -2,8 +2,6 @@
|
||||
-- Test roaringbitmap extension
|
||||
--
|
||||
|
||||
-CREATE EXTENSION if not exists roaringbitmap;
|
||||
-
|
||||
-- Test input and output
|
||||
|
||||
set roaringbitmap.output_format='array';
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression roaringbitmap
|
||||
@@ -1,24 +0,0 @@
|
||||
diff --git a/test/sql/base.sql b/test/sql/base.sql
|
||||
index af599d8..2eed91b 100644
|
||||
--- a/test/sql/base.sql
|
||||
+++ b/test/sql/base.sql
|
||||
@@ -2,7 +2,6 @@
|
||||
BEGIN;
|
||||
|
||||
\i test/pgtap-core.sql
|
||||
-\i sql/semver.sql
|
||||
|
||||
SELECT plan(334);
|
||||
--SELECT * FROM no_plan();
|
||||
diff --git a/test/sql/corpus.sql b/test/sql/corpus.sql
|
||||
index 1f5f637..a519905 100644
|
||||
--- a/test/sql/corpus.sql
|
||||
+++ b/test/sql/corpus.sql
|
||||
@@ -4,7 +4,6 @@ BEGIN;
|
||||
-- Test the SemVer corpus from https://regex101.com/r/Ly7O1x/3/.
|
||||
|
||||
\i test/pgtap-core.sql
|
||||
-\i sql/semver.sql
|
||||
|
||||
SELECT plan(71);
|
||||
--SELECT * FROM no_plan();
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression base corpus
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression 002_uuid_generate_v7 003_uuid_v7_to_timestamptz 004_uuid_timestamptz_to_v7 005_uuid_v7_to_timestamp 006_uuid_timestamp_to_v7
|
||||
@@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
cd "$(dirname "${0}")"
|
||||
pg_prove test.sql
|
||||
@@ -1,15 +0,0 @@
|
||||
diff --git a/test.sql b/test.sql
|
||||
index d7a0ca8..f15bc76 100644
|
||||
--- a/test.sql
|
||||
+++ b/test.sql
|
||||
@@ -9,9 +9,7 @@
|
||||
\set ON_ERROR_STOP true
|
||||
\set QUIET 1
|
||||
|
||||
-CREATE EXTENSION pgcrypto;
|
||||
-CREATE EXTENSION pgtap;
|
||||
-CREATE EXTENSION pgjwt;
|
||||
+CREATE EXTENSION IF NOT EXISTS pgtap;
|
||||
|
||||
BEGIN;
|
||||
SELECT plan(23);
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
pg_prove test.sql
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --use-existing --dbname=contrib_regression bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector sparsevec vector_type
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression plv8 plv8-errors scalar_args inline json startup_pre startup varparam json_conv jsonb_conv window guc es6 arraybuffer composites currentresource startup_perms bytea find_function_perms memory_limits reset show array_spread regression dialect bigint procedure
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression extension tables unit binary unicode prefix units time temperature functions language_functions round derived compare aggregate iec custom crosstab convert
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression prefix falcon explain queries
|
||||
@@ -1,19 +0,0 @@
|
||||
diff --git a/expected/rum.out b/expected/rum.out
|
||||
index 5966d19..8860b79 100644
|
||||
--- a/expected/rum.out
|
||||
+++ b/expected/rum.out
|
||||
@@ -1,4 +1,3 @@
|
||||
-CREATE EXTENSION rum;
|
||||
CREATE TABLE test_rum( t text, a tsvector );
|
||||
CREATE TRIGGER tsvectorupdate
|
||||
BEFORE UPDATE OR INSERT ON test_rum
|
||||
diff --git a/sql/rum.sql b/sql/rum.sql
|
||||
index 8414bb9..898e6ab 100644
|
||||
--- a/sql/rum.sql
|
||||
+++ b/sql/rum.sql
|
||||
@@ -1,5 +1,3 @@
|
||||
-CREATE EXTENSION rum;
|
||||
-
|
||||
CREATE TABLE test_rum( t text, a tsvector );
|
||||
|
||||
CREATE TRIGGER tsvectorupdate
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression rum rum_validate rum_hash ruminv timestamp orderby orderby_hash altorder altorder_hash limits int2 int4 int8 float4 float8 money oid time timetz date interval macaddr inet cidr text varchar char bytea bit varbit numeric rum_weight expr array
|
||||
@@ -1,22 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
extdir=${1}
|
||||
|
||||
cd "${extdir}" || exit 2
|
||||
cd /ext-src || exit 2
|
||||
FAILED=
|
||||
LIST=$( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
|
||||
for d in ${LIST}; do
|
||||
[ -d "${d}" ] || continue
|
||||
if ! psql -w -c "select 1" >/dev/null; then
|
||||
FAILED="${d} ${FAILED}"
|
||||
break
|
||||
fi
|
||||
if [ -f "${d}/neon-test.sh" ]; then
|
||||
"${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
|
||||
else
|
||||
LIST=$( (echo -e "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
|
||||
for d in ${LIST}
|
||||
do
|
||||
[ -d "${d}" ] || continue
|
||||
psql -c "select 1" >/dev/null || break
|
||||
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
|
||||
fi
|
||||
done
|
||||
[ -z "${FAILED}" ] && exit 0
|
||||
echo "${FAILED}"
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux -o pipefail
|
||||
cd "$(dirname "${0}")"
|
||||
# Takes a variable name as argument. The result is stored in that variable.
|
||||
generate_id() {
|
||||
local -n resvar=$1
|
||||
printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
|
||||
}
|
||||
if [ -z ${OLDTAG+x} ] || [ -z ${NEWTAG+x} ] || [ -z "${OLDTAG}" ] || [ -z "${NEWTAG}" ]; then
|
||||
echo OLDTAG and NEWTAG must be defined
|
||||
exit 1
|
||||
fi
|
||||
export PG_VERSION=${PG_VERSION:-16}
|
||||
function wait_for_ready {
|
||||
TIME=0
|
||||
while ! docker compose logs compute_is_ready | grep -q "accepting connections" && [ ${TIME} -le 300 ] ; do
|
||||
((TIME += 1 ))
|
||||
sleep 1
|
||||
done
|
||||
if [ ${TIME} -gt 300 ]; then
|
||||
echo Time is out.
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
function create_extensions() {
|
||||
for ext in ${1}; do
|
||||
docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext} CASCADE"
|
||||
done
|
||||
}
|
||||
EXTENSIONS='[
|
||||
{"extname": "plv8", "extdir": "plv8-src"},
|
||||
{"extname": "vector", "extdir": "pgvector-src"},
|
||||
{"extname": "unit", "extdir": "postgresql-unit-src"},
|
||||
{"extname": "hypopg", "extdir": "hypopg-src"},
|
||||
{"extname": "rum", "extdir": "rum-src"},
|
||||
{"extname": "ip4r", "extdir": "ip4r-src"},
|
||||
{"extname": "prefix", "extdir": "prefix-src"},
|
||||
{"extname": "hll", "extdir": "hll-src"},
|
||||
{"extname": "pg_cron", "extdir": "pg_cron-src"},
|
||||
{"extname": "pg_uuidv7", "extdir": "pg_uuidv7-src"},
|
||||
{"extname": "roaringbitmap", "extdir": "pg_roaringbitmap-src"},
|
||||
{"extname": "semver", "extdir": "pg_semver-src"},
|
||||
{"extname": "pg_ivm", "extdir": "pg_ivm-src"},
|
||||
{"extname": "pgjwt", "extdir": "pgjwt-src"}
|
||||
]'
|
||||
EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
|
||||
TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
|
||||
wait_for_ready
|
||||
docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
|
||||
docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
|
||||
create_extensions "${EXTNAMES}"
|
||||
query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')"
|
||||
new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
|
||||
docker compose --profile test-extensions down
|
||||
TAG=${OLDTAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
|
||||
wait_for_ready
|
||||
docker compose cp ext-src neon-test-extensions:/
|
||||
docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
|
||||
docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
|
||||
create_extensions "${EXTNAMES}"
|
||||
query="select pge.extname from pg_extension pge join (select key as extname, value as extversion from json_each_text('${new_vers}')) x on pge.extname=x.extname and pge.extversion <> x.extversion"
|
||||
exts=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
|
||||
if [ -z "${exts}" ]; then
|
||||
echo "No extensions were upgraded"
|
||||
else
|
||||
tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id")
|
||||
timeline_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
|
||||
for ext in ${exts}; do
|
||||
echo Testing ${ext}...
|
||||
EXTDIR=$(echo ${EXTENSIONS} | jq -r '.[] | select(.extname=="'${ext}'") | .extdir')
|
||||
generate_id new_timeline_id
|
||||
PARAMS=(
|
||||
-sbf
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${timeline_id}\"}"
|
||||
"http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} TAG=${OLDTAG} docker compose down compute compute_is_ready
|
||||
COMPUTE_TAG=${NEWTAG} TAG=${OLDTAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready
|
||||
wait_for_ready
|
||||
TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
|
||||
if [ ${TID} != ${new_timeline_id} ]; then
|
||||
echo Timeline mismatch
|
||||
exit 1
|
||||
fi
|
||||
docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
|
||||
docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh
|
||||
docker compose exec neon-test-extensions psql -d contrib_regression -c "alter extension ${ext} update"
|
||||
docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
|
||||
done
|
||||
fi
|
||||
@@ -81,7 +81,7 @@ configuration generation in them is less than its current one. Namely, it
|
||||
refuses to vote, to truncate WAL in `handle_elected` and to accept WAL. In
|
||||
response it sends its current configuration generation to let walproposer know.
|
||||
|
||||
Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
|
||||
Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
|
||||
accepting `Configuration`. Safekeeper switches to the given conf it is higher than its
|
||||
current one and ignores it otherwise. In any case it replies with
|
||||
```
|
||||
@@ -103,7 +103,7 @@ currently and tries to communicate with all of them. However, the list does not
|
||||
define consensus members. Instead, on start walproposer tracks highest
|
||||
configuration it receives from `AcceptorGreeting`s. Once it assembles greetings
|
||||
from majority of `sk_set` and majority of `new_sk_set` (if it is present), it
|
||||
establishes this configuration as its own and moves to voting.
|
||||
establishes this configuration as its own and moves to voting.
|
||||
|
||||
It should stop talking to safekeepers not listed in the configuration at this
|
||||
point, though it is not unsafe to continue doing so.
|
||||
@@ -119,7 +119,7 @@ refusal to accept due to configuration change) it simply restarts.
|
||||
The following algorithm can be executed anywhere having access to configuration
|
||||
storage and safekeepers. It is safe to interrupt / restart it and run multiple
|
||||
instances of it concurrently, though likely one of them won't make
|
||||
progress then. It accepts `desired_set: Vec<NodeId>` as input.
|
||||
progress then. It accepts `desired_set: Vec<NodeId>` as input.
|
||||
|
||||
Algorithm will refuse to make the change if it encounters previous interrupted
|
||||
change attempt, but in this case it will try to finish it.
|
||||
@@ -140,7 +140,7 @@ storage are reachable.
|
||||
safe. Failed CAS aborts the procedure.
|
||||
4) Call `PUT` `configuration` on safekeepers from the current set,
|
||||
delivering them `joint_conf`. Collecting responses from majority is required
|
||||
to proceed. If any response returned generation higher than
|
||||
to proceed. If any response returned generation higher than
|
||||
`joint_conf.generation`, abort (another switch raced us). Otherwise, choose
|
||||
max `<last_log_term, flush_lsn>` among responses and establish it as
|
||||
(in memory) `sync_position`. Also choose max `term` and establish it as (in
|
||||
@@ -149,49 +149,49 @@ storage are reachable.
|
||||
without ack from the new set. Similarly, we'll bump term on new majority
|
||||
to `sync_term` so that two computes with the same term are never elected.
|
||||
4) Initialize timeline on safekeeper(s) from `new_sk_set` where it
|
||||
doesn't exist yet by doing `pull_timeline` from the majority of the
|
||||
doesn't exist yet by doing `pull_timeline` from the majority of the
|
||||
current set. Doing that on majority of `new_sk_set` is enough to
|
||||
proceed, but it is reasonable to ensure that all `new_sk_set` members
|
||||
are initialized -- if some of them are down why are we migrating there?
|
||||
5) Call `POST` `bump_term(sync_term)` on safekeepers from the new set.
|
||||
5) Call `POST` `bump_term(sync_term)` on safekeepers from the new set.
|
||||
Success on majority is enough.
|
||||
6) Repeatedly call `PUT` `configuration` on safekeepers from the new set,
|
||||
delivering them `joint_conf` and collecting their positions. This will
|
||||
switch them to the `joint_conf` which generally won't be needed
|
||||
switch them to the `joint_conf` which generally won't be needed
|
||||
because `pull_timeline` already includes it and plus additionally would be
|
||||
broadcast by compute. More importantly, we may proceed to the next step
|
||||
only when `<last_log_term, flush_lsn>` on the majority of the new set reached
|
||||
`sync_position`. Similarly, on the happy path no waiting is not needed because
|
||||
only when `<last_log_term, flush_lsn>` on the majority of the new set reached
|
||||
`sync_position`. Similarly, on the happy path no waiting is not needed because
|
||||
`pull_timeline` already includes it. However, we should double
|
||||
check to be safe. For example, timeline could have been created earlier e.g.
|
||||
manually or after try-to-migrate, abort, try-to-migrate-again sequence.
|
||||
7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
|
||||
safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
|
||||
manually or after try-to-migrate, abort, try-to-migrate-again sequence.
|
||||
7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
|
||||
safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
|
||||
storage under one more CAS.
|
||||
8) Call `PUT` `configuration` on safekeepers from the new set,
|
||||
delivering them `new_conf`. It is enough to deliver it to the majority
|
||||
delivering them `new_conf`. It is enough to deliver it to the majority
|
||||
of the new set; the rest can be updated by compute.
|
||||
|
||||
I haven't put huge effort to make the description above very precise, because it
|
||||
is natural language prone to interpretations anyway. Instead I'd like to make TLA+
|
||||
spec of it.
|
||||
|
||||
Description above focuses on safety. To make the flow practical and live, here a few more
|
||||
Description above focuses on safety. To make the flow practical and live, here a few more
|
||||
considerations.
|
||||
1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
|
||||
1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
|
||||
step 3.
|
||||
2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
|
||||
2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
|
||||
it is safe to rollback to the old conf with one more CAS.
|
||||
3) On step 4 timeline might be already created on members of the new set for various reasons;
|
||||
3) On step 4 timeline might be already created on members of the new set for various reasons;
|
||||
the simplest is the procedure restart. There are more complicated scenarious like mentioned
|
||||
in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
|
||||
generations, so seems simpler to treat existing timeline as success. However, this also
|
||||
in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
|
||||
generations, so seems simpler to treat existing timeline as success. However, this also
|
||||
has a disadvantage: you might imagine an surpassingly unlikely schedule where condition in
|
||||
the step 5 is never reached until compute is (re)awaken up to synchronize new member(s).
|
||||
I don't think we'll observe this in practice, but can add waking up compute if needed.
|
||||
4) In the end timeline should be locally deleted on the safekeeper(s) which are
|
||||
in the old set but not in the new one, unless they are unreachable. To be
|
||||
safe this also should be done under generation number (deletion proceeds only if
|
||||
safe this also should be done under generation number (deletion proceeds only if
|
||||
current configuration is <= than one in request and safekeeper is not memeber of it).
|
||||
5) If current conf fetched on step 1 is already not joint and members equal to `desired_set`,
|
||||
jump to step 7, using it as `new_conf`.
|
||||
@@ -202,87 +202,47 @@ The procedure ought to be driven from somewhere. Obvious candidates are control
|
||||
plane and storage_controller; and as each of them already has db we don't want
|
||||
yet another storage. I propose to manage safekeepers in storage_controller
|
||||
because 1) since it is in rust it simplifies simulation testing (more on this
|
||||
below) 2) it already manages pageservers.
|
||||
below) 2) it already manages pageservers.
|
||||
|
||||
This assumes that migration will be fully usable only after we migrate all
|
||||
tenants/timelines to storage_controller. It is discussible whether we want also
|
||||
to manage pageserver attachments for all of these, but likely we do.
|
||||
|
||||
This requires us to define storcon <-> cplane interface and changes.
|
||||
This requires us to define storcon <-> cplane interface.
|
||||
|
||||
### storage_controller <-> control plane interface and changes
|
||||
### storage_controller <-> control plane interface
|
||||
|
||||
First of all, control plane should
|
||||
[change](https://neondb.slack.com/archives/C03438W3FLZ/p1719226543199829)
|
||||
storing safekeepers per timeline instead of per tenant because we can't migrate
|
||||
tenants atomically.
|
||||
tenants atomically.
|
||||
|
||||
The important question is how updated configuration is delivered from
|
||||
storage_controller to control plane to provide it to computes. As always, there
|
||||
are two options, pull and push. Let's do it the same push as with pageserver
|
||||
`/notify-attach` because 1) it keeps storage_controller out of critical compute
|
||||
start path 2) uniformity. It makes storage_controller responsible for retrying
|
||||
notifying control plane until it succeeds.
|
||||
start path 2) provides easier upgrade: there won't be such a thing as 'timeline
|
||||
managed by control plane / storcon', cplane just takes the value out of its db
|
||||
when needed 3) uniformity. It makes storage_controller responsible for retrying notifying
|
||||
control plane until it succeeds.
|
||||
|
||||
It is not needed for the control plane to fully know the `Configuration`. It is
|
||||
enough for it to only to be aware of the list of safekeepers in the latest
|
||||
configuration to supply it to compute, plus associated generation number to
|
||||
protect from stale update requests and to also pass it to compute.
|
||||
|
||||
So, cplane `/notify-safekeepers` for the timeline can accept JSON like
|
||||
```
|
||||
{
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
generation: u32,
|
||||
safekeepers: Vec<SafekeeperId>,
|
||||
}
|
||||
```
|
||||
where `SafekeeperId` is
|
||||
```
|
||||
{
|
||||
node_id: u64,
|
||||
host: String
|
||||
}
|
||||
```
|
||||
In principle `host` is redundant, but may be useful for observability.
|
||||
|
||||
The request updates list of safekeepers in the db if the provided conf
|
||||
generation is higher (the cplane db should also store generations for this).
|
||||
Similarly to
|
||||
[`/notify-attach`](https://www.notion.so/neondatabase/Storage-Controller-Control-Plane-interface-6de56dd310a043bfa5c2f5564fa98365),
|
||||
it should update db which makes the call successful, and then try to schedule
|
||||
`apply_config` if possible, it is ok if not. storage_controller should rate
|
||||
limit calling the endpoint, but likely this won't be needed, as migration
|
||||
So, cplane `/notify-safekeepers` for the timeline accepts `Configuration` and
|
||||
updates it in the db if the provided conf generation is higher (the cplane db
|
||||
should also store generations for this). Similarly to [`/notify-attach`](https://www.notion.so/neondatabase/Storage-Controller-Control-Plane-interface-6de56dd310a043bfa5c2f5564fa98365), it
|
||||
should update db which makes the call successful, and then try to schedule
|
||||
`apply_config` if possible, it is ok if not. storage_controller
|
||||
should rate limit calling the endpoint, but likely this won't be needed, as migration
|
||||
throughput is limited by `pull_timeline`.
|
||||
|
||||
Timeline (branch) creation in cplane should call storage_controller POST
|
||||
`tenant/:tenant_id/timeline` like it currently does for sharded tenants.
|
||||
Response should be augmented with `safekeepers_generation` and `safekeepers`
|
||||
fields like described in `/notify-safekeepers` above. Initially (currently)
|
||||
these fields may be absent; in this case cplane chooses safekeepers on its own
|
||||
like it currently does. The call should be retried until succeeds.
|
||||
Response should be augmented with `safekeeper_conf: Configuration`. The call
|
||||
should be retried until succeeds.
|
||||
|
||||
Timeline deletion and tenant deletion in cplane should call appropriate
|
||||
storage_controller endpoints like it currently does for sharded tenants. The
|
||||
calls should be retried until they succeed.
|
||||
|
||||
When compute receives safekeepers list from control plane it needs to know the
|
||||
generation to checked whether it should be updated (note that compute may get
|
||||
safekeeper list from either cplane or safekeepers). Currently `neon.safekeepers`
|
||||
GUC is just a comma separates list of `host:port`. Let's prefix it with
|
||||
`g#<generation>:` to this end, so it will look like
|
||||
```
|
||||
g#42:safekeeper-0.eu-central-1.aws.neon.tech:6401,safekeeper-2.eu-central-1.aws.neon.tech:6401,safekeeper-1.eu-central-1.aws.neon.tech:6401
|
||||
```
|
||||
|
||||
To summarize, list of cplane changes:
|
||||
- per tenant -> per timeline safekeepers management and addition of int `safekeeper_generation` field.
|
||||
- `/notify-safekeepers` endpoint.
|
||||
- Branch creation call may return list of safekeepers and when it is
|
||||
present cplane should adopt it instead of choosing on its own like it does currently.
|
||||
- `neon.safekeepers` GUC should be prefixed with `g#<generation>:`.
|
||||
|
||||
### storage_controller implementation
|
||||
|
||||
Current 'load everything on startup and keep in memory' easy design is fine.
|
||||
@@ -400,10 +360,10 @@ source safekeeper might fail, which is not a problem if we are going to
|
||||
decomission the node but leaves garbage otherwise. I'd propose in the first version
|
||||
1) Don't attempt deletion at all if node status is `offline`.
|
||||
2) If it failed, just issue warning.
|
||||
And add PUT `/control/v1/safekeepers/:node_id/scrub` endpoint which would find and
|
||||
remove garbage timelines for manual use. It will 1) list all timelines on the
|
||||
safekeeper 2) compare each one against configuration storage: if timeline
|
||||
doesn't exist at all (had been deleted), it can be deleted. Otherwise, it can
|
||||
And add PUT `/control/v1/safekeepers/:node_id/scrub` endpoint which would find and
|
||||
remove garbage timelines for manual use. It will 1) list all timelines on the
|
||||
safekeeper 2) compare each one against configuration storage: if timeline
|
||||
doesn't exist at all (had been deleted), it can be deleted. Otherwise, it can
|
||||
be deleted under generation number if node is not member of current generation.
|
||||
|
||||
Automating this is untrivial; we'd need to register all potential missing
|
||||
@@ -452,8 +412,8 @@ There should be following layers of tests:
|
||||
3) Since simulation testing injects at relatively high level points (not
|
||||
syscalls), it omits some code, in particular `pull_timeline`. Thus it is
|
||||
better to have basic tests covering whole system as well. Extended version of
|
||||
`test_restarts_under_load` would do: start background load and do migration
|
||||
under it, then restart endpoint and check that no reported commits
|
||||
`test_restarts_under_load` would do: start background load and do migration
|
||||
under it, then restart endpoint and check that no reported commits
|
||||
had been lost. I'd also add one more creating classic network split scenario, with
|
||||
one compute talking to AC and another to BD while migration from nodes ABC to ABD
|
||||
happens.
|
||||
@@ -462,51 +422,35 @@ There should be following layers of tests:
|
||||
|
||||
## Order of implementation and rollout
|
||||
|
||||
Note that
|
||||
Note that
|
||||
- Control plane parts and integration with it is fully independent from everything else
|
||||
(tests would use simulation and neon_local).
|
||||
- It is reasonable to make compute <-> safekeepers protocol change
|
||||
independent of enabling generations.
|
||||
- There is a lot of infra work making storage_controller aware of timelines and safekeepers
|
||||
and its impl/rollout should be separate from migration itself.
|
||||
- Initially walproposer can just stop working while it observes joint configuration.
|
||||
- Initially walproposer can just stop working while it observers joint configuration.
|
||||
Such window would be typically very short anyway.
|
||||
- Obviously we want to test the whole thing thoroughly on staging and only then
|
||||
gradually enable in prod.
|
||||
|
||||
Let's have the following implementation bits for gradual rollout:
|
||||
- compute gets `neon.safekeepers_proto_version` flag.
|
||||
Initially both compute and safekeepers will be able to talk both
|
||||
versions so that we can delay force restart of them and for
|
||||
simplicity of rollback in case it is needed.
|
||||
- storcon gets `-set-safekeepers` config option disabled by
|
||||
default. Timeline creation request chooses safekeepers
|
||||
(and returns them in response to cplane) only when it is set to
|
||||
true.
|
||||
- control_plane [see above](storage_controller-<->-control-plane interface-and-changes)
|
||||
prefixes `neon.safekeepers` GUC with generation number. When it is 0
|
||||
(or prefix not present at all), walproposer behaves as currently, committing on
|
||||
the provided safekeeper list -- generations are disabled.
|
||||
If it is non 0 it follows this RFC rules.
|
||||
- We provide a script for manual migration to storage controller.
|
||||
It selects timeline(s) from control plane (specified or all of them) db
|
||||
and calls special import endpoint on storage controller which is very
|
||||
similar to timeline creation: it inserts into the db, sets
|
||||
configuration to initial on the safekeepers, calls cplane
|
||||
`notify-safekeepers`.
|
||||
To rollout smoothly, both walproposer and safekeeper should have flag
|
||||
`configurations_enabled`; when set to false, they would work as currently, i.e.
|
||||
walproposer is able to commit on whatever safekeeper set it is provided. Until
|
||||
all timelines are managed by storcon we'd need to use current script to migrate
|
||||
and update/drop entries in the storage_controller database if it has any.
|
||||
|
||||
Then the rollout for a region would be:
|
||||
- Current situation: safekeepers are choosen by control_plane.
|
||||
- We manually migrate some timelines, test moving them around.
|
||||
- Then we enable `--set-safekeepers` so that all new timelines
|
||||
are on storage controller.
|
||||
- Finally migrate all existing timelines using the script (no
|
||||
compute should be speaking old proto version at this point).
|
||||
Safekeepers would need to be able to talk both current and new protocol version
|
||||
with compute to reduce number of computes restarted in prod once v2 protocol is
|
||||
deployed (though before completely switching we'd need to force this).
|
||||
|
||||
Let's have the following rollout order:
|
||||
- storage_controller becomes aware of safekeepers;
|
||||
- storage_controller gets timeline creation for new timelines and deletion requests, but
|
||||
doesn't manage all timelines yet. Migration can be tested on these new timelines.
|
||||
To keep control plane and storage_controller databases in sync while control
|
||||
plane still chooses the safekeepers initially (until all timelines are imported
|
||||
it can choose better), `TimelineCreateRequest` can get optional safekeepers
|
||||
field with safekeepers chosen by cplane.
|
||||
- Then we can import all existing timelines from control plane to
|
||||
storage_controller and gradually enable configurations region by region.
|
||||
|
||||
Until all timelines are managed by storcon we'd need to use current ad hoc
|
||||
script to migrate if needed. To keep state clean, all storage controller managed
|
||||
timelines must be migrated before that, or controller db and configurations
|
||||
state of safekeepers dropped manually.
|
||||
|
||||
Very rough implementation order:
|
||||
- Add concept of configurations to safekeepers (including control file),
|
||||
@@ -514,10 +458,10 @@ Very rough implementation order:
|
||||
- Implement walproposer changes, including protocol.
|
||||
- Implement storconn part. Use it in neon_local (and pytest).
|
||||
- Make cplane store safekeepers per timeline instead of per tenant.
|
||||
- Implement cplane/storcon integration. Route branch creation/deletion
|
||||
- Implement cplane/storcon integration. Route branch creation/deletion
|
||||
through storcon. Then we can test migration of new branches.
|
||||
- Finally import existing branches. Then we can drop cplane
|
||||
safekeeper selection code. Gradually enable configurations at
|
||||
- Finally import existing branches. Then we can drop cplane
|
||||
safekeeper selection code. Gradually enable configurations at
|
||||
computes and safekeepers. Before that, all computes must talk only
|
||||
v3 protocol version.
|
||||
|
||||
|
||||
@@ -1,247 +0,0 @@
|
||||
# CPU and Memory Profiling
|
||||
|
||||
Created 2025-01-12 by Erik Grinaker.
|
||||
|
||||
See also [internal user guide](https://www.notion.so/neondatabase/Storage-CPU-Memory-Profiling-14bf189e004780228ec7d04442742324?pvs=4).
|
||||
|
||||
## Summary
|
||||
|
||||
This document proposes a standard cross-team pattern for CPU and memory profiling across
|
||||
applications and languages, using the [pprof](https://github.com/google/pprof) profile format.
|
||||
|
||||
It enables both ad hoc profiles via HTTP endpoints, and continuous profiling across the fleet via
|
||||
[Grafana Cloud Profiles](https://grafana.com/docs/grafana-cloud/monitor-applications/profiles/).
|
||||
Continuous profiling incurs an overhead of about 0.1% CPU usage and 3% slower heap allocations.
|
||||
|
||||
## Motivation
|
||||
|
||||
CPU and memory profiles are crucial observability tools for understanding performance issues,
|
||||
resource exhaustion, and resource costs. They allow answering questions like:
|
||||
|
||||
* Why is this process using 100% CPU?
|
||||
* How do I make this go faster?
|
||||
* Why did this process run out of memory?
|
||||
* Why are we paying for all these CPU cores and memory chips?
|
||||
|
||||
Go has [first-class support](https://pkg.go.dev/net/http/pprof) for profiling included in its
|
||||
standard library, using the [pprof](https://github.com/google/pprof) profile format and associated
|
||||
tooling.
|
||||
|
||||
This is not the case for Rust and C, where obtaining profiles can be rather cumbersome. It requires
|
||||
installing and running additional tools like `perf` as root on production nodes, with analysis tools
|
||||
that can be hard to use and often don't give good results. This is not only annoying, but can also
|
||||
significantly affect the resolution time of production incidents.
|
||||
|
||||
This proposal will:
|
||||
|
||||
* Provide CPU and heap profiles in pprof format via HTTP API.
|
||||
* Record continuous profiles in Grafana for aggregate historical analysis.
|
||||
* Make it easy for anyone to see a flamegraph in less than one minute.
|
||||
* Be reasonably consistent across teams and services (Rust, Go, C).
|
||||
|
||||
## Non Goals (For Now)
|
||||
|
||||
* [Additional profile types](https://grafana.com/docs/pyroscope/next/configure-client/profile-types/)
|
||||
like mutexes, locks, goroutines, etc.
|
||||
* [Runtime trace integration](https://grafana.com/docs/pyroscope/next/configure-client/trace-span-profiles/).
|
||||
* [Profile-guided optimization](https://en.wikipedia.org/wiki/Profile-guided_optimization).
|
||||
|
||||
## Using Profiles
|
||||
|
||||
Ready-to-use profiles can be obtained using e.g. `curl`. For Rust services:
|
||||
|
||||
```
|
||||
$ curl localhost:9898/profile/cpu >profile.pb.gz
|
||||
```
|
||||
|
||||
pprof profiles can be explored using the [`pprof`](https://github.com/google/pprof) web UI, which
|
||||
provides flamegraphs, call graphs, plain text listings, and more:
|
||||
|
||||
```
|
||||
$ pprof -http :6060 <profile>
|
||||
```
|
||||
|
||||
Some endpoints (e.g. Rust-based ones) can also generate flamegraph SVGs directly:
|
||||
|
||||
```
|
||||
$ curl localhost:9898/profile/cpu?format=svg >profile.svg
|
||||
$ open profile.svg
|
||||
```
|
||||
|
||||
Continuous profiles are available in Grafana under Explore → Profiles → Explore Profiles
|
||||
(currently only in [staging](https://neonstaging.grafana.net/a/grafana-pyroscope-app/profiles-explorer)).
|
||||
|
||||
## API Requirements
|
||||
|
||||
* HTTP endpoints that return a profile in pprof format (with symbols).
|
||||
* CPU: records a profile over the request time interval (`seconds` query parameter).
|
||||
* Memory: returns the current in-use heap allocations.
|
||||
* Unauthenticated, as it should not expose user data or pose a denial-of-service risk.
|
||||
* Default sample frequency should not impact service (maximum 5% CPU overhead).
|
||||
* Linux-compatibility.
|
||||
|
||||
Nice to have:
|
||||
|
||||
* Return flamegraph SVG directly from the HTTP endpoint if requested.
|
||||
* Configurable sample frequency for CPU profiles.
|
||||
* Historical heap allocations, by count and bytes.
|
||||
* macOS-compatiblity.
|
||||
|
||||
## Rust Profiling
|
||||
|
||||
[`libs/utils/src/http/endpoint.rs`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs)
|
||||
contains ready-to-use HTTP endpoints for CPU and memory profiling:
|
||||
[`profile_cpu_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L338) and [`profile_heap_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L416).
|
||||
|
||||
### CPU
|
||||
|
||||
CPU profiles are provided by [pprof-rs](https://github.com/tikv/pprof-rs) via
|
||||
[`profile_cpu_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L338).
|
||||
Expose it unauthenticated at `/profile/cpu`.
|
||||
|
||||
Parameters:
|
||||
|
||||
* `format`: profile output format (`pprof` or `svg`; default `pprof`).
|
||||
* `seconds`: duration to collect profile over, in seconds (default `5`).
|
||||
* `frequency`: how often to sample thread stacks, in Hz (default `99`).
|
||||
* `force`: if `true`, cancel a running profile and start a new one (default `false`).
|
||||
|
||||
Works on Linux and macOS.
|
||||
|
||||
### Memory
|
||||
|
||||
Use the jemalloc allocator via [`tikv-jemallocator`](https://github.com/tikv/jemallocator),
|
||||
and enable profiling with samples every 2 MB allocated:
|
||||
|
||||
```rust
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
#[allow(non_upper_case_globals)]
|
||||
#[export_name = "malloc_conf"]
|
||||
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
|
||||
```
|
||||
|
||||
pprof profiles are generated by
|
||||
[`jemalloc-pprof`](https://github.com/polarsignals/rust-jemalloc-pprof) via
|
||||
[`profile_heap_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L416).
|
||||
Expose it unauthenticated at `/profile/heap`.
|
||||
|
||||
Parameters:
|
||||
|
||||
* `format`: profile output format (`pprof`, `svg`, or `jemalloc`; default `pprof`).
|
||||
|
||||
Works on Linux only, due to [jemalloc limitations](https://github.com/jemalloc/jemalloc/issues/26).
|
||||
|
||||
## Go Profiling
|
||||
|
||||
The Go standard library includes pprof profiling via HTTP API in
|
||||
[`net/http/pprof`](https://pkg.go.dev/net/http/pprof). Expose it unauthenticated at
|
||||
`/debug/pprof`.
|
||||
|
||||
Works on Linux and macOS.
|
||||
|
||||
### CPU
|
||||
|
||||
Via `/debug/pprof/profile`. Parameters:
|
||||
|
||||
* `debug`: profile output format (`0` is pprof, `1` or above is plaintext; default `0`).
|
||||
* `seconds`: duration to collect profile over, in seconds (default `30`).
|
||||
|
||||
Does not support a frequency parameter (see [#57488](https://github.com/golang/go/issues/57488)),
|
||||
and defaults to 100 Hz. A lower frequency can be hardcoded via `SetCPUProfileRate`, but the default
|
||||
is likely ok (estimated 1% overhead).
|
||||
|
||||
### Memory
|
||||
|
||||
Via `/debug/pprof/heap`. Parameters:
|
||||
|
||||
* `seconds`: take a delta profile over the given duration, in seconds (default `0`).
|
||||
* `gc`: if `1`, garbage collect before taking profile.
|
||||
|
||||
## C Profiling
|
||||
|
||||
[gperftools](https://github.com/gperftools/gperftools) provides in-process CPU and heap profiling
|
||||
with pprof output.
|
||||
|
||||
However, continuous profiling of PostgreSQL is expensive (many computes), and has limited value
|
||||
since we don't own the internals anyway.
|
||||
|
||||
Ad hoc profiling might still be useful, but the compute team considers existing tooling sufficient,
|
||||
so this is not a priority at the moment.
|
||||
|
||||
## Grafana Continuous Profiling
|
||||
|
||||
[Grafana Alloy](https://grafana.com/docs/alloy/latest/) continually scrapes CPU and memory profiles
|
||||
across the fleet, and archives them as time series. This can be used to analyze resource usage over
|
||||
time, either in aggregate or zoomed in to specific events and nodes.
|
||||
|
||||
Profiles are retained for 30 days. Profile ingestion volume for CPU+heap at 60-second intervals
|
||||
is about 0.5 GB/node/day, or about $0.25/node/day = $7.5/node/month ($0.50/GB).
|
||||
|
||||
It is currently enabled in [staging](https://neonstaging.grafana.net/a/grafana-pyroscope-app/profiles-explorer)
|
||||
for Pageserver and Safekeeper.
|
||||
|
||||
### Scraping
|
||||
|
||||
* CPU profiling: 59 seconds at 19 Hz every 60 seconds.
|
||||
* Heap profiling: heap snapshot with 2 MB frequency every 60 seconds.
|
||||
|
||||
There are two main approaches that can be taken for CPU profiles:
|
||||
|
||||
* Continuous low-frequency profiles (e.g. 19 Hz for 60 seconds every 60 seconds).
|
||||
* Occasional high-frequency profiles (e.g. 99 Hz for 5 seconds every 60 seconds).
|
||||
|
||||
We choose continuous low-frequency profiles where possible. This has a fixed low overhead, instead
|
||||
of a spiky high overhead. It likely also gives a more representative view of resource usage.
|
||||
However, a 19 Hz rate gives a minimum resolution of 52.6 ms per sample, which may be larger than the
|
||||
actual runtime of small functions. Note that Go does not support a frequency parameter, so we must
|
||||
use a fixed frequency for all profiles via `SetCPUProfileRate()` (default 100 Hz).
|
||||
|
||||
Only one CPU profile can be taken at a time. With continuous profiling, one will always be running.
|
||||
To allow also taking an ad hoc CPU profile, the Rust endpoint supports a `force` query parameter to
|
||||
cancel a running profile and start a new one.
|
||||
|
||||
### Overhead
|
||||
|
||||
With Rust:
|
||||
|
||||
* CPU profiles at 19 Hz frequency: 0.1% overhead.
|
||||
* Heap profiles at 2 MB frequency: 3% allocation overhead.
|
||||
* Profile call/encoding/symbolization: 20 ms every 60 seconds, or 0.03% of 1 CPU (for Pageserver).
|
||||
* Profile symbolization caches: 125 MB memory, or 0.4% of 32 GB (for Pageserver).
|
||||
|
||||
Benchmarks with pprof-rs showed that the CPU time for taking a stack trace of a 40-frame stack was
|
||||
11 µs using the `frame-pointer` feature, and 1.4 µs using `libunwind` with DWARF. `libunwind` saw
|
||||
frequent seg faults, so we use `frame-pointer` and build binaries with frame pointers (negligible
|
||||
overhead).
|
||||
|
||||
CPU profiles work by installing an `ITIMER_PROF` for the process, which triggers a `SIGPROF` signal
|
||||
after a given amount of cumulative CPU time across all CPUs. The signal handler will run for one
|
||||
of the currently executing threads and take a stack trace. Thus, a 19 Hz profile will take 1 stack
|
||||
trace every 52.6 ms CPU time -- assuming 11 µs for a stack trace, this is 0.02% overhead, but
|
||||
likely 0.1% in practice (given e.g. context switches).
|
||||
|
||||
Heap profiles work by probabilistically taking a stack trace on allocations, adjusted for the
|
||||
allocation size. A 1 MB allocation takes about 15 µs in benchmarks, and a stack trace about 1 µs,
|
||||
so we can estimate that a 2 MB sampling frequency has about 3% allocation overhead -- this is
|
||||
consistent with benchmarks. This is significantly larger than CPU profiles, but mitigated by the
|
||||
fact that performance-sensitive code will avoid allocations as far as possible.
|
||||
|
||||
Profile symbolization uses in-memory caches for symbol lookups. These take about 125 MB for
|
||||
Pageserver.
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
* eBPF profiles.
|
||||
* Don't require instrumenting the binary.
|
||||
* Use less resources.
|
||||
* Can profile in kernel space too.
|
||||
* Supported by Grafana.
|
||||
* Less information about stack frames and spans.
|
||||
* Limited tooling for local analysis.
|
||||
* Does not support heap profiles.
|
||||
* Does not work on macOS.
|
||||
|
||||
* [Polar Signals](https://www.polarsignals.com) instead of Grafana.
|
||||
* We already use Grafana for everything else. Appears good enough.
|
||||
@@ -1,255 +0,0 @@
|
||||
#
|
||||
Created on Aug 2024
|
||||
Implemented on Jan 2025
|
||||
|
||||
## Summary
|
||||
|
||||
Data in large tenants is split up between multiple pageservers according to key hashes, as
|
||||
introduced in the [sharding RFC](031-sharding-static.md) and [shard splitting RFC](032-shard-splitting.md).
|
||||
|
||||
Whereas currently we send all WAL to all pageserver shards, and each shard filters out the data that it needs,
|
||||
in this RFC we add a mechanism to filter the WAL on the safekeeper, so that each shard receives
|
||||
only the data it needs.
|
||||
|
||||
This will place some extra CPU load on the safekeepers, in exchange for reducing the network bandwidth
|
||||
for ingesting WAL back to scaling as O(1) with shard count, rather than O(N_shards).
|
||||
|
||||
## Motivation
|
||||
|
||||
1. Large databases require higher shard counts. Whereas currently we run with up to 8 shards for tenants
|
||||
with a few TB of storage, the next order of magnitude capacity increase will require tens of shards, such
|
||||
that sending all WAL to all shards is impractical in terms of bandwidth.
|
||||
2. For contemporary database sizes (~2TB), the pageserver is the bottleneck for ingest: since each
|
||||
shard has to decode and process the whole WAL, sharding doesn't fully relieve this bottleneck. To achieve significantly higher ingest speeds, we need to filter the WAL earlier so that each pageserver
|
||||
only has to process relevant parts.
|
||||
|
||||
## Non Goals (if relevant)
|
||||
|
||||
We do not seek to introduce multiple WALs per timeline, or to share the work of handling a timeline's
|
||||
WAL across safekeepers (beyond simple 3x replication). This RFC may be thought of as an incremental
|
||||
move of the ingestion bottleneck up the stack: instead of high write rates bottlenecking on the
|
||||
pageserver, they will bottleneck on the safekeeper.
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
Safekeeper, pageserver.
|
||||
|
||||
There will be no control plane or storage controller coordination needed, as pageservers will directly
|
||||
indicate their sharding parameters to the safekeeper when subscribing for WAL.
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
Terminology:
|
||||
- "Data pages" refers to postgres relation blocks, and SLRU blocks.
|
||||
- "Metadata pages" refers to everything else the pageserver stores, such as relation sizes and
|
||||
directories of relations.
|
||||
|
||||
### Phase 1: Refactor ingest
|
||||
|
||||
Currently, pageserver ingest code is structured approximately as follows:
|
||||
1. `handle_walreceiver_connection` reads a stream of binary WAL records off a network
|
||||
socket
|
||||
2. `WalIngest::ingest_record` to translate the record into a series of page-level modifications
|
||||
3. `DatadirModification` accumulates page updates from several `ingest_record` calls, and when
|
||||
its `commit()` method is called, flushes these into a Timeline's open `InMemoryLayer`.
|
||||
|
||||
This process currently assumes access to a pageserver `Timeline` throughout `ingest_record` and
|
||||
from `DatadirModification`, which is used to do read-modify-write cycles on metadata pages
|
||||
such as relation sizes and the master DBDIR page. It also assumes that records are ingested
|
||||
strictly one after the other: they cannot be ingested in parallel because each record assumes
|
||||
that earlier records' changes have already been applied to `Timeline`.
|
||||
|
||||
This code will be refactored to disentangle the simple, fast decode of relation page writes
|
||||
from the more complex logic for updating internal metadata. An intermediate representation
|
||||
called `InterpretedWalRecords` will be introduced. This is similar to the internal state of
|
||||
a `DatadirModification`, but does not require access to a Timeline. Instead of storing
|
||||
metadata updates as materialized writes to pages, it will accumulate these as abstract operations,
|
||||
for example rather than including a write to a relation size key, this structure will include
|
||||
an operation that indicates "Update relation _foo_'s size to the max of its current value and
|
||||
_bar_", such that these may be applied later to a real Timeline.
|
||||
|
||||
The `DatadirModification` will be aware of the `EphemeralFile` format, so that as it accumulates
|
||||
simple page writes of relation blocks, it can write them directly into a buffer in the serialized
|
||||
format. This will avoid the need to later deserialize/reserialize this data when passing the
|
||||
structure between safekeeper and pageserver.
|
||||
|
||||
The new pipeline will be:
|
||||
1. `handle_walreceiver_connection` reads a stream of binary WAL records off a network
|
||||
2. A `InterpretedWalRecords` is generated from the incoming WAL records. This does not
|
||||
require a reference to a Timeline.
|
||||
3. The logic that is current spread between `WalIngest` and `DatadirModification` for updating
|
||||
metadata will be refactored to consume the metadata operations from the `InterpretedWalRecords`
|
||||
and turn them into literal writes to metadata pages. This part must be done sequentially.
|
||||
4. The resulting buffer of metadata page writes is combined with the buffer of relation block
|
||||
writes, and written into the `InMemoryLayer`.
|
||||
|
||||
Implemented in:
|
||||
1. https://github.com/neondatabase/neon/pull/9472
|
||||
2. https://github.com/neondatabase/neon/pull/9504
|
||||
3. https://github.com/neondatabase/neon/pull/9524
|
||||
|
||||
### Phase 2: Decode & filter on safekeeper
|
||||
|
||||
In the previous phase, the ingest code was modified to be able to do most of its work without access to
|
||||
a Timeline: this first stage of ingest simply converts a series of binary wal records into
|
||||
a buffer of relation/SLRU page writes, and a buffer of abstract metadata writes.
|
||||
|
||||
The modified ingest code may be transplanted from pageserver to safekeeper (probably via a
|
||||
shared crate). The safekeeper->pageserver network protocol is modified to:
|
||||
- in subscription requests, send the `ShardIdentity` from the pageserver to the safekeeper
|
||||
- in responses, transmit a `InterpretedWalRecords` instead of a raw `WalRecord`.
|
||||
- use the `ShardIdentity` to filter the `ProcessedWalIngest` to relevant content for
|
||||
the subscribing shard before transmitting it.
|
||||
|
||||
The overall behavior of the pageserver->safekeeper interaction remains the same, in terms of
|
||||
consistent LSN feedback, and connection management. Only the payload of the subscriptions
|
||||
changes, to express an LSN range of WAL as a filtered `ProcessedWalIngest` instead of the
|
||||
raw data.
|
||||
|
||||
The ingest code on the pageserver can now skip the part where it does the first phase of
|
||||
processing, as it will receive pre-processed, compressed data off the wire.
|
||||
|
||||
Note that `InterpretedWalRecord` batches multiple `InterpretedWalRecord(s)` in the same network
|
||||
message. Safekeeper reads WAL in chunks of 16 blocks and then decodes as many Postgres WAL records
|
||||
as possible. Each Postgres WAL record maps to one `InterpretedWalRecord` for potentially multiple shards.
|
||||
Hence, the size of the batch is given by the number of Postgres WAL records that fit in 16 blocks.
|
||||
|
||||
The protocol needs to support evolution. Protobuf was chosen here with the view that, in the future,
|
||||
we may migrate it to GRPC altogether
|
||||
|
||||
Implemented in:
|
||||
1. https://github.com/neondatabase/neon/pull/9746
|
||||
2. https://github.com/neondatabase/neon/pull/9821
|
||||
|
||||
### Phase 3: Fan out interpreted WAL
|
||||
|
||||
In the previous phase, the initial processing of WAL was moved to the safekeeper, but it is still
|
||||
done once for each shard: this will generate O(N_shards) CPU work on the safekeeper (especially
|
||||
when considering converting to Protobuf format and compression).
|
||||
|
||||
To avoid this, we fan-out WAL from one (tenant, timeline, shard) to all other shards subscribed on
|
||||
the same safekeeper. Under normal operation, the WAL will be read from disk, decoded and interpreted
|
||||
_only_ once per (safekeeper, timeline).
|
||||
|
||||
When the first shard of a sharded timeline subscribes to a given safekeeper a task is spawned
|
||||
for the WAL reader (`InterpretedWalReader`). This task reads WAL, decodes, interprets it and sends
|
||||
it to the sender (`InterpretedWalSender`). The sender is a future that is polled from the connection
|
||||
task. When further shards subscribe on the safekeeper they will attach themselves to the existing WAL reader.
|
||||
There's two cases to consider:
|
||||
1. The shard's requested `start_lsn` is ahead of the current position of the WAL reader. In this case, the shard
|
||||
will start receiving data when the reader reaches that LSN. The intuition here is that there's little to gain
|
||||
by letting shards "front-run" since compute backpressure is based on the laggard LSN.
|
||||
2. The shard's requested `start_lsn` is below the current position of the WAL reader. In this case, the WAL reader
|
||||
gets reset to this requested position (same intuition). Special care is taken such that advanced shards do not receive
|
||||
interpreted WAL records below their current position.
|
||||
|
||||
The approach above implies that there is at most one WAL reader per (tenant, timeline) on a given safekeeper at any point in time.
|
||||
If this turns out to be operationally problematic, there's a trick we can deploy: `--max-delta-for-fanout` is an optional safekeeper
|
||||
argument that controls the max absolute delta between a new shard and the current WAL position of the WAL reader. If the absolute
|
||||
delta is above that value, a new reader is spawned. Note that there's currently no concurrency control on the number of WAL readers,
|
||||
so it's recommended to use large values to avoid pushing CPU utilisation too high.
|
||||
|
||||
Unsharded tenants do not spawn a separate task for the interpreted WAL reader since there's no benefit to it. Instead they poll
|
||||
the reader and sender concurrently from the connection task.
|
||||
|
||||
Shard splits are interesting here because it is the only case when the same shard might have two subscriptions at the same time.
|
||||
This is handled by giving readers a unique identifier. Both shards will receive the same data while respecting their requested start
|
||||
position.
|
||||
|
||||
Implemented in:
|
||||
1. https://github.com/neondatabase/neon/pull/10190
|
||||
|
||||
## Deployment
|
||||
|
||||
Each phase shall be deployed independently. Special care should be taken around protocol changes.
|
||||
|
||||
## Observability Tips
|
||||
|
||||
* The safekeeper logs the protocol requested by the pageserver
|
||||
along with the pageserver ID, tenant, timeline and shard: `starting streaming from`.
|
||||
* There's metrics for the number of wal readers:
|
||||
* `safekeeper_wal_readers{kind="task", target=~"pageserver.*"}` gives the number of wal reader tasks for each SK
|
||||
* `safekeeper_wal_readers{kind="future", target=~"pageserver.*"}` gives the numer of wal readers polled inline by each SK
|
||||
* `safekeeper_interpreted_wal_reader_tasks` gives the number of wal reader tasks per tenant, timeline
|
||||
* Interesting log lines for the fan-out reader:
|
||||
* `Spawning interpreted`: first shard creates the interpreted wal reader
|
||||
* `Fanning out`: a subsequent shard attaches itself to an interpreted wal reader
|
||||
* `Aborting interpreted`: all senders have finished and the reader task is being aborted
|
||||
|
||||
## Future Optimizations
|
||||
|
||||
This sections describes some improvement areas which may be revisited in the future.
|
||||
|
||||
### Buffering of Interpreted WAL
|
||||
|
||||
The interpreted WAL reader may buffer interpreted WAL records in user space to help with serving
|
||||
subscriptions that are lagging behind the current position of the reader.
|
||||
|
||||
Counterpoints:
|
||||
* Safekeepers serve many thousands of timelines and allocating a buffer for each might be wasteful,
|
||||
especially given that it would go unused on the happy path.
|
||||
* WAL is buffered in the kernel page cache. Usually we'd only pay the CPU cost of decoding and interpreting.
|
||||
|
||||
### Tweaking the Pagserver Safekeeper Selection Algorithm
|
||||
|
||||
We could make the pageserver aware of which safekeeper's already host shards for the timeline along
|
||||
with their current WAL positions. The pageserver should then prefer safkeepers that are in the same
|
||||
AZ _and_ already have a shard with a position close to the desired start position.
|
||||
|
||||
We currently run one safekeeper per AZ, so the point is mute until that changes.
|
||||
|
||||
### Pipelining first ingest phase
|
||||
|
||||
The first ingest phase is a stateless transformation of a binary WAL record into a pre-processed
|
||||
output per shard. To put multiple CPUs to work, we may pipeline this processing up to some defined buffer
|
||||
depth.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
### Give safekeepers enough state to fully decode WAL
|
||||
|
||||
In this RFC, we only do the first phase of ingest on the safekeeper, because this is
|
||||
the phase that is stateless. Subsequent changes then happen on the pageserver, with
|
||||
access to the `Timeline` state.
|
||||
|
||||
We could do more work on the safekeeper if we transmitted metadata state to the safekeeper
|
||||
when subscribing to the WAL: for example, by telling the safekeeper all the relation sizes,
|
||||
so that it could then generate all the metadata writes for relation sizes.
|
||||
|
||||
We avoid doing this for several reasons:
|
||||
1. Complexity: it's a more invasive protocol change
|
||||
2. Decoupling: having the safekeeper understand the `ProcessedWalIngest` already somewhat
|
||||
infects it with knowledge of the pageserver, but this is mainly an abstract structure
|
||||
that describes postgres writes. However, if we taught the safekeeper about the exact
|
||||
way that pageserver deals with metadata keys, this would be a much tighter coupling.
|
||||
3. Load: once the WAL has been processed to the point that it can be split between shards,
|
||||
it is preferable to share out work on the remaining shards rather than adding extra CPU
|
||||
load to the safekeeper.
|
||||
|
||||
### Do pre-processing on the compute instead of the safekeeper
|
||||
|
||||
Since our first stage of ingest is stateless, it could be done at any stage in the pipeline,
|
||||
all the way up to the compute.
|
||||
|
||||
We choose not to do this, because it is useful for the safekeeper to store the raw WAL rather
|
||||
than just the preprocessed WAL:
|
||||
- The safekeeper still needs to be able to serve raw WAL back to postgres for e.g. physical replication
|
||||
- It simplifies our paxos implementation to have the offset in the write log be literally
|
||||
the same as the LSN
|
||||
- Raw WAL must have a stable protocol since we might have to re-ingest it at arbitrary points in the future.
|
||||
Storing raw WAL give us more flexibility to evolve the pageserver, safekeeper protocol.
|
||||
|
||||
### Do wal pre-processing on shard 0 or a separate service, send it to other shards from there
|
||||
|
||||
If we wanted to keep the safekeepers as entirely pure stores of raw WAL bytes, then
|
||||
we could do the initial decode and shard-splitting in some other location:
|
||||
- Shard 0 could subscribe to the full WAL and then send writes to other shards
|
||||
- A new intermediate service between the safekeeper and pageserver could do the splitting.
|
||||
|
||||
So why not?
|
||||
- Extra network hop from shard 0 to the final destination shard
|
||||
- Clearly there is more infrastructure involved here compared with doing it inline on the safekeeper.
|
||||
- Safekeepers already have very light CPU load: typical cloud instances shapes with appropriate
|
||||
disks for the safekeepers effectively have "free" CPU resources.
|
||||
- Doing extra work on shard 0 would complicate scheduling of shards on pageservers, because
|
||||
shard 0 would have significantly higher CPU load under write workloads than other shards.
|
||||
@@ -15,6 +15,11 @@ pub struct GenericAPIError {
|
||||
pub error: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct InfoResponse {
|
||||
pub num_cpus: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ExtensionInstallResponse {
|
||||
pub extension: PgIdent,
|
||||
|
||||
@@ -138,13 +138,6 @@ pub struct ComputeSpec {
|
||||
/// enough spare connections for reconfiguration process to succeed.
|
||||
#[serde(default = "default_reconfigure_concurrency")]
|
||||
pub reconfigure_concurrency: usize,
|
||||
|
||||
/// If set to true, the compute_ctl will drop all subscriptions before starting the
|
||||
/// compute. This is needed when we start an endpoint on a branch, so that child
|
||||
/// would not compete with parent branch subscriptions
|
||||
/// over the same replication content from publisher.
|
||||
#[serde(default)] // Default false
|
||||
pub drop_subscriptions_before_start: bool,
|
||||
}
|
||||
|
||||
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
|
||||
@@ -204,16 +197,14 @@ impl RemoteExtSpec {
|
||||
|
||||
// Check if extension is present in public or custom.
|
||||
// If not, then it is not allowed to be used by this compute.
|
||||
if !self
|
||||
.public_extensions
|
||||
.as_ref()
|
||||
.is_some_and(|exts| exts.iter().any(|e| e == ext_name))
|
||||
&& !self
|
||||
.custom_extensions
|
||||
.as_ref()
|
||||
.is_some_and(|exts| exts.iter().any(|e| e == ext_name))
|
||||
{
|
||||
return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
|
||||
if let Some(public_extensions) = &self.public_extensions {
|
||||
if !public_extensions.contains(&real_ext_name.to_string()) {
|
||||
if let Some(custom_extensions) = &self.custom_extensions {
|
||||
if !custom_extensions.contains(&real_ext_name.to_string()) {
|
||||
return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match self.extension_data.get(real_ext_name) {
|
||||
@@ -342,96 +333,6 @@ mod tests {
|
||||
use super::*;
|
||||
use std::fs::File;
|
||||
|
||||
#[test]
|
||||
fn allow_installing_remote_extensions() {
|
||||
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
|
||||
"public_extensions": null,
|
||||
"custom_extensions": null,
|
||||
"library_index": {},
|
||||
"extension_data": {},
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
rspec
|
||||
.get_ext("ext", false, "latest", "v17")
|
||||
.expect_err("Extension should not be found");
|
||||
|
||||
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
|
||||
"public_extensions": [],
|
||||
"custom_extensions": null,
|
||||
"library_index": {},
|
||||
"extension_data": {},
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
rspec
|
||||
.get_ext("ext", false, "latest", "v17")
|
||||
.expect_err("Extension should not be found");
|
||||
|
||||
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
|
||||
"public_extensions": [],
|
||||
"custom_extensions": [],
|
||||
"library_index": {
|
||||
"ext": "ext"
|
||||
},
|
||||
"extension_data": {
|
||||
"ext": {
|
||||
"control_data": {
|
||||
"ext.control": ""
|
||||
},
|
||||
"archive_path": ""
|
||||
}
|
||||
},
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
rspec
|
||||
.get_ext("ext", false, "latest", "v17")
|
||||
.expect_err("Extension should not be found");
|
||||
|
||||
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
|
||||
"public_extensions": [],
|
||||
"custom_extensions": ["ext"],
|
||||
"library_index": {
|
||||
"ext": "ext"
|
||||
},
|
||||
"extension_data": {
|
||||
"ext": {
|
||||
"control_data": {
|
||||
"ext.control": ""
|
||||
},
|
||||
"archive_path": ""
|
||||
}
|
||||
},
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
rspec
|
||||
.get_ext("ext", false, "latest", "v17")
|
||||
.expect("Extension should be found");
|
||||
|
||||
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
|
||||
"public_extensions": ["ext"],
|
||||
"custom_extensions": [],
|
||||
"library_index": {
|
||||
"ext": "ext"
|
||||
},
|
||||
"extension_data": {
|
||||
"ext": {
|
||||
"control_data": {
|
||||
"ext.control": ""
|
||||
},
|
||||
"archive_path": ""
|
||||
}
|
||||
},
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
rspec
|
||||
.get_ext("ext", false, "latest", "v17")
|
||||
.expect("Extension should be found");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_spec_file() {
|
||||
let file = File::open("tests/cluster_spec.json").unwrap();
|
||||
|
||||
@@ -120,7 +120,6 @@ pub struct ConfigToml {
|
||||
pub no_sync: Option<bool>,
|
||||
pub wal_receiver_protocol: PostgresClientProtocol,
|
||||
pub page_service_pipelining: PageServicePipeliningConfig,
|
||||
pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -159,25 +158,6 @@ pub enum PageServiceProtocolPipelinedExecutionStrategy {
|
||||
Tasks,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case")]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub enum GetVectoredConcurrentIo {
|
||||
/// The read path is fully sequential: layers are visited
|
||||
/// one after the other and IOs are issued and waited upon
|
||||
/// from the same task that traverses the layers.
|
||||
Sequential,
|
||||
/// The read path still traverses layers sequentially, and
|
||||
/// index blocks will be read into the PS PageCache from
|
||||
/// that task, with waiting.
|
||||
/// But data IOs are dispatched and waited upon from a sidecar
|
||||
/// task so that the traversing task can continue to traverse
|
||||
/// layers while the IOs are in flight.
|
||||
/// If the PS PageCache miss rate is low, this improves
|
||||
/// throughput dramatically.
|
||||
SidecarTask,
|
||||
}
|
||||
|
||||
pub mod statvfs {
|
||||
pub mod mock {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -254,26 +234,9 @@ pub struct TenantConfigToml {
|
||||
// Duration::ZERO means automatic compaction is disabled.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub compaction_period: Duration,
|
||||
/// Level0 delta layer threshold for compaction.
|
||||
// Level0 delta layer threshold for compaction.
|
||||
pub compaction_threshold: usize,
|
||||
/// Controls the amount of L0 included in a single compaction iteration.
|
||||
/// The unit is `checkpoint_distance`, i.e., a size.
|
||||
/// We add L0s to the set of layers to compact until their cumulative
|
||||
/// size exceeds `compaction_upper_limit * checkpoint_distance`.
|
||||
pub compaction_upper_limit: usize,
|
||||
pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
|
||||
/// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
|
||||
/// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
|
||||
/// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
|
||||
/// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
|
||||
pub l0_flush_delay_threshold: Option<usize>,
|
||||
/// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
|
||||
/// to avoid deadlock. 0 to disable. Disabled by default.
|
||||
pub l0_flush_stall_threshold: Option<usize>,
|
||||
/// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
|
||||
/// layer. This is a temporary backpressure mechanism which should be removed once
|
||||
/// l0_flush_{delay,stall}_threshold is fully enabled.
|
||||
pub l0_flush_wait_upload: bool,
|
||||
// Determines how much history is retained, to allow
|
||||
// branching and read replicas at an older point in time.
|
||||
// The unit is #of bytes of WAL.
|
||||
@@ -323,10 +286,6 @@ pub struct TenantConfigToml {
|
||||
// Expresed in multiples of checkpoint distance.
|
||||
pub image_layer_creation_check_threshold: u8,
|
||||
|
||||
// How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
|
||||
// Set to 0 to disable preemption.
|
||||
pub image_creation_preempt_threshold: usize,
|
||||
|
||||
/// The length for an explicit LSN lease request.
|
||||
/// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
|
||||
#[serde(with = "humantime_serde")]
|
||||
@@ -342,20 +301,6 @@ pub struct TenantConfigToml {
|
||||
pub timeline_offloading: bool,
|
||||
|
||||
pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
|
||||
|
||||
/// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
|
||||
/// `index_part.json`, and it cannot be reversed.
|
||||
pub rel_size_v2_enabled: Option<bool>,
|
||||
|
||||
// gc-compaction related configs
|
||||
/// Enable automatic gc-compaction trigger on this tenant.
|
||||
pub gc_compaction_enabled: bool,
|
||||
/// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold,
|
||||
/// gc-compaction will be triggered.
|
||||
pub gc_compaction_initial_threshold_kb: u64,
|
||||
/// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
|
||||
/// is above this ratio, gc-compaction will be triggered.
|
||||
pub gc_compaction_ratio_percent: u64,
|
||||
}
|
||||
|
||||
pub mod defaults {
|
||||
@@ -505,11 +450,6 @@ impl Default for ConfigToml {
|
||||
execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
|
||||
})
|
||||
},
|
||||
get_vectored_concurrent_io: if !cfg!(test) {
|
||||
GetVectoredConcurrentIo::Sequential
|
||||
} else {
|
||||
GetVectoredConcurrentIo::SidecarTask
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -532,17 +472,9 @@ pub mod tenant_conf_defaults {
|
||||
|
||||
pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
|
||||
pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
|
||||
|
||||
// This value needs to be tuned to avoid OOM. We have 3/4 of the total CPU threads to do background works, that's 16*3/4=9 on
|
||||
// most of our pageservers. Compaction ~50 layers requires about 2GB memory (could be reduced later by optimizing L0 hole
|
||||
// calculation to avoid loading all keys into the memory). So with this config, we can get a maximum peak compaction usage of 18GB.
|
||||
pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 50;
|
||||
|
||||
pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
|
||||
crate::models::CompactionAlgorithm::Legacy;
|
||||
|
||||
pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = true;
|
||||
|
||||
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
|
||||
|
||||
// Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
|
||||
@@ -551,10 +483,6 @@ pub mod tenant_conf_defaults {
|
||||
// Relevant: https://github.com/neondatabase/neon/issues/3394
|
||||
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
|
||||
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
||||
// If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
|
||||
// layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
|
||||
// want to enable this feature.
|
||||
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
|
||||
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
|
||||
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
|
||||
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
|
||||
@@ -566,9 +494,6 @@ pub mod tenant_conf_defaults {
|
||||
// By default ingest enough WAL for two new L0 layers before checking if new image
|
||||
// image layers should be created.
|
||||
pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
|
||||
pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
|
||||
pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 10240000;
|
||||
pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
|
||||
}
|
||||
|
||||
impl Default for TenantConfigToml {
|
||||
@@ -582,13 +507,9 @@ impl Default for TenantConfigToml {
|
||||
compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
|
||||
.expect("cannot parse default compaction period"),
|
||||
compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
|
||||
compaction_upper_limit: DEFAULT_COMPACTION_UPPER_LIMIT,
|
||||
compaction_algorithm: crate::models::CompactionAlgorithmSettings {
|
||||
kind: DEFAULT_COMPACTION_ALGORITHM,
|
||||
},
|
||||
l0_flush_delay_threshold: None,
|
||||
l0_flush_stall_threshold: None,
|
||||
l0_flush_wait_upload: DEFAULT_L0_FLUSH_WAIT_UPLOAD,
|
||||
gc_horizon: DEFAULT_GC_HORIZON,
|
||||
gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
|
||||
.expect("cannot parse default gc period"),
|
||||
@@ -613,15 +534,10 @@ impl Default for TenantConfigToml {
|
||||
lazy_slru_download: false,
|
||||
timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
|
||||
image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
|
||||
image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
|
||||
lsn_lease_length: LsnLease::DEFAULT_LENGTH,
|
||||
lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
|
||||
timeline_offloading: false,
|
||||
wal_receiver_protocol_override: None,
|
||||
rel_size_v2_enabled: None,
|
||||
gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
|
||||
gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
|
||||
gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,7 +87,7 @@ impl Display for AvailabilityZone {
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ShardsPreferredAzsRequest {
|
||||
#[serde(flatten)]
|
||||
pub preferred_az_ids: HashMap<TenantShardId, Option<AvailabilityZone>>,
|
||||
pub preferred_az_ids: HashMap<TenantShardId, AvailabilityZone>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@@ -144,8 +144,6 @@ pub struct NodeDescribeResponse {
|
||||
pub availability: NodeAvailabilityWrapper,
|
||||
pub scheduling: NodeSchedulingPolicy,
|
||||
|
||||
pub availability_zone_id: String,
|
||||
|
||||
pub listen_http_addr: String,
|
||||
pub listen_http_port: u16,
|
||||
|
||||
@@ -181,6 +179,7 @@ pub struct TenantDescribeResponseShard {
|
||||
/// specifies some constraints, e.g. asking it to get off particular node(s)
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct TenantShardMigrateRequest {
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
pub node_id: NodeId,
|
||||
}
|
||||
|
||||
@@ -324,7 +323,7 @@ impl From<NodeSchedulingPolicy> for String {
|
||||
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
|
||||
pub enum SkSchedulingPolicy {
|
||||
Active,
|
||||
Pause,
|
||||
Disabled,
|
||||
Decomissioned,
|
||||
}
|
||||
|
||||
@@ -334,13 +333,9 @@ impl FromStr for SkSchedulingPolicy {
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"active" => Self::Active,
|
||||
"pause" => Self::Pause,
|
||||
"disabled" => Self::Disabled,
|
||||
"decomissioned" => Self::Decomissioned,
|
||||
_ => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Unknown scheduling policy '{s}', try active,pause,decomissioned"
|
||||
))
|
||||
}
|
||||
_ => return Err(anyhow::anyhow!("Unknown scheduling state '{s}'")),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -350,7 +345,7 @@ impl From<SkSchedulingPolicy> for String {
|
||||
use SkSchedulingPolicy::*;
|
||||
match value {
|
||||
Active => "active",
|
||||
Pause => "pause",
|
||||
Disabled => "disabled",
|
||||
Decomissioned => "decomissioned",
|
||||
}
|
||||
.to_string()
|
||||
@@ -373,16 +368,6 @@ pub enum PlacementPolicy {
|
||||
Detached,
|
||||
}
|
||||
|
||||
impl PlacementPolicy {
|
||||
pub fn want_secondaries(&self) -> usize {
|
||||
match self {
|
||||
PlacementPolicy::Attached(secondary_count) => *secondary_count,
|
||||
PlacementPolicy::Secondary => 1,
|
||||
PlacementPolicy::Detached => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct TenantShardMigrateResponse {}
|
||||
|
||||
@@ -420,6 +405,8 @@ pub struct MetadataHealthListOutdatedResponse {
|
||||
}
|
||||
|
||||
/// Publicly exposed safekeeper description
|
||||
///
|
||||
/// The `active` flag which we have in the DB is not included on purpose: it is deprecated.
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct SafekeeperDescribeResponse {
|
||||
pub id: NodeId,
|
||||
@@ -435,11 +422,6 @@ pub struct SafekeeperDescribeResponse {
|
||||
pub scheduling_policy: SkSchedulingPolicy,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct SafekeeperSchedulingPolicyRequest {
|
||||
pub scheduling_policy: SkSchedulingPolicy,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user