Compare commits

..

1 Commits

Author SHA1 Message Date
Conrad Ludgate
5019c7eb2c fix: dont hard error on initial redis failure 2025-01-30 10:26:48 +00:00
110 changed files with 1460 additions and 3825 deletions

View File

@@ -3,7 +3,6 @@ name: Bug Template
about: Used for describing bugs
title: ''
labels: t/bug
type: Bug
assignees: ''
---

View File

@@ -4,7 +4,6 @@ about: A set of related tasks contributing towards specific outcome, comprising
more than 1 week of work.
title: 'Epic: '
labels: t/Epic
type: Epic
assignees: ''
---

View File

@@ -27,4 +27,3 @@ config-variables:
- SLACK_ON_CALL_QA_STAGING_STREAM
- DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
- SLACK_ON_CALL_STORAGE_STAGING_STREAM
- SLACK_CICD_CHANNEL_ID

View File

@@ -41,10 +41,7 @@ inputs:
description: 'Path to directory containing libpq library - it is caller responsibility to provision the libpq library'
required: false
default: '/tmp/neon/pg_install/v16/lib'
project_settings:
description: 'A JSON object with project settings'
required: false
default: '{}'
outputs:
dsn:
@@ -76,7 +73,7 @@ runs:
\"provisioner\": \"k8s-neonvm\",
\"autoscaling_limit_min_cu\": ${MIN_CU},
\"autoscaling_limit_max_cu\": ${MAX_CU},
\"settings\": ${PROJECT_SETTINGS}
\"settings\": { }
}
}")
@@ -95,12 +92,12 @@ runs:
if [ "${SHARD_SPLIT_PROJECT}" = "true" ]; then
# determine tenant ID
TENANT_ID=`${PSQL} ${dsn} -t -A -c "SHOW neon.tenant_id"`
echo "Splitting project ${project_id} with tenant_id ${TENANT_ID} into $((SHARD_COUNT)) shards with stripe size $((STRIPE_SIZE))"
echo "Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split"
echo "with body {\"new_shard_count\": $((SHARD_COUNT)), \"new_stripe_size\": $((STRIPE_SIZE))}"
# we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)
curl -X PUT \
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split" \
@@ -121,4 +118,3 @@ runs:
STRIPE_SIZE: ${{ inputs.stripe_size }}
PSQL: ${{ inputs.psql_path }}
LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
PROJECT_SETTINGS: ${{ inputs.project_settings }}

View File

@@ -1,5 +1,4 @@
rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']
rust_dependencies: ['**/Cargo.lock']
v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']

View File

@@ -267,26 +267,6 @@ jobs:
path: /tmp/neon
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Check diesel schema
if: inputs.build-type == 'release' && inputs.arch == 'x64'
env:
DATABASE_URL: postgresql://localhost:1235/storage_controller
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
run: |
/tmp/neon/bin/neon_local init
/tmp/neon/bin/neon_local storage_controller start
diesel print-schema > storage_controller/src/schema.rs
if [ -n "$(git diff storage_controller/src/schema.rs)" ]; then
echo >&2 "Uncommitted changes in diesel schema"
git diff .
exit 1
fi
/tmp/neon/bin/neon_local storage_controller stop
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
- name: Merge and upload coverage data
if: inputs.build-type == 'debug'

View File

@@ -16,9 +16,6 @@ defaults:
run:
shell: bash -euxo pipefail {0}
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
jobs:
check-codestyle-rust:
strategy:
@@ -87,3 +84,8 @@ jobs:
run: |
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
# https://github.com/EmbarkStudios/cargo-deny
- name: Check rust licenses/bans/advisories/sources
if: ${{ !cancelled() }}
run: cargo deny check --hide-inclusion-graph

View File

@@ -94,9 +94,7 @@ jobs:
echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- run: gh pr checkout "${PR_NUMBER}"
- run: git checkout -b "${BRANCH}"

View File

@@ -319,7 +319,7 @@ jobs:
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 17, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
}'
@@ -340,7 +340,7 @@ jobs:
],
"pg_version" : [
16,17
]
],
}'
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
@@ -458,7 +458,7 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
# we want to compare Neon project OLTP throughput and latency at scale factor 10 GB
# we want to compare Neon project OLTP throughput and latency at scale factor 10 GB
# without (neonvm-captest-new)
# and with (neonvm-captest-new-many-tables) many relations in the database
- name: Create many relations before the run
@@ -590,20 +590,36 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
# until https://github.com/neondatabase/neon/issues/8275 is fixed we temporarily install postgresql-16
# instead of using Neon artifacts containing pgbench
- name: Install postgresql-16 where pytest expects it
run: |
# Just to make it easier to test things locally on macOS (with arm64)
arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
cd /home/nonroot
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.2-1.pgdg120+1_${arch}.deb"
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.6-1.pgdg120+1_${arch}.deb"
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.6-1.pgdg120+1_${arch}.deb"
dpkg -x libpq5_17.2-1.pgdg120+1_${arch}.deb pg
dpkg -x postgresql-16_16.6-1.pgdg120+1_${arch}.deb pg
dpkg -x postgresql-client-16_16.6-1.pgdg120+1_${arch}.deb pg
mkdir -p /tmp/neon/pg_install/v16/bin
mkdir -p /tmp/neon/pg_install/v17/bin
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql
ln -s /home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu /tmp/neon/pg_install/v16/lib
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v17/bin/pgbench
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v17/bin/psql
ln -s /home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu /tmp/neon/pg_install/v17/lib
LD_LIBRARY_PATH="/home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu:${LD_LIBRARY_PATH:-}"
export LD_LIBRARY_PATH
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> ${GITHUB_ENV}
/tmp/neon/pg_install/v16/bin/pgbench --version
/tmp/neon/pg_install/v16/bin/psql --version
- name: Set up Connection String
id: set-up-connstr
@@ -626,6 +642,13 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- name: Benchmark pgvector hnsw indexing
uses: ./.github/actions/run-python-test-set
with:
@@ -741,10 +764,10 @@ jobs:
neonvm-captest-reuse)
case "${PG_VERSION}" in
16)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR_V16 }}
;;
17)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_CONNSTR_PG17 }}
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR_PG17 }}
;;
*)
echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
@@ -810,7 +833,7 @@ jobs:
# We might change it after https://github.com/neondatabase/neon/issues/2900.
#
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
# if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
permissions:
contents: write
statuses: write
@@ -864,7 +887,7 @@ jobs:
CONNSTR_SECRET_NAME="BENCHMARK_CAPTEST_TPCH_S10_CONNSTR"
;;
17)
CONNSTR_SECRET_NAME="BENCHMARK_CAPTEST_TPCH_CONNSTR_PG17"
CONNSTR_SECRET_NAME="BENCHMARK_CAPTEST_CONNSTR_PG17"
;;
*)
echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"
@@ -883,7 +906,7 @@ jobs:
exit 1
;;
esac
echo "CONNSTR_SECRET_NAME=${CONNSTR_SECRET_NAME}" >> $GITHUB_ENV
- name: Set up Connection String
@@ -929,7 +952,7 @@ jobs:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
user-examples-compare:
# if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
permissions:
contents: write
statuses: write
@@ -984,7 +1007,7 @@ jobs:
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
;;
17)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_USER_EXAMPLE_CONNSTR_PG17 }}
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR_PG17 }}
;;
*)
echo >&2 "Unsupported PG_VERSION=${PG_VERSION} for PLATFORM=${PLATFORM}"

View File

@@ -45,26 +45,6 @@ jobs:
run cancel-previous-in-concurrency-group.yml \
--field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
files-changed:
needs: [ check-permissions ]
runs-on: [ self-hosted, small ]
timeout-minutes: 3
outputs:
check-rust-dependencies: ${{ steps.files-changed.outputs.rust_dependencies }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- name: Check for file changes
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
id: files-changed
with:
token: ${{ secrets.GITHUB_TOKEN }}
filters: .github/file-filters.yaml
tag:
needs: [ check-permissions ]
runs-on: [ self-hosted, small ]
@@ -190,14 +170,6 @@ jobs:
archs: '["x64", "arm64"]'
secrets: inherit
check-dependencies-rust:
needs: [ files-changed, build-build-tools-image ]
if: ${{ needs.files-changed.outputs.check-rust-dependencies == 'true' }}
uses: ./.github/workflows/cargo-deny.yml
with:
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
secrets: inherit
build-and-test-locally:
needs: [ tag, build-build-tools-image ]
strategy:
@@ -682,7 +654,7 @@ jobs:
push: true
pull: true
file: compute/compute-node.Dockerfile
target: extension-tests
target: neon-pg-ext-test
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
tags: |
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
@@ -1360,8 +1332,6 @@ jobs:
- build-and-test-locally
- check-codestyle-python
- check-codestyle-rust
- check-dependencies-rust
- files-changed
- promote-images-dev
- test-images
- trigger-custom-extensions-build-and-wait
@@ -1374,11 +1344,4 @@ jobs:
if: |
contains(needs.*.result, 'failure')
|| contains(needs.*.result, 'cancelled')
|| (needs.check-dependencies-rust.result == 'skipped' && needs.files-changed.outputs.check-rust-dependencies == 'true')
|| needs.build-and-test-locally.result == 'skipped'
|| needs.check-codestyle-python.result == 'skipped'
|| needs.check-codestyle-rust.result == 'skipped'
|| needs.files-changed.result == 'skipped'
|| needs.promote-images-dev.result == 'skipped'
|| needs.test-images.result == 'skipped'
|| needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
|| contains(needs.*.result, 'skipped')

View File

@@ -1,57 +0,0 @@
name: cargo deny checks
on:
workflow_call:
inputs:
build-tools-image:
required: false
type: string
schedule:
- cron: '0 0 * * *'
jobs:
cargo-deny:
strategy:
matrix:
ref: >-
${{
fromJSON(
github.event_name == 'schedule'
&& '["main","release","release-proxy","release-compute"]'
|| format('["{0}"]', github.sha)
)
}}
runs-on: [self-hosted, small]
container:
image: ${{ inputs.build-tools-image || 'neondatabase/build-tools:pinned' }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
steps:
- name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ matrix.ref }}
- name: Check rust licenses/bans/advisories/sources
env:
CARGO_DENY_TARGET: >-
${{ github.event_name == 'schedule' && 'advisories' || 'all' }}
run: cargo deny check --hide-inclusion-graph $CARGO_DENY_TARGET
- name: Post to a Slack channel
if: ${{ github.event_name == 'schedule' && failure() }}
uses: slackapi/slack-github-action@v2
with:
method: chat.postMessage
token: ${{ secrets.SLACK_BOT_TOKEN }}
payload: |
channel: ${{ vars.SLACK_CICD_CHANNEL_ID }}
text: |
Periodic cargo-deny on ${{ matrix.ref }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
Pinging @oncall-devprod.

View File

@@ -12,8 +12,8 @@ on:
pull_request:
paths:
- '.github/workflows/pg-clients.yml'
- 'test_runner/pg_clients/**/*.py'
- 'test_runner/logical_repl/**/*.py'
- 'test_runner/pg_clients/**'
- 'test_runner/logical_repl/**'
- 'poetry.lock'
workflow_dispatch:
@@ -104,8 +104,6 @@ jobs:
with:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
project_settings: >-
{"enable_logical_replication": true}
- name: Run tests
uses: ./.github/actions/run-python-test-set

View File

@@ -59,10 +59,7 @@ jobs:
echo "${RUST_CHANGED_FILES}"
build-build-tools-image:
if: |
false
|| needs.get-changed-files.outputs.python-changed == 'true'
|| needs.get-changed-files.outputs.rust-changed == 'true'
if: needs.get-changed-files.outputs.python-changed == 'true'
needs: [ get-changed-files ]
uses: ./.github/workflows/build-build-tools-image.yml
with:
@@ -95,8 +92,7 @@ jobs:
# - conclusion
# - neon-cloud-e2e
conclusion:
# Do not run job on Pull Requests as it interferes with the `conclusion` job from the `build_and_test` workflow
if: always() && github.event_name == 'merge_group'
if: always()
permissions:
statuses: write # for `github.repos.createCommitStatus(...)`
contents: write
@@ -128,8 +124,6 @@ jobs:
- name: Fail the job if any of the dependencies do not succeed or skipped
run: exit 1
if: |
false
|| (needs.check-codestyle-python.result == 'skipped' && needs.get-changed-files.outputs.python-changed == 'true')
|| (needs.check-codestyle-rust.result == 'skipped' && needs.get-changed-files.outputs.rust-changed == 'true')
(contains(needs.check-codestyle-python.result, 'skipped') && needs.get-changed-files.outputs.python-changed == 'true')
|| contains(needs.*.result, 'failure')
|| contains(needs.*.result, 'cancelled')

327
Cargo.lock generated
View File

@@ -290,9 +290,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "aws-config"
version = "1.5.15"
version = "1.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc47e70fc35d054c8fcd296d47a61711f043ac80534a10b4f741904f81e73a90"
checksum = "9b49afaa341e8dd8577e1a2200468f98956d6eda50bcf4a53246cc00174ba924"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -301,7 +301,7 @@ dependencies = [
"aws-sdk-sts",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-json 0.60.7",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
@@ -332,9 +332,9 @@ dependencies = [
[[package]]
name = "aws-runtime"
version = "1.5.4"
version = "1.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bee7643696e7fdd74c10f9eb42848a87fe469d35eae9c3323f80aa98f350baac"
checksum = "b5ac934720fbb46206292d2c75b57e67acfc56fe7dfd34fb9a02334af08409ea"
dependencies = [
"aws-credential-types",
"aws-sigv4",
@@ -366,7 +366,7 @@ dependencies = [
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-json 0.61.1",
"aws-smithy-query",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
@@ -389,7 +389,7 @@ dependencies = [
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-json 0.61.1",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
@@ -414,7 +414,7 @@ dependencies = [
"aws-smithy-checksums",
"aws-smithy-eventstream",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-json 0.61.1",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
@@ -437,15 +437,15 @@ dependencies = [
[[package]]
name = "aws-sdk-sso"
version = "1.57.0"
version = "1.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c54bab121fe1881a74c338c5f723d1592bf3b53167f80268a1274f404e1acc38"
checksum = "05ca43a4ef210894f93096039ef1d6fa4ad3edfabb3be92b80908b9f2e4b4eab"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-json 0.61.1",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
@@ -459,15 +459,15 @@ dependencies = [
[[package]]
name = "aws-sdk-ssooidc"
version = "1.58.0"
version = "1.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c8234fd024f7ac61c4e44ea008029bde934250f371efe7d4a39708397b1080c"
checksum = "abaf490c2e48eed0bb8e2da2fb08405647bd7f253996e0f93b981958ea0f73b0"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-json 0.61.1",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
@@ -481,15 +481,15 @@ dependencies = [
[[package]]
name = "aws-sdk-sts"
version = "1.58.0"
version = "1.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba60e1d519d6f23a9df712c04fdeadd7872ac911c84b2f62a8bda92e129b7962"
checksum = "b68fde0d69c8bfdc1060ea7da21df3e39f6014da316783336deff0a9ec28f4bf"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-json 0.61.1",
"aws-smithy-query",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
@@ -504,9 +504,9 @@ dependencies = [
[[package]]
name = "aws-sigv4"
version = "1.2.7"
version = "1.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "690118821e46967b3c4501d67d7d52dd75106a9c54cf36cefa1985cedbe94e05"
checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2"
dependencies = [
"aws-credential-types",
"aws-smithy-eventstream",
@@ -533,9 +533,9 @@ dependencies = [
[[package]]
name = "aws-smithy-async"
version = "1.2.4"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa59d1327d8b5053c54bf2eaae63bf629ba9e904434d0835a28ed3c0ed0a614e"
checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c"
dependencies = [
"futures-util",
"pin-project-lite",
@@ -565,9 +565,9 @@ dependencies = [
[[package]]
name = "aws-smithy-eventstream"
version = "0.60.6"
version = "0.60.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b18559a41e0c909b77625adf2b8c50de480a8041e5e4a3f5f7d177db70abc5a"
checksum = "cef7d0a272725f87e51ba2bf89f8c21e4df61b9e49ae1ac367a6d69916ef7c90"
dependencies = [
"aws-smithy-types",
"bytes",
@@ -576,9 +576,9 @@ dependencies = [
[[package]]
name = "aws-smithy-http"
version = "0.60.12"
version = "0.60.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7809c27ad8da6a6a68c454e651d4962479e81472aa19ae99e59f9aba1f9713cc"
checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6"
dependencies = [
"aws-smithy-eventstream",
"aws-smithy-runtime-api",
@@ -597,9 +597,18 @@ dependencies = [
[[package]]
name = "aws-smithy-json"
version = "0.61.2"
version = "0.60.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "623a51127f24c30776c8b374295f2df78d92517386f77ba30773f15a30ce1422"
checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6"
dependencies = [
"aws-smithy-types",
]
[[package]]
name = "aws-smithy-json"
version = "0.61.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee4e69cc50921eb913c6b662f8d909131bb3e6ad6cb6090d3a39b66fc5c52095"
dependencies = [
"aws-smithy-types",
]
@@ -616,9 +625,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
version = "1.7.7"
version = "1.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "865f7050bbc7107a6c98a397a9fcd9413690c27fa718446967cf03b2d3ac517e"
checksum = "9f20685047ca9d6f17b994a07f629c813f08b5bce65523e47124879e60103d45"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@@ -660,9 +669,9 @@ dependencies = [
[[package]]
name = "aws-smithy-types"
version = "1.2.12"
version = "1.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28f6feb647fb5e0d5b50f0472c19a7db9462b74e2fec01bb0b44eedcc834e97"
checksum = "4fbd94a32b3a7d55d3806fe27d98d3ad393050439dd05eb53ece36ec5e3d3510"
dependencies = [
"base64-simd",
"bytes",
@@ -695,9 +704,9 @@ dependencies = [
[[package]]
name = "aws-types"
version = "1.3.4"
version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0df5a18c4f951c645300d365fec53a61418bcf4650f604f85fe2a665bfaa0c2"
checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef"
dependencies = [
"aws-credential-types",
"aws-smithy-async",
@@ -969,7 +978,7 @@ version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [
"bitflags 2.8.0",
"bitflags 2.4.1",
"cexpr",
"clang-sys",
"itertools 0.12.1",
@@ -997,9 +1006,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.8.0"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
[[package]]
name = "block-buffer"
@@ -1216,20 +1225,6 @@ version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7"
[[package]]
name = "clashmap"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93bd59c81e2bd87a775ae2de75f070f7e2bfe97363a6ad652f46824564c23e4d"
dependencies = [
"crossbeam-utils",
"hashbrown 0.15.2",
"lock_api",
"parking_lot_core 0.9.8",
"polonius-the-crab",
"replace_with",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
@@ -1317,7 +1312,7 @@ dependencies = [
"tar",
"thiserror 1.0.69",
"tokio",
"tokio-postgres",
"tokio-postgres 0.7.9",
"tokio-stream",
"tokio-util",
"tower 0.5.2",
@@ -1426,7 +1421,7 @@ dependencies = [
"storage_broker",
"thiserror 1.0.69",
"tokio",
"tokio-postgres",
"tokio-postgres 0.7.9",
"tokio-util",
"toml",
"toml_edit",
@@ -1566,7 +1561,7 @@ version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
dependencies = [
"bitflags 2.8.0",
"bitflags 2.4.1",
"crossterm_winapi",
"libc",
"parking_lot 0.12.1",
@@ -1797,7 +1792,7 @@ version = "2.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12"
dependencies = [
"bitflags 2.8.0",
"bitflags 2.4.1",
"byteorder",
"chrono",
"diesel_derives",
@@ -1817,7 +1812,7 @@ dependencies = [
"futures-util",
"scoped-futures",
"tokio",
"tokio-postgres",
"tokio-postgres 0.7.12",
]
[[package]]
@@ -2561,12 +2556,6 @@ dependencies = [
"allocator-api2",
]
[[package]]
name = "hashbrown"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
[[package]]
name = "hashlink"
version = "0.9.1"
@@ -2617,15 +2606,6 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46"
[[package]]
name = "higher-kinded-types"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "561985554c8b8d4808605c90a5f1979cc6c31a5d20b78465cd59501233c6678e"
dependencies = [
"never-say-never",
]
[[package]]
name = "hmac"
version = "0.12.1"
@@ -3104,11 +3084,11 @@ dependencies = [
[[package]]
name = "inotify"
version = "0.11.0"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3"
checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff"
dependencies = [
"bitflags 2.8.0",
"bitflags 1.3.2",
"inotify-sys",
"libc",
]
@@ -3285,9 +3265,9 @@ dependencies = [
[[package]]
name = "kqueue"
version = "1.0.8"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7447f1ca1b7b563588a205fe93dea8df60fd981423a768bc1c0ded35ed147d0c"
checksum = "2c8fc60ba15bf51257aa9807a48a61013db043fcf3a78cb0d916e8e396dcad98"
dependencies = [
"kqueue-sys",
"libc",
@@ -3295,9 +3275,9 @@ dependencies = [
[[package]]
name = "kqueue-sys"
version = "1.0.4"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b"
checksum = "8367585489f01bc55dd27404dcf56b95e6da061a256a666ab23be9ba96a2e587"
dependencies = [
"bitflags 1.3.2",
"libc",
@@ -3324,9 +3304,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.169"
version = "0.2.167"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
[[package]]
name = "libloading"
@@ -3573,14 +3553,14 @@ dependencies = [
[[package]]
name = "mio"
version = "1.0.3"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [
"libc",
"log",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.52.0",
"windows-sys 0.48.0",
]
[[package]]
@@ -3589,12 +3569,6 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
[[package]]
name = "never-say-never"
version = "6.6.666"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf5a574dadd7941adeaa71823ecba5e28331b8313fb2e1c6a5c7e5981ea53ad6"
[[package]]
name = "nix"
version = "0.25.1"
@@ -3626,7 +3600,7 @@ version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"
dependencies = [
"bitflags 2.8.0",
"bitflags 2.4.1",
"cfg-if",
"libc",
"memoffset 0.9.0",
@@ -3644,11 +3618,12 @@ dependencies = [
[[package]]
name = "notify"
version = "8.0.0"
version = "6.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fee8403b3d66ac7b26aee6e40a897d85dc5ce26f44da36b8b73e987cc52e943"
checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
dependencies = [
"bitflags 2.8.0",
"bitflags 2.4.1",
"crossbeam-channel",
"filetime",
"fsevent-sys",
"inotify",
@@ -3656,17 +3631,10 @@ dependencies = [
"libc",
"log",
"mio",
"notify-types",
"walkdir",
"windows-sys 0.59.0",
"windows-sys 0.48.0",
]
[[package]]
name = "notify-types"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d"
[[package]]
name = "ntapi"
version = "0.4.1"
@@ -4092,8 +4060,8 @@ dependencies = [
"pageserver_compaction",
"pin-project-lite",
"postgres",
"postgres-protocol",
"postgres-types",
"postgres-protocol 0.6.6",
"postgres-types 0.2.6",
"postgres_backend",
"postgres_connection",
"postgres_ffi",
@@ -4124,7 +4092,7 @@ dependencies = [
"tokio",
"tokio-epoll-uring",
"tokio-io-timeout",
"tokio-postgres",
"tokio-postgres 0.7.9",
"tokio-stream",
"tokio-tar",
"tokio-util",
@@ -4182,7 +4150,7 @@ dependencies = [
"serde",
"thiserror 1.0.69",
"tokio",
"tokio-postgres",
"tokio-postgres 0.7.9",
"tokio-stream",
"tokio-util",
"utils",
@@ -4478,40 +4446,48 @@ dependencies = [
"plotters-backend",
]
[[package]]
name = "polonius-the-crab"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e97ca2c89572ae41bbec1c99498251f87dd5a94e500c5ec19c382dd593dd5ce9"
dependencies = [
"higher-kinded-types",
"never-say-never",
]
[[package]]
name = "postgres"
version = "0.19.7"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
version = "0.19.6"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
dependencies = [
"bytes",
"fallible-iterator",
"futures-util",
"log",
"tokio",
"tokio-postgres",
"tokio-postgres 0.7.9",
]
[[package]]
name = "postgres-protocol"
version = "0.6.6"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
dependencies = [
"base64 0.21.1",
"byteorder",
"bytes",
"fallible-iterator",
"hmac",
"lazy_static",
"md-5",
"memchr",
"rand 0.8.5",
"sha2",
"stringprep",
]
[[package]]
name = "postgres-protocol"
version = "0.6.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acda0ebdebc28befa84bee35e651e4c5f09073d668c7aed4cf7e23c3cda84b23"
dependencies = [
"base64 0.22.1",
"byteorder",
"bytes",
"fallible-iterator",
"hmac",
"lazy_static",
"md-5",
"memchr",
"rand 0.8.5",
@@ -4538,12 +4514,23 @@ dependencies = [
[[package]]
name = "postgres-types"
version = "0.2.6"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
dependencies = [
"bytes",
"chrono",
"fallible-iterator",
"postgres-protocol",
"postgres-protocol 0.6.6",
]
[[package]]
name = "postgres-types"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f66ea23a2d0e5734297357705193335e0a957696f34bed2f2faefacb2fec336f"
dependencies = [
"bytes",
"fallible-iterator",
"postgres-protocol 0.6.7",
]
[[package]]
@@ -4568,7 +4555,7 @@ dependencies = [
"serde",
"thiserror 1.0.69",
"tokio",
"tokio-postgres",
"tokio-postgres 0.7.9",
"tokio-postgres-rustls",
"tokio-rustls 0.26.0",
"tokio-util",
@@ -4583,7 +4570,7 @@ dependencies = [
"itertools 0.10.5",
"once_cell",
"postgres",
"tokio-postgres",
"tokio-postgres 0.7.9",
"url",
]
@@ -4677,7 +4664,7 @@ dependencies = [
"byteorder",
"bytes",
"itertools 0.10.5",
"postgres-protocol",
"postgres-protocol 0.6.6",
"rand 0.8.5",
"serde",
"thiserror 1.0.69",
@@ -4718,7 +4705,7 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4"
dependencies = [
"bitflags 2.8.0",
"bitflags 2.4.1",
"chrono",
"flate2",
"hex",
@@ -4733,7 +4720,7 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29"
dependencies = [
"bitflags 2.8.0",
"bitflags 2.4.1",
"chrono",
"hex",
]
@@ -4852,9 +4839,9 @@ dependencies = [
"camino-tempfile",
"chrono",
"clap",
"clashmap",
"compute_api",
"consumption_metrics",
"dashmap 5.5.0",
"ecdsa 0.16.9",
"ed25519-dalek",
"env_logger 0.10.2",
@@ -4925,7 +4912,7 @@ dependencies = [
"tikv-jemalloc-ctl",
"tikv-jemallocator",
"tokio",
"tokio-postgres",
"tokio-postgres 0.7.9",
"tokio-postgres2",
"tokio-rustls 0.26.0",
"tokio-tungstenite 0.21.0",
@@ -5262,12 +5249,6 @@ dependencies = [
"utils",
]
[[package]]
name = "replace_with"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3a8614ee435691de62bcffcf4a66d91b3594bf1428a5722e79103249a095690"
[[package]]
name = "reqwest"
version = "0.12.4"
@@ -5547,7 +5528,7 @@ version = "0.38.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
dependencies = [
"bitflags 2.8.0",
"bitflags 2.4.1",
"errno",
"libc",
"linux-raw-sys 0.4.14",
@@ -5719,7 +5700,7 @@ dependencies = [
"pageserver_api",
"parking_lot 0.12.1",
"postgres",
"postgres-protocol",
"postgres-protocol 0.6.6",
"postgres_backend",
"postgres_ffi",
"pprof",
@@ -5743,7 +5724,7 @@ dependencies = [
"tikv-jemallocator",
"tokio",
"tokio-io-timeout",
"tokio-postgres",
"tokio-postgres 0.7.9",
"tokio-stream",
"tokio-tar",
"tokio-util",
@@ -6360,8 +6341,6 @@ dependencies = [
"rand 0.8.5",
"reqwest",
"routerify",
"rustls 0.23.18",
"rustls-native-certs 0.8.0",
"scoped-futures",
"scopeguard",
"serde",
@@ -6370,8 +6349,6 @@ dependencies = [
"strum_macros",
"thiserror 1.0.69",
"tokio",
"tokio-postgres",
"tokio-postgres-rustls",
"tokio-util",
"tracing",
"utils",
@@ -6417,7 +6394,7 @@ dependencies = [
"serde_json",
"storage_controller_client",
"tokio",
"tokio-postgres",
"tokio-postgres 0.7.9",
"tokio-postgres-rustls",
"tokio-stream",
"tokio-util",
@@ -6614,7 +6591,7 @@ dependencies = [
"fastrand 2.2.0",
"once_cell",
"rustix",
"windows-sys 0.59.0",
"windows-sys 0.52.0",
]
[[package]]
@@ -6826,20 +6803,21 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
version = "1.43.0"
version = "1.38.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df"
dependencies = [
"backtrace",
"bytes",
"libc",
"mio",
"num_cpus",
"parking_lot 0.12.1",
"pin-project-lite",
"signal-hook-registry",
"socket2",
"tokio-macros",
"windows-sys 0.52.0",
"windows-sys 0.48.0",
]
[[package]]
@@ -6870,9 +6848,9 @@ dependencies = [
[[package]]
name = "tokio-macros"
version = "2.5.0"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
dependencies = [
"proc-macro2",
"quote",
@@ -6881,8 +6859,8 @@ dependencies = [
[[package]]
name = "tokio-postgres"
version = "0.7.10"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
version = "0.7.9"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
dependencies = [
"async-trait",
"byteorder",
@@ -6895,8 +6873,34 @@ dependencies = [
"percent-encoding",
"phf",
"pin-project-lite",
"postgres-protocol",
"postgres-types",
"postgres-protocol 0.6.6",
"postgres-types 0.2.6",
"rand 0.8.5",
"socket2",
"tokio",
"tokio-util",
"whoami",
]
[[package]]
name = "tokio-postgres"
version = "0.7.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b5d3742945bc7d7f210693b0c58ae542c6fd47b17adbbda0885f3dcb34a6bdb"
dependencies = [
"async-trait",
"byteorder",
"bytes",
"fallible-iterator",
"futures-channel",
"futures-util",
"log",
"parking_lot 0.12.1",
"percent-encoding",
"phf",
"pin-project-lite",
"postgres-protocol 0.6.7",
"postgres-types 0.2.8",
"rand 0.8.5",
"socket2",
"tokio",
@@ -6913,7 +6917,7 @@ dependencies = [
"ring",
"rustls 0.23.18",
"tokio",
"tokio-postgres",
"tokio-postgres 0.7.9",
"tokio-rustls 0.26.0",
"x509-certificate",
]
@@ -7157,7 +7161,7 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697"
dependencies = [
"bitflags 2.8.0",
"bitflags 2.4.1",
"bytes",
"http 1.1.0",
"http-body 1.0.0",
@@ -7591,7 +7595,7 @@ dependencies = [
"serde_json",
"sysinfo",
"tokio",
"tokio-postgres",
"tokio-postgres 0.7.9",
"tokio-util",
"tracing",
"tracing-subscriber",
@@ -7654,9 +7658,9 @@ dependencies = [
[[package]]
name = "walkdir"
version = "2.5.0"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
dependencies = [
"same-file",
"winapi-util",
@@ -7908,15 +7912,6 @@ dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-targets"
version = "0.48.0"

View File

@@ -77,10 +77,10 @@ camino = "1.1.6"
cfg-if = "1.0.0"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
clap = { version = "4.0", features = ["derive", "env"] }
clashmap = { version = "1.0", features = ["raw-api"] }
comfy-table = "7.1"
const_format = "0.2"
crc32c = "0.6"
dashmap = { version = "5.5.0", features = ["raw-api"] }
diatomic-waker = { version = "0.2.3" }
either = "1.8"
enum-map = "2.4.2"
@@ -123,7 +123,7 @@ measured = { version = "0.0.22", features=["lasso"] }
measured-process = { version = "0.0.22" }
memoffset = "0.9"
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
notify = "8.0.0"
notify = "6.0.0"
num_cpus = "1.15"
num-traits = "0.2.15"
once_cell = "1.13"
@@ -177,7 +177,7 @@ test-context = "0.3"
thiserror = "1.0"
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
tokio = { version = "1.41", features = ["macros"] }
tokio = { version = "1.17", features = ["macros"] }
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
tokio-io-timeout = "1.2.0"
tokio-postgres-rustls = "0.12.0"

View File

@@ -253,7 +253,7 @@ WORKDIR /home/nonroot
# Rust
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
ENV RUSTC_VERSION=1.84.1
ENV RUSTC_VERSION=1.84.0
ENV RUSTUP_HOME="/home/nonroot/.rustup"
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
ARG RUSTFILT_VERSION=0.2.1
@@ -261,7 +261,6 @@ ARG CARGO_HAKARI_VERSION=0.9.33
ARG CARGO_DENY_VERSION=0.16.2
ARG CARGO_HACK_VERSION=0.6.33
ARG CARGO_NEXTEST_VERSION=0.9.85
ARG CARGO_DIESEL_CLI_VERSION=2.2.6
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
chmod +x rustup-init && \
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
@@ -275,8 +274,6 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} \
--features postgres-bundled --no-default-features && \
rm -rf /home/nonroot/.cargo/registry && \
rm -rf /home/nonroot/.cargo/git

File diff suppressed because it is too large Load Diff

View File

@@ -1,242 +0,0 @@
diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out
index 979e5e8..2375b45 100644
--- a/contrib/amcheck/expected/check_heap.out
+++ b/contrib/amcheck/expected/check_heap.out
@@ -80,12 +80,9 @@ INSERT INTO heaptest (a, b)
-- same transaction. The heaptest table is smaller than the default
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
-- shared_buffers. A transaction delays that and excludes any autovacuum.
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
SELECT sum(reads) AS stats_bulkreads_before
FROM pg_stat_io WHERE context = 'bulkread' \gset
BEGIN;
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
-- Check that valid options are not rejected nor corruption reported
-- for a non-empty table
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
@@ -118,14 +115,6 @@ SELECT pg_stat_force_next_flush();
(1 row)
-SELECT sum(reads) AS stats_bulkreads_after
- FROM pg_stat_io WHERE context = 'bulkread' \gset
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
- ?column?
-----------
- t
-(1 row)
-
CREATE ROLE regress_heaptest_role;
-- verify permissions are checked (error due to function not callable)
SET ROLE regress_heaptest_role;
@@ -233,7 +222,6 @@ ERROR: cannot check relation "test_foreign_table"
DETAIL: This operation is not supported for foreign tables.
-- cleanup
DROP TABLE heaptest;
-DROP TABLESPACE regress_test_stats_tblspc;
DROP TABLE test_partition;
DROP TABLE test_partitioned;
DROP OWNED BY regress_heaptest_role; -- permissions
diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql
index 1745bae..3b429c3 100644
--- a/contrib/amcheck/sql/check_heap.sql
+++ b/contrib/amcheck/sql/check_heap.sql
@@ -40,12 +40,9 @@ INSERT INTO heaptest (a, b)
-- same transaction. The heaptest table is smaller than the default
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
-- shared_buffers. A transaction delays that and excludes any autovacuum.
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
SELECT sum(reads) AS stats_bulkreads_before
FROM pg_stat_io WHERE context = 'bulkread' \gset
BEGIN;
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
-- Check that valid options are not rejected nor corruption reported
-- for a non-empty table
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
@@ -58,9 +55,6 @@ COMMIT;
-- ALTER TABLE ... SET TABLESPACE ...
-- causing an additional bulkread, which should be reflected in pg_stat_io.
SELECT pg_stat_force_next_flush();
-SELECT sum(reads) AS stats_bulkreads_after
- FROM pg_stat_io WHERE context = 'bulkread' \gset
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
CREATE ROLE regress_heaptest_role;
@@ -140,7 +134,6 @@ SELECT * FROM verify_heapam('test_foreign_table',
-- cleanup
DROP TABLE heaptest;
-DROP TABLESPACE regress_test_stats_tblspc;
DROP TABLE test_partition;
DROP TABLE test_partitioned;
DROP OWNED BY regress_heaptest_role; -- permissions
diff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out
index 33be13a..70a406c 100644
--- a/contrib/citext/expected/create_index_acl.out
+++ b/contrib/citext/expected/create_index_acl.out
@@ -5,9 +5,6 @@
-- owner having as few applicable privileges as possible. (The privileges.sql
-- regress_sro_user tests look for the opposite defect; they confirm that
-- DefineIndex() uses the table owner userid where necessary.)
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
-RESET allow_in_place_tablespaces;
BEGIN;
CREATE ROLE regress_minimal;
CREATE SCHEMA s;
@@ -49,11 +46,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
-- Empty-table DefineIndex()
CREATE UNIQUE INDEX u0rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Make the table nonempty.
INSERT INTO s.x VALUES ('foo'), ('bar');
@@ -66,11 +61,9 @@ RESET search_path;
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
CREATE UNIQUE INDEX u2rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Shall not find s.coll via search_path, despite the s.const->public.setter
-- call having set search_path=s during expression planning. Suppress the
@@ -78,9 +71,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
\set VERBOSITY sqlstate
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
ERROR: 42704
\set VERBOSITY default
ROLLBACK;
-DROP TABLESPACE regress_create_idx_tblspace;
diff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql
index 10b5225..ae442e1 100644
--- a/contrib/citext/sql/create_index_acl.sql
+++ b/contrib/citext/sql/create_index_acl.sql
@@ -6,10 +6,6 @@
-- regress_sro_user tests look for the opposite defect; they confirm that
-- DefineIndex() uses the table owner userid where necessary.)
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
-RESET allow_in_place_tablespaces;
-
BEGIN;
CREATE ROLE regress_minimal;
CREATE SCHEMA s;
@@ -51,11 +47,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
-- Empty-table DefineIndex()
CREATE UNIQUE INDEX u0rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Make the table nonempty.
INSERT INTO s.x VALUES ('foo'), ('bar');
@@ -68,11 +62,9 @@ RESET search_path;
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
CREATE UNIQUE INDEX u2rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Shall not find s.coll via search_path, despite the s.const->public.setter
-- call having set search_path=s during expression planning. Suppress the
@@ -80,9 +72,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
\set VERBOSITY sqlstate
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
\set VERBOSITY default
ROLLBACK;
-DROP TABLESPACE regress_create_idx_tblspace;
diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
index 72304e0..ebe131b 100644
--- a/contrib/file_fdw/expected/file_fdw.out
+++ b/contrib/file_fdw/expected/file_fdw.out
@@ -4,6 +4,7 @@
-- directory paths are passed to us in environment variables
\getenv abs_srcdir PG_ABS_SRCDIR
-- Clean up in case a prior regression run failed
+SET compute_query_id TO 'off';
SET client_min_messages TO 'warning';
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
RESET client_min_messages;
diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
index f0548e1..848a08c 100644
--- a/contrib/file_fdw/sql/file_fdw.sql
+++ b/contrib/file_fdw/sql/file_fdw.sql
@@ -6,6 +6,7 @@
\getenv abs_srcdir PG_ABS_SRCDIR
-- Clean up in case a prior regression run failed
+SET compute_query_id TO 'off';
SET client_min_messages TO 'warning';
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
RESET client_min_messages;
diff --git a/contrib/pageinspect/expected/gist.out b/contrib/pageinspect/expected/gist.out
index d1adbab..38b52ac 100644
--- a/contrib/pageinspect/expected/gist.out
+++ b/contrib/pageinspect/expected/gist.out
@@ -10,25 +10,6 @@ BEGIN;
CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM
generate_series(1,1000) i;
CREATE INDEX test_gist_idx ON test_gist USING gist (p);
--- Page 0 is the root, the rest are leaf pages
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));
- lsn | nsn | rightlink | flags
------+-----+------------+-------
- 0/1 | 0/0 | 4294967295 | {}
-(1 row)
-
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1));
- lsn | nsn | rightlink | flags
------+-----+------------+--------
- 0/1 | 0/0 | 4294967295 | {leaf}
-(1 row)
-
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));
- lsn | nsn | rightlink | flags
------+-----+-----------+--------
- 0/1 | 0/0 | 1 | {leaf}
-(1 row)
-
COMMIT;
SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');
itemoffset | ctid | itemlen | dead | keys
diff --git a/contrib/pageinspect/sql/gist.sql b/contrib/pageinspect/sql/gist.sql
index d263542..607992f 100644
--- a/contrib/pageinspect/sql/gist.sql
+++ b/contrib/pageinspect/sql/gist.sql
@@ -12,11 +12,6 @@ CREATE TABLE test_gist AS SELECT point(i,i) p, i::text t FROM
generate_series(1,1000) i;
CREATE INDEX test_gist_idx ON test_gist USING gist (p);
--- Page 0 is the root, the rest are leaf pages
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 0));
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 1));
-SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2));
-
COMMIT;
SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 0), 'test_gist_idx');

View File

@@ -1,196 +0,0 @@
diff --git a/contrib/amcheck/expected/check_heap.out b/contrib/amcheck/expected/check_heap.out
index 979e5e8..2375b45 100644
--- a/contrib/amcheck/expected/check_heap.out
+++ b/contrib/amcheck/expected/check_heap.out
@@ -80,12 +80,9 @@ INSERT INTO heaptest (a, b)
-- same transaction. The heaptest table is smaller than the default
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
-- shared_buffers. A transaction delays that and excludes any autovacuum.
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
SELECT sum(reads) AS stats_bulkreads_before
FROM pg_stat_io WHERE context = 'bulkread' \gset
BEGIN;
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
-- Check that valid options are not rejected nor corruption reported
-- for a non-empty table
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
@@ -118,14 +115,6 @@ SELECT pg_stat_force_next_flush();
(1 row)
-SELECT sum(reads) AS stats_bulkreads_after
- FROM pg_stat_io WHERE context = 'bulkread' \gset
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
- ?column?
-----------
- t
-(1 row)
-
CREATE ROLE regress_heaptest_role;
-- verify permissions are checked (error due to function not callable)
SET ROLE regress_heaptest_role;
@@ -233,7 +222,6 @@ ERROR: cannot check relation "test_foreign_table"
DETAIL: This operation is not supported for foreign tables.
-- cleanup
DROP TABLE heaptest;
-DROP TABLESPACE regress_test_stats_tblspc;
DROP TABLE test_partition;
DROP TABLE test_partitioned;
DROP OWNED BY regress_heaptest_role; -- permissions
diff --git a/contrib/amcheck/sql/check_heap.sql b/contrib/amcheck/sql/check_heap.sql
index 1745bae..3b429c3 100644
--- a/contrib/amcheck/sql/check_heap.sql
+++ b/contrib/amcheck/sql/check_heap.sql
@@ -40,12 +40,9 @@ INSERT INTO heaptest (a, b)
-- same transaction. The heaptest table is smaller than the default
-- wal_skip_threshold, so a wal_level=minimal commit reads the table into
-- shared_buffers. A transaction delays that and excludes any autovacuum.
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_test_stats_tblspc LOCATION '';
SELECT sum(reads) AS stats_bulkreads_before
FROM pg_stat_io WHERE context = 'bulkread' \gset
BEGIN;
-ALTER TABLE heaptest SET TABLESPACE regress_test_stats_tblspc;
-- Check that valid options are not rejected nor corruption reported
-- for a non-empty table
SELECT * FROM verify_heapam(relation := 'heaptest', skip := 'none');
@@ -58,9 +55,6 @@ COMMIT;
-- ALTER TABLE ... SET TABLESPACE ...
-- causing an additional bulkread, which should be reflected in pg_stat_io.
SELECT pg_stat_force_next_flush();
-SELECT sum(reads) AS stats_bulkreads_after
- FROM pg_stat_io WHERE context = 'bulkread' \gset
-SELECT :stats_bulkreads_after > :stats_bulkreads_before;
CREATE ROLE regress_heaptest_role;
@@ -140,7 +134,6 @@ SELECT * FROM verify_heapam('test_foreign_table',
-- cleanup
DROP TABLE heaptest;
-DROP TABLESPACE regress_test_stats_tblspc;
DROP TABLE test_partition;
DROP TABLE test_partitioned;
DROP OWNED BY regress_heaptest_role; -- permissions
diff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out
index 33be13a..70a406c 100644
--- a/contrib/citext/expected/create_index_acl.out
+++ b/contrib/citext/expected/create_index_acl.out
@@ -5,9 +5,6 @@
-- owner having as few applicable privileges as possible. (The privileges.sql
-- regress_sro_user tests look for the opposite defect; they confirm that
-- DefineIndex() uses the table owner userid where necessary.)
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
-RESET allow_in_place_tablespaces;
BEGIN;
CREATE ROLE regress_minimal;
CREATE SCHEMA s;
@@ -49,11 +46,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
-- Empty-table DefineIndex()
CREATE UNIQUE INDEX u0rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Make the table nonempty.
INSERT INTO s.x VALUES ('foo'), ('bar');
@@ -66,11 +61,9 @@ RESET search_path;
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
CREATE UNIQUE INDEX u2rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Shall not find s.coll via search_path, despite the s.const->public.setter
-- call having set search_path=s during expression planning. Suppress the
@@ -78,9 +71,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
\set VERBOSITY sqlstate
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
ERROR: 42704
\set VERBOSITY default
ROLLBACK;
-DROP TABLESPACE regress_create_idx_tblspace;
diff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql
index 10b5225..ae442e1 100644
--- a/contrib/citext/sql/create_index_acl.sql
+++ b/contrib/citext/sql/create_index_acl.sql
@@ -6,10 +6,6 @@
-- regress_sro_user tests look for the opposite defect; they confirm that
-- DefineIndex() uses the table owner userid where necessary.)
-SET allow_in_place_tablespaces = true;
-CREATE TABLESPACE regress_create_idx_tblspace LOCATION '';
-RESET allow_in_place_tablespaces;
-
BEGIN;
CREATE ROLE regress_minimal;
CREATE SCHEMA s;
@@ -51,11 +47,9 @@ ALTER TABLE s.x OWNER TO regress_minimal;
-- Empty-table DefineIndex()
CREATE UNIQUE INDEX u0rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Make the table nonempty.
INSERT INTO s.x VALUES ('foo'), ('bar');
@@ -68,11 +62,9 @@ RESET search_path;
GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal;
CREATE UNIQUE INDEX u2rows ON s.x USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops)
- TABLESPACE regress_create_idx_tblspace
WHERE s.index_row_if(y);
ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
-- Shall not find s.coll via search_path, despite the s.const->public.setter
-- call having set search_path=s during expression planning. Suppress the
@@ -80,9 +72,7 @@ ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree
\set VERBOSITY sqlstate
ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree
((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=)
- USING INDEX TABLESPACE regress_create_idx_tblspace
WHERE (s.index_row_if(y));
\set VERBOSITY default
ROLLBACK;
-DROP TABLESPACE regress_create_idx_tblspace;
diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
index 86c148a..81bdb2c 100644
--- a/contrib/file_fdw/expected/file_fdw.out
+++ b/contrib/file_fdw/expected/file_fdw.out
@@ -4,6 +4,7 @@
-- directory paths are passed to us in environment variables
\getenv abs_srcdir PG_ABS_SRCDIR
-- Clean up in case a prior regression run failed
+SET compute_query_id TO 'off';
SET client_min_messages TO 'warning';
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
RESET client_min_messages;
diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
index f0548e1..848a08c 100644
--- a/contrib/file_fdw/sql/file_fdw.sql
+++ b/contrib/file_fdw/sql/file_fdw.sql
@@ -6,6 +6,7 @@
\getenv abs_srcdir PG_ABS_SRCDIR
-- Clean up in case a prior regression run failed
+SET compute_query_id TO 'off';
SET client_min_messages TO 'warning';
DROP ROLE IF EXISTS regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
RESET client_min_messages;

View File

@@ -34,7 +34,6 @@
//! -r http://pg-ext-s3-gateway \
//! ```
use std::collections::HashMap;
use std::ffi::OsString;
use std::fs::File;
use std::path::Path;
use std::process::exit;
@@ -45,7 +44,7 @@ use std::{thread, time::Duration};
use anyhow::{Context, Result};
use chrono::Utc;
use clap::Parser;
use clap::Arg;
use compute_tools::disk_quota::set_disk_quota;
use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
use signal_hook::consts::{SIGQUIT, SIGTERM};
@@ -74,76 +73,11 @@ use utils::failpoint_support;
// in-case of not-set environment var
const BUILD_TAG_DEFAULT: &str = "latest";
// Compatibility hack: if the control plane specified any remote-ext-config
// use the default value for extension storage proxy gateway.
// Remove this once the control plane is updated to pass the gateway URL
fn parse_remote_ext_config(arg: &str) -> Result<String> {
if arg.starts_with("http") {
Ok(arg.trim_end_matches('/').to_string())
} else {
Ok("http://pg-ext-s3-gateway".to_string())
}
}
#[derive(Parser)]
#[command(rename_all = "kebab-case")]
struct Cli {
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
pub pgbin: String,
#[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
pub remote_ext_config: Option<String>,
#[arg(long, default_value_t = 3080)]
pub http_port: u16,
#[arg(short = 'D', long, value_name = "DATADIR")]
pub pgdata: String,
#[arg(short = 'C', long, value_name = "DATABASE_URL")]
pub connstr: String,
#[cfg(target_os = "linux")]
#[arg(long, default_value = "neon-postgres")]
pub cgroup: String,
#[cfg(target_os = "linux")]
#[arg(
long,
default_value = "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor"
)]
pub filecache_connstr: String,
#[cfg(target_os = "linux")]
#[arg(long, default_value = "0.0.0.0:10301")]
pub vm_monitor_addr: String,
#[arg(long, action = clap::ArgAction::SetTrue)]
pub resize_swap_on_bind: bool,
#[arg(long)]
pub set_disk_quota_for_fs: Option<String>,
#[arg(short = 's', long = "spec", group = "spec")]
pub spec_json: Option<String>,
#[arg(short = 'S', long, group = "spec-path")]
pub spec_path: Option<OsString>,
#[arg(short = 'i', long, group = "compute-id", conflicts_with_all = ["spec", "spec-path"])]
pub compute_id: Option<String>,
#[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], requires = "compute-id", value_name = "CONTROL_PLANE_API_BASE_URL")]
pub control_plane_uri: Option<String>,
}
fn main() -> Result<()> {
let cli = Cli::parse();
let build_tag = init()?;
let scenario = failpoint_support::init();
let (build_tag, clap_args) = init()?;
// enable core dumping for all child processes
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
@@ -151,11 +85,13 @@ fn main() -> Result<()> {
// Enter startup tracing context
let _startup_context_guard = startup_context_from_env();
let cli_spec = try_spec_from_cli(&cli)?;
let cli_args = process_cli(&clap_args)?;
let compute = wait_spec(build_tag, &cli, cli_spec)?;
let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
start_postgres(&cli, compute)?
let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
start_postgres(&clap_args, wait_spec_result)?
// Startup is finished, exit the startup tracing span
};
@@ -172,7 +108,7 @@ fn main() -> Result<()> {
deinit_and_exit(wait_pg_result);
}
fn init() -> Result<String> {
fn init() -> Result<(String, clap::ArgMatches)> {
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
@@ -187,7 +123,66 @@ fn init() -> Result<String> {
.to_string();
info!("build_tag: {build_tag}");
Ok(build_tag)
Ok((build_tag, cli().get_matches()))
}
fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
let pgbin_default = "postgres";
let pgbin = matches
.get_one::<String>("pgbin")
.map(|s| s.as_str())
.unwrap_or(pgbin_default);
let ext_remote_storage = matches
.get_one::<String>("remote-ext-config")
// Compatibility hack: if the control plane specified any remote-ext-config
// use the default value for extension storage proxy gateway.
// Remove this once the control plane is updated to pass the gateway URL
.map(|conf| {
if conf.starts_with("http") {
conf.trim_end_matches('/')
} else {
"http://pg-ext-s3-gateway"
}
});
let http_port = *matches
.get_one::<u16>("http-port")
.expect("http-port is required");
let pgdata = matches
.get_one::<String>("pgdata")
.expect("PGDATA path is required");
let connstr = matches
.get_one::<String>("connstr")
.expect("Postgres connection string is required");
let spec_json = matches.get_one::<String>("spec");
let spec_path = matches.get_one::<String>("spec-path");
let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
let set_disk_quota_for_fs = matches.get_one::<String>("set-disk-quota-for-fs");
Ok(ProcessCliResult {
connstr,
pgdata,
pgbin,
ext_remote_storage,
http_port,
spec_json,
spec_path,
resize_swap_on_bind,
set_disk_quota_for_fs,
})
}
struct ProcessCliResult<'clap> {
connstr: &'clap str,
pgdata: &'clap str,
pgbin: &'clap str,
ext_remote_storage: Option<&'clap str>,
http_port: u16,
spec_json: Option<&'clap String>,
spec_path: Option<&'clap String>,
resize_swap_on_bind: bool,
set_disk_quota_for_fs: Option<&'clap String>,
}
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
@@ -240,9 +235,19 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
}
}
fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
fn try_spec_from_cli(
matches: &clap::ArgMatches,
ProcessCliResult {
spec_json,
spec_path,
..
}: &ProcessCliResult,
) -> Result<CliSpecParams> {
let compute_id = matches.get_one::<String>("compute-id");
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
// First, try to get cluster spec from the cli argument
if let Some(ref spec_json) = cli.spec_json {
if let Some(spec_json) = spec_json {
info!("got spec from cli argument {}", spec_json);
return Ok(CliSpecParams {
spec: Some(serde_json::from_str(spec_json)?),
@@ -251,7 +256,7 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
}
// Second, try to read it from the file if path is provided
if let Some(ref spec_path) = cli.spec_path {
if let Some(spec_path) = spec_path {
let file = File::open(Path::new(spec_path))?;
return Ok(CliSpecParams {
spec: Some(serde_json::from_reader(file)?),
@@ -259,20 +264,17 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
});
}
if cli.compute_id.is_none() {
let Some(compute_id) = compute_id else {
panic!(
"compute spec should be provided by one of the following ways: \
--spec OR --spec-path OR --control-plane-uri and --compute-id"
);
};
if cli.control_plane_uri.is_none() {
let Some(control_plane_uri) = control_plane_uri else {
panic!("must specify both --control-plane-uri and --compute-id or none");
};
match get_spec_from_control_plane(
cli.control_plane_uri.as_ref().unwrap(),
cli.compute_id.as_ref().unwrap(),
) {
match get_spec_from_control_plane(control_plane_uri, compute_id) {
Ok(spec) => Ok(CliSpecParams {
spec,
live_config_allowed: true,
@@ -296,12 +298,21 @@ struct CliSpecParams {
fn wait_spec(
build_tag: String,
cli: &Cli,
ProcessCliResult {
connstr,
pgdata,
pgbin,
ext_remote_storage,
resize_swap_on_bind,
set_disk_quota_for_fs,
http_port,
..
}: ProcessCliResult,
CliSpecParams {
spec,
live_config_allowed,
}: CliSpecParams,
) -> Result<Arc<ComputeNode>> {
) -> Result<WaitSpecResult> {
let mut new_state = ComputeState::new();
let spec_set;
@@ -313,7 +324,7 @@ fn wait_spec(
} else {
spec_set = false;
}
let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
let connstr = Url::parse(connstr).context("cannot parse connstr as a URL")?;
let conn_conf = postgres::config::Config::from_str(connstr.as_str())
.context("cannot build postgres config from connstr")?;
let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str())
@@ -322,14 +333,14 @@ fn wait_spec(
connstr,
conn_conf,
tokio_conn_conf,
pgdata: cli.pgdata.clone(),
pgbin: cli.pgbin.clone(),
pgversion: get_pg_version_string(&cli.pgbin),
http_port: cli.http_port,
pgdata: pgdata.to_string(),
pgbin: pgbin.to_string(),
pgversion: get_pg_version_string(pgbin),
http_port,
live_config_allowed,
state: Mutex::new(new_state),
state_changed: Condvar::new(),
ext_remote_storage: cli.remote_ext_config.clone(),
ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
ext_download_progress: RwLock::new(HashMap::new()),
build_tag,
};
@@ -346,7 +357,7 @@ fn wait_spec(
// Launch http service first, so that we can serve control-plane requests
// while configuration is still in progress.
let _http_handle =
launch_http_server(cli.http_port, &compute).expect("cannot launch http endpoint thread");
launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
if !spec_set {
// No spec provided, hang waiting for it.
@@ -378,12 +389,27 @@ fn wait_spec(
launch_lsn_lease_bg_task_for_static(&compute);
Ok(compute)
Ok(WaitSpecResult {
compute,
resize_swap_on_bind,
set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(),
})
}
struct WaitSpecResult {
compute: Arc<ComputeNode>,
resize_swap_on_bind: bool,
set_disk_quota_for_fs: Option<String>,
}
fn start_postgres(
cli: &Cli,
compute: Arc<ComputeNode>,
// need to allow unused because `matches` is only used if target_os = "linux"
#[allow(unused_variables)] matches: &clap::ArgMatches,
WaitSpecResult {
compute,
resize_swap_on_bind,
set_disk_quota_for_fs,
}: WaitSpecResult,
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
// We got all we need, update the state.
let mut state = compute.state.lock().unwrap();
@@ -411,7 +437,7 @@ fn start_postgres(
let mut delay_exit = false;
// Resize swap to the desired size if the compute spec says so
if let (Some(size_bytes), true) = (swap_size_bytes, cli.resize_swap_on_bind) {
if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
// To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
// *before* starting postgres.
//
@@ -438,9 +464,9 @@ fn start_postgres(
// Set disk quota if the compute spec says so
if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
(disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref())
(disk_quota_bytes, set_disk_quota_for_fs)
{
match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) {
match set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) {
Ok(()) => {
let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
info!(%disk_quota_bytes, %size_mib, "set disk quota");
@@ -483,7 +509,13 @@ fn start_postgres(
if #[cfg(target_os = "linux")] {
use std::env;
use tokio_util::sync::CancellationToken;
let vm_monitor_addr = matches
.get_one::<String>("vm-monitor-addr")
.expect("--vm-monitor-addr should always be set because it has a default arg");
let file_cache_connstr = matches.get_one::<String>("filecache-connstr");
let cgroup = matches.get_one::<String>("cgroup");
// Only make a runtime if we need to.
// Note: it seems like you can make a runtime in an inner scope and
// if you start a task in it it won't be dropped. However, make it
// in the outermost scope just to be safe.
@@ -506,15 +538,15 @@ fn start_postgres(
let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
None
} else {
Some(cli.filecache_connstr.clone())
file_cache_connstr.cloned()
};
let vm_monitor = rt.as_ref().map(|rt| {
rt.spawn(vm_monitor::start(
Box::leak(Box::new(vm_monitor::Args {
cgroup: Some(cli.cgroup.clone()),
cgroup: cgroup.cloned(),
pgconnstr,
addr: cli.vm_monitor_addr.clone(),
addr: vm_monitor_addr.clone(),
})),
token.clone(),
))
@@ -670,6 +702,105 @@ fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
exit(exit_code.unwrap_or(1))
}
fn cli() -> clap::Command {
// Env variable is set by `cargo`
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
clap::Command::new("compute_ctl")
.version(version)
.arg(
Arg::new("http-port")
.long("http-port")
.value_name("HTTP_PORT")
.default_value("3080")
.value_parser(clap::value_parser!(u16))
.required(false),
)
.arg(
Arg::new("connstr")
.short('C')
.long("connstr")
.value_name("DATABASE_URL")
.required(true),
)
.arg(
Arg::new("pgdata")
.short('D')
.long("pgdata")
.value_name("DATADIR")
.required(true),
)
.arg(
Arg::new("pgbin")
.short('b')
.long("pgbin")
.default_value("postgres")
.value_name("POSTGRES_PATH"),
)
.arg(
Arg::new("spec")
.short('s')
.long("spec")
.value_name("SPEC_JSON"),
)
.arg(
Arg::new("spec-path")
.short('S')
.long("spec-path")
.value_name("SPEC_PATH"),
)
.arg(
Arg::new("compute-id")
.short('i')
.long("compute-id")
.value_name("COMPUTE_ID"),
)
.arg(
Arg::new("control-plane-uri")
.short('p')
.long("control-plane-uri")
.value_name("CONTROL_PLANE_API_BASE_URI"),
)
.arg(
Arg::new("remote-ext-config")
.short('r')
.long("remote-ext-config")
.value_name("REMOTE_EXT_CONFIG"),
)
// TODO(fprasx): we currently have default arguments because the cloud PR
// to pass them in hasn't been merged yet. We should get rid of them once
// the PR is merged.
.arg(
Arg::new("vm-monitor-addr")
.long("vm-monitor-addr")
.default_value("0.0.0.0:10301")
.value_name("VM_MONITOR_ADDR"),
)
.arg(
Arg::new("cgroup")
.long("cgroup")
.default_value("neon-postgres")
.value_name("CGROUP"),
)
.arg(
Arg::new("filecache-connstr")
.long("filecache-connstr")
.default_value(
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
)
.value_name("FILECACHE_CONNSTR"),
)
.arg(
Arg::new("resize-swap-on-bind")
.long("resize-swap-on-bind")
.action(clap::ArgAction::SetTrue),
)
.arg(
Arg::new("set-disk-quota-for-fs")
.long("set-disk-quota-for-fs")
.value_name("SET_DISK_QUOTA_FOR_FS")
)
}
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
/// to prevent leakage. TODO: it is better to convert compute_ctl to async and
/// wait for termination which would be easy then.
@@ -679,14 +810,7 @@ fn handle_exit_signal(sig: i32) {
exit(1);
}
#[cfg(test)]
mod test {
use clap::CommandFactory;
use super::Cli;
#[test]
fn verify_cli() {
Cli::command().debug_assert()
}
#[test]
fn verify_cli() {
cli().debug_assert()
}

View File

@@ -258,11 +258,14 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
let uri = format!("{}/{}", ext_remote_storage, ext_path);
info!("Download extension {} from uri {}", ext_path, uri);
info!("Download extension {:?} from uri {:?}", ext_path, uri);
match do_extension_server_request(&uri).await {
Ok(resp) => {
info!("Successfully downloaded remote extension data {}", ext_path);
info!(
"Successfully downloaded remote extension data {:?}",
ext_path
);
REMOTE_EXT_REQUESTS_TOTAL
.with_label_values(&[&StatusCode::OK.to_string()])
.inc();
@@ -282,10 +285,7 @@ async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Res
async fn do_extension_server_request(uri: &str) -> Result<Bytes, (String, String)> {
let resp = reqwest::get(uri).await.map_err(|e| {
(
format!(
"could not perform remote extensions server request: {:?}",
e
),
format!("could not perform remote extensions server request: {}", e),
UNKNOWN_HTTP_STATUS.to_string(),
)
})?;
@@ -295,7 +295,7 @@ async fn do_extension_server_request(uri: &str) -> Result<Bytes, (String, String
StatusCode::OK => match resp.bytes().await {
Ok(resp) => Ok(resp),
Err(e) => Err((
format!("could not read remote extensions server response: {:?}", e),
format!("could not read remote extensions server response: {}", e),
// It's fine to return and report error with status as 200 OK,
// because we still failed to read the response.
status.to_string(),

View File

@@ -125,7 +125,7 @@ impl<'m> MigrationRunner<'m> {
info!("Finished migration id={}", migration_id);
}
Err(e) => {
error!("Failed to run migration id={}: {:?}", migration_id, e);
error!("Failed to run migration id={}: {}", migration_id, e);
DB_MIGRATION_FAILED
.with_label_values(&[migration_id.to_string().as_str()])
.inc();

View File

@@ -28,7 +28,7 @@ fn do_control_plane_request(
.map_err(|e| {
(
true,
format!("could not perform spec request to control plane: {:?}", e),
format!("could not perform spec request to control plane: {}", e),
UNKNOWN_HTTP_STATUS.to_string(),
)
})?;
@@ -39,7 +39,7 @@ fn do_control_plane_request(
Ok(spec_resp) => Ok(spec_resp),
Err(e) => Err((
true,
format!("could not deserialize control plane response: {:?}", e),
format!("could not deserialize control plane response: {}", e),
status.to_string(),
)),
},

View File

@@ -388,11 +388,6 @@ impl PageServerNode {
.map(|x| x.parse::<u8>())
.transpose()
.context("Failed to parse 'image_creation_check_threshold' as integer")?,
image_creation_preempt_threshold: settings
.remove("image_creation_preempt_threshold")
.map(|x| x.parse::<usize>())
.transpose()
.context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
walreceiver_connect_timeout: settings
.remove("walreceiver_connect_timeout")

View File

@@ -10,8 +10,8 @@ use pageserver_api::{
controller_api::{
AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy,
TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
ShardsPreferredAzsRequest, SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse,
TenantPolicyRequest,
},
models::{
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
@@ -800,7 +800,7 @@ async fn main() -> anyhow::Result<()> {
.collect(),
};
storcon_client
.dispatch::<ShardsPreferredAzsRequest, ShardsPreferredAzsResponse>(
.dispatch::<ShardsPreferredAzsRequest, ()>(
Method::PUT,
"control/v1/preferred_azs".to_string(),
Some(req),

View File

@@ -13,6 +13,6 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
jq \
netcat-openbsd
#This is required for the pg_hintplan test
RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw
RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src
USER postgres

View File

@@ -61,32 +61,17 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
rm -rf $TMPDIR
# The following block does the same for the contrib/file_fdw test
TMPDIR=$(mktemp -d)
docker cp $TEST_CONTAINER_NAME:/postgres/contrib/file_fdw/data $TMPDIR/data
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/postgres/contrib/file_fdw/data
rm -rf $TMPDIR
# Apply patches
cat ../compute/patches/contrib_pg${pg_version}.patch | docker exec -i $TEST_CONTAINER_NAME bash -c "(cd /postgres && patch -p1)"
# We are running tests now
rm -f testout.txt testout_contrib.txt
docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
$TEST_CONTAINER_NAME /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
docker exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
$TEST_CONTAINER_NAME /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
if [ $EXT_SUCCESS -eq 0 ] || [ $CONTRIB_SUCCESS -eq 0 ]; then
CONTRIB_FAILED=
FAILED=
[ $EXT_SUCCESS -eq 0 ] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
[ $CONTRIB_SUCCESS -eq 0 ] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
for d in $FAILED $CONTRIB_FAILED; do
dn="$(basename $d)"
rm -rf $dn
mkdir $dn
docker cp $TEST_CONTAINER_NAME:$d/regression.diffs $dn || [ $? -eq 1 ]
docker cp $TEST_CONTAINER_NAME:$d/regression.out $dn || [ $? -eq 1 ]
cat $dn/regression.out $dn/regression.diffs || true
rm -rf $dn
if ! docker exec -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
$TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
then
FAILED=$(tail -1 testout.txt)
for d in $FAILED
do
mkdir $d
docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.diffs $d || true
docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.out $d || true
cat $d/regression.out $d/regression.diffs || true
done
rm -rf $FAILED
exit 1

View File

@@ -1,11 +1,9 @@
#!/bin/bash
set -x
extdir=${1}
cd "${extdir}" || exit 2
cd /ext-src || exit 2
FAILED=
LIST=$( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
LIST=$( (echo -e "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
for d in ${LIST}; do
[ -d "${d}" ] || continue
if ! psql -w -c "select 1" >/dev/null; then

View File

@@ -323,10 +323,6 @@ pub struct TenantConfigToml {
// Expresed in multiples of checkpoint distance.
pub image_layer_creation_check_threshold: u8,
// How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
// Set to 0 to disable preemption.
pub image_creation_preempt_threshold: usize,
/// The length for an explicit LSN lease request.
/// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
#[serde(with = "humantime_serde")]
@@ -551,10 +547,6 @@ pub mod tenant_conf_defaults {
// Relevant: https://github.com/neondatabase/neon/issues/3394
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
// If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
// layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
// want to enable this feature.
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
@@ -613,7 +605,6 @@ impl Default for TenantConfigToml {
lazy_slru_download: false,
timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
lsn_lease_length: LsnLease::DEFAULT_LENGTH,
lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
timeline_offloading: false,

View File

@@ -498,8 +498,6 @@ pub struct TenantConfigPatch {
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
pub image_layer_creation_check_threshold: FieldPatch<u8>,
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
pub image_creation_preempt_threshold: FieldPatch<usize>,
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
pub lsn_lease_length: FieldPatch<String>,
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
pub lsn_lease_length_for_ts: FieldPatch<String>,
@@ -546,7 +544,6 @@ pub struct TenantConfig {
pub lazy_slru_download: Option<bool>,
pub timeline_get_throttle: Option<ThrottleConfig>,
pub image_layer_creation_check_threshold: Option<u8>,
pub image_creation_preempt_threshold: Option<usize>,
pub lsn_lease_length: Option<String>,
pub lsn_lease_length_for_ts: Option<String>,
pub timeline_offloading: Option<bool>,
@@ -584,7 +581,6 @@ impl TenantConfig {
mut lazy_slru_download,
mut timeline_get_throttle,
mut image_layer_creation_check_threshold,
mut image_creation_preempt_threshold,
mut lsn_lease_length,
mut lsn_lease_length_for_ts,
mut timeline_offloading,
@@ -639,9 +635,6 @@ impl TenantConfig {
patch
.image_layer_creation_check_threshold
.apply(&mut image_layer_creation_check_threshold);
patch
.image_creation_preempt_threshold
.apply(&mut image_creation_preempt_threshold);
patch.lsn_lease_length.apply(&mut lsn_lease_length);
patch
.lsn_lease_length_for_ts
@@ -686,7 +679,6 @@ impl TenantConfig {
lazy_slru_download,
timeline_get_throttle,
image_layer_creation_check_threshold,
image_creation_preempt_threshold,
lsn_lease_length,
lsn_lease_length_for_ts,
timeline_offloading,

View File

@@ -5,24 +5,6 @@ use metrics::{IntCounter, IntCounterVec};
use once_cell::sync::Lazy;
use strum_macros::{EnumString, VariantNames};
/// Logs a critical error, similarly to `tracing::error!`. This will:
///
/// * Emit an ERROR log message with prefix "CRITICAL:" and a backtrace.
/// * Increment libmetrics_tracing_event_count{level="critical"}, and indirectly level="error".
/// * Trigger a pageable alert (via the metric above).
/// * In debug builds, panic the process.
#[macro_export]
macro_rules! critical {
($($arg:tt)*) => {
if cfg!(debug_assertions) {
panic!($($arg)*);
}
$crate::logging::TRACING_EVENT_COUNT_METRIC.inc_critical();
let backtrace = std::backtrace::Backtrace::capture();
tracing::error!("CRITICAL: {}\n{backtrace}", format!($($arg)*));
};
}
#[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
#[strum(serialize_all = "snake_case")]
pub enum LogFormat {
@@ -43,10 +25,7 @@ impl LogFormat {
}
}
pub struct TracingEventCountMetric {
/// CRITICAL is not a `tracing` log level. Instead, we increment it in the `critical!` macro,
/// and also emit it as a regular error. These are thus double-counted, but that seems fine.
critical: IntCounter,
struct TracingEventCountMetric {
error: IntCounter,
warn: IntCounter,
info: IntCounter,
@@ -54,7 +33,7 @@ pub struct TracingEventCountMetric {
trace: IntCounter,
}
pub static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
let vec = metrics::register_int_counter_vec!(
"libmetrics_tracing_event_count",
"Number of tracing events, by level",
@@ -67,7 +46,6 @@ pub static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new
impl TracingEventCountMetric {
fn new(vec: IntCounterVec) -> Self {
Self {
critical: vec.with_label_values(&["critical"]),
error: vec.with_label_values(&["error"]),
warn: vec.with_label_values(&["warn"]),
info: vec.with_label_values(&["info"]),
@@ -76,11 +54,6 @@ impl TracingEventCountMetric {
}
}
// Allow public access from `critical!` macro.
pub fn inc_critical(&self) {
self.critical.inc();
}
fn inc_for_level(&self, level: tracing::Level) {
let counter = match level {
tracing::Level::ERROR => &self.error,

View File

@@ -177,8 +177,8 @@ impl FileCacheState {
crate::spawn_with_cancel(
token,
|res| {
if let Err(e) = res {
error!(error = format_args!("{e:#}"), "postgres error");
if let Err(error) = res {
error!(%error, "postgres error")
}
},
conn,
@@ -205,7 +205,7 @@ impl FileCacheState {
{
Ok(rows) => Ok(rows),
Err(e) => {
error!(error = format_args!("{e:#}"), "postgres error -> retrying");
error!(error = ?e, "postgres error: {e} -> retrying");
let client = FileCacheState::connect(&self.conn_str, self.token.clone())
.await

View File

@@ -191,12 +191,15 @@ async fn start_monitor(
.await;
let mut monitor = match monitor {
Ok(Ok(monitor)) => monitor,
Ok(Err(e)) => {
error!(error = format_args!("{e:#}"), "failed to create monitor");
Ok(Err(error)) => {
error!(?error, "failed to create monitor");
return;
}
Err(_) => {
error!(?timeout, "creating monitor timed out");
error!(
?timeout,
"creating monitor timed out (probably waiting to receive protocol range)"
);
return;
}
};
@@ -204,9 +207,6 @@ async fn start_monitor(
match monitor.run().await {
Ok(()) => info!("monitor was killed due to new connection"),
Err(e) => error!(
error = format_args!("{e:#}"),
"monitor terminated unexpectedly"
),
Err(e) => error!(error = ?e, "monitor terminated unexpectedly"),
}
}

View File

@@ -370,16 +370,12 @@ impl Runner {
}),
InboundMsgKind::InvalidMessage { error } => {
warn!(
error = format_args!("{error:#}"),
id, "received notification of an invalid message we sent"
%error, id, "received notification of an invalid message we sent"
);
Ok(None)
}
InboundMsgKind::InternalError { error } => {
warn!(
error = format_args!("{error:#}"),
id, "agent experienced an internal error"
);
warn!(error, id, "agent experienced an internal error");
Ok(None)
}
InboundMsgKind::HealthCheck {} => {
@@ -480,7 +476,7 @@ impl Runner {
// gives the outermost cause, and the debug impl
// pretty-prints the error, whereas {:#} contains all the
// causes, but is compact (no newlines).
warn!(error = format_args!("{e:#}"), "error handling message");
warn!(error = format!("{e:#}"), "error handling message");
OutboundMsg::new(
OutboundMsgKind::InternalError {
error: e.to_string(),
@@ -496,7 +492,7 @@ impl Runner {
.context("failed to send message")?;
}
Err(e) => warn!(
error = format_args!("{e:#}"),
error = format!("{e}"),
msg = ?msg,
"received error message"
),

View File

@@ -1472,13 +1472,7 @@ async fn layer_download_handler(
let downloaded = timeline
.download_layer(&layer_name)
.await
.map_err(|e| match e {
tenant::storage_layer::layer::DownloadError::TimelineShutdown
| tenant::storage_layer::layer::DownloadError::DownloadCancelled => {
ApiError::ShuttingDown
}
other => ApiError::InternalServerError(other.into()),
})?;
.map_err(ApiError::InternalServerError)?;
match downloaded {
Some(true) => json_response(StatusCode::OK, ()),
@@ -3175,16 +3169,12 @@ async fn put_tenant_timeline_import_basebackup(
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
let span = info_span!("import_basebackup",
tenant_id=%tenant_id, timeline_id=%timeline_id, shard_id=%tenant_shard_id.shard_slug(),
base_lsn=%base_lsn, end_lsn=%end_lsn, pg_version=%pg_version);
let span = info_span!("import_basebackup", tenant_id=%tenant_id, timeline_id=%timeline_id, base_lsn=%base_lsn, end_lsn=%end_lsn, pg_version=%pg_version);
async move {
let state = get_state(&request);
let tenant = state
.tenant_manager
.get_attached_tenant_shard(tenant_shard_id)?;
.get_attached_tenant_shard(TenantShardId::unsharded(tenant_id))?;
let broker_client = state.broker_client.clone();
@@ -3393,17 +3383,7 @@ where
let status = response.status();
info!(%status, "Cancelled request finished successfully")
}
Err(e) => match e {
ApiError::ShuttingDown | ApiError::ResourceUnavailable(_) => {
// Don't log this at error severity: they are normal during lifecycle of tenants/process
info!("Cancelled request aborted for shutdown")
}
_ => {
// Log these in a highly visible way, because we have no client to send the response to, but
// would like to know that something went wrong.
error!("Cancelled request finished with an error: {e:?}")
}
},
Err(e) => error!("Cancelled request finished with an error: {e:?}"),
}
}
// only logging for cancelled panicked request handlers is the tracing_panic_hook,

View File

@@ -263,6 +263,14 @@ pub(crate) const TENANT_HEATMAP_BASENAME: &str = "heatmap-v1.json";
/// data directory at pageserver startup can be automatically removed.
pub(crate) const TEMP_FILE_SUFFIX: &str = "___temp";
/// A marker file to mark that a timeline directory was not fully initialized.
/// If a timeline directory with this marker is encountered at pageserver startup,
/// the timeline directory and the marker file are both removed.
/// Full path: `tenants/<tenant_id>/timelines/<timeline_id>___uninit`.
pub(crate) const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
pub(crate) const TIMELINE_DELETE_MARK_SUFFIX: &str = "___delete";
pub fn is_temporary(path: &Utf8Path) -> bool {
match path.file_name() {
Some(name) => name.ends_with(TEMP_FILE_SUFFIX),
@@ -270,6 +278,25 @@ pub fn is_temporary(path: &Utf8Path) -> bool {
}
}
fn ends_with_suffix(path: &Utf8Path, suffix: &str) -> bool {
match path.file_name() {
Some(name) => name.ends_with(suffix),
None => false,
}
}
// FIXME: DO NOT ADD new query methods like this, which will have a next step of parsing timelineid
// from the directory name. Instead create type "UninitMark(TimelineId)" and only parse it once
// from the name.
pub(crate) fn is_uninit_mark(path: &Utf8Path) -> bool {
ends_with_suffix(path, TIMELINE_UNINIT_MARK_SUFFIX)
}
pub(crate) fn is_delete_mark(path: &Utf8Path) -> bool {
ends_with_suffix(path, TIMELINE_DELETE_MARK_SUFFIX)
}
/// During pageserver startup, we need to order operations not to exhaust tokio worker threads by
/// blocking.
///

View File

@@ -116,38 +116,11 @@ pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
.expect("failed to define a metric")
});
/// Measures layers visited per read (i.e. read amplification).
///
/// NB: for a batch, we count all visited layers towards each read. While the cost of layer visits
/// are amortized across the batch, and some layers may not intersect with a given key, each visited
/// layer contributes directly to the observed latency for every read in the batch, which is what we
/// care about.
pub(crate) static LAYERS_PER_READ: Lazy<HistogramVec> = Lazy::new(|| {
register_histogram_vec!(
"pageserver_layers_per_read",
"Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
&["tenant_id", "shard_id", "timeline_id"],
// Low resolution to reduce cardinality.
vec![1.0, 5.0, 10.0, 25.0, 50.0, 100.0],
)
.expect("failed to define a metric")
});
pub(crate) static LAYERS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
pub(crate) static VEC_READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_layers_per_read_global",
"Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
)
.expect("failed to define a metric")
});
pub(crate) static DELTAS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
// We expect this to be low because of Postgres checkpoints. Let's see if that holds.
register_histogram!(
"pageserver_deltas_per_read_global",
"Number of delta pages applied to image page per read",
vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],
"pageserver_layers_visited_per_vectored_read_global",
"Average number of layers visited to reconstruct one key",
vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
)
.expect("failed to define a metric")
});
@@ -2659,7 +2632,6 @@ pub(crate) struct TimelineMetrics {
pub disk_consistent_lsn_gauge: IntGauge,
pub pitr_history_size: UIntGauge,
pub archival_size: UIntGauge,
pub layers_per_read: Histogram,
pub standby_horizon_gauge: IntGauge,
pub resident_physical_size_gauge: UIntGauge,
pub visible_physical_size_gauge: UIntGauge,
@@ -2757,10 +2729,6 @@ impl TimelineMetrics {
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();
let layers_per_read = LAYERS_PER_READ
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();
let standby_horizon_gauge = STANDBY_HORIZON
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();
@@ -2825,7 +2793,6 @@ impl TimelineMetrics {
disk_consistent_lsn_gauge,
pitr_history_size,
archival_size,
layers_per_read,
standby_horizon_gauge,
resident_physical_size_gauge,
visible_physical_size_gauge,
@@ -2995,8 +2962,6 @@ impl TimelineMetrics {
}
}
let _ = LAYERS_PER_READ.remove_label_values(&[tenant_id, shard_id, timeline_id]);
let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);
@@ -3947,8 +3912,7 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) {
// histograms
[
&LAYERS_PER_READ_GLOBAL,
&DELTAS_PER_READ_GLOBAL,
&VEC_READ_NUM_LAYERS_VISITED,
&WAIT_LSN_TIME,
&WAL_REDO_TIME,
&WAL_REDO_RECORDS_HISTOGRAM,

View File

@@ -46,7 +46,6 @@ use std::sync::atomic::AtomicBool;
use std::sync::Weak;
use std::time::SystemTime;
use storage_broker::BrokerClientChannel;
use timeline::compaction::CompactionOutcome;
use timeline::compaction::GcCompactionQueue;
use timeline::import_pgdata;
use timeline::offload::offload_timeline;
@@ -96,6 +95,7 @@ use crate::context::{DownloadBehavior, RequestContext};
use crate::deletion_queue::DeletionQueueClient;
use crate::deletion_queue::DeletionQueueError;
use crate::import_datadir;
use crate::is_uninit_mark;
use crate::l0_flush::L0FlushGlobalState;
use crate::metrics::CONCURRENT_INITDBS;
use crate::metrics::INITDB_RUN_TIME;
@@ -1793,7 +1793,11 @@ impl Tenant {
let entry = entry.context("read timeline dir entry")?;
let entry_path = entry.path();
let purge = if crate::is_temporary(entry_path) {
let purge = if crate::is_temporary(entry_path)
// TODO: remove uninit mark code (https://github.com/neondatabase/neon/issues/5718)
|| is_uninit_mark(entry_path)
|| crate::is_delete_mark(entry_path)
{
true
} else {
match TimelineId::try_from(entry_path.file_name()) {
@@ -2422,7 +2426,7 @@ impl Tenant {
// Make sure the freeze_and_flush reaches remote storage.
tline.remote_client.wait_completion().await.unwrap();
let tl = uninit_tl.finish_creation().await?;
let tl = uninit_tl.finish_creation()?;
// The non-test code would call tl.activate() here.
tl.set_state(TimelineState::Active);
Ok(tl)
@@ -2908,10 +2912,10 @@ impl Tenant {
self: &Arc<Self>,
cancel: &CancellationToken,
ctx: &RequestContext,
) -> Result<CompactionOutcome, timeline::CompactionError> {
) -> Result<bool, timeline::CompactionError> {
// Don't start doing work during shutdown, or when broken, we do not need those in the logs
if !self.is_active() {
return Ok(CompactionOutcome::Done);
return Ok(false);
}
{
@@ -2925,7 +2929,7 @@ impl Tenant {
// to AttachedSingle state.
if !conf.location.may_upload_layers_hint() {
info!("Skipping compaction in location state {:?}", conf.location);
return Ok(CompactionOutcome::Done);
return Ok(false);
}
}
@@ -2968,7 +2972,7 @@ impl Tenant {
// Before doing any I/O work, check our circuit breaker
if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
info!("Skipping compaction due to previous failures");
return Ok(CompactionOutcome::Done);
return Ok(false);
}
let mut has_pending_task = false;
@@ -2976,10 +2980,10 @@ impl Tenant {
for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload
{
// pending_task_left == None: cannot compact, maybe still pending tasks
// pending_task_left == Some(Pending): compaction task left
// pending_task_left == Some(Done): no compaction task left
// pending_task_left == Some(true): compaction task left
// pending_task_left == Some(false): no compaction task left
let pending_task_left = if *can_compact {
let compaction_outcome = timeline
let has_pending_l0_compaction_task = timeline
.compact(cancel, EnumSet::empty(), ctx)
.instrument(info_span!("compact_timeline", %timeline_id))
.await
@@ -2997,27 +3001,27 @@ impl Tenant {
.fail(&CIRCUIT_BREAKERS_BROKEN, e);
}
})?;
if let CompactionOutcome::Pending = compaction_outcome {
Some(CompactionOutcome::Pending)
if has_pending_l0_compaction_task {
Some(true)
} else {
let queue = {
let guard = self.scheduled_compaction_tasks.lock().unwrap();
guard.get(timeline_id).cloned()
};
if let Some(queue) = queue {
let outcome = queue
let has_pending_tasks = queue
.iteration(cancel, ctx, &self.gc_block, timeline)
.await?;
Some(outcome)
Some(has_pending_tasks)
} else {
Some(CompactionOutcome::Done)
Some(false)
}
}
} else {
None
};
has_pending_task |= pending_task_left == Some(CompactionOutcome::Pending);
if pending_task_left == Some(CompactionOutcome::Done) && *can_offload {
has_pending_task |= pending_task_left.unwrap_or(false);
if pending_task_left == Some(false) && *can_offload {
pausable_failpoint!("before-timeline-auto-offload");
match offload_timeline(self, timeline)
.instrument(info_span!("offload_timeline", %timeline_id))
@@ -3037,11 +3041,7 @@ impl Tenant {
.unwrap()
.success(&CIRCUIT_BREAKERS_UNBROKEN);
Ok(if has_pending_task {
CompactionOutcome::Pending
} else {
CompactionOutcome::Done
})
Ok(has_pending_task)
}
/// Cancel scheduled compaction tasks
@@ -4702,7 +4702,7 @@ impl Tenant {
)
.await?;
let new_timeline = uninitialized_timeline.finish_creation().await?;
let new_timeline = uninitialized_timeline.finish_creation()?;
// Root timeline gets its layers during creation and uploads them along with the metadata.
// A branch timeline though, when created, can get no writes for some time, hence won't get any layers created.
@@ -4892,11 +4892,10 @@ impl Tenant {
}
// this new directory is very temporary, set to remove it immediately after bootstrap, we don't need it
let pgdata_path_deferred = pgdata_path.clone();
scopeguard::defer! {
if let Err(e) = fs::remove_dir_all(&pgdata_path_deferred) {
if let Err(e) = fs::remove_dir_all(&pgdata_path) {
// this is unlikely, but we will remove the directory on pageserver restart or another bootstrap call
error!("Failed to remove temporary initdb directory '{pgdata_path_deferred}': {e}");
error!("Failed to remove temporary initdb directory '{pgdata_path}': {e}");
}
}
if let Some(existing_initdb_timeline_id) = load_existing_initdb {
@@ -4963,7 +4962,7 @@ impl Tenant {
pgdata_lsn,
pg_version,
);
let mut raw_timeline = self
let raw_timeline = self
.prepare_new_timeline(
timeline_id,
&new_metadata,
@@ -4974,33 +4973,42 @@ impl Tenant {
.await?;
let tenant_shard_id = raw_timeline.owning_tenant.tenant_shard_id;
raw_timeline
.write(|unfinished_timeline| async move {
import_datadir::import_timeline_from_postgres_datadir(
&unfinished_timeline,
&pgdata_path,
pgdata_lsn,
ctx,
let unfinished_timeline = raw_timeline.raw_timeline()?;
// Flush the new layer files to disk, before we make the timeline as available to
// the outside world.
//
// Flush loop needs to be spawned in order to be able to flush.
unfinished_timeline.maybe_spawn_flush_loop();
import_datadir::import_timeline_from_postgres_datadir(
unfinished_timeline,
&pgdata_path,
pgdata_lsn,
ctx,
)
.await
.with_context(|| {
format!("Failed to import pgdatadir for timeline {tenant_shard_id}/{timeline_id}")
})?;
fail::fail_point!("before-checkpoint-new-timeline", |_| {
Err(CreateTimelineError::Other(anyhow::anyhow!(
"failpoint before-checkpoint-new-timeline"
)))
});
unfinished_timeline
.freeze_and_flush()
.await
.with_context(|| {
format!(
"Failed to flush after pgdatadir import for timeline {tenant_shard_id}/{timeline_id}"
)
.await
.with_context(|| {
format!(
"Failed to import pgdatadir for timeline {tenant_shard_id}/{timeline_id}"
)
})?;
fail::fail_point!("before-checkpoint-new-timeline", |_| {
Err(CreateTimelineError::Other(anyhow::anyhow!(
"failpoint before-checkpoint-new-timeline"
)))
});
Ok(())
})
.await?;
})?;
// All done!
let timeline = raw_timeline.finish_creation().await?;
let timeline = raw_timeline.finish_creation()?;
// Callers are responsible to wait for uploads to complete and for activating the timeline.
@@ -5491,9 +5499,6 @@ pub(crate) mod harness {
image_layer_creation_check_threshold: Some(
tenant_conf.image_layer_creation_check_threshold,
),
image_creation_preempt_threshold: Some(
tenant_conf.image_creation_preempt_threshold,
),
lsn_lease_length: Some(tenant_conf.lsn_lease_length),
lsn_lease_length_for_ts: Some(tenant_conf.lsn_lease_length_for_ts),
timeline_offloading: Some(tenant_conf.timeline_offloading),

View File

@@ -357,9 +357,6 @@ pub struct TenantConfOpt {
#[serde(skip_serializing_if = "Option::is_none")]
pub image_layer_creation_check_threshold: Option<u8>,
#[serde(skip_serializing_if = "Option::is_none")]
pub image_creation_preempt_threshold: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(with = "humantime_serde")]
#[serde(default)]
@@ -456,9 +453,6 @@ impl TenantConfOpt {
image_layer_creation_check_threshold: self
.image_layer_creation_check_threshold
.unwrap_or(global_conf.image_layer_creation_check_threshold),
image_creation_preempt_threshold: self
.image_creation_preempt_threshold
.unwrap_or(global_conf.image_creation_preempt_threshold),
lsn_lease_length: self
.lsn_lease_length
.unwrap_or(global_conf.lsn_lease_length),
@@ -510,7 +504,6 @@ impl TenantConfOpt {
mut lazy_slru_download,
mut timeline_get_throttle,
mut image_layer_creation_check_threshold,
mut image_creation_preempt_threshold,
mut lsn_lease_length,
mut lsn_lease_length_for_ts,
mut timeline_offloading,
@@ -585,9 +578,6 @@ impl TenantConfOpt {
patch
.image_layer_creation_check_threshold
.apply(&mut image_layer_creation_check_threshold);
patch
.image_creation_preempt_threshold
.apply(&mut image_creation_preempt_threshold);
patch
.lsn_lease_length
.map(|v| humantime::parse_duration(&v))?
@@ -636,7 +626,6 @@ impl TenantConfOpt {
lazy_slru_download,
timeline_get_throttle,
image_layer_creation_check_threshold,
image_creation_preempt_threshold,
lsn_lease_length,
lsn_lease_length_for_ts,
timeline_offloading,
@@ -700,7 +689,6 @@ impl From<TenantConfOpt> for models::TenantConfig {
lazy_slru_download: value.lazy_slru_download,
timeline_get_throttle: value.timeline_get_throttle,
image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
image_creation_preempt_threshold: value.image_creation_preempt_threshold,
lsn_lease_length: value.lsn_lease_length.map(humantime),
lsn_lease_length_for_ts: value.lsn_lease_length_for_ts.map(humantime),
timeline_offloading: value.timeline_offloading,

View File

@@ -673,30 +673,12 @@ impl<'a> TenantDownloader<'a> {
HeatMapDownload::Modified(m) => m,
};
// Heatmap storage location
let heatmap_path = self.conf.tenant_heatmap_path(tenant_shard_id);
let last_heatmap = if last_download.is_none() {
match load_heatmap(&heatmap_path, ctx).await {
Ok(htm) => htm,
Err(e) => {
tracing::warn!("Couldn't load heatmap from {heatmap_path}: {e:?}");
None
}
}
} else {
None
};
let last_heatmap_timelines = last_heatmap.as_ref().map(|htm| {
htm.timelines
.iter()
.map(|tl| (tl.timeline_id, tl))
.collect::<HashMap<_, _>>()
});
let heatmap = serde_json::from_slice::<HeatMapTenant>(&heatmap_bytes)?;
// Save the heatmap: this will be useful on restart, allowing us to reconstruct
// layer metadata without having to re-download it.
let heatmap_path = self.conf.tenant_heatmap_path(tenant_shard_id);
let temp_path = path_with_suffix_extension(&heatmap_path, TEMP_FILE_SUFFIX);
let context_msg = format!("write tenant {tenant_shard_id} heatmap to {heatmap_path}");
let heatmap_path_bg = heatmap_path.clone();
@@ -725,17 +707,10 @@ impl<'a> TenantDownloader<'a> {
let timeline_state = match timeline_state {
Some(t) => t,
None => {
let last_heatmap =
last_heatmap_timelines
.as_ref()
.and_then(|last_heatmap_timelines| {
last_heatmap_timelines.get(&timeline.timeline_id).copied()
});
// We have no existing state: need to scan local disk for layers first.
let timeline_state = init_timeline_state(
self.conf,
tenant_shard_id,
last_heatmap,
timeline,
&self.secondary_state.resident_size_metric,
)
@@ -1104,12 +1079,12 @@ impl<'a> TenantDownloader<'a> {
}
}
if on_disk.metadata.generation_file_size() != layer.metadata.generation_file_size() {
if on_disk.metadata.generation_file_size() != on_disk.metadata.generation_file_size() {
tracing::info!(
"Re-downloading layer {} with changed size or generation: {:?}->{:?}",
layer.name,
on_disk.metadata.generation_file_size(),
layer.metadata.generation_file_size()
on_disk.metadata.generation_file_size()
);
return LayerAction::Download;
}
@@ -1302,7 +1277,6 @@ impl<'a> TenantDownloader<'a> {
async fn init_timeline_state(
conf: &'static PageServerConf,
tenant_shard_id: &TenantShardId,
last_heatmap: Option<&HeatMapTimeline>,
heatmap: &HeatMapTimeline,
resident_metric: &UIntGauge,
) -> SecondaryDetailTimeline {
@@ -1332,13 +1306,6 @@ async fn init_timeline_state(
let heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> =
heatmap.layers.iter().map(|l| (&l.name, l)).collect();
let last_heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> =
if let Some(last_heatmap) = last_heatmap {
last_heatmap.layers.iter().map(|l| (&l.name, l)).collect()
} else {
HashMap::new()
};
while let Some(dentry) = dir
.next_entry()
.await
@@ -1372,32 +1339,18 @@ async fn init_timeline_state(
match LayerName::from_str(file_name) {
Ok(name) => {
let remote_meta = heatmap_metadata.get(&name);
let last_meta = last_heatmap_metadata.get(&name);
let mut remove = false;
match remote_meta {
Some(remote_meta) => {
let last_meta_generation_file_size = last_meta
.map(|m| m.metadata.generation_file_size())
.unwrap_or(remote_meta.metadata.generation_file_size());
// TODO: checksums for layers (https://github.com/neondatabase/neon/issues/2784)
if remote_meta.metadata.generation_file_size()
!= last_meta_generation_file_size
{
tracing::info!(
"Removing local layer {name} as on-disk json metadata has different generation or file size from remote: {:?} -> {:?}",
last_meta_generation_file_size,
remote_meta.metadata.generation_file_size()
);
remove = true;
} else if local_meta.len() != remote_meta.metadata.file_size {
// This can happen in the presence of race conditions: the remote and on-disk metadata have changed, but we haven't had
// the chance yet to download the new layer to disk, before the process restarted.
tracing::info!(
if local_meta.len() != remote_meta.metadata.file_size {
// This should not happen, because we do crashsafe write-then-rename when downloading
// layers, and layers in remote storage are immutable. Remove the local file because
// we cannot trust it.
tracing::warn!(
"Removing local layer {name} with unexpected local size {} != {}",
local_meta.len(),
remote_meta.metadata.file_size
);
remove = true;
} else {
// We expect the access time to be initialized immediately afterwards, when
// the latest heatmap is applied to the state.
@@ -1419,18 +1372,15 @@ async fn init_timeline_state(
"Removing secondary local layer {} because it's absent in heatmap",
name
);
remove = true;
tokio::fs::remove_file(&dentry.path())
.await
.or_else(fs_ext::ignore_not_found)
.fatal_err(&format!(
"Removing layer {}",
dentry.path().to_string_lossy()
));
}
}
if remove {
tokio::fs::remove_file(&dentry.path())
.await
.or_else(fs_ext::ignore_not_found)
.fatal_err(&format!(
"Removing layer {}",
dentry.path().to_string_lossy()
));
}
}
Err(_) => {
// Ignore it.
@@ -1441,18 +1391,3 @@ async fn init_timeline_state(
detail
}
/// Loads a json-encoded heatmap file from the provided on-disk path
async fn load_heatmap(
path: &Utf8PathBuf,
ctx: &RequestContext,
) -> Result<Option<HeatMapTenant>, anyhow::Error> {
let mut file = match VirtualFile::open(path, ctx).await {
Ok(file) => file,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
Err(e) => Err(e)?,
};
let st = file.read_to_string(ctx).await?;
let htm = serde_json::from_str(&st)?;
Ok(Some(htm))
}

View File

@@ -80,16 +80,6 @@ pub(crate) struct ValueReconstructState {
pub(crate) img: Option<(Lsn, Bytes)>,
}
impl ValueReconstructState {
/// Returns the number of page deltas applied to the page image.
pub fn num_deltas(&self) -> usize {
match self.img {
Some(_) => self.records.len(),
None => self.records.len() - 1, // omit will_init record
}
}
}
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub(crate) enum ValueReconstructSituation {
Complete,

View File

@@ -166,10 +166,6 @@ impl BatchLayerWriter {
// END: catch every error and do the recovery in the above section
Ok(generated_layers)
}
pub fn pending_layer_num(&self) -> usize {
self.generated_layer_writers.len()
}
}
/// An image writer that takes images and produces multiple image layers.

View File

@@ -340,7 +340,7 @@ impl Layer {
/// Download the layer if evicted.
///
/// Will not error when the layer is already downloaded.
pub(crate) async fn download(&self) -> Result<(), DownloadError> {
pub(crate) async fn download(&self) -> anyhow::Result<()> {
self.0.get_or_maybe_download(true, None).await?;
Ok(())
}

View File

@@ -11,7 +11,6 @@ use crate::metrics::TENANT_TASK_EVENTS;
use crate::task_mgr;
use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
use crate::tenant::throttle::Stats;
use crate::tenant::timeline::compaction::CompactionOutcome;
use crate::tenant::timeline::CompactionError;
use crate::tenant::{Tenant, TenantState};
use rand::Rng;
@@ -207,10 +206,10 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
.run(tenant.compaction_iteration(&cancel, &ctx))
.await;
match output {
Ok(outcome) => {
Ok(has_pending_task) => {
error_run_count = 0;
// schedule the next compaction immediately in case there is a pending compaction task
sleep_duration = if let CompactionOutcome::Pending = outcome {
sleep_duration = if has_pending_task {
Duration::ZERO
} else {
period

View File

@@ -18,7 +18,6 @@ use arc_swap::{ArcSwap, ArcSwapOption};
use bytes::Bytes;
use camino::Utf8Path;
use chrono::{DateTime, Utc};
use compaction::CompactionOutcome;
use enumset::EnumSet;
use fail::fail_point;
use futures::{stream::FuturesUnordered, StreamExt};
@@ -52,7 +51,6 @@ use tokio::{
};
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::rate_limit::RateLimit;
use utils::{
fs_ext,
guard_arc_swap::GuardArcSwap,
@@ -117,7 +115,7 @@ use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;
use crate::config::PageServerConf;
use crate::keyspace::{KeyPartitioning, KeySpace};
use crate::metrics::{TimelineMetrics, DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL};
use crate::metrics::TimelineMetrics;
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
use crate::tenant::config::TenantConfOpt;
use pageserver_api::reltag::RelTag;
@@ -190,14 +188,6 @@ pub enum ImageLayerCreationMode {
Initial,
}
#[derive(Clone, Debug, Default)]
pub enum LastImageLayerCreationStatus {
Incomplete, // TODO: record the last key being processed
Complete,
#[default]
Initial,
}
impl std::fmt::Display for ImageLayerCreationMode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
@@ -350,14 +340,10 @@ pub struct Timeline {
// Needed to ensure that we can't create a branch at a point that was already garbage collected
pub latest_gc_cutoff_lsn: Rcu<Lsn>,
pub(crate) gc_compaction_layer_update_lock: tokio::sync::RwLock<()>,
// List of child timelines and their branch points. This is needed to avoid
// garbage collecting data that is still needed by the child timelines.
pub(crate) gc_info: std::sync::RwLock<GcInfo>,
pub(crate) last_image_layer_creation_status: ArcSwap<LastImageLayerCreationStatus>,
// It may change across major versions so for simplicity
// keep it after running initdb for a timeline.
// It is needed in checks when we want to error on some operations
@@ -947,16 +933,9 @@ pub(crate) enum ShutdownMode {
Hard,
}
enum ImageLayerCreationOutcome {
/// We generated an image layer
Generated {
unfinished_image_layer: ImageLayerWriter,
},
/// The key range is empty
Empty,
/// (Only used in metadata image layer creation), after reading the metadata keys, we decide to skip
/// the image layer creation.
Skip,
struct ImageLayerCreationOutcome {
unfinished_image_layer: Option<ImageLayerWriter>,
next_start_key: Key,
}
/// Public interface functions
@@ -1065,7 +1044,7 @@ impl Timeline {
}
pub(crate) const MAX_GET_VECTORED_KEYS: u64 = 32;
pub(crate) const LAYERS_VISITED_WARN_THRESHOLD: u32 = 100;
pub(crate) const VEC_GET_LAYERS_VISITED_WARN_THRESH: f64 = 512.0;
/// Look up multiple page versions at a given LSN
///
@@ -1215,7 +1194,6 @@ impl Timeline {
return (key, Err(err));
}
};
DELTAS_PER_READ_GLOBAL.observe(converted.num_deltas() as f64);
// The walredo module expects the records to be descending in terms of Lsn.
// And we submit the IOs in that order, so, there shuold be no need to sort here.
@@ -1243,28 +1221,25 @@ impl Timeline {
// (this is a requirement, not a bug). Skip updating the metric in these cases
// to avoid infinite results.
if !results.is_empty() {
// Record the total number of layers visited towards each key in the batch. While some
// layers may not intersect with a given read, and the cost of layer visits are
// amortized across the batch, each visited layer contributes directly to the observed
// latency for every read in the batch, which is what we care about.
if layers_visited >= Self::LAYERS_VISITED_WARN_THRESHOLD {
static LOG_PACER: Lazy<Mutex<RateLimit>> =
let avg = layers_visited as f64 / results.len() as f64;
if avg >= Self::VEC_GET_LAYERS_VISITED_WARN_THRESH {
use utils::rate_limit::RateLimit;
static LOGGED: Lazy<Mutex<RateLimit>> =
Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(60))));
LOG_PACER.lock().unwrap().call(|| {
let num_keys = keyspace.total_raw_size();
let num_pages = results.len();
let mut rate_limit = LOGGED.lock().unwrap();
rate_limit.call(|| {
tracing::info!(
shard_id = %self.tenant_shard_id.shard_slug(),
lsn = %lsn,
"Vectored read for {keyspace} visited {layers_visited} layers. Returned {num_pages}/{num_keys} pages.",
);
"Vectored read for {} visited {} layers on average per key and {} in total. {}/{} pages were returned",
keyspace, avg, layers_visited, results.len(), keyspace.total_raw_size());
});
}
for _ in &results {
self.metrics.layers_per_read.observe(layers_visited as f64);
LAYERS_PER_READ_GLOBAL.observe(layers_visited as f64);
}
// Note that this is an approximation. Tracking the exact number of layers visited
// per key requires virtually unbounded memory usage and is inefficient
// (i.e. segment tree tracking each range queried from a layer)
crate::metrics::VEC_READ_NUM_LAYERS_VISITED.observe(avg);
}
Ok(results)
@@ -1680,7 +1655,7 @@ impl Timeline {
cancel: &CancellationToken,
flags: EnumSet<CompactFlags>,
ctx: &RequestContext,
) -> Result<CompactionOutcome, CompactionError> {
) -> Result<bool, CompactionError> {
self.compact_with_options(
cancel,
CompactOptions {
@@ -1702,7 +1677,7 @@ impl Timeline {
cancel: &CancellationToken,
options: CompactOptions,
ctx: &RequestContext,
) -> Result<CompactionOutcome, CompactionError> {
) -> Result<bool, CompactionError> {
// most likely the cancellation token is from background task, but in tests it could be the
// request task as well.
@@ -1722,8 +1697,8 @@ impl Timeline {
// compaction task goes over it's period (20s) which is quite often in production.
let (_guard, _permit) = tokio::select! {
tuple = prepare => { tuple },
_ = self.cancel.cancelled() => return Ok(CompactionOutcome::Done),
_ = cancel.cancelled() => return Ok(CompactionOutcome::Done),
_ = self.cancel.cancelled() => return Ok(false),
_ = cancel.cancelled() => return Ok(false),
};
let last_record_lsn = self.get_last_record_lsn();
@@ -1731,13 +1706,13 @@ impl Timeline {
// Last record Lsn could be zero in case the timeline was just created
if !last_record_lsn.is_valid() {
warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
return Ok(CompactionOutcome::Done);
return Ok(false);
}
let result = match self.get_compaction_algorithm_settings().kind {
CompactionAlgorithm::Tiered => {
self.compact_tiered(cancel, ctx).await?;
Ok(CompactionOutcome::Done)
Ok(false)
}
CompactionAlgorithm::Legacy => self.compact_legacy(cancel, options, ctx).await,
};
@@ -1836,7 +1811,7 @@ impl Timeline {
self.last_record_lsn.shutdown();
if let ShutdownMode::FreezeAndFlush = mode {
let do_flush = if let Some((open, frozen)) = self
if let Some((open, frozen)) = self
.layers
.read()
.await
@@ -1845,54 +1820,43 @@ impl Timeline {
.ok()
.filter(|(open, frozen)| *open || *frozen > 0)
{
if self.remote_client.is_archived() == Some(true) {
// No point flushing on shutdown for an archived timeline: it is not important
// to have it nice and fresh after our restart, and trying to flush here might
// race with trying to offload it (which also stops the flush loop)
false
} else {
tracing::info!(?open, frozen, "flushing and freezing on shutdown");
true
}
tracing::info!(?open, frozen, "flushing and freezing on shutdown");
} else {
// this is double-shutdown, it'll be a no-op
true
};
// this is double-shutdown, ignore it
}
// we shut down walreceiver above, so, we won't add anything more
// to the InMemoryLayer; freeze it and wait for all frozen layers
// to reach the disk & upload queue, then shut the upload queue and
// wait for it to drain.
if do_flush {
match self.freeze_and_flush().await {
Ok(_) => {
// drain the upload queue
// if we did not wait for completion here, it might be our shutdown process
// didn't wait for remote uploads to complete at all, as new tasks can forever
// be spawned.
//
// what is problematic is the shutting down of RemoteTimelineClient, because
// obviously it does not make sense to stop while we wait for it, but what
// about corner cases like s3 suddenly hanging up?
self.remote_client.shutdown().await;
}
Err(FlushLayerError::Cancelled) => {
// this is likely the second shutdown, ignore silently.
// TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
debug_assert!(self.cancel.is_cancelled());
}
Err(e) => {
// Non-fatal. Shutdown is infallible. Failures to flush just mean that
// we have some extra WAL replay to do next time the timeline starts.
warn!("failed to freeze and flush: {e:#}");
}
match self.freeze_and_flush().await {
Ok(_) => {
// drain the upload queue
// if we did not wait for completion here, it might be our shutdown process
// didn't wait for remote uploads to complete at all, as new tasks can forever
// be spawned.
//
// what is problematic is the shutting down of RemoteTimelineClient, because
// obviously it does not make sense to stop while we wait for it, but what
// about corner cases like s3 suddenly hanging up?
self.remote_client.shutdown().await;
}
Err(FlushLayerError::Cancelled) => {
// this is likely the second shutdown, ignore silently.
// TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
debug_assert!(self.cancel.is_cancelled());
}
Err(e) => {
// Non-fatal. Shutdown is infallible. Failures to flush just mean that
// we have some extra WAL replay to do next time the timeline starts.
warn!("failed to freeze and flush: {e:#}");
}
}
// `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but
// we also do a final check here to ensure that the queue is empty.
if !self.remote_client.no_pending_work() {
warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
}
// `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but
// we also do a final check here to ensure that the queue is empty.
if !self.remote_client.no_pending_work() {
warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
}
}
@@ -2057,16 +2021,8 @@ impl Timeline {
pub(crate) async fn download_layer(
&self,
layer_file_name: &LayerName,
) -> Result<Option<bool>, super::storage_layer::layer::DownloadError> {
let Some(layer) = self
.find_layer(layer_file_name)
.await
.map_err(|e| match e {
layer_manager::Shutdown => {
super::storage_layer::layer::DownloadError::TimelineShutdown
}
})?
else {
) -> anyhow::Result<Option<bool>> {
let Some(layer) = self.find_layer(layer_file_name).await? else {
return Ok(None);
};
@@ -2367,18 +2323,6 @@ impl Timeline {
)
}
fn get_image_creation_preempt_threshold(&self) -> usize {
let tenant_conf = self.tenant_conf.load();
tenant_conf
.tenant_conf
.image_creation_preempt_threshold
.unwrap_or(
self.conf
.default_tenant_conf
.image_creation_preempt_threshold,
)
}
/// Resolve the effective WAL receiver protocol to use for this tenant.
///
/// Priority order is:
@@ -2488,7 +2432,6 @@ impl Timeline {
shard_identity,
pg_version,
layers: Default::default(),
gc_compaction_layer_update_lock: tokio::sync::RwLock::new(()),
walredo_mgr,
walreceiver: Mutex::new(None),
@@ -2529,10 +2472,6 @@ impl Timeline {
gc_info: std::sync::RwLock::new(GcInfo::default()),
last_image_layer_creation_status: ArcSwap::new(Arc::new(
LastImageLayerCreationStatus::default(),
)),
latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
initdb_lsn: metadata.initdb_lsn(),
@@ -3536,9 +3475,6 @@ impl Timeline {
// image layer).
let _gc_cutoff_holder = timeline.get_latest_gc_cutoff_lsn();
// See `compaction::compact_with_gc` for why we need this.
let _guard = timeline.gc_compaction_layer_update_lock.read().await;
loop {
if cancel.is_cancelled() {
return Err(GetVectoredError::Cancelled);
@@ -4076,20 +4012,15 @@ impl Timeline {
}
let mut layers_to_upload = Vec::new();
let (generated_image_layers, is_complete) = self
.create_image_layers(
layers_to_upload.extend(
self.create_image_layers(
&partitions,
self.initdb_lsn,
ImageLayerCreationMode::Initial,
ctx,
LastImageLayerCreationStatus::Initial,
)
.await?;
debug_assert!(
matches!(is_complete, LastImageLayerCreationStatus::Complete),
"init image generation mode must fully cover the keyspace"
.await?,
);
layers_to_upload.extend(generated_image_layers);
(layers_to_upload, None)
} else {
@@ -4409,6 +4340,7 @@ impl Timeline {
lsn: Lsn,
ctx: &RequestContext,
img_range: Range<Key>,
start: Key,
io_concurrency: IoConcurrency,
) -> Result<ImageLayerCreationOutcome, CreateImageLayersError> {
let mut wrote_keys = false;
@@ -4496,23 +4428,26 @@ impl Timeline {
lsn
},
);
Ok(ImageLayerCreationOutcome::Generated {
unfinished_image_layer: image_layer_writer,
Ok(ImageLayerCreationOutcome {
unfinished_image_layer: Some(image_layer_writer),
next_start_key: img_range.end,
})
} else {
// Special case: the image layer may be empty if this is a sharded tenant and the
// partition does not cover any keys owned by this shard. In this case, to ensure
// we don't leave gaps between image layers, leave `start` where it is, so that the next
// layer we write will cover the key range that we just scanned.
tracing::debug!("no data in range {}-{}", img_range.start, img_range.end);
Ok(ImageLayerCreationOutcome::Empty)
Ok(ImageLayerCreationOutcome {
unfinished_image_layer: None,
next_start_key: start,
})
}
}
/// Create an image layer for metadata keys. This function produces one image layer for all metadata
/// keys for now. Because metadata keys cannot exceed basebackup size limit, the image layer for it
/// would not be too large to fit in a single image layer.
///
/// Creating image layers for metadata keys are different from relational keys. Firstly, instead of
/// iterating each key and get an image for each of them, we do a `vectored_get` scan over the sparse
/// keyspace to get all images in one run. Secondly, we use a different image layer generation metrics
/// for metadata keys than relational keys, which is the number of delta files visited during the scan.
#[allow(clippy::too_many_arguments)]
async fn create_image_layer_for_metadata_keys(
self: &Arc<Self>,
@@ -4522,13 +4457,12 @@ impl Timeline {
ctx: &RequestContext,
img_range: Range<Key>,
mode: ImageLayerCreationMode,
start: Key,
io_concurrency: IoConcurrency,
) -> Result<ImageLayerCreationOutcome, CreateImageLayersError> {
// Metadata keys image layer creation.
let mut reconstruct_state = ValuesReconstructState::new(io_concurrency);
let begin = Instant::now();
// Directly use `get_vectored_impl` to skip the max_vectored_read_key limit check. Note that the keyspace should
// not contain too many keys, otherwise this takes a lot of memory.
let data = self
.get_vectored_impl(partition.clone(), lsn, &mut reconstruct_state, ctx)
.await?;
@@ -4553,7 +4487,10 @@ impl Timeline {
);
if !trigger_generation && mode == ImageLayerCreationMode::Try {
return Ok(ImageLayerCreationOutcome::Skip);
return Ok(ImageLayerCreationOutcome {
unfinished_image_layer: None,
next_start_key: img_range.end,
});
}
if self.cancel.is_cancelled() {
return Err(CreateImageLayersError::Cancelled);
@@ -4584,12 +4521,20 @@ impl Timeline {
lsn
}
);
Ok(ImageLayerCreationOutcome::Generated {
unfinished_image_layer: image_layer_writer,
Ok(ImageLayerCreationOutcome {
unfinished_image_layer: Some(image_layer_writer),
next_start_key: img_range.end,
})
} else {
// Special case: the image layer may be empty if this is a sharded tenant and the
// partition does not cover any keys owned by this shard. In this case, to ensure
// we don't leave gaps between image layers, leave `start` where it is, so that the next
// layer we write will cover the key range that we just scanned.
tracing::debug!("no data in range {}-{}", img_range.start, img_range.end);
Ok(ImageLayerCreationOutcome::Empty)
Ok(ImageLayerCreationOutcome {
unfinished_image_layer: None,
next_start_key: start,
})
}
}
@@ -4645,8 +4590,6 @@ impl Timeline {
decision
}
/// Returns the image layers generated and an enum indicating whether the process is fully completed.
/// true = we have generate all image layers, false = we preempt the process for L0 compaction.
#[tracing::instrument(skip_all, fields(%lsn, %mode))]
async fn create_image_layers(
self: &Arc<Timeline>,
@@ -4654,8 +4597,7 @@ impl Timeline {
lsn: Lsn,
mode: ImageLayerCreationMode,
ctx: &RequestContext,
last_status: LastImageLayerCreationStatus,
) -> Result<(Vec<ResidentLayer>, LastImageLayerCreationStatus), CreateImageLayersError> {
) -> Result<Vec<ResidentLayer>, CreateImageLayersError> {
let timer = self.metrics.create_images_time_histo.start_timer();
// We need to avoid holes between generated image layers.
@@ -4669,23 +4611,10 @@ impl Timeline {
// image layers <100000000..100000099> and <200000000..200000199> are not completely covering it.
let mut start = Key::MIN;
let check_for_image_layers = if let LastImageLayerCreationStatus::Incomplete = last_status {
info!(
"resuming image layer creation: last_status={:?}",
last_status
);
true
} else {
self.should_check_if_image_layers_required(lsn)
};
let check_for_image_layers = self.should_check_if_image_layers_required(lsn);
let mut batch_image_writer = BatchLayerWriter::new(self.conf).await?;
let mut all_generated = true;
let mut partition_processed = 0;
let total_partitions = partitioning.parts.len();
for partition in partitioning.parts.iter() {
if self.cancel.is_cancelled() {
return Err(CreateImageLayersError::Cancelled);
@@ -4758,13 +4687,17 @@ impl Timeline {
.map_err(|_| CreateImageLayersError::Cancelled)?,
);
let outcome = if !compact_metadata {
let ImageLayerCreationOutcome {
unfinished_image_layer,
next_start_key,
} = if !compact_metadata {
self.create_image_layer_for_rel_blocks(
partition,
image_layer_writer,
lsn,
ctx,
img_range.clone(),
start,
io_concurrency,
)
.await?
@@ -4776,58 +4709,18 @@ impl Timeline {
ctx,
img_range.clone(),
mode,
start,
io_concurrency,
)
.await?
};
match outcome {
ImageLayerCreationOutcome::Empty => {
// No data in this partition, so we don't need to create an image layer (for now).
// The next image layer should cover this key range, so we don't advance the `start`
// key.
}
ImageLayerCreationOutcome::Generated {
start = next_start_key;
if let Some(unfinished_image_layer) = unfinished_image_layer {
batch_image_writer.add_unfinished_image_writer(
unfinished_image_layer,
} => {
batch_image_writer.add_unfinished_image_writer(
unfinished_image_layer,
img_range.clone(),
lsn,
);
// The next image layer should be generated right after this one.
start = img_range.end;
}
ImageLayerCreationOutcome::Skip => {
// We don't need to create an image layer for this partition.
// The next image layer should NOT cover this range, otherwise
// the keyspace becomes empty (reads don't go past image layers).
start = img_range.end;
}
}
partition_processed += 1;
if let ImageLayerCreationMode::Try = mode {
// We have at least made some progress
if batch_image_writer.pending_layer_num() >= 1 {
// The `Try` mode is currently only used on the compaction path. We want to avoid
// image layer generation taking too long time and blocking L0 compaction. So in this
// mode, we also inspect the current number of L0 layers and skip image layer generation
// if there are too many of them.
let num_of_l0_layers = {
let layers = self.layers.read().await;
layers.layer_map()?.level0_deltas().len()
};
let image_preempt_threshold = self.get_image_creation_preempt_threshold()
* self.get_compaction_threshold();
if image_preempt_threshold != 0 && num_of_l0_layers >= image_preempt_threshold {
tracing::info!(
"preempt image layer generation at {start} at {lsn}: too many L0 layers {num_of_l0_layers}",
);
all_generated = false;
break;
}
}
img_range,
lsn,
);
}
}
@@ -4842,35 +4735,14 @@ impl Timeline {
.open_mut()?
.track_new_image_layers(&image_layers, &self.metrics);
drop_wlock(guard);
let duration = timer.stop_and_record();
timer.stop_and_record();
// Creating image layers may have caused some previously visible layers to be covered
if !image_layers.is_empty() {
self.update_layer_visibility().await?;
}
let total_layer_size = image_layers
.iter()
.map(|l| l.metadata().file_size)
.sum::<u64>();
info!(
"created {} image layers ({} bytes) in {}s, processed {} out of {} partitions",
image_layers.len(),
total_layer_size,
duration.as_secs_f64(),
partition_processed,
total_partitions
);
Ok((
image_layers,
if all_generated {
LastImageLayerCreationStatus::Complete
} else {
LastImageLayerCreationStatus::Incomplete
},
))
Ok(image_layers)
}
/// Wait until the background initial logical size calculation is complete, or

View File

@@ -11,7 +11,7 @@ use std::sync::Arc;
use super::layer_manager::LayerManager;
use super::{
CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode,
LastImageLayerCreationStatus, RecordedDuration, Timeline,
RecordedDuration, Timeline,
};
use anyhow::{anyhow, bail, Context};
@@ -262,13 +262,13 @@ impl GcCompactionQueue {
ctx: &RequestContext,
gc_block: &GcBlock,
timeline: &Arc<Timeline>,
) -> Result<CompactionOutcome, CompactionError> {
) -> Result<bool, CompactionError> {
let _one_op_at_a_time_guard = self.consumer_lock.lock().await;
let has_pending_tasks;
let (id, item) = {
let mut guard = self.inner.lock().unwrap();
let Some((id, item)) = guard.queued.pop_front() else {
return Ok(CompactionOutcome::Done);
return Ok(false);
};
guard.running = Some((id, item.clone()));
has_pending_tasks = !guard.queued.is_empty();
@@ -323,11 +323,7 @@ impl GcCompactionQueue {
let mut guard = self.inner.lock().unwrap();
guard.running = None;
}
Ok(if has_pending_tasks {
CompactionOutcome::Pending
} else {
CompactionOutcome::Done
})
Ok(has_pending_tasks)
}
#[allow(clippy::type_complexity)]
@@ -593,17 +589,6 @@ impl CompactionStatistics {
}
}
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompactionOutcome {
#[default]
/// No layers need to be compacted after this round. Compaction doesn't need
/// to be immediately scheduled.
Done,
/// Still has pending layers to be compacted after this round. Ideally, the scheduler
/// should immediately schedule another compaction.
Pending,
}
impl Timeline {
/// TODO: cancellation
///
@@ -613,7 +598,7 @@ impl Timeline {
cancel: &CancellationToken,
options: CompactOptions,
ctx: &RequestContext,
) -> Result<CompactionOutcome, CompactionError> {
) -> Result<bool, CompactionError> {
if options
.flags
.contains(CompactFlags::EnhancedGcBottomMostCompaction)
@@ -621,7 +606,7 @@ impl Timeline {
self.compact_with_gc(cancel, options, ctx)
.await
.map_err(CompactionError::Other)?;
return Ok(CompactionOutcome::Done);
return Ok(false);
}
if options.flags.contains(CompactFlags::DryRun) {
@@ -681,9 +666,9 @@ impl Timeline {
// Define partitioning schema if needed
// 1. L0 Compact
let l0_compaction_outcome = {
let fully_compacted = {
let timer = self.metrics.compact_time_histo.start_timer();
let l0_compaction_outcome = self
let fully_compacted = self
.compact_level0(
target_file_size,
options.flags.contains(CompactFlags::ForceL0Compaction),
@@ -691,15 +676,15 @@ impl Timeline {
)
.await?;
timer.stop_and_record();
l0_compaction_outcome
fully_compacted
};
if let CompactionOutcome::Pending = l0_compaction_outcome {
if !fully_compacted {
// Yield and do not do any other kind of compaction. True means
// that we have pending L0 compaction tasks and the compaction scheduler
// will prioritize compacting this tenant/timeline again.
info!("skipping image layer generation and shard ancestor compaction due to L0 compaction did not include all layers.");
return Ok(CompactionOutcome::Pending);
return Ok(true);
}
// 2. Repartition and create image layers if necessary
@@ -724,7 +709,7 @@ impl Timeline {
.extend(sparse_partitioning.into_dense().parts);
// 3. Create new image layers for partitions that have been modified "enough".
let (image_layers, outcome) = self
let image_layers = self
.create_image_layers(
&partitioning,
lsn,
@@ -737,22 +722,10 @@ impl Timeline {
ImageLayerCreationMode::Try
},
&image_ctx,
self.last_image_layer_creation_status
.load()
.as_ref()
.clone(),
)
.await?;
self.last_image_layer_creation_status
.store(Arc::new(outcome.clone()));
self.upload_new_image_layers(image_layers)?;
if let LastImageLayerCreationStatus::Incomplete = outcome {
// Yield and do not do any other kind of compaction.
info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
return Ok(CompactionOutcome::Pending);
}
partitioning.parts.len()
}
Err(err) => {
@@ -780,7 +753,7 @@ impl Timeline {
self.compact_shard_ancestors(rewrite_max, ctx).await?;
}
Ok(CompactionOutcome::Done)
Ok(false)
}
/// Check for layers that are elegible to be rewritten:
@@ -1037,11 +1010,11 @@ impl Timeline {
target_file_size: u64,
force_compaction_ignore_threshold: bool,
ctx: &RequestContext,
) -> Result<CompactionOutcome, CompactionError> {
) -> Result<bool, CompactionError> {
let CompactLevel0Phase1Result {
new_layers,
deltas_to_compact,
outcome,
fully_compacted,
} = {
let phase1_span = info_span!("compact_level0_phase1");
let ctx = ctx.attached_child();
@@ -1070,12 +1043,12 @@ impl Timeline {
if new_layers.is_empty() && deltas_to_compact.is_empty() {
// nothing to do
return Ok(CompactionOutcome::Done);
return Ok(true);
}
self.finish_compact_batch(&new_layers, &Vec::new(), &deltas_to_compact)
.await?;
Ok(outcome)
Ok(fully_compacted)
}
/// Level0 files first phase of compaction, explained in the [`Self::compact_legacy`] comment.
@@ -1149,13 +1122,7 @@ impl Timeline {
// Under normal circumstances, we will accumulate up to compaction_upper_limit L0s of size
// checkpoint_distance each. To avoid edge cases using extra system resources, bound our
// work in this function to only operate on this much delta data at once.
//
// In general, compaction_threshold should be <= compaction_upper_limit, but in case that
// the constraint is not respected, we use the larger of the two.
let delta_size_limit = std::cmp::max(
self.get_compaction_upper_limit(),
self.get_compaction_threshold(),
) as u64
let delta_size_limit = self.get_compaction_upper_limit() as u64
* std::cmp::max(self.get_checkpoint_distance(), DEFAULT_CHECKPOINT_DISTANCE);
let mut fully_compacted = true;
@@ -1530,9 +1497,11 @@ impl Timeline {
.await
.map_err(CompactionError::Other)?;
} else {
let shard = self.shard_identity.shard_index();
let owner = self.shard_identity.get_shard_number(&key);
// This happens after a shard split, when we're compacting an L0 created by our parent shard
if cfg!(debug_assertions) {
panic!("key {key} does not belong on shard {shard}, owned by {owner}");
}
debug!("dropping key {key} during compaction (it belongs on shard {owner})");
}
@@ -1617,11 +1586,7 @@ impl Timeline {
.into_iter()
.map(|x| x.drop_eviction_guard())
.collect::<Vec<_>>(),
outcome: if fully_compacted {
CompactionOutcome::Done
} else {
CompactionOutcome::Pending
},
fully_compacted,
})
}
}
@@ -1632,7 +1597,7 @@ struct CompactLevel0Phase1Result {
deltas_to_compact: Vec<Layer>,
// Whether we have included all L0 layers, or selected only part of them due to the
// L0 compaction size limit.
outcome: CompactionOutcome,
fully_compacted: bool,
}
#[derive(Default)]
@@ -2948,45 +2913,10 @@ impl Timeline {
// Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only
// operate on L1 layers.
{
// Gc-compaction will rewrite the history of a key. This could happen in two ways:
//
// 1. We create an image layer to replace all the deltas below the compact LSN. In this case, assume
// we have 2 delta layers A and B, both below the compact LSN. We create an image layer I to replace
// A and B at the compact LSN. If the read path finishes reading A, yields, and now we update the layer
// map, the read path then cannot find any keys below A, reporting a missing key error, while the key
// now gets stored in I at the compact LSN.
//
// --------------- ---------------
// delta1@LSN20 image1@LSN20
// --------------- (read path collects delta@LSN20, => --------------- (read path cannot find anything
// delta1@LSN10 yields) below LSN 20)
// ---------------
//
// 2. We create a delta layer to replace all the deltas below the compact LSN, and in the delta layers,
// we combines the history of a key into a single image. For example, we have deltas at LSN 1, 2, 3, 4,
// Assume one delta layer contains LSN 1, 2, 3 and the other contains LSN 4.
//
// We let gc-compaction combine delta 2, 3, 4 into an image at LSN 4, which produces a delta layer that
// contains the delta at LSN 1, the image at LSN 4. If the read path finishes reading the original delta
// layer containing 4, yields, and we update the layer map to put the delta layer.
//
// --------------- ---------------
// delta1@LSN4 image1@LSN4
// --------------- (read path collects delta@LSN4, => --------------- (read path collects LSN4 and LSN1,
// delta1@LSN1-3 yields) delta1@LSN1 which is an invalid history)
// --------------- ---------------
//
// Therefore, the gc-compaction layer update operation should wait for all ongoing reads, block all pending reads,
// and only allow reads to continue after the update is finished.
let update_guard = self.gc_compaction_layer_update_lock.write().await;
// Acquiring the update guard ensures current read operations end and new read operations are blocked.
// TODO: can we use `latest_gc_cutoff` Rcu to achieve the same effect?
let mut guard = self.layers.write().await;
guard
.open_mut()?
.finish_gc_compaction(&layer_selection, &compact_to, &self.metrics);
drop(update_guard); // Allow new reads to start ONLY after we finished updating the layer map.
.finish_gc_compaction(&layer_selection, &compact_to, &self.metrics)
};
// Schedule an index-only upload to update the `latest_gc_cutoff` in the index_part.json.
@@ -3263,7 +3193,11 @@ impl TimelineAdaptor {
ranges: self.get_keyspace(key_range, lsn, ctx).await?,
};
// TODO set proper (stateful) start. The create_image_layer_for_rel_blocks function mostly
let outcome = self
let start = Key::MIN;
let ImageLayerCreationOutcome {
unfinished_image_layer,
next_start_key: _,
} = self
.timeline
.create_image_layer_for_rel_blocks(
&keyspace,
@@ -3271,15 +3205,13 @@ impl TimelineAdaptor {
lsn,
ctx,
key_range.clone(),
start,
IoConcurrency::sequential(),
)
.await?;
if let ImageLayerCreationOutcome::Generated {
unfinished_image_layer,
} = outcome
{
let (desc, path) = unfinished_image_layer.finish(ctx).await?;
if let Some(image_layer_writer) = unfinished_image_layer {
let (desc, path) = image_layer_writer.finish(ctx).await?;
let image_layer =
Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?;
self.new_images.push(image_layer);

View File

@@ -1,4 +1,4 @@
use std::{collections::hash_map::Entry, fs, future::Future, sync::Arc};
use std::{collections::hash_map::Entry, fs, sync::Arc};
use anyhow::Context;
use camino::Utf8PathBuf;
@@ -8,8 +8,7 @@ use utils::{fs_ext, id::TimelineId, lsn::Lsn, sync::gate::GateGuard};
use crate::{
context::RequestContext,
import_datadir,
span::debug_assert_current_span_has_tenant_and_timeline_id,
tenant::{CreateTimelineError, CreateTimelineIdempotency, Tenant, TimelineOrOffloaded},
tenant::{CreateTimelineIdempotency, Tenant, TimelineOrOffloaded},
};
use super::Timeline;
@@ -25,9 +24,6 @@ pub struct UninitializedTimeline<'t> {
pub(crate) owning_tenant: &'t Tenant,
timeline_id: TimelineId,
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
/// Whether we spawned the inner Timeline's tasks such that we must later shut it down
/// if aborting the timeline creation
needs_shutdown: bool,
}
impl<'t> UninitializedTimeline<'t> {
@@ -40,50 +36,6 @@ impl<'t> UninitializedTimeline<'t> {
owning_tenant,
timeline_id,
raw_timeline,
needs_shutdown: false,
}
}
/// When writing data to this timeline during creation, use this wrapper: it will take care of
/// setup of Timeline tasks required for I/O (flush loop) and making sure they are torn down
/// later.
pub(crate) async fn write<F, Fut>(&mut self, f: F) -> anyhow::Result<()>
where
F: FnOnce(Arc<Timeline>) -> Fut,
Fut: Future<Output = Result<(), CreateTimelineError>>,
{
debug_assert_current_span_has_tenant_and_timeline_id();
// Remember that we did I/O (spawned the flush loop), so that we can check we shut it down on drop
self.needs_shutdown = true;
let timeline = self.raw_timeline()?;
// Spawn flush loop so that the Timeline is ready to accept writes
timeline.maybe_spawn_flush_loop();
// Invoke the provided function, which will write some data into the new timeline
if let Err(e) = f(timeline.clone()).await {
self.abort().await;
return Err(e.into());
}
// Flush the underlying timeline's ephemeral layers to disk
if let Err(e) = timeline
.freeze_and_flush()
.await
.context("Failed to flush after timeline creation writes")
{
self.abort().await;
return Err(e);
}
Ok(())
}
pub(crate) async fn abort(&self) {
if let Some((raw_timeline, _)) = self.raw_timeline.as_ref() {
raw_timeline.shutdown(super::ShutdownMode::Hard).await;
}
}
@@ -92,13 +44,11 @@ impl<'t> UninitializedTimeline<'t> {
/// This function launches the flush loop if not already done.
///
/// The caller is responsible for activating the timeline (function `.activate()`).
pub(crate) async fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {
pub(crate) fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {
let timeline_id = self.timeline_id;
let tenant_shard_id = self.owning_tenant.tenant_shard_id;
if self.raw_timeline.is_none() {
self.abort().await;
return Err(anyhow::anyhow!(
"No timeline for initialization found for {tenant_shard_id}/{timeline_id}"
));
@@ -112,25 +62,16 @@ impl<'t> UninitializedTimeline<'t> {
.0
.get_disk_consistent_lsn();
if !new_disk_consistent_lsn.is_valid() {
self.abort().await;
return Err(anyhow::anyhow!(
"new timeline {tenant_shard_id}/{timeline_id} has invalid disk_consistent_lsn"
));
}
anyhow::ensure!(
new_disk_consistent_lsn.is_valid(),
"new timeline {tenant_shard_id}/{timeline_id} has invalid disk_consistent_lsn"
);
let mut timelines = self.owning_tenant.timelines.lock().unwrap();
match timelines.entry(timeline_id) {
Entry::Occupied(_) => {
// Unexpected, bug in the caller. Tenant is responsible for preventing concurrent creation of the same timeline.
//
// We do not call Self::abort here. Because we don't cleanly shut down our Timeline, [`Self::drop`] should
// skip trying to delete the timeline directory too.
anyhow::bail!(
Entry::Occupied(_) => anyhow::bail!(
"Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map"
)
}
),
Entry::Vacant(v) => {
// after taking here should be no fallible operations, because the drop guard will not
// cleanup after and would block for example the tenant deletion
@@ -152,31 +93,36 @@ impl<'t> UninitializedTimeline<'t> {
/// Prepares timeline data by loading it from the basebackup archive.
pub(crate) async fn import_basebackup_from_tar(
mut self,
self,
tenant: Arc<Tenant>,
copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
base_lsn: Lsn,
broker_client: storage_broker::BrokerClientChannel,
ctx: &RequestContext,
) -> anyhow::Result<Arc<Timeline>> {
self.write(|raw_timeline| async move {
import_datadir::import_basebackup_from_tar(&raw_timeline, copyin_read, base_lsn, ctx)
.await
.context("Failed to import basebackup")
.map_err(CreateTimelineError::Other)?;
let raw_timeline = self.raw_timeline()?;
fail::fail_point!("before-checkpoint-new-timeline", |_| {
Err(CreateTimelineError::Other(anyhow::anyhow!(
"failpoint before-checkpoint-new-timeline"
)))
});
import_datadir::import_basebackup_from_tar(raw_timeline, copyin_read, base_lsn, ctx)
.await
.context("Failed to import basebackup")?;
Ok(())
})
.await?;
// Flush the new layer files to disk, before we make the timeline as available to
// the outside world.
//
// Flush loop needs to be spawned in order to be able to flush.
raw_timeline.maybe_spawn_flush_loop();
fail::fail_point!("before-checkpoint-new-timeline", |_| {
anyhow::bail!("failpoint before-checkpoint-new-timeline");
});
raw_timeline
.freeze_and_flush()
.await
.context("Failed to flush after basebackup import")?;
// All the data has been imported. Insert the Timeline into the tenant's timelines map
let tl = self.finish_creation().await?;
let tl = self.finish_creation()?;
tl.activate(tenant, broker_client, None, ctx);
Ok(tl)
}
@@ -197,19 +143,12 @@ impl<'t> UninitializedTimeline<'t> {
impl Drop for UninitializedTimeline<'_> {
fn drop(&mut self) {
if let Some((timeline, create_guard)) = self.raw_timeline.take() {
if let Some((_, create_guard)) = self.raw_timeline.take() {
let _entered = info_span!("drop_uninitialized_timeline", tenant_id = %self.owning_tenant.tenant_shard_id.tenant_id, shard_id = %self.owning_tenant.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id).entered();
if self.needs_shutdown && !timeline.gate.close_complete() {
// This should not happen: caller should call [`Self::abort`] on failures
tracing::warn!(
"Timeline not shut down after initialization failure, cannot clean up files"
);
} else {
// This is unusual, but can happen harmlessly if the pageserver is stopped while
// creating a timeline.
info!("Timeline got dropped without initializing, cleaning its files");
cleanup_timeline_directory(create_guard);
}
// This is unusual, but can happen harmlessly if the pageserver is stopped while
// creating a timeline.
info!("Timeline got dropped without initializing, cleaning its files");
cleanup_timeline_directory(create_guard);
}
}
}

View File

@@ -234,19 +234,6 @@ impl VirtualFile {
) -> (FullSlice<Buf>, Result<usize, Error>) {
self.inner.write_all(buf, ctx).await
}
async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
self.inner.read_to_end(buf, ctx).await
}
pub(crate) async fn read_to_string(
&mut self,
ctx: &RequestContext,
) -> Result<String, anyhow::Error> {
let mut buf = Vec::new();
self.read_to_end(&mut buf, ctx).await?;
Ok(String::from_utf8(buf)?)
}
}
/// Indicates whether to enable fsync, fdatasync, or O_SYNC/O_DSYNC when writing
@@ -1006,24 +993,6 @@ impl VirtualFileInner {
(buf, result)
})
}
async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
let mut tmp = vec![0; 128];
loop {
let slice = tmp.slice(..128);
let (slice, res) = self.read_at(slice, self.pos, ctx).await;
match res {
Ok(0) => return Ok(()),
Ok(n) => {
self.pos += n as u64;
buf.extend_from_slice(&slice[..n]);
}
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
Err(e) => return Err(e),
}
tmp = slice.into_inner();
}
}
}
// Adapted from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#117-135
@@ -1268,6 +1237,10 @@ impl VirtualFile {
) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {
self.inner.read_blk(blknum, ctx).await
}
async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
self.inner.read_to_end(buf, ctx).await
}
}
#[cfg(test)]
@@ -1287,6 +1260,24 @@ impl VirtualFileInner {
slice.into_inner(),
))
}
async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
let mut tmp = vec![0; 128];
loop {
let slice = tmp.slice(..128);
let (slice, res) = self.read_at(slice, self.pos, ctx).await;
match res {
Ok(0) => return Ok(()),
Ok(n) => {
self.pos += n as u64;
buf.extend_from_slice(&slice[..n]);
}
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
Err(e) => return Err(e),
}
tmp = slice.into_inner();
}
}
}
impl Drop for VirtualFileInner {

View File

@@ -24,9 +24,9 @@ bytes = { workspace = true, features = ["serde"] }
camino.workspace = true
chrono.workspace = true
clap = { workspace = true, features = ["derive", "env"] }
clashmap.workspace = true
compute_api.workspace = true
consumption_metrics.workspace = true
dashmap.workspace = true
env_logger.workspace = true
framed-websockets.workspace = true
futures.workspace = true

View File

@@ -106,7 +106,17 @@ cases where it is hard to use rows represented as objects (e.g. when several fie
Proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so we can use *.localtest.me` which resolves to `127.0.0.1`.
We will need to have a postgres instance. Assuming that we have set up docker we can set it up as follows:
Let's create self-signed certificate by running:
```sh
openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.localtest.me"
```
Then we need to build proxy with 'testing' feature and run, e.g.:
```sh
RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://proxy:password@endpoint.localtest.me:5432/postgres' --is-private-access-proxy true -c server.crt -k server.key
```
We will also need to have a postgres instance. Assuming that we have setted up docker we can set it up as follows:
```sh
docker run \
--detach \
@@ -123,18 +133,8 @@ docker exec -it proxy-postgres psql -U postgres -c "CREATE TABLE neon_control_pl
docker exec -it proxy-postgres psql -U postgres -c "CREATE ROLE proxy WITH SUPERUSER LOGIN PASSWORD 'password';"
```
Let's create self-signed certificate by running:
```sh
openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.localtest.me"
```
Then we need to build proxy with 'testing' feature and run, e.g.:
```sh
RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://postgres:proxy-postgres@127.0.0.1:5432/postgres' -c server.crt -k server.key
```
Now from client you can start a new session:
```sh
PGSSLROOTCERT=./server.crt psql "postgresql://proxy:password@endpoint.localtest.me:4432/postgres?sslmode=verify-full"
```
```

View File

@@ -7,8 +7,8 @@ use thiserror::Error;
use tokio::io::{AsyncRead, AsyncWrite};
use tracing::{info, info_span};
use super::ComputeCredentialKeys;
use crate::auth::backend::ComputeUserInfo;
use super::{ComputeCredentialKeys, ControlPlaneApi};
use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
use crate::auth::IpPattern;
use crate::cache::Cached;
use crate::config::AuthenticationConfig;
@@ -84,15 +84,26 @@ pub(crate) fn new_psql_session_id() -> String {
hex::encode(rand::random::<[u8; 8]>())
}
#[async_trait]
impl BackendIpAllowlist for ConsoleRedirectBackend {
async fn get_allowed_ips(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> auth::Result<Vec<auth::IpPattern>> {
self.api
.get_allowed_ips_and_secret(ctx, user_info)
.await
.map(|(ips, _)| ips.as_ref().clone())
.map_err(|e| e.into())
}
}
impl ConsoleRedirectBackend {
pub fn new(console_uri: reqwest::Url, api: cplane_proxy_v1::NeonControlPlaneClient) -> Self {
Self { console_uri, api }
}
pub(crate) fn get_api(&self) -> &cplane_proxy_v1::NeonControlPlaneClient {
&self.api
}
pub(crate) async fn authenticate(
&self,
ctx: &RequestContext,
@@ -180,15 +191,6 @@ async fn authenticate(
}
}
// Check if the access over the public internet is allowed, otherwise block. Note that
// the console redirect is not behind the VPC service endpoint, so we don't need to check
// the VPC endpoint ID.
if let Some(public_access_allowed) = db_info.public_access_allowed {
if !public_access_allowed {
return Err(auth::AuthError::NetworkNotAllowed);
}
}
client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
// This config should be self-contained, because we won't

View File

@@ -4,7 +4,7 @@ use std::sync::Arc;
use std::time::{Duration, SystemTime};
use arc_swap::ArcSwapOption;
use clashmap::ClashMap;
use dashmap::DashMap;
use jose_jwk::crypto::KeyInfo;
use reqwest::{redirect, Client};
use reqwest_retry::policies::ExponentialBackoff;
@@ -64,7 +64,7 @@ pub(crate) struct AuthRule {
pub struct JwkCache {
client: reqwest_middleware::ClientWithMiddleware,
map: ClashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
map: DashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
}
pub(crate) struct JwkCacheEntry {
@@ -469,7 +469,7 @@ impl Default for JwkCache {
JwkCache {
client,
map: ClashMap::default(),
map: DashMap::default(),
}
}
}

View File

@@ -26,12 +26,10 @@ use crate::context::RequestContext;
use crate::control_plane::client::ControlPlaneClient;
use crate::control_plane::errors::GetAuthInfoError;
use crate::control_plane::{
self, AccessBlockerFlags, AuthSecret, CachedAccessBlockerFlags, CachedAllowedIps,
CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi,
self, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi,
};
use crate::intern::EndpointIdInt;
use crate::metrics::Metrics;
use crate::protocol2::ConnectionInfoExtra;
use crate::proxy::connect_compute::ComputeConnectBackend;
use crate::proxy::NeonOptions;
use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter};
@@ -101,13 +99,6 @@ impl<T> Backend<'_, T> {
Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)),
}
}
pub(crate) fn get_api(&self) -> &ControlPlaneClient {
match self {
Self::ControlPlane(api, _) => api,
Self::Local(_) => panic!("Local backend has no API"),
}
}
}
impl<'a, T> Backend<'a, T> {
@@ -256,6 +247,15 @@ impl AuthenticationConfig {
}
}
#[async_trait::async_trait]
pub(crate) trait BackendIpAllowlist {
async fn get_allowed_ips(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> auth::Result<Vec<auth::IpPattern>>;
}
/// True to its name, this function encapsulates our current auth trade-offs.
/// Here, we choose the appropriate auth flow based on circumstances.
///
@@ -282,51 +282,23 @@ async fn auth_quirks(
Ok(info) => (info, None),
};
debug!("fetching authentication info and allowlists");
debug!("fetching user's authentication info");
let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?;
// check allowed list
let allowed_ips = if config.ip_allowlist_check_enabled {
let allowed_ips = api.get_allowed_ips(ctx, &info).await?;
if !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips) {
return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
}
allowed_ips
} else {
Cached::new_uncached(Arc::new(vec![]))
};
// check if a VPC endpoint ID is coming in and if yes, if it's allowed
let access_blocks = api.get_block_public_or_vpc_access(ctx, &info).await?;
if config.is_vpc_acccess_proxy {
if access_blocks.vpc_access_blocked {
return Err(AuthError::NetworkNotAllowed);
}
let incoming_vpc_endpoint_id = match ctx.extra() {
None => return Err(AuthError::MissingEndpointName),
Some(ConnectionInfoExtra::Aws { vpce_id }) => {
// Convert the vcpe_id to a string
String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
}
Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
};
let allowed_vpc_endpoint_ids = api.get_allowed_vpc_endpoint_ids(ctx, &info).await?;
// TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
if !allowed_vpc_endpoint_ids.is_empty()
&& !allowed_vpc_endpoint_ids.contains(&incoming_vpc_endpoint_id)
{
return Err(AuthError::vpc_endpoint_id_not_allowed(
incoming_vpc_endpoint_id,
));
}
} else if access_blocks.public_access_blocked {
return Err(AuthError::NetworkNotAllowed);
if config.ip_allowlist_check_enabled
&& !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
{
return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
}
if !endpoint_rate_limiter.check(info.endpoint.clone().into(), 1) {
return Err(AuthError::too_many_connections());
}
let cached_secret = api.get_role_secret(ctx, &info).await?;
let cached_secret = match maybe_secret {
Some(secret) => secret,
None => api.get_role_secret(ctx, &info).await?,
};
let (cached_entry, secret) = cached_secret.take_value();
let secret = if let Some(secret) = secret {
@@ -468,38 +440,34 @@ impl Backend<'_, ComputeUserInfo> {
}
}
pub(crate) async fn get_allowed_ips(
pub(crate) async fn get_allowed_ips_and_secret(
&self,
ctx: &RequestContext,
) -> Result<CachedAllowedIps, GetAuthInfoError> {
match self {
Self::ControlPlane(api, user_info) => api.get_allowed_ips(ctx, user_info).await,
Self::Local(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
}
}
pub(crate) async fn get_allowed_vpc_endpoint_ids(
&self,
ctx: &RequestContext,
) -> Result<CachedAllowedVpcEndpointIds, GetAuthInfoError> {
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
match self {
Self::ControlPlane(api, user_info) => {
api.get_allowed_vpc_endpoint_ids(ctx, user_info).await
api.get_allowed_ips_and_secret(ctx, user_info).await
}
Self::Local(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
}
}
}
pub(crate) async fn get_block_public_or_vpc_access(
#[async_trait::async_trait]
impl BackendIpAllowlist for Backend<'_, ()> {
async fn get_allowed_ips(
&self,
ctx: &RequestContext,
) -> Result<CachedAccessBlockerFlags, GetAuthInfoError> {
match self {
Self::ControlPlane(api, user_info) => {
api.get_block_public_or_vpc_access(ctx, user_info).await
}
Self::Local(_) => Ok(Cached::new_uncached(AccessBlockerFlags::default())),
}
user_info: &ComputeUserInfo,
) -> auth::Result<Vec<auth::IpPattern>> {
let auth_data = match self {
Self::ControlPlane(api, ()) => api.get_allowed_ips_and_secret(ctx, user_info).await,
Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
};
auth_data
.map(|(ips, _)| ips.as_ref().clone())
.map_err(|e| e.into())
}
}
@@ -546,10 +514,7 @@ mod tests {
use crate::auth::{ComputeUserInfoMaybeEndpoint, IpPattern};
use crate::config::AuthenticationConfig;
use crate::context::RequestContext;
use crate::control_plane::{
self, AccessBlockerFlags, CachedAccessBlockerFlags, CachedAllowedIps,
CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret,
};
use crate::control_plane::{self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret};
use crate::proxy::NeonOptions;
use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo};
use crate::scram::threadpool::ThreadPool;
@@ -558,8 +523,6 @@ mod tests {
struct Auth {
ips: Vec<IpPattern>,
vpc_endpoint_ids: Vec<String>,
access_blocker_flags: AccessBlockerFlags,
secret: AuthSecret,
}
@@ -572,31 +535,17 @@ mod tests {
Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
}
async fn get_allowed_ips(
async fn get_allowed_ips_and_secret(
&self,
_ctx: &RequestContext,
_user_info: &super::ComputeUserInfo,
) -> Result<CachedAllowedIps, control_plane::errors::GetAuthInfoError> {
Ok(CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())))
}
async fn get_allowed_vpc_endpoint_ids(
&self,
_ctx: &RequestContext,
_user_info: &super::ComputeUserInfo,
) -> Result<CachedAllowedVpcEndpointIds, control_plane::errors::GetAuthInfoError> {
Ok(CachedAllowedVpcEndpointIds::new_uncached(Arc::new(
self.vpc_endpoint_ids.clone(),
)))
}
async fn get_block_public_or_vpc_access(
&self,
_ctx: &RequestContext,
_user_info: &super::ComputeUserInfo,
) -> Result<CachedAccessBlockerFlags, control_plane::errors::GetAuthInfoError> {
Ok(CachedAccessBlockerFlags::new_uncached(
self.access_blocker_flags.clone(),
) -> Result<
(CachedAllowedIps, Option<CachedRoleSecret>),
control_plane::errors::GetAuthInfoError,
> {
Ok((
CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())),
Some(CachedRoleSecret::new_uncached(Some(self.secret.clone()))),
))
}
@@ -626,7 +575,6 @@ mod tests {
rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
rate_limit_ip_subnet: 64,
ip_allowlist_check_enabled: true,
is_vpc_acccess_proxy: false,
is_auth_broker: false,
accept_jwts: false,
console_redirect_confirmation_timeout: std::time::Duration::from_secs(5),
@@ -694,8 +642,6 @@ mod tests {
let ctx = RequestContext::test();
let api = Auth {
ips: vec![],
vpc_endpoint_ids: vec![],
access_blocker_flags: AccessBlockerFlags::default(),
secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
};
@@ -776,8 +722,6 @@ mod tests {
let ctx = RequestContext::test();
let api = Auth {
ips: vec![],
vpc_endpoint_ids: vec![],
access_blocker_flags: AccessBlockerFlags::default(),
secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
};
@@ -830,8 +774,6 @@ mod tests {
let ctx = RequestContext::test();
let api = Auth {
ips: vec![],
vpc_endpoint_ids: vec![],
access_blocker_flags: AccessBlockerFlags::default(),
secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
};

View File

@@ -55,12 +55,6 @@ pub(crate) enum AuthError {
)]
MissingEndpointName,
#[error(
"VPC endpoint ID is not specified. \
This endpoint requires a VPC endpoint ID to connect."
)]
MissingVPCEndpointId,
#[error("password authentication failed for user '{0}'")]
PasswordFailed(Box<str>),
@@ -75,15 +69,6 @@ pub(crate) enum AuthError {
)]
IpAddressNotAllowed(IpAddr),
#[error("This connection is trying to access this endpoint from a blocked network.")]
NetworkNotAllowed,
#[error(
"This VPC endpoint id {0} is not allowed to connect to this endpoint. \
Please add it to the allowed list in the Neon console."
)]
VpcEndpointIdNotAllowed(String),
#[error("Too many connections to this endpoint. Please try again later.")]
TooManyConnections,
@@ -110,10 +95,6 @@ impl AuthError {
AuthError::IpAddressNotAllowed(ip)
}
pub(crate) fn vpc_endpoint_id_not_allowed(id: String) -> Self {
AuthError::VpcEndpointIdNotAllowed(id)
}
pub(crate) fn too_many_connections() -> Self {
AuthError::TooManyConnections
}
@@ -141,11 +122,8 @@ impl UserFacingError for AuthError {
Self::BadAuthMethod(_) => self.to_string(),
Self::MalformedPassword(_) => self.to_string(),
Self::MissingEndpointName => self.to_string(),
Self::MissingVPCEndpointId => self.to_string(),
Self::Io(_) => "Internal error".to_string(),
Self::IpAddressNotAllowed(_) => self.to_string(),
Self::NetworkNotAllowed => self.to_string(),
Self::VpcEndpointIdNotAllowed(_) => self.to_string(),
Self::TooManyConnections => self.to_string(),
Self::UserTimeout(_) => self.to_string(),
Self::ConfirmationTimeout(_) => self.to_string(),
@@ -164,11 +142,8 @@ impl ReportableError for AuthError {
Self::BadAuthMethod(_) => crate::error::ErrorKind::User,
Self::MalformedPassword(_) => crate::error::ErrorKind::User,
Self::MissingEndpointName => crate::error::ErrorKind::User,
Self::MissingVPCEndpointId => crate::error::ErrorKind::User,
Self::Io(_) => crate::error::ErrorKind::ClientDisconnect,
Self::IpAddressNotAllowed(_) => crate::error::ErrorKind::User,
Self::NetworkNotAllowed => crate::error::ErrorKind::User,
Self::VpcEndpointIdNotAllowed(_) => crate::error::ErrorKind::User,
Self::TooManyConnections => crate::error::ErrorKind::RateLimit,
Self::UserTimeout(_) => crate::error::ErrorKind::User,
Self::ConfirmationTimeout(_) => crate::error::ErrorKind::User,

View File

@@ -284,7 +284,6 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
rate_limiter: BucketRateLimiter::new(vec![]),
rate_limit_ip_subnet: 64,
ip_allowlist_check_enabled: true,
is_vpc_acccess_proxy: false,
is_auth_broker: false,
accept_jwts: true,
console_redirect_confirmation_timeout: Duration::ZERO,

View File

@@ -505,13 +505,6 @@ async fn main() -> anyhow::Result<()> {
}
}
if let Some(mut redis_kv_client) = redis_kv_client {
maintenance_tasks.spawn(async move {
redis_kv_client.try_connect().await?;
handle_cancel_messages(&mut redis_kv_client, rx_cancel).await
});
}
if let Some(regional_redis_client) = regional_redis_client {
let cache = api.caches.endpoints_cache.clone();
let con = regional_redis_client;
@@ -524,6 +517,15 @@ async fn main() -> anyhow::Result<()> {
}
}
if let Some(mut redis_kv_client) = redis_kv_client {
maintenance_tasks.spawn(async move {
if let Err(err) = redis_kv_client.try_connect().await {
tracing::error!(?err, "could not connect to redis")
}
handle_cancel_messages(&mut redis_kv_client, rx_cancel).await
});
}
let maintenance = loop {
// get one complete task
match futures::future::select(
@@ -630,7 +632,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
ip_allowlist_check_enabled: !args.is_private_access_proxy,
is_vpc_acccess_proxy: args.is_private_access_proxy,
is_auth_broker: args.is_auth_broker,
accept_jwts: args.is_auth_broker,
console_redirect_confirmation_timeout: args.webauth_confirmation_timeout,

View File

@@ -3,7 +3,7 @@ use std::future::pending;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
use clashmap::ClashSet;
use dashmap::DashSet;
use redis::streams::{StreamReadOptions, StreamReadReply};
use redis::{AsyncCommands, FromRedisValue, Value};
use serde::Deserialize;
@@ -55,9 +55,9 @@ impl TryFrom<&Value> for ControlPlaneEvent {
pub struct EndpointsCache {
config: EndpointCacheConfig,
endpoints: ClashSet<EndpointIdInt>,
branches: ClashSet<BranchIdInt>,
projects: ClashSet<ProjectIdInt>,
endpoints: DashSet<EndpointIdInt>,
branches: DashSet<BranchIdInt>,
projects: DashSet<ProjectIdInt>,
ready: AtomicBool,
limiter: Arc<Mutex<GlobalRateLimiter>>,
}
@@ -69,9 +69,9 @@ impl EndpointsCache {
config.limiter_info.clone(),
))),
config,
endpoints: ClashSet::new(),
branches: ClashSet::new(),
projects: ClashSet::new(),
endpoints: DashSet::new(),
branches: DashSet::new(),
projects: DashSet::new(),
ready: AtomicBool::new(false),
}
}

View File

@@ -5,7 +5,7 @@ use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use clashmap::ClashMap;
use dashmap::DashMap;
use rand::{thread_rng, Rng};
use smol_str::SmolStr;
use tokio::sync::Mutex;
@@ -15,16 +15,13 @@ use tracing::{debug, info};
use super::{Cache, Cached};
use crate::auth::IpPattern;
use crate::config::ProjectInfoCacheOptions;
use crate::control_plane::{AccessBlockerFlags, AuthSecret};
use crate::intern::{AccountIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
use crate::control_plane::AuthSecret;
use crate::intern::{EndpointIdInt, ProjectIdInt, RoleNameInt};
use crate::types::{EndpointId, RoleName};
#[async_trait]
pub(crate) trait ProjectInfoCache {
fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt);
fn invalidate_allowed_vpc_endpoint_ids_for_projects(&self, project_ids: Vec<ProjectIdInt>);
fn invalidate_allowed_vpc_endpoint_ids_for_org(&self, account_id: AccountIdInt);
fn invalidate_block_public_or_vpc_access_for_project(&self, project_id: ProjectIdInt);
fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt);
async fn decrement_active_listeners(&self);
async fn increment_active_listeners(&self);
@@ -54,8 +51,6 @@ impl<T> From<T> for Entry<T> {
struct EndpointInfo {
secret: std::collections::HashMap<RoleNameInt, Entry<Option<AuthSecret>>>,
allowed_ips: Option<Entry<Arc<Vec<IpPattern>>>>,
block_public_or_vpc_access: Option<Entry<AccessBlockerFlags>>,
allowed_vpc_endpoint_ids: Option<Entry<Arc<Vec<String>>>>,
}
impl EndpointInfo {
@@ -97,52 +92,9 @@ impl EndpointInfo {
}
None
}
pub(crate) fn get_allowed_vpc_endpoint_ids(
&self,
valid_since: Instant,
ignore_cache_since: Option<Instant>,
) -> Option<(Arc<Vec<String>>, bool)> {
if let Some(allowed_vpc_endpoint_ids) = &self.allowed_vpc_endpoint_ids {
if valid_since < allowed_vpc_endpoint_ids.created_at {
return Some((
allowed_vpc_endpoint_ids.value.clone(),
Self::check_ignore_cache(
ignore_cache_since,
allowed_vpc_endpoint_ids.created_at,
),
));
}
}
None
}
pub(crate) fn get_block_public_or_vpc_access(
&self,
valid_since: Instant,
ignore_cache_since: Option<Instant>,
) -> Option<(AccessBlockerFlags, bool)> {
if let Some(block_public_or_vpc_access) = &self.block_public_or_vpc_access {
if valid_since < block_public_or_vpc_access.created_at {
return Some((
block_public_or_vpc_access.value.clone(),
Self::check_ignore_cache(
ignore_cache_since,
block_public_or_vpc_access.created_at,
),
));
}
}
None
}
pub(crate) fn invalidate_allowed_ips(&mut self) {
self.allowed_ips = None;
}
pub(crate) fn invalidate_allowed_vpc_endpoint_ids(&mut self) {
self.allowed_vpc_endpoint_ids = None;
}
pub(crate) fn invalidate_block_public_or_vpc_access(&mut self) {
self.block_public_or_vpc_access = None;
}
pub(crate) fn invalidate_role_secret(&mut self, role_name: RoleNameInt) {
self.secret.remove(&role_name);
}
@@ -156,11 +108,9 @@ impl EndpointInfo {
/// One may ask, why the data is stored per project, when on the user request there is only data about the endpoint available?
/// On the cplane side updates are done per project (or per branch), so it's easier to invalidate the whole project cache.
pub struct ProjectInfoCacheImpl {
cache: ClashMap<EndpointIdInt, EndpointInfo>,
cache: DashMap<EndpointIdInt, EndpointInfo>,
project2ep: ClashMap<ProjectIdInt, HashSet<EndpointIdInt>>,
// FIXME(stefan): we need a way to GC the account2ep map.
account2ep: ClashMap<AccountIdInt, HashSet<EndpointIdInt>>,
project2ep: DashMap<ProjectIdInt, HashSet<EndpointIdInt>>,
config: ProjectInfoCacheOptions,
start_time: Instant,
@@ -170,63 +120,6 @@ pub struct ProjectInfoCacheImpl {
#[async_trait]
impl ProjectInfoCache for ProjectInfoCacheImpl {
fn invalidate_allowed_vpc_endpoint_ids_for_projects(&self, project_ids: Vec<ProjectIdInt>) {
info!(
"invalidating allowed vpc endpoint ids for projects `{}`",
project_ids
.iter()
.map(|id| id.to_string())
.collect::<Vec<_>>()
.join(", ")
);
for project_id in project_ids {
let endpoints = self
.project2ep
.get(&project_id)
.map(|kv| kv.value().clone())
.unwrap_or_default();
for endpoint_id in endpoints {
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
endpoint_info.invalidate_allowed_vpc_endpoint_ids();
}
}
}
}
fn invalidate_allowed_vpc_endpoint_ids_for_org(&self, account_id: AccountIdInt) {
info!(
"invalidating allowed vpc endpoint ids for org `{}`",
account_id
);
let endpoints = self
.account2ep
.get(&account_id)
.map(|kv| kv.value().clone())
.unwrap_or_default();
for endpoint_id in endpoints {
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
endpoint_info.invalidate_allowed_vpc_endpoint_ids();
}
}
}
fn invalidate_block_public_or_vpc_access_for_project(&self, project_id: ProjectIdInt) {
info!(
"invalidating block public or vpc access for project `{}`",
project_id
);
let endpoints = self
.project2ep
.get(&project_id)
.map(|kv| kv.value().clone())
.unwrap_or_default();
for endpoint_id in endpoints {
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
endpoint_info.invalidate_block_public_or_vpc_access();
}
}
}
fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt) {
info!("invalidating allowed ips for project `{}`", project_id);
let endpoints = self
@@ -283,9 +176,8 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
impl ProjectInfoCacheImpl {
pub(crate) fn new(config: ProjectInfoCacheOptions) -> Self {
Self {
cache: ClashMap::new(),
project2ep: ClashMap::new(),
account2ep: ClashMap::new(),
cache: DashMap::new(),
project2ep: DashMap::new(),
config,
ttl_disabled_since_us: AtomicU64::new(u64::MAX),
start_time: Instant::now(),
@@ -334,49 +226,6 @@ impl ProjectInfoCacheImpl {
}
Some(Cached::new_uncached(value))
}
pub(crate) fn get_allowed_vpc_endpoint_ids(
&self,
endpoint_id: &EndpointId,
) -> Option<Cached<&Self, Arc<Vec<String>>>> {
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
let (valid_since, ignore_cache_since) = self.get_cache_times();
let endpoint_info = self.cache.get(&endpoint_id)?;
let value = endpoint_info.get_allowed_vpc_endpoint_ids(valid_since, ignore_cache_since);
let (value, ignore_cache) = value?;
if !ignore_cache {
let cached = Cached {
token: Some((
self,
CachedLookupInfo::new_allowed_vpc_endpoint_ids(endpoint_id),
)),
value,
};
return Some(cached);
}
Some(Cached::new_uncached(value))
}
pub(crate) fn get_block_public_or_vpc_access(
&self,
endpoint_id: &EndpointId,
) -> Option<Cached<&Self, AccessBlockerFlags>> {
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
let (valid_since, ignore_cache_since) = self.get_cache_times();
let endpoint_info = self.cache.get(&endpoint_id)?;
let value = endpoint_info.get_block_public_or_vpc_access(valid_since, ignore_cache_since);
let (value, ignore_cache) = value?;
if !ignore_cache {
let cached = Cached {
token: Some((
self,
CachedLookupInfo::new_block_public_or_vpc_access(endpoint_id),
)),
value,
};
return Some(cached);
}
Some(Cached::new_uncached(value))
}
pub(crate) fn insert_role_secret(
&self,
project_id: ProjectIdInt,
@@ -407,43 +256,6 @@ impl ProjectInfoCacheImpl {
self.insert_project2endpoint(project_id, endpoint_id);
self.cache.entry(endpoint_id).or_default().allowed_ips = Some(allowed_ips.into());
}
pub(crate) fn insert_allowed_vpc_endpoint_ids(
&self,
account_id: Option<AccountIdInt>,
project_id: ProjectIdInt,
endpoint_id: EndpointIdInt,
allowed_vpc_endpoint_ids: Arc<Vec<String>>,
) {
if self.cache.len() >= self.config.size {
// If there are too many entries, wait until the next gc cycle.
return;
}
if let Some(account_id) = account_id {
self.insert_account2endpoint(account_id, endpoint_id);
}
self.insert_project2endpoint(project_id, endpoint_id);
self.cache
.entry(endpoint_id)
.or_default()
.allowed_vpc_endpoint_ids = Some(allowed_vpc_endpoint_ids.into());
}
pub(crate) fn insert_block_public_or_vpc_access(
&self,
project_id: ProjectIdInt,
endpoint_id: EndpointIdInt,
access_blockers: AccessBlockerFlags,
) {
if self.cache.len() >= self.config.size {
// If there are too many entries, wait until the next gc cycle.
return;
}
self.insert_project2endpoint(project_id, endpoint_id);
self.cache
.entry(endpoint_id)
.or_default()
.block_public_or_vpc_access = Some(access_blockers.into());
}
fn insert_project2endpoint(&self, project_id: ProjectIdInt, endpoint_id: EndpointIdInt) {
if let Some(mut endpoints) = self.project2ep.get_mut(&project_id) {
endpoints.insert(endpoint_id);
@@ -452,14 +264,6 @@ impl ProjectInfoCacheImpl {
.insert(project_id, HashSet::from([endpoint_id]));
}
}
fn insert_account2endpoint(&self, account_id: AccountIdInt, endpoint_id: EndpointIdInt) {
if let Some(mut endpoints) = self.account2ep.get_mut(&account_id) {
endpoints.insert(endpoint_id);
} else {
self.account2ep
.insert(account_id, HashSet::from([endpoint_id]));
}
}
fn get_cache_times(&self) -> (Instant, Option<Instant>) {
let mut valid_since = Instant::now() - self.config.ttl;
// Only ignore cache if ttl is disabled.
@@ -498,7 +302,7 @@ impl ProjectInfoCacheImpl {
let mut removed = 0;
let shard = self.project2ep.shards()[shard].write();
for (_, endpoints) in shard.iter() {
for endpoint in endpoints {
for endpoint in endpoints.get() {
self.cache.remove(endpoint);
removed += 1;
}
@@ -530,25 +334,11 @@ impl CachedLookupInfo {
lookup_type: LookupType::AllowedIps,
}
}
pub(self) fn new_allowed_vpc_endpoint_ids(endpoint_id: EndpointIdInt) -> Self {
Self {
endpoint_id,
lookup_type: LookupType::AllowedVpcEndpointIds,
}
}
pub(self) fn new_block_public_or_vpc_access(endpoint_id: EndpointIdInt) -> Self {
Self {
endpoint_id,
lookup_type: LookupType::BlockPublicOrVpcAccess,
}
}
}
enum LookupType {
RoleSecret(RoleNameInt),
AllowedIps,
AllowedVpcEndpointIds,
BlockPublicOrVpcAccess,
}
impl Cache for ProjectInfoCacheImpl {
@@ -570,16 +360,6 @@ impl Cache for ProjectInfoCacheImpl {
endpoint_info.invalidate_allowed_ips();
}
}
LookupType::AllowedVpcEndpointIds => {
if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
endpoint_info.invalidate_allowed_vpc_endpoint_ids();
}
}
LookupType::BlockPublicOrVpcAccess => {
if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
endpoint_info.invalidate_block_public_or_vpc_access();
}
}
}
}
}

View File

@@ -1,4 +1,3 @@
use std::convert::Infallible;
use std::net::{IpAddr, SocketAddr};
use std::sync::Arc;
@@ -9,22 +8,23 @@ use pq_proto::CancelKeyData;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tokio::net::TcpStream;
use tokio::sync::{mpsc, oneshot};
use tokio::sync::mpsc;
use tracing::{debug, info};
use crate::auth::backend::ComputeUserInfo;
use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
use crate::auth::{check_peer_addr_is_in_list, AuthError};
use crate::config::ComputeConfig;
use crate::context::RequestContext;
use crate::control_plane::ControlPlaneApi;
use crate::error::ReportableError;
use crate::ext::LockExt;
use crate::metrics::{CancelChannelSizeGuard, CancellationRequest, Metrics, RedisMsgKind};
use crate::protocol2::ConnectionInfoExtra;
use crate::metrics::CancelChannelSizeGuard;
use crate::metrics::{CancellationRequest, Metrics, RedisMsgKind};
use crate::rate_limiter::LeakyBucketRateLimiter;
use crate::redis::keys::KeyPrefix;
use crate::redis::kv_ops::RedisKVClient;
use crate::tls::postgres_rustls::MakeRustlsConnect;
use std::convert::Infallible;
use tokio::sync::oneshot;
type IpSubnetKey = IpNet;
@@ -135,9 +135,6 @@ pub(crate) enum CancelError {
#[error("IP is not allowed")]
IpNotAllowed,
#[error("VPC endpoint id is not allowed to connect")]
VpcEndpointIdNotAllowed,
#[error("Authentication backend error")]
AuthError(#[from] AuthError),
@@ -157,9 +154,8 @@ impl ReportableError for CancelError {
}
CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
CancelError::IpNotAllowed
| CancelError::VpcEndpointIdNotAllowed
| CancelError::NotFound => crate::error::ErrorKind::User,
CancelError::IpNotAllowed => crate::error::ErrorKind::User,
CancelError::NotFound => crate::error::ErrorKind::User,
CancelError::AuthError(_) => crate::error::ErrorKind::ControlPlane,
CancelError::InternalError => crate::error::ErrorKind::Service,
}
@@ -271,12 +267,11 @@ impl CancellationHandler {
/// Will fetch IP allowlist internally.
///
/// return Result primarily for tests
pub(crate) async fn cancel_session<T: ControlPlaneApi>(
pub(crate) async fn cancel_session<T: BackendIpAllowlist>(
&self,
key: CancelKeyData,
ctx: RequestContext,
check_ip_allowed: bool,
check_vpc_allowed: bool,
check_allowed: bool,
auth_backend: &T,
) -> Result<(), CancelError> {
let subnet_key = match ctx.peer_addr() {
@@ -311,11 +306,11 @@ impl CancellationHandler {
return Err(CancelError::NotFound);
};
if check_ip_allowed {
if check_allowed {
let ip_allowlist = auth_backend
.get_allowed_ips(&ctx, &cancel_closure.user_info)
.await
.map_err(|e| CancelError::AuthError(e.into()))?;
.map_err(CancelError::AuthError)?;
if !check_peer_addr_is_in_list(&ctx.peer_addr(), &ip_allowlist) {
// log it here since cancel_session could be spawned in a task
@@ -327,40 +322,6 @@ impl CancellationHandler {
}
}
// check if a VPC endpoint ID is coming in and if yes, if it's allowed
let access_blocks = auth_backend
.get_block_public_or_vpc_access(&ctx, &cancel_closure.user_info)
.await
.map_err(|e| CancelError::AuthError(e.into()))?;
if check_vpc_allowed {
if access_blocks.vpc_access_blocked {
return Err(CancelError::AuthError(AuthError::NetworkNotAllowed));
}
let incoming_vpc_endpoint_id = match ctx.extra() {
None => return Err(CancelError::AuthError(AuthError::MissingVPCEndpointId)),
Some(ConnectionInfoExtra::Aws { vpce_id }) => {
// Convert the vcpe_id to a string
String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
}
Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
};
let allowed_vpc_endpoint_ids = auth_backend
.get_allowed_vpc_endpoint_ids(&ctx, &cancel_closure.user_info)
.await
.map_err(|e| CancelError::AuthError(e.into()))?;
// TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
if !allowed_vpc_endpoint_ids.is_empty()
&& !allowed_vpc_endpoint_ids.contains(&incoming_vpc_endpoint_id)
{
return Err(CancelError::VpcEndpointIdNotAllowed);
}
} else if access_blocks.public_access_blocked {
return Err(CancelError::VpcEndpointIdNotAllowed);
}
Metrics::get()
.proxy
.cancellation_requests_total

View File

@@ -68,7 +68,6 @@ pub struct AuthenticationConfig {
pub rate_limiter: AuthRateLimiter,
pub rate_limit_ip_subnet: u8,
pub ip_allowlist_check_enabled: bool,
pub is_vpc_acccess_proxy: bool,
pub jwks_cache: JwkCache,
pub is_auth_broker: bool,
pub accept_jwts: bool,

View File

@@ -182,8 +182,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
cancel_key_data,
ctx,
config.authentication_config.ip_allowlist_check_enabled,
config.authentication_config.is_vpc_acccess_proxy,
backend.get_api(),
backend,
)
.await
.inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();

View File

@@ -19,7 +19,7 @@ use crate::intern::{BranchIdInt, ProjectIdInt};
use crate::metrics::{
ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting,
};
use crate::protocol2::{ConnectionInfo, ConnectionInfoExtra};
use crate::protocol2::ConnectionInfo;
use crate::types::{DbName, EndpointId, RoleName};
pub mod parquet;
@@ -312,15 +312,6 @@ impl RequestContext {
.ip()
}
pub(crate) fn extra(&self) -> Option<ConnectionInfoExtra> {
self.0
.try_lock()
.expect("should not deadlock")
.conn_info
.extra
.clone()
}
pub(crate) fn cold_start_info(&self) -> ColdStartInfo {
self.0
.try_lock()

View File

@@ -22,8 +22,7 @@ use crate::control_plane::errors::{
use crate::control_plane::locks::ApiLocks;
use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason};
use crate::control_plane::{
AccessBlockerFlags, AuthInfo, AuthSecret, CachedAccessBlockerFlags, CachedAllowedIps,
CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret, NodeInfo,
AuthInfo, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, NodeInfo,
};
use crate::metrics::{CacheOutcome, Metrics};
use crate::rate_limiter::WakeComputeRateLimiter;
@@ -138,6 +137,9 @@ impl NeonControlPlaneClient {
}
};
// Ivan: don't know where it will be used, so I leave it here
let _endpoint_vpc_ids = body.allowed_vpc_endpoint_ids.unwrap_or_default();
let secret = if body.role_secret.is_empty() {
None
} else {
@@ -151,23 +153,10 @@ impl NeonControlPlaneClient {
.proxy
.allowed_ips_number
.observe(allowed_ips.len() as f64);
let allowed_vpc_endpoint_ids = body.allowed_vpc_endpoint_ids.unwrap_or_default();
Metrics::get()
.proxy
.allowed_vpc_endpoint_ids
.observe(allowed_vpc_endpoint_ids.len() as f64);
let block_public_connections = body.block_public_connections.unwrap_or_default();
let block_vpc_connections = body.block_vpc_connections.unwrap_or_default();
Ok(AuthInfo {
secret,
allowed_ips,
allowed_vpc_endpoint_ids,
project_id: body.project_id,
account_id: body.account_id,
access_blocker_flags: AccessBlockerFlags {
public_access_blocked: block_public_connections,
vpc_access_blocked: block_vpc_connections,
},
})
}
.inspect_err(|e| tracing::debug!(error = ?e))
@@ -310,7 +299,6 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
return Ok(role_secret);
}
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
let account_id = auth_info.account_id;
if let Some(project_id) = auth_info.project_id {
let normalized_ep_int = normalized_ep.into();
self.caches.project_info.insert_role_secret(
@@ -324,35 +312,24 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
normalized_ep_int,
Arc::new(auth_info.allowed_ips),
);
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
account_id,
project_id,
normalized_ep_int,
Arc::new(auth_info.allowed_vpc_endpoint_ids),
);
self.caches.project_info.insert_block_public_or_vpc_access(
project_id,
normalized_ep_int,
auth_info.access_blocker_flags,
);
ctx.set_project_id(project_id);
}
// When we just got a secret, we don't need to invalidate it.
Ok(Cached::new_uncached(auth_info.secret))
}
async fn get_allowed_ips(
async fn get_allowed_ips_and_secret(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAllowedIps, GetAuthInfoError> {
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
let normalized_ep = &user_info.endpoint.normalize();
if let Some(allowed_ips) = self.caches.project_info.get_allowed_ips(normalized_ep) {
Metrics::get()
.proxy
.allowed_ips_cache_misses // TODO SR: Should we rename this variable to something like allowed_ip_cache_stats?
.allowed_ips_cache_misses
.inc(CacheOutcome::Hit);
return Ok(allowed_ips);
return Ok((allowed_ips, None));
}
Metrics::get()
.proxy
@@ -360,10 +337,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
.inc(CacheOutcome::Miss);
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
let allowed_ips = Arc::new(auth_info.allowed_ips);
let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
let access_blocker_flags = auth_info.access_blocker_flags;
let user = &user_info.user;
let account_id = auth_info.account_id;
if let Some(project_id) = auth_info.project_id {
let normalized_ep_int = normalized_ep.into();
self.caches.project_info.insert_role_secret(
@@ -377,136 +351,12 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
normalized_ep_int,
allowed_ips.clone(),
);
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
account_id,
project_id,
normalized_ep_int,
allowed_vpc_endpoint_ids.clone(),
);
self.caches.project_info.insert_block_public_or_vpc_access(
project_id,
normalized_ep_int,
access_blocker_flags,
);
ctx.set_project_id(project_id);
}
Ok(Cached::new_uncached(allowed_ips))
}
async fn get_allowed_vpc_endpoint_ids(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAllowedVpcEndpointIds, GetAuthInfoError> {
let normalized_ep = &user_info.endpoint.normalize();
if let Some(allowed_vpc_endpoint_ids) = self
.caches
.project_info
.get_allowed_vpc_endpoint_ids(normalized_ep)
{
Metrics::get()
.proxy
.vpc_endpoint_id_cache_stats
.inc(CacheOutcome::Hit);
return Ok(allowed_vpc_endpoint_ids);
}
Metrics::get()
.proxy
.vpc_endpoint_id_cache_stats
.inc(CacheOutcome::Miss);
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
let allowed_ips = Arc::new(auth_info.allowed_ips);
let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
let access_blocker_flags = auth_info.access_blocker_flags;
let user = &user_info.user;
let account_id = auth_info.account_id;
if let Some(project_id) = auth_info.project_id {
let normalized_ep_int = normalized_ep.into();
self.caches.project_info.insert_role_secret(
project_id,
normalized_ep_int,
user.into(),
auth_info.secret.clone(),
);
self.caches.project_info.insert_allowed_ips(
project_id,
normalized_ep_int,
allowed_ips.clone(),
);
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
account_id,
project_id,
normalized_ep_int,
allowed_vpc_endpoint_ids.clone(),
);
self.caches.project_info.insert_block_public_or_vpc_access(
project_id,
normalized_ep_int,
access_blocker_flags,
);
ctx.set_project_id(project_id);
}
Ok(Cached::new_uncached(allowed_vpc_endpoint_ids))
}
async fn get_block_public_or_vpc_access(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAccessBlockerFlags, GetAuthInfoError> {
let normalized_ep = &user_info.endpoint.normalize();
if let Some(access_blocker_flags) = self
.caches
.project_info
.get_block_public_or_vpc_access(normalized_ep)
{
Metrics::get()
.proxy
.access_blocker_flags_cache_stats
.inc(CacheOutcome::Hit);
return Ok(access_blocker_flags);
}
Metrics::get()
.proxy
.access_blocker_flags_cache_stats
.inc(CacheOutcome::Miss);
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
let allowed_ips = Arc::new(auth_info.allowed_ips);
let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
let access_blocker_flags = auth_info.access_blocker_flags;
let user = &user_info.user;
let account_id = auth_info.account_id;
if let Some(project_id) = auth_info.project_id {
let normalized_ep_int = normalized_ep.into();
self.caches.project_info.insert_role_secret(
project_id,
normalized_ep_int,
user.into(),
auth_info.secret.clone(),
);
self.caches.project_info.insert_allowed_ips(
project_id,
normalized_ep_int,
allowed_ips.clone(),
);
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
account_id,
project_id,
normalized_ep_int,
allowed_vpc_endpoint_ids.clone(),
);
self.caches.project_info.insert_block_public_or_vpc_access(
project_id,
normalized_ep_int,
access_blocker_flags.clone(),
);
ctx.set_project_id(project_id);
}
Ok(Cached::new_uncached(access_blocker_flags))
Ok((
Cached::new_uncached(allowed_ips),
Some(Cached::new_uncached(auth_info.secret)),
))
}
#[tracing::instrument(skip_all)]

View File

@@ -13,14 +13,12 @@ use crate::auth::backend::ComputeUserInfo;
use crate::auth::IpPattern;
use crate::cache::Cached;
use crate::context::RequestContext;
use crate::control_plane::client::{
CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedRoleSecret,
};
use crate::control_plane::client::{CachedAllowedIps, CachedRoleSecret};
use crate::control_plane::errors::{
ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError,
};
use crate::control_plane::messages::MetricsAuxInfo;
use crate::control_plane::{AccessBlockerFlags, AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo};
use crate::control_plane::{AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo};
use crate::error::io_error;
use crate::intern::RoleNameInt;
use crate::types::{BranchId, EndpointId, ProjectId, RoleName};
@@ -123,10 +121,7 @@ impl MockControlPlane {
Ok(AuthInfo {
secret,
allowed_ips,
allowed_vpc_endpoint_ids: vec![],
project_id: None,
account_id: None,
access_blocker_flags: AccessBlockerFlags::default(),
})
}
@@ -219,35 +214,16 @@ impl super::ControlPlaneApi for MockControlPlane {
))
}
async fn get_allowed_ips(
async fn get_allowed_ips_and_secret(
&self,
_ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAllowedIps, GetAuthInfoError> {
Ok(Cached::new_uncached(Arc::new(
self.do_get_auth_info(user_info).await?.allowed_ips,
)))
}
async fn get_allowed_vpc_endpoint_ids(
&self,
_ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAllowedVpcEndpointIds, super::errors::GetAuthInfoError> {
Ok(Cached::new_uncached(Arc::new(
self.do_get_auth_info(user_info)
.await?
.allowed_vpc_endpoint_ids,
)))
}
async fn get_block_public_or_vpc_access(
&self,
_ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<super::CachedAccessBlockerFlags, super::errors::GetAuthInfoError> {
Ok(Cached::new_uncached(
self.do_get_auth_info(user_info).await?.access_blocker_flags,
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
Ok((
Cached::new_uncached(Arc::new(
self.do_get_auth_info(user_info).await?.allowed_ips,
)),
None,
))
}

View File

@@ -6,7 +6,7 @@ use std::hash::Hash;
use std::sync::Arc;
use std::time::Duration;
use clashmap::ClashMap;
use dashmap::DashMap;
use tokio::time::Instant;
use tracing::{debug, info};
@@ -17,8 +17,7 @@ use crate::cache::project_info::ProjectInfoCacheImpl;
use crate::config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions};
use crate::context::RequestContext;
use crate::control_plane::{
errors, CachedAccessBlockerFlags, CachedAllowedIps, CachedAllowedVpcEndpointIds,
CachedNodeInfo, CachedRoleSecret, ControlPlaneApi, NodeInfoCache,
errors, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi, NodeInfoCache,
};
use crate::error::ReportableError;
use crate::metrics::ApiLockMetrics;
@@ -56,45 +55,17 @@ impl ControlPlaneApi for ControlPlaneClient {
}
}
async fn get_allowed_ips(
async fn get_allowed_ips_and_secret(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAllowedIps, errors::GetAuthInfoError> {
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError> {
match self {
Self::ProxyV1(api) => api.get_allowed_ips(ctx, user_info).await,
Self::ProxyV1(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
#[cfg(any(test, feature = "testing"))]
Self::PostgresMock(api) => api.get_allowed_ips(ctx, user_info).await,
Self::PostgresMock(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
#[cfg(test)]
Self::Test(api) => api.get_allowed_ips(),
}
}
async fn get_allowed_vpc_endpoint_ids(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError> {
match self {
Self::ProxyV1(api) => api.get_allowed_vpc_endpoint_ids(ctx, user_info).await,
#[cfg(any(test, feature = "testing"))]
Self::PostgresMock(api) => api.get_allowed_vpc_endpoint_ids(ctx, user_info).await,
#[cfg(test)]
Self::Test(api) => api.get_allowed_vpc_endpoint_ids(),
}
}
async fn get_block_public_or_vpc_access(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError> {
match self {
Self::ProxyV1(api) => api.get_block_public_or_vpc_access(ctx, user_info).await,
#[cfg(any(test, feature = "testing"))]
Self::PostgresMock(api) => api.get_block_public_or_vpc_access(ctx, user_info).await,
#[cfg(test)]
Self::Test(api) => api.get_block_public_or_vpc_access(),
Self::Test(api) => api.get_allowed_ips_and_secret(),
}
}
@@ -131,15 +102,9 @@ impl ControlPlaneApi for ControlPlaneClient {
pub(crate) trait TestControlPlaneClient: Send + Sync + 'static {
fn wake_compute(&self) -> Result<CachedNodeInfo, errors::WakeComputeError>;
fn get_allowed_ips(&self) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;
fn get_allowed_vpc_endpoint_ids(
fn get_allowed_ips_and_secret(
&self,
) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError>;
fn get_block_public_or_vpc_access(
&self,
) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError>;
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
fn dyn_clone(&self) -> Box<dyn TestControlPlaneClient>;
}
@@ -183,7 +148,7 @@ impl ApiCaches {
/// Various caches for [`control_plane`](super).
pub struct ApiLocks<K> {
name: &'static str,
node_locks: ClashMap<K, Arc<DynamicLimiter>>,
node_locks: DashMap<K, Arc<DynamicLimiter>>,
config: RateLimiterConfig,
timeout: Duration,
epoch: std::time::Duration,
@@ -215,7 +180,7 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
) -> prometheus::Result<Self> {
Ok(Self {
name,
node_locks: ClashMap::with_shard_amount(shards),
node_locks: DashMap::with_shard_amount(shards),
config,
timeout,
epoch,
@@ -273,7 +238,7 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
let mut lock = shard.write();
let timer = self.metrics.reclamation_lag_seconds.start_timer();
let count = lock
.extract_if(|(_, semaphore)| Arc::strong_count(semaphore) == 1)
.extract_if(|_, semaphore| Arc::strong_count(semaphore.get_mut()) == 1)
.count();
drop(lock);
self.metrics.semaphores_unregistered.inc_by(count as u64);

View File

@@ -4,7 +4,7 @@ use measured::FixedCardinalityLabel;
use serde::{Deserialize, Serialize};
use crate::auth::IpPattern;
use crate::intern::{AccountIdInt, BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
use crate::proxy::retry::CouldRetry;
/// Generic error response with human-readable description.
@@ -227,11 +227,8 @@ pub(crate) struct UserFacingMessage {
pub(crate) struct GetEndpointAccessControl {
pub(crate) role_secret: Box<str>,
pub(crate) allowed_ips: Option<Vec<IpPattern>>,
pub(crate) allowed_vpc_endpoint_ids: Option<Vec<String>>,
pub(crate) project_id: Option<ProjectIdInt>,
pub(crate) account_id: Option<AccountIdInt>,
pub(crate) block_public_connections: Option<bool>,
pub(crate) block_vpc_connections: Option<bool>,
pub(crate) allowed_vpc_endpoint_ids: Option<Vec<EndpointIdInt>>,
}
/// Response which holds compute node's `host:port` pair.
@@ -285,10 +282,6 @@ pub(crate) struct DatabaseInfo {
pub(crate) aux: MetricsAuxInfo,
#[serde(default)]
pub(crate) allowed_ips: Option<Vec<IpPattern>>,
#[serde(default)]
pub(crate) allowed_vpc_endpoint_ids: Option<Vec<String>>,
#[serde(default)]
pub(crate) public_access_allowed: Option<bool>,
}
// Manually implement debug to omit sensitive info.
@@ -300,7 +293,6 @@ impl fmt::Debug for DatabaseInfo {
.field("dbname", &self.dbname)
.field("user", &self.user)
.field("allowed_ips", &self.allowed_ips)
.field("allowed_vpc_endpoint_ids", &self.allowed_vpc_endpoint_ids)
.finish_non_exhaustive()
}
}
@@ -465,7 +457,7 @@ mod tests {
#[test]
fn parse_get_role_secret() -> anyhow::Result<()> {
// Empty `allowed_ips` and `allowed_vpc_endpoint_ids` field.
// Empty `allowed_ips` field.
let json = json!({
"role_secret": "secret",
});
@@ -475,21 +467,9 @@ mod tests {
"allowed_ips": ["8.8.8.8"],
});
serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
let json = json!({
"role_secret": "secret",
"allowed_vpc_endpoint_ids": ["vpce-0abcd1234567890ef"],
});
serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
let json = json!({
"role_secret": "secret",
"allowed_ips": ["8.8.8.8"],
"allowed_vpc_endpoint_ids": ["vpce-0abcd1234567890ef"],
});
serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
let json = json!({
"role_secret": "secret",
"allowed_ips": ["8.8.8.8"],
"allowed_vpc_endpoint_ids": ["vpce-0abcd1234567890ef"],
"project_id": "project",
});
serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;

View File

@@ -19,7 +19,6 @@ use crate::cache::{Cached, TimedLru};
use crate::config::ComputeConfig;
use crate::context::RequestContext;
use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo};
use crate::intern::AccountIdInt;
use crate::intern::ProjectIdInt;
use crate::types::{EndpointCacheKey, EndpointId};
use crate::{compute, scram};
@@ -53,14 +52,8 @@ pub(crate) struct AuthInfo {
pub(crate) secret: Option<AuthSecret>,
/// List of IP addresses allowed for the autorization.
pub(crate) allowed_ips: Vec<IpPattern>,
/// List of VPC endpoints allowed for the autorization.
pub(crate) allowed_vpc_endpoint_ids: Vec<String>,
/// Project ID. This is used for cache invalidation.
pub(crate) project_id: Option<ProjectIdInt>,
/// Account ID. This is used for cache invalidation.
pub(crate) account_id: Option<AccountIdInt>,
/// Are public connections or VPC connections blocked?
pub(crate) access_blocker_flags: AccessBlockerFlags,
}
/// Info for establishing a connection to a compute node.
@@ -102,21 +95,11 @@ impl NodeInfo {
}
}
#[derive(Clone, Default, Eq, PartialEq, Debug)]
pub(crate) struct AccessBlockerFlags {
pub public_access_blocked: bool,
pub vpc_access_blocked: bool,
}
pub(crate) type NodeInfoCache =
TimedLru<EndpointCacheKey, Result<NodeInfo, Box<ControlPlaneErrorMessage>>>;
pub(crate) type CachedNodeInfo = Cached<&'static NodeInfoCache, NodeInfo>;
pub(crate) type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, Option<AuthSecret>>;
pub(crate) type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPattern>>>;
pub(crate) type CachedAllowedVpcEndpointIds =
Cached<&'static ProjectInfoCacheImpl, Arc<Vec<String>>>;
pub(crate) type CachedAccessBlockerFlags =
Cached<&'static ProjectInfoCacheImpl, AccessBlockerFlags>;
/// This will allocate per each call, but the http requests alone
/// already require a few allocations, so it should be fine.
@@ -130,23 +113,11 @@ pub(crate) trait ControlPlaneApi {
user_info: &ComputeUserInfo,
) -> Result<CachedRoleSecret, errors::GetAuthInfoError>;
async fn get_allowed_ips(
async fn get_allowed_ips_and_secret(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;
async fn get_allowed_vpc_endpoint_ids(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError>;
async fn get_block_public_or_vpc_access(
&self,
ctx: &RequestContext,
user_info: &ComputeUserInfo,
) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError>;
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
async fn get_endpoint_jwks(
&self,

View File

@@ -7,7 +7,7 @@ use std::sync::OnceLock;
use lasso::{Capacity, MemoryLimits, Spur, ThreadedRodeo};
use rustc_hash::FxHasher;
use crate::types::{AccountId, BranchId, EndpointId, ProjectId, RoleName};
use crate::types::{BranchId, EndpointId, ProjectId, RoleName};
pub trait InternId: Sized + 'static {
fn get_interner() -> &'static StringInterner<Self>;
@@ -206,26 +206,6 @@ impl From<ProjectId> for ProjectIdInt {
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct AccountIdTag;
impl InternId for AccountIdTag {
fn get_interner() -> &'static StringInterner<Self> {
static ROLE_NAMES: OnceLock<StringInterner<AccountIdTag>> = OnceLock::new();
ROLE_NAMES.get_or_init(Default::default)
}
}
pub type AccountIdInt = InternedString<AccountIdTag>;
impl From<&AccountId> for AccountIdInt {
fn from(value: &AccountId) -> Self {
AccountIdTag::get_interner().get_or_intern(value)
}
}
impl From<AccountId> for AccountIdInt {
fn from(value: AccountId) -> Self {
AccountIdTag::get_interner().get_or_intern(&value)
}
}
#[cfg(test)]
#[expect(clippy::unwrap_used)]
mod tests {

View File

@@ -96,16 +96,6 @@ pub struct ProxyMetrics {
#[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))]
pub allowed_ips_number: Histogram<10>,
/// Number of cache hits/misses for VPC endpoint IDs.
pub vpc_endpoint_id_cache_stats: CounterVec<StaticLabelSet<CacheOutcome>>,
/// Number of cache hits/misses for access blocker flags.
pub access_blocker_flags_cache_stats: CounterVec<StaticLabelSet<CacheOutcome>>,
/// Number of allowed VPC endpoints IDs
#[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))]
pub allowed_vpc_endpoint_ids: Histogram<10>,
/// Number of connections (per sni).
pub accepted_connections_by_sni: CounterVec<StaticLabelSet<SniKind>>,
@@ -580,9 +570,6 @@ pub enum RedisEventsCount {
CancelSession,
PasswordUpdate,
AllowedIpsUpdate,
AllowedVpcEndpointIdsUpdateForProjects,
AllowedVpcEndpointIdsUpdateForAllProjectsInOrg,
BlockPublicOrVpcAccessUpdate,
}
pub struct ThreadPoolWorkers(usize);

View File

@@ -201,26 +201,25 @@ impl CopyBuffer {
W: AsyncWrite + ?Sized,
{
loop {
// If there is some space left in our buffer, then we try to read some
// data to continue, thus maximizing the chances of a large write.
if self.cap < self.buf.len() && !self.read_done {
// If our buffer is empty, then we need to read some data to
// continue.
if self.pos == self.cap && !self.read_done {
self.pos = 0;
self.cap = 0;
match self.poll_fill_buf(cx, reader.as_mut()) {
Poll::Ready(Ok(())) => (),
Poll::Ready(Err(err)) => return Poll::Ready(Err(ErrorDirection::Read(err))),
Poll::Pending => {
// Ignore pending reads when our buffer is not empty, because
// we can try to write data immediately.
if self.pos == self.cap {
// Try flushing when the reader has no progress to avoid deadlock
// when the reader depends on buffered writer.
if self.need_flush {
ready!(writer.as_mut().poll_flush(cx))
.map_err(ErrorDirection::Write)?;
self.need_flush = false;
}
return Poll::Pending;
// Try flushing when the reader has no progress to avoid deadlock
// when the reader depends on buffered writer.
if self.need_flush {
ready!(writer.as_mut().poll_flush(cx))
.map_err(ErrorDirection::Write)?;
self.need_flush = false;
}
return Poll::Pending;
}
}
}
@@ -247,13 +246,9 @@ impl CopyBuffer {
"writer returned length larger than input slice"
);
// All data has been written, the buffer can be considered empty again
self.pos = 0;
self.cap = 0;
// If we've written all the data and we've seen EOF, flush out the
// data and finish the transfer.
if self.read_done {
if self.pos == self.cap && self.read_done {
ready!(writer.as_mut().poll_flush(cx)).map_err(ErrorDirection::Write)?;
return Poll::Ready(Ok(self.amt));
}

View File

@@ -283,8 +283,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
cancel_key_data,
ctx,
config.authentication_config.ip_allowlist_check_enabled,
config.authentication_config.is_vpc_acccess_proxy,
auth_backend.get_api(),
auth_backend,
)
.await
.inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();

View File

@@ -26,7 +26,7 @@ use crate::config::{ComputeConfig, RetryConfig};
use crate::control_plane::client::{ControlPlaneClient, TestControlPlaneClient};
use crate::control_plane::messages::{ControlPlaneErrorMessage, Details, MetricsAuxInfo, Status};
use crate::control_plane::{
self, CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedNodeInfo, NodeInfo, NodeInfoCache,
self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, NodeInfo, NodeInfoCache,
};
use crate::error::ErrorKind;
use crate::tls::client_config::compute_client_config_with_certs;
@@ -526,19 +526,9 @@ impl TestControlPlaneClient for TestConnectMechanism {
}
}
fn get_allowed_ips(&self) -> Result<CachedAllowedIps, control_plane::errors::GetAuthInfoError> {
unimplemented!("not used in tests")
}
fn get_allowed_vpc_endpoint_ids(
fn get_allowed_ips_and_secret(
&self,
) -> Result<CachedAllowedVpcEndpointIds, control_plane::errors::GetAuthInfoError> {
unimplemented!("not used in tests")
}
fn get_block_public_or_vpc_access(
&self,
) -> Result<control_plane::CachedAccessBlockerFlags, control_plane::errors::GetAuthInfoError>
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), control_plane::errors::GetAuthInfoError>
{
unimplemented!("not used in tests")
}

View File

@@ -2,7 +2,7 @@ use std::hash::Hash;
use std::sync::atomic::{AtomicUsize, Ordering};
use ahash::RandomState;
use clashmap::ClashMap;
use dashmap::DashMap;
use rand::{thread_rng, Rng};
use tokio::time::Instant;
use tracing::info;
@@ -14,7 +14,7 @@ use crate::intern::EndpointIdInt;
pub type EndpointRateLimiter = LeakyBucketRateLimiter<EndpointIdInt>;
pub struct LeakyBucketRateLimiter<Key> {
map: ClashMap<Key, LeakyBucketState, RandomState>,
map: DashMap<Key, LeakyBucketState, RandomState>,
config: utils::leaky_bucket::LeakyBucketConfig,
access_count: AtomicUsize,
}
@@ -27,7 +27,7 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
pub fn new_with_shards(config: LeakyBucketConfig, shards: usize) -> Self {
Self {
map: ClashMap::with_hasher_and_shard_amount(RandomState::new(), shards),
map: DashMap::with_hasher_and_shard_amount(RandomState::new(), shards),
config: config.into(),
access_count: AtomicUsize::new(0),
}
@@ -58,7 +58,7 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
let shard = thread_rng().gen_range(0..n);
self.map.shards()[shard]
.write()
.retain(|(_, value)| !value.bucket_is_empty(now));
.retain(|_, value| !value.get().bucket_is_empty(now));
}
}

View File

@@ -5,7 +5,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Mutex;
use anyhow::bail;
use clashmap::ClashMap;
use dashmap::DashMap;
use itertools::Itertools;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
@@ -62,7 +62,7 @@ impl GlobalRateLimiter {
pub type WakeComputeRateLimiter = BucketRateLimiter<EndpointIdInt, StdRng, RandomState>;
pub struct BucketRateLimiter<Key, Rand = StdRng, Hasher = RandomState> {
map: ClashMap<Key, Vec<RateBucket>, Hasher>,
map: DashMap<Key, Vec<RateBucket>, Hasher>,
info: Cow<'static, [RateBucketInfo]>,
access_count: AtomicUsize,
rand: Mutex<Rand>,
@@ -202,7 +202,7 @@ impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {
info!(buckets = ?info, "endpoint rate limiter");
Self {
info,
map: ClashMap::with_hasher_and_shard_amount(hasher, 64),
map: DashMap::with_hasher_and_shard_amount(hasher, 64),
access_count: AtomicUsize::new(1), // start from 1 to avoid GC on the first request
rand: Mutex::new(rand),
}

View File

@@ -1,114 +0,0 @@
use core::net::IpAddr;
use std::sync::Arc;
use pq_proto::CancelKeyData;
use tokio::sync::Mutex;
use uuid::Uuid;
use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
use crate::rate_limiter::{GlobalRateLimiter, RateBucketInfo};
pub trait CancellationPublisherMut: Send + Sync + 'static {
#[allow(async_fn_in_trait)]
async fn try_publish(
&mut self,
cancel_key_data: CancelKeyData,
session_id: Uuid,
peer_addr: IpAddr,
) -> anyhow::Result<()>;
}
pub trait CancellationPublisher: Send + Sync + 'static {
#[allow(async_fn_in_trait)]
async fn try_publish(
&self,
cancel_key_data: CancelKeyData,
session_id: Uuid,
peer_addr: IpAddr,
) -> anyhow::Result<()>;
}
impl CancellationPublisher for () {
async fn try_publish(
&self,
_cancel_key_data: CancelKeyData,
_session_id: Uuid,
_peer_addr: IpAddr,
) -> anyhow::Result<()> {
Ok(())
}
}
impl<P: CancellationPublisher> CancellationPublisherMut for P {
async fn try_publish(
&mut self,
cancel_key_data: CancelKeyData,
session_id: Uuid,
peer_addr: IpAddr,
) -> anyhow::Result<()> {
<P as CancellationPublisher>::try_publish(self, cancel_key_data, session_id, peer_addr)
.await
}
}
impl<P: CancellationPublisher> CancellationPublisher for Option<P> {
async fn try_publish(
&self,
cancel_key_data: CancelKeyData,
session_id: Uuid,
peer_addr: IpAddr,
) -> anyhow::Result<()> {
if let Some(p) = self {
p.try_publish(cancel_key_data, session_id, peer_addr).await
} else {
Ok(())
}
}
}
impl<P: CancellationPublisherMut> CancellationPublisher for Arc<Mutex<P>> {
async fn try_publish(
&self,
cancel_key_data: CancelKeyData,
session_id: Uuid,
peer_addr: IpAddr,
) -> anyhow::Result<()> {
self.lock()
.await
.try_publish(cancel_key_data, session_id, peer_addr)
.await
}
}
pub struct RedisPublisherClient {
#[allow(dead_code)]
client: ConnectionWithCredentialsProvider,
_region_id: String,
_limiter: GlobalRateLimiter,
}
impl RedisPublisherClient {
pub fn new(
client: ConnectionWithCredentialsProvider,
region_id: String,
info: &'static [RateBucketInfo],
) -> anyhow::Result<Self> {
Ok(Self {
client,
_region_id: region_id,
_limiter: GlobalRateLimiter::new(info.into()),
})
}
#[allow(dead_code)]
pub(crate) async fn try_connect(&mut self) -> anyhow::Result<()> {
match self.client.connect().await {
Ok(()) => {}
Err(e) => {
tracing::error!("failed to connect to redis: {e}");
return Err(e);
}
}
Ok(())
}
}

View File

@@ -1,8 +1,7 @@
use std::io::ErrorKind;
use anyhow::Ok;
use pq_proto::{id_to_cancel_key, CancelKeyData};
use serde::{Deserialize, Serialize};
use std::io::ErrorKind;
pub mod keyspace {
pub const CANCEL_PREFIX: &str = "cancel";

View File

@@ -1,6 +1,7 @@
use redis::{AsyncCommands, ToRedisArgs};
use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
use crate::rate_limiter::{GlobalRateLimiter, RateBucketInfo};
pub struct RedisKVClient {

View File

@@ -1,4 +1,3 @@
pub mod cancellation_publisher;
pub mod connection_with_credentials_provider;
pub mod elasticache;
pub mod keys;

View File

@@ -10,7 +10,7 @@ use uuid::Uuid;
use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
use crate::cache::project_info::ProjectInfoCache;
use crate::intern::{AccountIdInt, ProjectIdInt, RoleNameInt};
use crate::intern::{ProjectIdInt, RoleNameInt};
use crate::metrics::{Metrics, RedisErrors, RedisEventsCount};
const CPLANE_CHANNEL_NAME: &str = "neondb-proxy-ws-updates";
@@ -86,7 +86,9 @@ pub(crate) struct BlockPublicOrVpcAccessUpdated {
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
pub(crate) struct AllowedVpcEndpointsUpdatedForOrg {
account_id: AccountIdInt,
// TODO: change type once the implementation is more fully fledged.
// See e.g. https://github.com/neondatabase/neon/pull/10073.
account_id: ProjectIdInt,
}
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
@@ -203,24 +205,6 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
.proxy
.redis_events_count
.inc(RedisEventsCount::PasswordUpdate);
} else if matches!(
msg,
Notification::AllowedVpcEndpointsUpdatedForProjects { .. }
) {
Metrics::get()
.proxy
.redis_events_count
.inc(RedisEventsCount::AllowedVpcEndpointIdsUpdateForProjects);
} else if matches!(msg, Notification::AllowedVpcEndpointsUpdatedForOrg { .. }) {
Metrics::get()
.proxy
.redis_events_count
.inc(RedisEventsCount::AllowedVpcEndpointIdsUpdateForAllProjectsInOrg);
} else if matches!(msg, Notification::BlockPublicOrVpcAccessUpdated { .. }) {
Metrics::get()
.proxy
.redis_events_count
.inc(RedisEventsCount::BlockPublicOrVpcAccessUpdate);
}
// TODO: add additional metrics for the other event types.
@@ -246,26 +230,20 @@ fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
Notification::AllowedIpsUpdate { allowed_ips_update } => {
cache.invalidate_allowed_ips_for_project(allowed_ips_update.project_id);
}
Notification::BlockPublicOrVpcAccessUpdated {
block_public_or_vpc_access_updated,
} => cache.invalidate_block_public_or_vpc_access_for_project(
block_public_or_vpc_access_updated.project_id,
),
Notification::AllowedVpcEndpointsUpdatedForOrg {
allowed_vpc_endpoints_updated_for_org,
} => cache.invalidate_allowed_vpc_endpoint_ids_for_org(
allowed_vpc_endpoints_updated_for_org.account_id,
),
Notification::AllowedVpcEndpointsUpdatedForProjects {
allowed_vpc_endpoints_updated_for_projects,
} => cache.invalidate_allowed_vpc_endpoint_ids_for_projects(
allowed_vpc_endpoints_updated_for_projects.project_ids,
),
Notification::PasswordUpdate { password_update } => cache
.invalidate_role_secret_for_project(
password_update.project_id,
password_update.role_name,
),
Notification::BlockPublicOrVpcAccessUpdated { .. } => {
// https://github.com/neondatabase/neon/pull/10073
}
Notification::AllowedVpcEndpointsUpdatedForOrg { .. } => {
// https://github.com/neondatabase/neon/pull/10073
}
Notification::AllowedVpcEndpointsUpdatedForProjects { .. } => {
// https://github.com/neondatabase/neon/pull/10073
}
Notification::UnknownTopic => unreachable!(),
}
}

View File

@@ -30,7 +30,6 @@ use crate::control_plane::locks::ApiLocks;
use crate::control_plane::CachedNodeInfo;
use crate::error::{ErrorKind, ReportableError, UserFacingError};
use crate::intern::EndpointIdInt;
use crate::protocol2::ConnectionInfoExtra;
use crate::proxy::connect_compute::ConnectMechanism;
use crate::proxy::retry::{CouldRetry, ShouldRetryWakeCompute};
use crate::rate_limiter::EndpointRateLimiter;
@@ -58,52 +57,23 @@ impl PoolingBackend {
let user_info = user_info.clone();
let backend = self.auth_backend.as_ref().map(|()| user_info.clone());
let allowed_ips = backend.get_allowed_ips(ctx).await?;
let (allowed_ips, maybe_secret) = backend.get_allowed_ips_and_secret(ctx).await?;
if self.config.authentication_config.ip_allowlist_check_enabled
&& !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
{
return Err(AuthError::ip_address_not_allowed(ctx.peer_addr()));
}
let access_blocker_flags = backend.get_block_public_or_vpc_access(ctx).await?;
if self.config.authentication_config.is_vpc_acccess_proxy {
if access_blocker_flags.vpc_access_blocked {
return Err(AuthError::NetworkNotAllowed);
}
let extra = ctx.extra();
let incoming_endpoint_id = match extra {
None => String::new(),
Some(ConnectionInfoExtra::Aws { vpce_id }) => {
// Convert the vcpe_id to a string
String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
}
Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
};
if incoming_endpoint_id.is_empty() {
return Err(AuthError::MissingVPCEndpointId);
}
let allowed_vpc_endpoint_ids = backend.get_allowed_vpc_endpoint_ids(ctx).await?;
// TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
if !allowed_vpc_endpoint_ids.is_empty()
&& !allowed_vpc_endpoint_ids.contains(&incoming_endpoint_id)
{
return Err(AuthError::vpc_endpoint_id_not_allowed(incoming_endpoint_id));
}
} else if access_blocker_flags.public_access_blocked {
return Err(AuthError::NetworkNotAllowed);
}
if !self
.endpoint_rate_limiter
.check(user_info.endpoint.clone().into(), 1)
{
return Err(AuthError::too_many_connections());
}
let cached_secret = backend.get_role_secret(ctx).await?;
let cached_secret = match maybe_secret {
Some(secret) => secret,
None => backend.get_role_secret(ctx).await?,
};
let secret = match cached_secret.value.clone() {
Some(secret) => self.config.authentication_config.check_rate_limit(
ctx,

View File

@@ -5,7 +5,7 @@ use std::sync::atomic::{self, AtomicUsize};
use std::sync::{Arc, Weak};
use std::time::Duration;
use clashmap::ClashMap;
use dashmap::DashMap;
use parking_lot::RwLock;
use postgres_client::ReadyForQueryStatus;
use rand::Rng;
@@ -351,11 +351,11 @@ where
//
// That should be a fairly conteded map, so return reference to the per-endpoint
// pool as early as possible and release the lock.
pub(crate) global_pool: ClashMap<EndpointCacheKey, Arc<RwLock<P>>>,
pub(crate) global_pool: DashMap<EndpointCacheKey, Arc<RwLock<P>>>,
/// Number of endpoint-connection pools
///
/// [`ClashMap::len`] iterates over all inner pools and acquires a read lock on each.
/// [`DashMap::len`] iterates over all inner pools and acquires a read lock on each.
/// That seems like far too much effort, so we're using a relaxed increment counter instead.
/// It's only used for diagnostics.
pub(crate) global_pool_size: AtomicUsize,
@@ -396,7 +396,7 @@ where
pub(crate) fn new(config: &'static crate::config::HttpConfig) -> Arc<Self> {
let shards = config.pool_options.pool_shards;
Arc::new(Self {
global_pool: ClashMap::with_shard_amount(shards),
global_pool: DashMap::with_shard_amount(shards),
global_pool_size: AtomicUsize::new(0),
config,
global_connections_count: Arc::new(AtomicUsize::new(0)),
@@ -442,10 +442,10 @@ where
.start_timer();
let current_len = shard.len();
let mut clients_removed = 0;
shard.retain(|(endpoint, x)| {
shard.retain(|endpoint, x| {
// if the current endpoint pool is unique (no other strong or weak references)
// then it is currently not in use by any connections.
if let Some(pool) = Arc::get_mut(x) {
if let Some(pool) = Arc::get_mut(x.get_mut()) {
let endpoints = pool.get_mut();
clients_removed = endpoints.clear_closed();

View File

@@ -97,8 +97,6 @@ smol_str_wrapper!(EndpointId);
smol_str_wrapper!(BranchId);
// 90% of project strings are 23 characters or less.
smol_str_wrapper!(ProjectId);
// 90% of account strings are 23 characters or less.
smol_str_wrapper!(AccountId);
// will usually equal endpoint ID
smol_str_wrapper!(EndpointCacheKey);

View File

@@ -10,9 +10,9 @@ use anyhow::{bail, Context};
use async_compression::tokio::write::GzipEncoder;
use bytes::Bytes;
use chrono::{DateTime, Datelike, Timelike, Utc};
use clashmap::mapref::entry::Entry;
use clashmap::ClashMap;
use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_SIZE};
use dashmap::mapref::entry::Entry;
use dashmap::DashMap;
use once_cell::sync::Lazy;
use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
use serde::{Deserialize, Serialize};
@@ -137,7 +137,7 @@ type FastHasher = std::hash::BuildHasherDefault<rustc_hash::FxHasher>;
#[derive(Default)]
pub(crate) struct Metrics {
endpoints: ClashMap<Ids, Arc<MetricCounter>, FastHasher>,
endpoints: DashMap<Ids, Arc<MetricCounter>, FastHasher>,
}
impl Metrics {
@@ -213,7 +213,7 @@ pub async fn task_main(config: &MetricCollectionConfig) -> anyhow::Result<Infall
}
fn collect_and_clear_metrics<C: Clearable>(
endpoints: &ClashMap<Ids, Arc<C>, FastHasher>,
endpoints: &DashMap<Ids, Arc<C>, FastHasher>,
) -> Vec<(Ids, u64)> {
let mut metrics_to_clear = Vec::new();
@@ -271,7 +271,7 @@ fn create_event_chunks<'a>(
#[expect(clippy::too_many_arguments)]
#[instrument(skip_all)]
async fn collect_metrics_iteration(
endpoints: &ClashMap<Ids, Arc<MetricCounter>, FastHasher>,
endpoints: &DashMap<Ids, Arc<MetricCounter>, FastHasher>,
client: &http::ClientWithMiddleware,
metric_collection_endpoint: &reqwest::Url,
storage: Option<&GenericRemoteStorage>,

View File

@@ -1,5 +1,5 @@
[toolchain]
channel = "1.84.1"
channel = "1.84.0"
profile = "default"
# The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
# https://rust-lang.github.io/rustup/concepts/profiles.html

View File

@@ -535,10 +535,6 @@ pub async fn main_task(
// limit concurrent uploads
let _upload_permit = tokio::select! {
acq = limiter.acquire_partial_backup() => acq,
_ = backup.tli.cancel.cancelled() => {
info!("timeline canceled");
return None;
}
_ = cancel.cancelled() => {
info!("task canceled");
return None;

View File

@@ -32,7 +32,6 @@ postgres_connection.workspace = true
rand.workspace = true
reqwest = { workspace = true, features = ["stream"] }
routerify.workspace = true
rustls-native-certs.workspace = true
serde.workspace = true
serde_json.workspace = true
thiserror.workspace = true
@@ -40,12 +39,9 @@ tokio.workspace = true
tokio-util.workspace = true
tracing.workspace = true
measured.workspace = true
rustls.workspace = true
scopeguard.workspace = true
strum.workspace = true
strum_macros.workspace = true
tokio-postgres.workspace = true
tokio-postgres-rustls.workspace = true
diesel = { version = "2.2.6", features = [
"serde_json",

View File

@@ -1,7 +1,6 @@
pub(crate) mod split_state;
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use std::time::Instant;
@@ -10,11 +9,8 @@ use diesel::prelude::*;
use diesel_async::async_connection_wrapper::AsyncConnectionWrapper;
use diesel_async::pooled_connection::bb8::Pool;
use diesel_async::pooled_connection::AsyncDieselConnectionManager;
use diesel_async::pooled_connection::ManagerConfig;
use diesel_async::AsyncPgConnection;
use diesel_async::RunQueryDsl;
use futures::future::BoxFuture;
use futures::FutureExt;
use diesel_async::{AsyncConnection, AsyncPgConnection};
use itertools::Itertools;
use pageserver_api::controller_api::AvailabilityZone;
use pageserver_api::controller_api::MetadataHealthRecord;
@@ -27,9 +23,6 @@ use pageserver_api::shard::ShardConfigError;
use pageserver_api::shard::ShardIdentity;
use pageserver_api::shard::ShardStripeSize;
use pageserver_api::shard::{ShardCount, ShardNumber, TenantShardId};
use rustls::client::danger::ServerCertVerifier;
use rustls::client::WebPkiServerVerifier;
use rustls::crypto::ring;
use scoped_futures::ScopedBoxFuture;
use serde::{Deserialize, Serialize};
use utils::generation::Generation;
@@ -163,13 +156,7 @@ impl Persistence {
const MAX_CONNECTION_LIFETIME: Duration = Duration::from_secs(60);
pub async fn new(database_url: String) -> Self {
let mut mgr_config = ManagerConfig::default();
mgr_config.custom_setup = Box::new(establish_connection_rustls);
let manager = AsyncDieselConnectionManager::<AsyncPgConnection>::new_with_config(
database_url,
mgr_config,
);
let manager = AsyncDieselConnectionManager::<AsyncPgConnection>::new(database_url);
// We will use a connection pool: this is primarily to _limit_ our connection count, rather than to optimize time
// to execute queries (database queries are not generally on latency-sensitive paths).
@@ -195,7 +182,7 @@ impl Persistence {
) -> Result<(), diesel::ConnectionError> {
let started_at = Instant::now();
loop {
match establish_connection_rustls(database_url).await {
match AsyncPgConnection::establish(database_url).await {
Ok(_) => {
tracing::info!("Connected to database.");
return Ok(());
@@ -1269,126 +1256,6 @@ impl Persistence {
}
}
pub(crate) fn load_certs() -> anyhow::Result<Arc<rustls::RootCertStore>> {
let der_certs = rustls_native_certs::load_native_certs();
if !der_certs.errors.is_empty() {
anyhow::bail!("could not parse certificates: {:?}", der_certs.errors);
}
let mut store = rustls::RootCertStore::empty();
store.add_parsable_certificates(der_certs.certs);
Ok(Arc::new(store))
}
/// Loads the root certificates and constructs a client config suitable for connecting.
/// This function is blocking.
fn client_config_with_root_certs() -> anyhow::Result<rustls::ClientConfig> {
let client_config =
rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider()))
.with_safe_default_protocol_versions()
.expect("ring should support the default protocol versions");
static DO_CERT_CHECKS: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
let do_cert_checks =
DO_CERT_CHECKS.get_or_init(|| std::env::var("STORCON_CERT_CHECKS").is_ok());
Ok(if *do_cert_checks {
client_config
.with_root_certificates(load_certs()?)
.with_no_client_auth()
} else {
use rustls::client::danger::{HandshakeSignatureValid, ServerCertVerified};
#[derive(Debug)]
struct AcceptAll(Arc<WebPkiServerVerifier>);
impl ServerCertVerifier for AcceptAll {
fn verify_server_cert(
&self,
end_entity: &rustls::pki_types::CertificateDer<'_>,
intermediates: &[rustls::pki_types::CertificateDer<'_>],
server_name: &rustls::pki_types::ServerName<'_>,
ocsp_response: &[u8],
now: rustls::pki_types::UnixTime,
) -> Result<ServerCertVerified, rustls::Error> {
let r = self.0.verify_server_cert(
end_entity,
intermediates,
server_name,
ocsp_response,
now,
);
if let Err(err) = r {
tracing::info!(
?server_name,
"ignoring db connection TLS validation error: {err:?}"
);
return Ok(ServerCertVerified::assertion());
}
r
}
fn verify_tls12_signature(
&self,
message: &[u8],
cert: &rustls::pki_types::CertificateDer<'_>,
dss: &rustls::DigitallySignedStruct,
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error>
{
let r = self.0.verify_tls12_signature(message, cert, dss);
if let Err(err) = r {
tracing::info!(
"ignoring db connection 1.2 signature TLS validation error: {err:?}"
);
return Ok(HandshakeSignatureValid::assertion());
}
r
}
fn verify_tls13_signature(
&self,
message: &[u8],
cert: &rustls::pki_types::CertificateDer<'_>,
dss: &rustls::DigitallySignedStruct,
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error>
{
let r = self.0.verify_tls13_signature(message, cert, dss);
if let Err(err) = r {
tracing::info!(
"ignoring db connection 1.3 signature TLS validation error: {err:?}"
);
return Ok(HandshakeSignatureValid::assertion());
}
r
}
fn supported_verify_schemes(&self) -> Vec<rustls::SignatureScheme> {
self.0.supported_verify_schemes()
}
}
let verifier = AcceptAll(
WebPkiServerVerifier::builder_with_provider(
load_certs()?,
Arc::new(ring::default_provider()),
)
.build()?,
);
client_config
.dangerous()
.with_custom_certificate_verifier(Arc::new(verifier))
.with_no_client_auth()
})
}
fn establish_connection_rustls(config: &str) -> BoxFuture<ConnectionResult<AsyncPgConnection>> {
let fut = async {
// We first set up the way we want rustls to work.
let rustls_config = client_config_with_root_certs()
.map_err(|err| ConnectionError::BadConnection(format!("{err:?}")))?;
let tls = tokio_postgres_rustls::MakeRustlsConnect::new(rustls_config);
let (client, conn) = tokio_postgres::connect(config, tls)
.await
.map_err(|e| ConnectionError::BadConnection(e.to_string()))?;
AsyncPgConnection::try_from_client_and_connection(client, conn).await
};
fut.boxed()
}
/// Parts of [`crate::tenant_shard::TenantShard`] that are stored durably
#[derive(
QueryableByName, Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq,

View File

@@ -774,9 +774,8 @@ impl Scheduler {
if !matches!(context.mode, ScheduleMode::Speculative) {
tracing::info!(
"scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?}, preferred_az: {:?})",
scores.iter().map(|i| i.node_id().0).collect::<Vec<_>>(),
preferred_az,
"scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?})",
scores.iter().map(|i| i.node_id().0).collect::<Vec<_>>()
);
}

View File

@@ -96,38 +96,29 @@ impl ChaosInjector {
let batch_size = 128;
let mut inner = self.service.inner.write().unwrap();
let (nodes, tenants, scheduler) = inner.parts_mut();
let tenant_ids = tenants.keys().cloned().collect::<Vec<_>>();
// Prefer to migrate tenants that are currently outside their home AZ. This avoids the chaos injector
// continuously pushing tenants outside their home AZ: instead, we'll tend to cycle between picking some
// random tenants to move, and then on next chaos iteration moving them back, then picking some new
// random tenants on the next iteration.
let (out_of_home_az, in_home_az): (Vec<_>, Vec<_>) = tenants
.values()
.map(|shard| {
(
shard.tenant_shard_id,
shard.is_attached_outside_preferred_az(nodes),
)
})
.partition(|(_id, is_outside)| *is_outside);
let mut out_of_home_az: Vec<_> = out_of_home_az.into_iter().map(|(id, _)| id).collect();
let mut in_home_az: Vec<_> = in_home_az.into_iter().map(|(id, _)| id).collect();
let mut victims = Vec::with_capacity(batch_size);
if out_of_home_az.len() >= batch_size {
tracing::info!("Injecting chaos: found {batch_size} shards to migrate back to home AZ (total {} out of home AZ)", out_of_home_az.len());
for shard in tenants.values() {
if shard.is_attached_outside_preferred_az(nodes) {
victims.push(shard.tenant_shard_id);
}
out_of_home_az.shuffle(&mut thread_rng());
victims.extend(out_of_home_az.into_iter().take(batch_size));
} else {
tracing::info!("Injecting chaos: found {} shards to migrate back to home AZ, picking {} random shards to migrate", out_of_home_az.len(), std::cmp::min(batch_size - out_of_home_az.len(), in_home_az.len()));
victims.extend(out_of_home_az);
in_home_az.shuffle(&mut thread_rng());
victims.extend(in_home_az.into_iter().take(batch_size - victims.len()));
if victims.len() >= batch_size {
break;
}
}
let choose_random = batch_size.saturating_sub(victims.len());
tracing::info!("Injecting chaos: found {} shards to migrate back to home AZ, picking {choose_random} random shards to migrate", victims.len());
let random_victims = tenant_ids.choose_multiple(&mut thread_rng(), choose_random);
victims.extend(random_victims);
for victim in victims {
self.maybe_migrate_to_secondary(victim, nodes, tenants, scheduler);
}

View File

@@ -710,11 +710,7 @@ impl TenantShard {
modified = true;
} else if self.intent.secondary.is_empty() {
// Populate secondary by scheduling a fresh node
//
// We use [`AttachedShardTag`] because when a secondary location is the only one
// a shard has, we expect that its next use will be as an attached location: we want
// the tenant to be ready to warm up and run fast in their preferred AZ.
let node_id = scheduler.schedule_shard::<AttachedShardTag>(
let node_id = scheduler.schedule_shard::<SecondaryShardTag>(
&[],
&self.intent.preferred_az_id,
context,
@@ -723,17 +719,9 @@ impl TenantShard {
modified = true;
}
while self.intent.secondary.len() > 1 {
// If we have multiple secondaries (e.g. when transitioning from Attached to Secondary and
// having just demoted our attached location), then we should prefer to keep the location
// in our preferred AZ. Tenants in Secondary mode want to be in the preferred AZ so that
// they have a warm location to become attached when transitioning back into Attached.
let mut candidates = self.intent.get_secondary().clone();
// Sort to get secondaries outside preferred AZ last
candidates
.sort_by_key(|n| scheduler.get_node_az(n).as_ref() != self.preferred_az());
let secondary_to_remove = candidates.pop().unwrap();
self.intent.remove_secondary(scheduler, secondary_to_remove);
// We have no particular preference for one secondary location over another: just
// arbitrarily drop from the end
self.intent.pop_secondary(scheduler);
modified = true;
}
}
@@ -1091,31 +1079,12 @@ impl TenantShard {
None => vec![],
};
let replacement = match &self.policy {
PlacementPolicy::Attached(_) => {
// Secondaries for an attached shard should be scheduled using `SecondaryShardTag`
// to avoid placing them in the preferred AZ.
self.find_better_location::<SecondaryShardTag>(
scheduler,
&schedule_context,
*secondary,
&exclude,
)
}
PlacementPolicy::Secondary => {
// In secondary-only mode, we want our secondary locations in the preferred AZ,
// so that they're ready to take over as an attached location when we transition
// into PlacementPolicy::Attached.
self.find_better_location::<AttachedShardTag>(
scheduler,
&schedule_context,
*secondary,
&exclude,
)
}
PlacementPolicy::Detached => None,
};
let replacement = self.find_better_location::<SecondaryShardTag>(
scheduler,
&schedule_context,
*secondary,
&exclude,
);
assert!(replacement != Some(*secondary));
if let Some(replacement) = replacement {
// We have found a candidate and confirmed that its score is preferable
@@ -1837,7 +1806,7 @@ impl TenantShard {
.get(&node_id)
.expect("referenced node exists")
.get_availability_zone_id(),
) != self.intent.preferred_az_id.as_ref()
) == self.intent.preferred_az_id.as_ref()
})
.unwrap_or(false)
}
@@ -2718,108 +2687,4 @@ pub(crate) mod tests {
}
Ok(())
}
/// Check how the shard's scheduling behaves when in PlacementPolicy::Secondary mode.
#[test]
fn tenant_secondary_scheduling() -> anyhow::Result<()> {
let az_a = AvailabilityZone("az-a".to_string());
let nodes = make_test_nodes(
3,
&[
az_a.clone(),
AvailabilityZone("az-b".to_string()),
AvailabilityZone("az-c".to_string()),
],
);
let mut scheduler = Scheduler::new(nodes.values());
let mut context = ScheduleContext::default();
let mut tenant_shard = make_test_tenant_shard(PlacementPolicy::Secondary);
tenant_shard.intent.preferred_az_id = Some(az_a.clone());
tenant_shard
.schedule(&mut scheduler, &mut context)
.expect("we have enough nodes, scheduling should work");
assert_eq!(tenant_shard.intent.secondary.len(), 1);
assert!(tenant_shard.intent.attached.is_none());
// Should have scheduled into the preferred AZ
assert_eq!(
scheduler
.get_node_az(&tenant_shard.intent.secondary[0])
.as_ref(),
tenant_shard.preferred_az()
);
// Optimizer should agree
assert_eq!(
tenant_shard.optimize_attachment(&mut scheduler, &context),
None
);
assert_eq!(
tenant_shard.optimize_secondary(&mut scheduler, &context),
None
);
// Switch to PlacementPolicy::Attached
tenant_shard.policy = PlacementPolicy::Attached(1);
tenant_shard
.schedule(&mut scheduler, &mut context)
.expect("we have enough nodes, scheduling should work");
assert_eq!(tenant_shard.intent.secondary.len(), 1);
assert!(tenant_shard.intent.attached.is_some());
// Secondary should now be in non-preferred AZ
assert_ne!(
scheduler
.get_node_az(&tenant_shard.intent.secondary[0])
.as_ref(),
tenant_shard.preferred_az()
);
// Attached should be in preferred AZ
assert_eq!(
scheduler
.get_node_az(&tenant_shard.intent.attached.unwrap())
.as_ref(),
tenant_shard.preferred_az()
);
// Optimizer should agree
assert_eq!(
tenant_shard.optimize_attachment(&mut scheduler, &context),
None
);
assert_eq!(
tenant_shard.optimize_secondary(&mut scheduler, &context),
None
);
// Switch back to PlacementPolicy::Secondary
tenant_shard.policy = PlacementPolicy::Secondary;
tenant_shard
.schedule(&mut scheduler, &mut context)
.expect("we have enough nodes, scheduling should work");
assert_eq!(tenant_shard.intent.secondary.len(), 1);
assert!(tenant_shard.intent.attached.is_none());
// When we picked a location to keep, we should have kept the one in the preferred AZ
assert_eq!(
scheduler
.get_node_az(&tenant_shard.intent.secondary[0])
.as_ref(),
tenant_shard.preferred_az()
);
// Optimizer should agree
assert_eq!(
tenant_shard.optimize_attachment(&mut scheduler, &context),
None
);
assert_eq!(
tenant_shard.optimize_secondary(&mut scheduler, &context),
None
);
tenant_shard.intent.clear(&mut scheduler);
Ok(())
}
}

View File

@@ -158,9 +158,6 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = (
"pageserver_pitr_history_size",
"pageserver_layer_bytes",
"pageserver_layer_count",
"pageserver_layers_per_read_bucket",
"pageserver_layers_per_read_count",
"pageserver_layers_per_read_sum",
"pageserver_visible_physical_size",
"pageserver_storage_operations_seconds_count_total",
"pageserver_storage_operations_seconds_sum_total",

View File

@@ -2105,7 +2105,7 @@ class NeonStorageController(MetricsGetter, LogUtils):
log.info(f"reconcile_all waited for {n} shards")
return n
def reconcile_until_idle(self, timeout_secs=30, max_interval=1):
def reconcile_until_idle(self, timeout_secs=30, max_interval=5):
start_at = time.time()
n = 1
delay_sec = 0.1
@@ -4996,35 +4996,13 @@ def check_restored_datadir_content(
assert (mismatch, error) == ([], [])
# wait for subscriber to catch up with publisher
def logical_replication_sync(
subscriber: PgProtocol,
publisher: PgProtocol,
# pass subname explicitly to avoid confusion
# when multiple subscriptions are present
subname: str,
sub_dbname: str | None = None,
pub_dbname: str | None = None,
):
) -> Lsn:
"""Wait logical replication subscriber to sync with publisher."""
def initial_sync():
# first check if the subscription is active `s`=`synchronized`, `r` = `ready`
query = f"""SELECT 1 FROM pg_subscription_rel join pg_catalog.pg_subscription
on pg_subscription_rel.srsubid = pg_subscription.oid
WHERE srsubstate NOT IN ('r', 's') and subname='{subname}'"""
if sub_dbname is not None:
res = subscriber.safe_psql(query, dbname=sub_dbname)
else:
res = subscriber.safe_psql(query)
assert (res is None) or (len(res) == 0)
wait_until(initial_sync)
# wait for the subscription to catch up with current state of publisher
# caller is responsible to call checkpoint before calling this function
if pub_dbname is not None:
publisher_lsn = Lsn(
publisher.safe_psql("SELECT pg_current_wal_flush_lsn()", dbname=pub_dbname)[0][0]
@@ -5032,23 +5010,23 @@ def logical_replication_sync(
else:
publisher_lsn = Lsn(publisher.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
def subscriber_catch_up():
query = f"select latest_end_lsn from pg_catalog.pg_stat_subscription where latest_end_lsn is NOT NULL and subname='{subname}'"
while True:
if sub_dbname is not None:
res = subscriber.safe_psql(query, dbname=sub_dbname)
res = subscriber.safe_psql(
"select latest_end_lsn from pg_catalog.pg_stat_subscription", dbname=sub_dbname
)[0][0]
else:
res = subscriber.safe_psql(query)
res = subscriber.safe_psql(
"select latest_end_lsn from pg_catalog.pg_stat_subscription"
)[0][0]
assert res is not None
res_lsn = res[0][0]
log.info(f"subscriber_lsn={res_lsn}")
subscriber_lsn = Lsn(res_lsn)
log.info(f"Subscriber LSN={subscriber_lsn}, publisher LSN={publisher_lsn}")
assert subscriber_lsn >= publisher_lsn
wait_until(subscriber_catch_up)
if res:
log.info(f"subscriber_lsn={res}")
subscriber_lsn = Lsn(res)
log.info(f"Subscriber LSN={subscriber_lsn}, publisher LSN={publisher_lsn}")
if subscriber_lsn >= publisher_lsn:
return subscriber_lsn
time.sleep(0.5)
def tenant_get_shards(

View File

@@ -1,18 +1,13 @@
# Logical replication tests
> [!NOTE]
> Neon project should have logical replication enabled:
>
> https://neon.tech/docs/guides/logical-replication-postgres#enable-logical-replication-in-the-source-neon-project
## Clickhouse
```bash
export BENCHMARK_CONNSTR=postgres://user:pass@ep-abc-xyz-123.us-east-2.aws.neon.build/neondb
docker compose -f test_runner/logical_repl/clickhouse/docker-compose.yml up -d
./scripts/pytest -m remote_cluster -k test_clickhouse
docker compose -f test_runner/logical_repl/clickhouse/docker-compose.yml down
docker compose -f clickhouse/docker-compose.yml up -d
pytest -m remote_cluster -k test_clickhouse
docker compose -f clickhouse/docker-compose.yml down
```
## Debezium
@@ -20,7 +15,8 @@ docker compose -f test_runner/logical_repl/clickhouse/docker-compose.yml down
```bash
export BENCHMARK_CONNSTR=postgres://user:pass@ep-abc-xyz-123.us-east-2.aws.neon.build/neondb
docker compose -f test_runner/logical_repl/debezium/docker-compose.yml up -d
./scripts/pytest -m remote_cluster -k test_debezium
docker compose -f test_runner/logical_repl/debezium/docker-compose.yml down
```
docker compose -f debezium/docker-compose.yml up -d
pytest -m remote_cluster -k test_debezium
docker compose -f debezium/docker-compose.yml down
```

View File

@@ -44,13 +44,13 @@ def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg
vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub1")
# Wait logical replication channel to be established
logical_replication_sync(vanilla_pg, endpoint, "sub1")
logical_replication_sync(vanilla_pg, endpoint)
pg_bin.run_capture(["pgbench", "-c10", "-T100", "-Mprepared", endpoint.connstr()])
# Wait logical replication to sync
start = time.time()
logical_replication_sync(vanilla_pg, endpoint, "sub1")
logical_replication_sync(vanilla_pg, endpoint)
log.info(f"Sync with master took {time.time() - start} seconds")
sum_master = cast("int", endpoint.safe_psql("select sum(abalance) from pgbench_accounts")[0][0])

View File

@@ -247,7 +247,7 @@ def test_sharding_autosplit(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
log.info(f"{shard_zero_id} timeline: {timeline_info}")
# Run compaction for all tenants, restart endpoint so that on subsequent reads we will
# definitely hit pageserver for reads. This compaction pass is expected to drop unwanted
# definitely hit pageserver for reads. This compaction passis expected to drop unwanted
# layers but not do any rewrites (we're still in the same generation)
for tenant_id, tenant_state in tenants.items():
tenant_state.endpoint.stop()
@@ -296,16 +296,6 @@ def test_sharding_autosplit(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
for fut in pgbench_futs:
fut.result()
# Run a full forced compaction, to detect any data corruption.
for tenant_id, tenant_state in tenants.items():
for shard_id, shard_ps in tenant_get_shards(env, tenant_id):
shard_ps.http_client().timeline_compact(
shard_id,
tenant_state.timeline_id,
force_image_layer_creation=True,
force_l0_compaction=True,
)
# Assert that some rewrites happened
# TODO: uncomment this after https://github.com/neondatabase/neon/pull/7531 is merged
# assert any(ps.log_contains(".*Rewriting layer after shard split.*") for ps in env.pageservers)

View File

@@ -421,9 +421,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "openssl"
version = "0.10.70"
version = "0.10.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61cfb4e166a8bb8c9b55c500bc2308550148ece889be90f609377e58140f42c6"
checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1"
dependencies = [
"bitflags 2.6.0",
"cfg-if",
@@ -453,9 +453,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-sys"
version = "0.9.105"
version = "0.9.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b22d5b84be05a8d6947c7cb71f7c849aa0f112acd4bf51c2a7c1c988ac0a9dc"
checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6"
dependencies = [
"cc",
"libc",

View File

@@ -184,7 +184,6 @@ def test_fully_custom_config(positive_env: NeonEnv):
"gc_compaction_enabled": True,
"gc_compaction_initial_threshold_kb": 1024000,
"gc_compaction_ratio_percent": 200,
"image_creation_preempt_threshold": 5,
}
vps_http = env.storage_controller.pageserver_api()

View File

@@ -86,9 +86,9 @@ page_cache_size=10
log.info("Checking layer access metrics ...")
layer_access_metric_names = [
"pageserver_layers_per_read_global_sum",
"pageserver_layers_per_read_global_count",
"pageserver_layers_per_read_global_bucket",
"pageserver_layers_visited_per_vectored_read_global_sum",
"pageserver_layers_visited_per_vectored_read_global_count",
"pageserver_layers_visited_per_vectored_read_global_bucket",
]
metrics = env.pageserver.http_client().get_metrics()
@@ -96,8 +96,8 @@ page_cache_size=10
layer_access_metrics = metrics.query_all(name)
log.info(f"Got metrics: {layer_access_metrics}")
vectored_sum = metrics.query_one("pageserver_layers_per_read_global_sum")
vectored_count = metrics.query_one("pageserver_layers_per_read_global_count")
vectored_sum = metrics.query_one("pageserver_layers_visited_per_vectored_read_global_sum")
vectored_count = metrics.query_one("pageserver_layers_visited_per_vectored_read_global_count")
if vectored_count.value > 0:
assert vectored_sum.value > 0
vectored_average = vectored_sum.value / vectored_count.value

View File

@@ -183,7 +183,6 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv):
cursor.execute("select pg_catalog.pg_create_logical_replication_slot('mysub', 'pgoutput');")
cursor.execute("CREATE TABLE t(a int)")
cursor.execute("INSERT INTO t VALUES (1)")
cursor.execute("CHECKPOINT")
# connect to the subscriber_db and create a subscription
# Note that we need to create subscription with
@@ -196,11 +195,7 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv):
# wait for the subscription to be active
logical_replication_sync(
endpoint,
endpoint,
"mysub",
sub_dbname="subscriber_db",
pub_dbname="publisher_db",
endpoint, endpoint, sub_dbname="subscriber_db", pub_dbname="publisher_db"
)
# Check that replication is working

View File

@@ -59,9 +59,6 @@ def test_pgdata_import_smoke(
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
# The test needs LocalFs support, which is only built in testing mode.
env.pageserver.is_testing_enabled_or_skip()
env.pageserver.patch_config_toml_nonrecursive(
{
"import_pgdata_upcall_api": f"http://{cplane_mgmt_api_server.host}:{cplane_mgmt_api_server.port}/path/to/mgmt/api"
@@ -70,12 +67,6 @@ def test_pgdata_import_smoke(
env.pageserver.stop()
env.pageserver.start()
# By default our tests run with a tiny shared_buffers=1MB setting. That
# doesn't allow any prefetching on v17 and above, where the new streaming
# read machinery keeps buffers pinned while prefetching them. Use a higher
# setting to enable prefetching and speed up the tests
ep_config = ["shared_buffers=64MB"]
#
# Put data in vanilla pg
#
@@ -252,11 +243,7 @@ def test_pgdata_import_smoke(
#
ro_endpoint = env.endpoints.create_start(
branch_name=import_branch_name,
endpoint_id="ro",
tenant_id=tenant_id,
lsn=last_record_lsn,
config_lines=ep_config,
branch_name=import_branch_name, endpoint_id="ro", tenant_id=tenant_id, lsn=last_record_lsn
)
validate_vanilla_equivalence(ro_endpoint)
@@ -286,10 +273,7 @@ def test_pgdata_import_smoke(
# validate that we can write
#
rw_endpoint = env.endpoints.create_start(
branch_name=import_branch_name,
endpoint_id="rw",
tenant_id=tenant_id,
config_lines=ep_config,
branch_name=import_branch_name, endpoint_id="rw", tenant_id=tenant_id
)
rw_endpoint.safe_psql("create table othertable(values text)")
rw_lsn = Lsn(rw_endpoint.safe_psql_scalar("select pg_current_wal_flush_lsn()"))
@@ -309,7 +293,7 @@ def test_pgdata_import_smoke(
ancestor_start_lsn=rw_lsn,
)
br_tip_endpoint = env.endpoints.create_start(
branch_name="br-tip", endpoint_id="br-tip-ro", tenant_id=tenant_id, config_lines=ep_config
branch_name="br-tip", endpoint_id="br-tip-ro", tenant_id=tenant_id
)
validate_vanilla_equivalence(br_tip_endpoint)
br_tip_endpoint.safe_psql("select * from othertable")
@@ -322,10 +306,7 @@ def test_pgdata_import_smoke(
ancestor_start_lsn=initdb_lsn,
)
br_initdb_endpoint = env.endpoints.create_start(
branch_name="br-initdb",
endpoint_id="br-initdb-ro",
tenant_id=tenant_id,
config_lines=ep_config,
branch_name="br-initdb", endpoint_id="br-initdb-ro", tenant_id=tenant_id
)
validate_vanilla_equivalence(br_initdb_endpoint)
with pytest.raises(psycopg2.errors.UndefinedTable):

Some files were not shown because too many files have changed in this diff Show More