mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 08:52:56 +00:00
Merge pull request #10713 from neondatabase/rc/release-compute/2025-02-07
Compute release 2025-02-07
This commit is contained in:
@@ -24,3 +24,4 @@
|
||||
!storage_controller/
|
||||
!vendor/postgres-*/
|
||||
!workspace_hack/
|
||||
!build_tools/patches
|
||||
|
||||
1
.github/actionlint.yml
vendored
1
.github/actionlint.yml
vendored
@@ -27,3 +27,4 @@ config-variables:
|
||||
- SLACK_ON_CALL_QA_STAGING_STREAM
|
||||
- DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
|
||||
- SLACK_ON_CALL_STORAGE_STAGING_STREAM
|
||||
- SLACK_CICD_CHANNEL_ID
|
||||
|
||||
12
.github/actions/neon-project-create/action.yml
vendored
12
.github/actions/neon-project-create/action.yml
vendored
@@ -41,7 +41,10 @@ inputs:
|
||||
description: 'Path to directory containing libpq library - it is caller responsibility to provision the libpq library'
|
||||
required: false
|
||||
default: '/tmp/neon/pg_install/v16/lib'
|
||||
|
||||
project_settings:
|
||||
description: 'A JSON object with project settings'
|
||||
required: false
|
||||
default: '{}'
|
||||
|
||||
outputs:
|
||||
dsn:
|
||||
@@ -73,7 +76,7 @@ runs:
|
||||
\"provisioner\": \"k8s-neonvm\",
|
||||
\"autoscaling_limit_min_cu\": ${MIN_CU},
|
||||
\"autoscaling_limit_max_cu\": ${MAX_CU},
|
||||
\"settings\": { }
|
||||
\"settings\": ${PROJECT_SETTINGS}
|
||||
}
|
||||
}")
|
||||
|
||||
@@ -92,12 +95,12 @@ runs:
|
||||
if [ "${SHARD_SPLIT_PROJECT}" = "true" ]; then
|
||||
# determine tenant ID
|
||||
TENANT_ID=`${PSQL} ${dsn} -t -A -c "SHOW neon.tenant_id"`
|
||||
|
||||
|
||||
echo "Splitting project ${project_id} with tenant_id ${TENANT_ID} into $((SHARD_COUNT)) shards with stripe size $((STRIPE_SIZE))"
|
||||
|
||||
echo "Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split"
|
||||
echo "with body {\"new_shard_count\": $((SHARD_COUNT)), \"new_stripe_size\": $((STRIPE_SIZE))}"
|
||||
|
||||
|
||||
# we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)
|
||||
curl -X PUT \
|
||||
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split" \
|
||||
@@ -118,3 +121,4 @@ runs:
|
||||
STRIPE_SIZE: ${{ inputs.stripe_size }}
|
||||
PSQL: ${{ inputs.psql_path }}
|
||||
LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
|
||||
PROJECT_SETTINGS: ${{ inputs.project_settings }}
|
||||
|
||||
@@ -121,6 +121,8 @@ runs:
|
||||
export DEFAULT_PG_VERSION=${PG_VERSION#v}
|
||||
export LD_LIBRARY_PATH=${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/lib
|
||||
export BENCHMARK_CONNSTR=${BENCHMARK_CONNSTR:-}
|
||||
export ASAN_OPTIONS=detect_leaks=0:detect_stack_use_after_return=0:abort_on_error=1:strict_string_checks=1:check_initialization_order=1:strict_init_order=1
|
||||
export UBSAN_OPTIONS=abort_on_error=1:print_stacktrace=1
|
||||
|
||||
if [ "${BUILD_TYPE}" = "remote" ]; then
|
||||
export REMOTE_ENV=1
|
||||
|
||||
1
.github/file-filters.yaml
vendored
1
.github/file-filters.yaml
vendored
@@ -1,4 +1,5 @@
|
||||
rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']
|
||||
rust_dependencies: ['**/Cargo.lock']
|
||||
|
||||
v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
|
||||
v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']
|
||||
|
||||
59
.github/workflows/_build-and-test-locally.yml
vendored
59
.github/workflows/_build-and-test-locally.yml
vendored
@@ -20,7 +20,7 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
test-cfg:
|
||||
description: 'a json object of postgres versions and lfc states to run regression tests on'
|
||||
description: 'a json object of postgres versions and lfc/sanitizers states to build and run regression tests on'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
@@ -48,6 +48,8 @@ jobs:
|
||||
# io_uring will account the memory of the CQ and SQ as locked.
|
||||
# More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
|
||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(format('{{"include":{0}}}', inputs.test-cfg)) }}
|
||||
env:
|
||||
BUILD_TYPE: ${{ inputs.build-type }}
|
||||
GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
@@ -87,6 +89,7 @@ jobs:
|
||||
- name: Set env variables
|
||||
env:
|
||||
ARCH: ${{ inputs.arch }}
|
||||
SANITIZERS: ${{ matrix.sanitizers }}
|
||||
run: |
|
||||
CARGO_FEATURES="--features testing"
|
||||
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
|
||||
@@ -99,8 +102,14 @@ jobs:
|
||||
cov_prefix=""
|
||||
CARGO_FLAGS="--locked --release"
|
||||
fi
|
||||
if [[ $SANITIZERS == 'enabled' ]]; then
|
||||
make_vars="WITH_SANITIZERS=yes"
|
||||
else
|
||||
make_vars=""
|
||||
fi
|
||||
{
|
||||
echo "cov_prefix=${cov_prefix}"
|
||||
echo "make_vars=${make_vars}"
|
||||
echo "CARGO_FEATURES=${CARGO_FEATURES}"
|
||||
echo "CARGO_FLAGS=${CARGO_FLAGS}"
|
||||
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
|
||||
@@ -136,37 +145,39 @@ jobs:
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v14 -j$(nproc)
|
||||
run: mold -run make ${make_vars} postgres-v14 -j$(nproc)
|
||||
|
||||
- name: Build postgres v15
|
||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v15 -j$(nproc)
|
||||
run: mold -run make ${make_vars} postgres-v15 -j$(nproc)
|
||||
|
||||
- name: Build postgres v16
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v16 -j$(nproc)
|
||||
run: mold -run make ${make_vars} postgres-v16 -j$(nproc)
|
||||
|
||||
- name: Build postgres v17
|
||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v17 -j$(nproc)
|
||||
run: mold -run make ${make_vars} postgres-v17 -j$(nproc)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: mold -run make neon-pg-ext -j$(nproc)
|
||||
run: mold -run make ${make_vars} neon-pg-ext -j$(nproc)
|
||||
|
||||
- name: Build walproposer-lib
|
||||
run: mold -run make walproposer-lib -j$(nproc)
|
||||
run: mold -run make ${make_vars} walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Run cargo build
|
||||
env:
|
||||
WITH_TESTS: ${{ matrix.sanitizers != 'enabled' && '--tests' || '' }}
|
||||
run: |
|
||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||
export PQ_LIB_DIR
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
||||
export ASAN_OPTIONS=detect_leaks=0
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins ${WITH_TESTS}
|
||||
|
||||
# Do install *before* running rust tests because they might recompile the
|
||||
# binaries with different features/flags.
|
||||
- name: Install rust binaries
|
||||
env:
|
||||
ARCH: ${{ inputs.arch }}
|
||||
SANITIZERS: ${{ matrix.sanitizers }}
|
||||
run: |
|
||||
# Install target binaries
|
||||
mkdir -p /tmp/neon/bin/
|
||||
@@ -181,7 +192,7 @@ jobs:
|
||||
done
|
||||
|
||||
# Install test executables and write list of all binaries (for code coverage)
|
||||
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
|
||||
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' && $SANITIZERS != 'enabled' ]]; then
|
||||
# Keep bloated coverage data files away from the rest of the artifact
|
||||
mkdir -p /tmp/coverage/
|
||||
|
||||
@@ -214,11 +225,10 @@ jobs:
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Run rust tests
|
||||
if: ${{ matrix.sanitizers != 'enabled' }}
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||
export PQ_LIB_DIR
|
||||
LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
|
||||
export LD_LIBRARY_PATH
|
||||
|
||||
@@ -271,6 +281,26 @@ jobs:
|
||||
path: /tmp/neon
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Check diesel schema
|
||||
if: inputs.build-type == 'release' && inputs.arch == 'x64'
|
||||
env:
|
||||
DATABASE_URL: postgresql://localhost:1235/storage_controller
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
run: |
|
||||
/tmp/neon/bin/neon_local init
|
||||
/tmp/neon/bin/neon_local storage_controller start
|
||||
|
||||
diesel print-schema > storage_controller/src/schema.rs
|
||||
|
||||
if [ -n "$(git diff storage_controller/src/schema.rs)" ]; then
|
||||
echo >&2 "Uncommitted changes in diesel schema"
|
||||
|
||||
git diff .
|
||||
exit 1
|
||||
fi
|
||||
|
||||
/tmp/neon/bin/neon_local storage_controller stop
|
||||
|
||||
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
|
||||
- name: Merge and upload coverage data
|
||||
if: inputs.build-type == 'debug'
|
||||
@@ -303,7 +333,7 @@ jobs:
|
||||
- name: Pytest regression tests
|
||||
continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
timeout-minutes: 60
|
||||
timeout-minutes: ${{ matrix.sanitizers != 'enabled' && 60 || 180 }}
|
||||
with:
|
||||
build_type: ${{ inputs.build-type }}
|
||||
test_selection: regress
|
||||
@@ -321,6 +351,7 @@ jobs:
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
||||
SANITIZERS: ${{ matrix.sanitizers }}
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||
|
||||
8
.github/workflows/_check-codestyle-rust.yml
vendored
8
.github/workflows/_check-codestyle-rust.yml
vendored
@@ -16,6 +16,9 @@ defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
check-codestyle-rust:
|
||||
strategy:
|
||||
@@ -84,8 +87,3 @@ jobs:
|
||||
run: |
|
||||
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
|
||||
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
|
||||
|
||||
# https://github.com/EmbarkStudios/cargo-deny
|
||||
- name: Check rust licenses/bans/advisories/sources
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo deny check --hide-inclusion-graph
|
||||
|
||||
12
.github/workflows/approved-for-ci-run.yml
vendored
12
.github/workflows/approved-for-ci-run.yml
vendored
@@ -67,9 +67,9 @@ jobs:
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
|
||||
- name: Look for existing PR
|
||||
id: get-pr
|
||||
env:
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
run: |
|
||||
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
|
||||
echo "ALREADY_CREATED=${ALREADY_CREATED}" >> ${GITHUB_OUTPUT}
|
||||
|
||||
|
||||
- name: Get changed labels
|
||||
id: get-labels
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
@@ -94,8 +94,6 @@ jobs:
|
||||
echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
|
||||
echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
|
||||
|
||||
- run: gh pr checkout "${PR_NUMBER}"
|
||||
|
||||
- run: git checkout -b "${BRANCH}"
|
||||
|
||||
- run: git push --force origin "${BRANCH}"
|
||||
@@ -103,7 +101,7 @@ jobs:
|
||||
|
||||
- name: Create a Pull Request for CI run (if required)
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED == ''
|
||||
env:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
cat << EOF > body.md
|
||||
@@ -140,7 +138,7 @@ jobs:
|
||||
|
||||
- run: git push --force origin "${BRANCH}"
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
|
||||
|
||||
cleanup:
|
||||
# Close PRs and delete branchs if the original PR is closed.
|
||||
|
||||
|
||||
6
.github/workflows/benchmarking.yml
vendored
6
.github/workflows/benchmarking.yml
vendored
@@ -340,7 +340,7 @@ jobs:
|
||||
],
|
||||
"pg_version" : [
|
||||
16,17
|
||||
],
|
||||
]
|
||||
}'
|
||||
|
||||
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
|
||||
@@ -810,7 +810,7 @@ jobs:
|
||||
# We might change it after https://github.com/neondatabase/neon/issues/2900.
|
||||
#
|
||||
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
# if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
@@ -929,7 +929,7 @@ jobs:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
user-examples-compare:
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
# if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
|
||||
2
.github/workflows/build-macos.yml
vendored
2
.github/workflows/build-macos.yml
vendored
@@ -235,7 +235,7 @@ jobs:
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Run cargo build (only for v17)
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release -j$(sysctl -n hw.ncpu)
|
||||
run: cargo build --all --release -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Check that no warnings are produced (only for v17)
|
||||
run: ./run_clippy.sh
|
||||
|
||||
41
.github/workflows/build_and_test.yml
vendored
41
.github/workflows/build_and_test.yml
vendored
@@ -45,6 +45,26 @@ jobs:
|
||||
run cancel-previous-in-concurrency-group.yml \
|
||||
--field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
|
||||
|
||||
files-changed:
|
||||
needs: [ check-permissions ]
|
||||
runs-on: [ self-hosted, small ]
|
||||
timeout-minutes: 3
|
||||
outputs:
|
||||
check-rust-dependencies: ${{ steps.files-changed.outputs.rust_dependencies }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Check for file changes
|
||||
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
|
||||
id: files-changed
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
filters: .github/file-filters.yaml
|
||||
|
||||
tag:
|
||||
needs: [ check-permissions ]
|
||||
runs-on: [ self-hosted, small ]
|
||||
@@ -170,6 +190,14 @@ jobs:
|
||||
archs: '["x64", "arm64"]'
|
||||
secrets: inherit
|
||||
|
||||
check-dependencies-rust:
|
||||
needs: [ files-changed, build-build-tools-image ]
|
||||
if: ${{ needs.files-changed.outputs.check-rust-dependencies == 'true' }}
|
||||
uses: ./.github/workflows/cargo-deny.yml
|
||||
with:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
secrets: inherit
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ tag, build-build-tools-image ]
|
||||
strategy:
|
||||
@@ -654,7 +682,7 @@ jobs:
|
||||
push: true
|
||||
pull: true
|
||||
file: compute/compute-node.Dockerfile
|
||||
target: neon-pg-ext-test
|
||||
target: extension-tests
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
tags: |
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
@@ -1332,6 +1360,8 @@ jobs:
|
||||
- build-and-test-locally
|
||||
- check-codestyle-python
|
||||
- check-codestyle-rust
|
||||
- check-dependencies-rust
|
||||
- files-changed
|
||||
- promote-images-dev
|
||||
- test-images
|
||||
- trigger-custom-extensions-build-and-wait
|
||||
@@ -1344,4 +1374,11 @@ jobs:
|
||||
if: |
|
||||
contains(needs.*.result, 'failure')
|
||||
|| contains(needs.*.result, 'cancelled')
|
||||
|| contains(needs.*.result, 'skipped')
|
||||
|| (needs.check-dependencies-rust.result == 'skipped' && needs.files-changed.outputs.check-rust-dependencies == 'true')
|
||||
|| needs.build-and-test-locally.result == 'skipped'
|
||||
|| needs.check-codestyle-python.result == 'skipped'
|
||||
|| needs.check-codestyle-rust.result == 'skipped'
|
||||
|| needs.files-changed.result == 'skipped'
|
||||
|| needs.promote-images-dev.result == 'skipped'
|
||||
|| needs.test-images.result == 'skipped'
|
||||
|| needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
|
||||
|
||||
133
.github/workflows/build_and_test_with_sanitizers.yml
vendored
Normal file
133
.github/workflows/build_and_test_with_sanitizers.yml
vendored
Normal file
@@ -0,0 +1,133 @@
|
||||
name: Build and Test with Sanitizers
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 1 * * *' # run once a day, timezone is utc
|
||||
workflow_dispatch:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow per any non-`main` branch.
|
||||
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
|
||||
jobs:
|
||||
tag:
|
||||
runs-on: [ self-hosted, small ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
|
||||
steps:
|
||||
# Need `fetch-depth: 0` to count the number of commits in the branch
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get build tag
|
||||
run: |
|
||||
echo run:$GITHUB_RUN_ID
|
||||
echo ref:$GITHUB_REF_NAME
|
||||
echo rev:$(git rev-list --count HEAD)
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
|
||||
echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
|
||||
echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'"
|
||||
echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
shell: bash
|
||||
id: build-tag
|
||||
|
||||
build-build-tools-image:
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
secrets: inherit
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ tag, build-build-tools-image ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
build-type: [ release ]
|
||||
uses: ./.github/workflows/_build-and-test-locally.yml
|
||||
with:
|
||||
arch: ${{ matrix.arch }}
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
test-cfg: '[{"pg_version":"v17", "sanitizers": "enabled"}]'
|
||||
secrets: inherit
|
||||
|
||||
|
||||
create-test-report:
|
||||
needs: [ build-and-test-locally, build-build-tools-image ]
|
||||
if: ${{ !cancelled() }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
outputs:
|
||||
report-url: ${{ steps.create-allure-report.outputs.report-url }}
|
||||
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
id: create-allure-report
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
- uses: actions/github-script@v7
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
||||
retries: 5
|
||||
script: |
|
||||
const report = {
|
||||
reportUrl: "${{ steps.create-allure-report.outputs.report-url }}",
|
||||
reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
|
||||
}
|
||||
|
||||
const coverage = {}
|
||||
|
||||
const script = require("./scripts/comment-test-report.js")
|
||||
await script({
|
||||
github,
|
||||
context,
|
||||
fetch,
|
||||
report,
|
||||
coverage,
|
||||
})
|
||||
57
.github/workflows/cargo-deny.yml
vendored
Normal file
57
.github/workflows/cargo-deny.yml
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
name: cargo deny checks
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
build-tools-image:
|
||||
required: false
|
||||
type: string
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
jobs:
|
||||
cargo-deny:
|
||||
strategy:
|
||||
matrix:
|
||||
ref: >-
|
||||
${{
|
||||
fromJSON(
|
||||
github.event_name == 'schedule'
|
||||
&& '["main","release","release-proxy","release-compute"]'
|
||||
|| format('["{0}"]', github.sha)
|
||||
)
|
||||
}}
|
||||
|
||||
runs-on: [self-hosted, small]
|
||||
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image || 'neondatabase/build-tools:pinned' }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ matrix.ref }}
|
||||
|
||||
- name: Check rust licenses/bans/advisories/sources
|
||||
env:
|
||||
CARGO_DENY_TARGET: >-
|
||||
${{ github.event_name == 'schedule' && 'advisories' || 'all' }}
|
||||
run: cargo deny check --hide-inclusion-graph $CARGO_DENY_TARGET
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event_name == 'schedule' && failure() }}
|
||||
uses: slackapi/slack-github-action@v2
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: ${{ vars.SLACK_CICD_CHANNEL_ID }}
|
||||
text: |
|
||||
Periodic cargo-deny on ${{ matrix.ref }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
Pinging @oncall-devprod.
|
||||
2
.github/workflows/neon_extra_builds.yml
vendored
2
.github/workflows/neon_extra_builds.yml
vendored
@@ -114,7 +114,7 @@ jobs:
|
||||
run: make walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Produce the build stats
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)
|
||||
run: cargo build --all --release --timings -j$(nproc)
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
|
||||
6
.github/workflows/pg-clients.yml
vendored
6
.github/workflows/pg-clients.yml
vendored
@@ -12,8 +12,8 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/pg-clients.yml'
|
||||
- 'test_runner/pg_clients/**'
|
||||
- 'test_runner/logical_repl/**'
|
||||
- 'test_runner/pg_clients/**/*.py'
|
||||
- 'test_runner/logical_repl/**/*.py'
|
||||
- 'poetry.lock'
|
||||
workflow_dispatch:
|
||||
|
||||
@@ -104,6 +104,8 @@ jobs:
|
||||
with:
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
project_settings: >-
|
||||
{"enable_logical_replication": true}
|
||||
|
||||
- name: Run tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
|
||||
12
.github/workflows/pre-merge-checks.yml
vendored
12
.github/workflows/pre-merge-checks.yml
vendored
@@ -59,7 +59,10 @@ jobs:
|
||||
echo "${RUST_CHANGED_FILES}"
|
||||
|
||||
build-build-tools-image:
|
||||
if: needs.get-changed-files.outputs.python-changed == 'true'
|
||||
if: |
|
||||
false
|
||||
|| needs.get-changed-files.outputs.python-changed == 'true'
|
||||
|| needs.get-changed-files.outputs.rust-changed == 'true'
|
||||
needs: [ get-changed-files ]
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
with:
|
||||
@@ -92,7 +95,8 @@ jobs:
|
||||
# - conclusion
|
||||
# - neon-cloud-e2e
|
||||
conclusion:
|
||||
if: always()
|
||||
# Do not run job on Pull Requests as it interferes with the `conclusion` job from the `build_and_test` workflow
|
||||
if: always() && github.event_name == 'merge_group'
|
||||
permissions:
|
||||
statuses: write # for `github.repos.createCommitStatus(...)`
|
||||
contents: write
|
||||
@@ -124,6 +128,8 @@ jobs:
|
||||
- name: Fail the job if any of the dependencies do not succeed or skipped
|
||||
run: exit 1
|
||||
if: |
|
||||
(contains(needs.check-codestyle-python.result, 'skipped') && needs.get-changed-files.outputs.python-changed == 'true')
|
||||
false
|
||||
|| (needs.check-codestyle-python.result == 'skipped' && needs.get-changed-files.outputs.python-changed == 'true')
|
||||
|| (needs.check-codestyle-rust.result == 'skipped' && needs.get-changed-files.outputs.rust-changed == 'true')
|
||||
|| contains(needs.*.result, 'failure')
|
||||
|| contains(needs.*.result, 'cancelled')
|
||||
|
||||
365
Cargo.lock
generated
365
Cargo.lock
generated
@@ -206,6 +206,16 @@ dependencies = [
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "assert-json-diff"
|
||||
version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-channel"
|
||||
version = "1.9.0"
|
||||
@@ -290,9 +300,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "aws-config"
|
||||
version = "1.5.10"
|
||||
version = "1.5.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b49afaa341e8dd8577e1a2200468f98956d6eda50bcf4a53246cc00174ba924"
|
||||
checksum = "dc47e70fc35d054c8fcd296d47a61711f043ac80534a10b4f741904f81e73a90"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
@@ -301,7 +311,7 @@ dependencies = [
|
||||
"aws-sdk-sts",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json 0.60.7",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
@@ -332,9 +342,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-runtime"
|
||||
version = "1.4.4"
|
||||
version = "1.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5ac934720fbb46206292d2c75b57e67acfc56fe7dfd34fb9a02334af08409ea"
|
||||
checksum = "bee7643696e7fdd74c10f9eb42848a87fe469d35eae9c3323f80aa98f350baac"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-sigv4",
|
||||
@@ -358,15 +368,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-iam"
|
||||
version = "1.53.0"
|
||||
version = "1.60.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb8a6fea8d335cde419176b1f2c6d2d6e97997719e7df4b51e59064310f48e4a"
|
||||
checksum = "a43daa438f8e7e4ebbbcb5c712b3b85db50d62e637a7da4ba9da51095d327460"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json 0.61.1",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-query",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
@@ -381,15 +391,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-kms"
|
||||
version = "1.51.0"
|
||||
version = "1.58.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c30f6fd5646b99d9b45ec3a0c22e67112c175b2383100c960d7ee39d96c8d96"
|
||||
checksum = "40b7a24700ac548025a47a5c579886f5198895bb1eccd8964dfd71cd66c16912"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json 0.61.1",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
@@ -403,9 +413,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-s3"
|
||||
version = "1.65.0"
|
||||
version = "1.68.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3ba2c5c0f2618937ce3d4a5ad574b86775576fa24006bcb3128c6e2cbf3c34e"
|
||||
checksum = "bc5ddf1dc70287dc9a2f953766a1fe15e3e74aef02fd1335f2afa475c9b4f4fc"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
@@ -414,7 +424,7 @@ dependencies = [
|
||||
"aws-smithy-checksums",
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json 0.61.1",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
@@ -437,15 +447,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sso"
|
||||
version = "1.50.0"
|
||||
version = "1.57.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05ca43a4ef210894f93096039ef1d6fa4ad3edfabb3be92b80908b9f2e4b4eab"
|
||||
checksum = "c54bab121fe1881a74c338c5f723d1592bf3b53167f80268a1274f404e1acc38"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json 0.61.1",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
@@ -459,15 +469,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-ssooidc"
|
||||
version = "1.51.0"
|
||||
version = "1.58.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "abaf490c2e48eed0bb8e2da2fb08405647bd7f253996e0f93b981958ea0f73b0"
|
||||
checksum = "8c8234fd024f7ac61c4e44ea008029bde934250f371efe7d4a39708397b1080c"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json 0.61.1",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
@@ -481,15 +491,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sts"
|
||||
version = "1.51.0"
|
||||
version = "1.58.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b68fde0d69c8bfdc1060ea7da21df3e39f6014da316783336deff0a9ec28f4bf"
|
||||
checksum = "ba60e1d519d6f23a9df712c04fdeadd7872ac911c84b2f62a8bda92e129b7962"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json 0.61.1",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-query",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
@@ -504,9 +514,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sigv4"
|
||||
version = "1.2.6"
|
||||
version = "1.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2"
|
||||
checksum = "0bc5bbd1e4a2648fd8c5982af03935972c24a2f9846b396de661d351ee3ce837"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-smithy-eventstream",
|
||||
@@ -533,9 +543,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-async"
|
||||
version = "1.2.1"
|
||||
version = "1.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c"
|
||||
checksum = "fa59d1327d8b5053c54bf2eaae63bf629ba9e904434d0835a28ed3c0ed0a614e"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
@@ -565,9 +575,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-eventstream"
|
||||
version = "0.60.5"
|
||||
version = "0.60.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cef7d0a272725f87e51ba2bf89f8c21e4df61b9e49ae1ac367a6d69916ef7c90"
|
||||
checksum = "8b18559a41e0c909b77625adf2b8c50de480a8041e5e4a3f5f7d177db70abc5a"
|
||||
dependencies = [
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
@@ -576,9 +586,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-http"
|
||||
version = "0.60.11"
|
||||
version = "0.60.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6"
|
||||
checksum = "7809c27ad8da6a6a68c454e651d4962479e81472aa19ae99e59f9aba1f9713cc"
|
||||
dependencies = [
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-runtime-api",
|
||||
@@ -597,18 +607,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-json"
|
||||
version = "0.60.7"
|
||||
version = "0.61.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6"
|
||||
dependencies = [
|
||||
"aws-smithy-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-json"
|
||||
version = "0.61.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee4e69cc50921eb913c6b662f8d909131bb3e6ad6cb6090d3a39b66fc5c52095"
|
||||
checksum = "623a51127f24c30776c8b374295f2df78d92517386f77ba30773f15a30ce1422"
|
||||
dependencies = [
|
||||
"aws-smithy-types",
|
||||
]
|
||||
@@ -625,9 +626,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-runtime"
|
||||
version = "1.7.4"
|
||||
version = "1.7.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f20685047ca9d6f17b994a07f629c813f08b5bce65523e47124879e60103d45"
|
||||
checksum = "865f7050bbc7107a6c98a397a9fcd9413690c27fa718446967cf03b2d3ac517e"
|
||||
dependencies = [
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
@@ -669,9 +670,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-types"
|
||||
version = "1.2.9"
|
||||
version = "1.2.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fbd94a32b3a7d55d3806fe27d98d3ad393050439dd05eb53ece36ec5e3d3510"
|
||||
checksum = "c7b8a53819e42f10d0821f56da995e1470b199686a1809168db6ca485665f042"
|
||||
dependencies = [
|
||||
"base64-simd",
|
||||
"bytes",
|
||||
@@ -704,9 +705,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-types"
|
||||
version = "1.3.3"
|
||||
version = "1.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef"
|
||||
checksum = "dfbd0a668309ec1f66c0f6bda4840dd6d4796ae26d699ebc266d7cc95c6d040f"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-smithy-async",
|
||||
@@ -941,6 +942,18 @@ version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "bb8"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d89aabfae550a5c44b43ab941844ffcd2e993cb6900b342debf59e9ea74acdb8"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-util",
|
||||
"parking_lot 0.12.1",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bcder"
|
||||
version = "0.7.4"
|
||||
@@ -966,7 +979,7 @@ version = "0.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bitflags 2.8.0",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools 0.12.1",
|
||||
@@ -994,9 +1007,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.4.1"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
|
||||
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
@@ -1007,6 +1020,12 @@ dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "boxcar"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2721c3c5a6f0e7f7e607125d963fedeb765f545f67adc9d71ed934693881eb42"
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "1.5.0"
|
||||
@@ -1213,6 +1232,20 @@ version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7"
|
||||
|
||||
[[package]]
|
||||
name = "clashmap"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93bd59c81e2bd87a775ae2de75f070f7e2bfe97363a6ad652f46824564c23e4d"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
"hashbrown 0.15.2",
|
||||
"lock_api",
|
||||
"parking_lot_core 0.9.8",
|
||||
"polonius-the-crab",
|
||||
"replace_with",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.0"
|
||||
@@ -1549,7 +1582,7 @@ version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bitflags 2.8.0",
|
||||
"crossterm_winapi",
|
||||
"libc",
|
||||
"parking_lot 0.12.1",
|
||||
@@ -1780,16 +1813,29 @@ version = "2.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bitflags 2.8.0",
|
||||
"byteorder",
|
||||
"chrono",
|
||||
"diesel_derives",
|
||||
"itoa",
|
||||
"pq-sys",
|
||||
"r2d2",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diesel-async"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51a307ac00f7c23f526a04a77761a0519b9f0eb2838ebf5b905a58580095bdcb"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bb8",
|
||||
"diesel",
|
||||
"futures-util",
|
||||
"scoped-futures",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diesel_derives"
|
||||
version = "2.2.1"
|
||||
@@ -2403,6 +2449,16 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gettid"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "397256552fed4a9e577850498071831ec8f18ea83368aecc114cab469dcb43e5"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.31.1"
|
||||
@@ -2531,6 +2587,12 @@ dependencies = [
|
||||
"allocator-api2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
|
||||
|
||||
[[package]]
|
||||
name = "hashlink"
|
||||
version = "0.9.1"
|
||||
@@ -2581,6 +2643,15 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46"
|
||||
|
||||
[[package]]
|
||||
name = "higher-kinded-types"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "561985554c8b8d4808605c90a5f1979cc6c31a5d20b78465cd59501233c6678e"
|
||||
dependencies = [
|
||||
"never-say-never",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hmac"
|
||||
version = "0.12.1"
|
||||
@@ -3059,11 +3130,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "inotify"
|
||||
version = "0.9.6"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff"
|
||||
checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"bitflags 2.8.0",
|
||||
"inotify-sys",
|
||||
"libc",
|
||||
]
|
||||
@@ -3240,9 +3311,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kqueue"
|
||||
version = "1.0.7"
|
||||
version = "1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c8fc60ba15bf51257aa9807a48a61013db043fcf3a78cb0d916e8e396dcad98"
|
||||
checksum = "7447f1ca1b7b563588a205fe93dea8df60fd981423a768bc1c0ded35ed147d0c"
|
||||
dependencies = [
|
||||
"kqueue-sys",
|
||||
"libc",
|
||||
@@ -3250,9 +3321,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kqueue-sys"
|
||||
version = "1.0.3"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8367585489f01bc55dd27404dcf56b95e6da061a256a666ab23be9ba96a2e587"
|
||||
checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"libc",
|
||||
@@ -3279,9 +3350,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.167"
|
||||
version = "0.2.169"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
|
||||
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
@@ -3528,14 +3599,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.11"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
|
||||
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"windows-sys 0.48.0",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3544,6 +3615,12 @@ version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
|
||||
|
||||
[[package]]
|
||||
name = "never-say-never"
|
||||
version = "6.6.666"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf5a574dadd7941adeaa71823ecba5e28331b8313fb2e1c6a5c7e5981ea53ad6"
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.25.1"
|
||||
@@ -3575,7 +3652,7 @@ version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bitflags 2.8.0",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"memoffset 0.9.0",
|
||||
@@ -3593,12 +3670,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "notify"
|
||||
version = "6.1.1"
|
||||
version = "8.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
|
||||
checksum = "2fee8403b3d66ac7b26aee6e40a897d85dc5ce26f44da36b8b73e987cc52e943"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"crossbeam-channel",
|
||||
"bitflags 2.8.0",
|
||||
"filetime",
|
||||
"fsevent-sys",
|
||||
"inotify",
|
||||
@@ -3606,10 +3682,17 @@ dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"mio",
|
||||
"notify-types",
|
||||
"walkdir",
|
||||
"windows-sys 0.48.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "notify-types"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d"
|
||||
|
||||
[[package]]
|
||||
name = "ntapi"
|
||||
version = "0.4.1"
|
||||
@@ -4155,6 +4238,16 @@ dependencies = [
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "papaya"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc7c76487f7eaa00a0fc1d7f88dc6b295aec478d11b0fc79f857b62c2874124c"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"seize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking"
|
||||
version = "2.1.1"
|
||||
@@ -4421,10 +4514,20 @@ dependencies = [
|
||||
"plotters-backend",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polonius-the-crab"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e97ca2c89572ae41bbec1c99498251f87dd5a94e500c5ec19c382dd593dd5ce9"
|
||||
dependencies = [
|
||||
"higher-kinded-types",
|
||||
"never-say-never",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.19.6"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
|
||||
version = "0.19.7"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4437,9 +4540,9 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.6"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
|
||||
dependencies = [
|
||||
"base64 0.21.1",
|
||||
"base64 0.22.1",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4471,7 +4574,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.6"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
@@ -4603,15 +4706,6 @@ version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||
|
||||
[[package]]
|
||||
name = "pq-sys"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6cc05d7ea95200187117196eee9edd0644424911821aeb28a18ce60ea0b8793"
|
||||
dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pq_proto"
|
||||
version = "0.1.0"
|
||||
@@ -4660,7 +4754,7 @@ version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bitflags 2.8.0",
|
||||
"chrono",
|
||||
"flate2",
|
||||
"hex",
|
||||
@@ -4675,7 +4769,7 @@ version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bitflags 2.8.0",
|
||||
"chrono",
|
||||
"hex",
|
||||
]
|
||||
@@ -4781,6 +4875,7 @@ dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
"arc-swap",
|
||||
"assert-json-diff",
|
||||
"async-compression",
|
||||
"async-trait",
|
||||
"atomic-take",
|
||||
@@ -4788,15 +4883,16 @@ dependencies = [
|
||||
"aws-sdk-iam",
|
||||
"aws-sigv4",
|
||||
"base64 0.13.1",
|
||||
"boxcar",
|
||||
"bstr",
|
||||
"bytes",
|
||||
"camino",
|
||||
"camino-tempfile",
|
||||
"chrono",
|
||||
"clap",
|
||||
"clashmap",
|
||||
"compute_api",
|
||||
"consumption_metrics",
|
||||
"dashmap 5.5.0",
|
||||
"ecdsa 0.16.9",
|
||||
"ed25519-dalek",
|
||||
"env_logger 0.10.2",
|
||||
@@ -4804,6 +4900,7 @@ dependencies = [
|
||||
"flate2",
|
||||
"framed-websockets",
|
||||
"futures",
|
||||
"gettid",
|
||||
"hashbrown 0.14.5",
|
||||
"hashlink",
|
||||
"hex",
|
||||
@@ -4826,7 +4923,9 @@ dependencies = [
|
||||
"measured",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
"opentelemetry",
|
||||
"p256 0.13.2",
|
||||
"papaya",
|
||||
"parking_lot 0.12.1",
|
||||
"parquet",
|
||||
"parquet_derive",
|
||||
@@ -4873,6 +4972,9 @@ dependencies = [
|
||||
"tokio-tungstenite 0.21.0",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-log",
|
||||
"tracing-opentelemetry",
|
||||
"tracing-serde",
|
||||
"tracing-subscriber",
|
||||
"tracing-utils",
|
||||
"try-lock",
|
||||
@@ -4924,17 +5026,6 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "r2d2"
|
||||
version = "0.8.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93"
|
||||
dependencies = [
|
||||
"log",
|
||||
"parking_lot 0.12.1",
|
||||
"scheduled-thread-pool",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.7.3"
|
||||
@@ -5215,6 +5306,12 @@ dependencies = [
|
||||
"utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "replace_with"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3a8614ee435691de62bcffcf4a66d91b3594bf1428a5722e79103249a095690"
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.12.4"
|
||||
@@ -5494,7 +5591,7 @@ version = "0.38.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bitflags 2.8.0",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys 0.4.14",
|
||||
@@ -5749,12 +5846,12 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scheduled-thread-pool"
|
||||
version = "0.2.7"
|
||||
name = "scoped-futures"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19"
|
||||
checksum = "1b24aae2d0636530f359e9d5ef0c04669d11c5e756699b27a6a6d845d8329091"
|
||||
dependencies = [
|
||||
"parking_lot 0.12.1",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5831,6 +5928,16 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "seize"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d84b0c858bdd30cb56f5597f8b3bf702ec23829e652cc636a1e5a7b9de46ae93"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.17"
|
||||
@@ -6289,6 +6396,7 @@ dependencies = [
|
||||
"clap",
|
||||
"control_plane",
|
||||
"diesel",
|
||||
"diesel-async",
|
||||
"diesel_migrations",
|
||||
"fail",
|
||||
"futures",
|
||||
@@ -6303,10 +6411,12 @@ dependencies = [
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"postgres_connection",
|
||||
"r2d2",
|
||||
"rand 0.8.5",
|
||||
"reqwest",
|
||||
"routerify",
|
||||
"rustls 0.23.18",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"scoped-futures",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -6314,6 +6424,8 @@ dependencies = [
|
||||
"strum_macros",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"utils",
|
||||
@@ -6556,7 +6668,7 @@ dependencies = [
|
||||
"fastrand 2.2.0",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6768,21 +6880,20 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.38.1"
|
||||
version = "1.43.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df"
|
||||
checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"bytes",
|
||||
"libc",
|
||||
"mio",
|
||||
"num_cpus",
|
||||
"parking_lot 0.12.1",
|
||||
"pin-project-lite",
|
||||
"signal-hook-registry",
|
||||
"socket2",
|
||||
"tokio-macros",
|
||||
"windows-sys 0.48.0",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6813,9 +6924,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio-macros"
|
||||
version = "2.3.0"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
|
||||
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -6824,8 +6935,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.9"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
|
||||
version = "0.7.10"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#1f21e7959a96a34dcfbfce1b14b73286cdadffe9"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -7100,7 +7211,7 @@ version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bitflags 2.8.0",
|
||||
"bytes",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
@@ -7515,12 +7626,6 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
@@ -7603,9 +7708,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.3"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
|
||||
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi-util",
|
||||
@@ -7857,6 +7962,15 @@ dependencies = [
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||
dependencies = [
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.0"
|
||||
@@ -8085,6 +8199,7 @@ dependencies = [
|
||||
"tower 0.4.13",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
"url",
|
||||
"zerocopy",
|
||||
"zeroize",
|
||||
|
||||
@@ -54,6 +54,7 @@ async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
|
||||
atomic-take = "1.1.0"
|
||||
backtrace = "0.3.74"
|
||||
flate2 = "1.0.26"
|
||||
assert-json-diff = "2"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
|
||||
@@ -77,10 +78,10 @@ camino = "1.1.6"
|
||||
cfg-if = "1.0.0"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
clap = { version = "4.0", features = ["derive", "env"] }
|
||||
clashmap = { version = "1.0", features = ["raw-api"] }
|
||||
comfy-table = "7.1"
|
||||
const_format = "0.2"
|
||||
crc32c = "0.6"
|
||||
dashmap = { version = "5.5.0", features = ["raw-api"] }
|
||||
diatomic-waker = { version = "0.2.3" }
|
||||
either = "1.8"
|
||||
enum-map = "2.4.2"
|
||||
@@ -123,7 +124,7 @@ measured = { version = "0.0.22", features=["lasso"] }
|
||||
measured-process = { version = "0.0.22" }
|
||||
memoffset = "0.9"
|
||||
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
|
||||
notify = "6.0.0"
|
||||
notify = "8.0.0"
|
||||
num_cpus = "1.15"
|
||||
num-traits = "0.2.15"
|
||||
once_cell = "1.13"
|
||||
@@ -177,7 +178,7 @@ test-context = "0.3"
|
||||
thiserror = "1.0"
|
||||
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
|
||||
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
|
||||
tokio = { version = "1.17", features = ["macros"] }
|
||||
tokio = { version = "1.41", features = ["macros"] }
|
||||
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
|
||||
tokio-io-timeout = "1.2.0"
|
||||
tokio-postgres-rustls = "0.12.0"
|
||||
@@ -193,7 +194,9 @@ tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
|
||||
tower-service = "0.3.3"
|
||||
tracing = "0.1"
|
||||
tracing-error = "0.2"
|
||||
tracing-log = "0.2"
|
||||
tracing-opentelemetry = "0.28"
|
||||
tracing-serde = "0.2.0"
|
||||
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
|
||||
try-lock = "0.2.5"
|
||||
twox-hash = { version = "1.6.3", default-features = false }
|
||||
|
||||
@@ -45,7 +45,7 @@ COPY --chown=nonroot . .
|
||||
|
||||
ARG ADDITIONAL_RUSTFLAGS
|
||||
RUN set -e \
|
||||
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
&& RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
--bin pg_sni_router \
|
||||
--bin pageserver \
|
||||
--bin pagectl \
|
||||
|
||||
19
Makefile
19
Makefile
@@ -10,18 +10,29 @@ ICU_PREFIX_DIR := /usr/local/icu
|
||||
# environment variable.
|
||||
#
|
||||
BUILD_TYPE ?= debug
|
||||
WITH_SANITIZERS ?= no
|
||||
ifeq ($(BUILD_TYPE),release)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
|
||||
PG_CFLAGS = -O2 -g3 $(CFLAGS)
|
||||
PG_LDFLAGS = $(LDFLAGS)
|
||||
# Unfortunately, `--profile=...` is a nightly feature
|
||||
CARGO_BUILD_FLAGS += --release
|
||||
else ifeq ($(BUILD_TYPE),debug)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
|
||||
PG_CFLAGS = -O0 -g3 $(CFLAGS)
|
||||
PG_LDFLAGS = $(LDFLAGS)
|
||||
else
|
||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||
endif
|
||||
|
||||
ifeq ($(WITH_SANITIZERS),yes)
|
||||
PG_CFLAGS += -fsanitize=address -fsanitize=undefined -fno-sanitize-recover
|
||||
COPT += -Wno-error # to avoid failing on warnings induced by sanitizers
|
||||
PG_LDFLAGS = -fsanitize=address -fsanitize=undefined -static-libasan -static-libubsan $(LDFLAGS)
|
||||
export CC := gcc
|
||||
export ASAN_OPTIONS := detect_leaks=0
|
||||
endif
|
||||
|
||||
ifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)
|
||||
# Exclude static build openssl, icu for local build (MacOS, Linux)
|
||||
# Only keep for build type release and debug
|
||||
@@ -33,7 +44,9 @@ endif
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
# Seccomp BPF is only available for Linux
|
||||
PG_CONFIGURE_OPTS += --with-libseccomp
|
||||
ifneq ($(WITH_SANITIZERS),yes)
|
||||
PG_CONFIGURE_OPTS += --with-libseccomp
|
||||
endif
|
||||
else ifeq ($(UNAME_S),Darwin)
|
||||
PG_CFLAGS += -DUSE_PREFETCH
|
||||
ifndef DISABLE_HOMEBREW
|
||||
@@ -64,8 +77,6 @@ CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
|
||||
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
|
||||
# Force cargo not to print progress bar
|
||||
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
|
||||
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
|
||||
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
|
||||
|
||||
CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
|
||||
|
||||
@@ -108,7 +119,7 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
||||
EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
|
||||
CFLAGS='$(PG_CFLAGS)' \
|
||||
CFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \
|
||||
$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
|
||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
|
||||
|
||||
|
||||
@@ -3,10 +3,17 @@ ARG DEBIAN_VERSION=bookworm
|
||||
FROM debian:bookworm-slim AS pgcopydb_builder
|
||||
ARG DEBIAN_VERSION
|
||||
|
||||
# Use strict mode for bash to catch errors early
|
||||
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
|
||||
|
||||
# By default, /bin/sh used in debian images will treat '\n' as eol,
|
||||
# but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.
|
||||
RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
|
||||
echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc \
|
||||
echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
|
||||
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
|
||||
|
||||
COPY build_tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
|
||||
|
||||
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
|
||||
set -e && \
|
||||
apt update && \
|
||||
@@ -39,6 +46,7 @@ RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
|
||||
mkdir /tmp/pgcopydb && \
|
||||
tar -xzf /tmp/pgcopydb.tar.gz -C /tmp/pgcopydb --strip-components=1 && \
|
||||
cd /tmp/pgcopydb && \
|
||||
patch -p1 < /pgcopydbv017.patch && \
|
||||
make -s clean && \
|
||||
make -s -j12 install && \
|
||||
libpq_path=$(find /lib /usr/lib -name "libpq.so.5" | head -n 1) && \
|
||||
@@ -55,7 +63,8 @@ ARG DEBIAN_VERSION
|
||||
|
||||
# Add nonroot user
|
||||
RUN useradd -ms /bin/bash nonroot -b /home
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
# Use strict mode for bash to catch errors early
|
||||
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
|
||||
|
||||
RUN mkdir -p /pgcopydb/bin && \
|
||||
mkdir -p /pgcopydb/lib && \
|
||||
@@ -66,7 +75,7 @@ COPY --from=pgcopydb_builder /usr/lib/postgresql/16/bin/pgcopydb /pgcopydb/bin/p
|
||||
COPY --from=pgcopydb_builder /pgcopydb/lib/libpq.so.5 /pgcopydb/lib/libpq.so.5
|
||||
|
||||
RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
|
||||
echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc \
|
||||
echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
|
||||
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
|
||||
|
||||
# System deps
|
||||
@@ -190,8 +199,14 @@ RUN set -e \
|
||||
# It includes several bug fixes on top on v2.0 release (https://github.com/linux-test-project/lcov/compare/v2.0...master)
|
||||
# And patches from us:
|
||||
# - Generates json file with code coverage summary (https://github.com/neondatabase/lcov/commit/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz)
|
||||
RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JSON::XS Memory::Process Time::HiRes JSON; do yes | perl -MCPAN -e "CPAN::Shell->notest('install', '$package')"; done \
|
||||
&& wget https://github.com/neondatabase/lcov/archive/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz -O lcov.tar.gz \
|
||||
RUN set +o pipefail && \
|
||||
for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JSON::XS Memory::Process Time::HiRes JSON; do \
|
||||
yes | perl -MCPAN -e "CPAN::Shell->notest('install', '$package')";\
|
||||
done && \
|
||||
set -o pipefail
|
||||
# Split into separate step to debug flaky failures here
|
||||
RUN wget https://github.com/neondatabase/lcov/archive/426e7e7a22f669da54278e9b55e6d8caabd00af0.tar.gz -O lcov.tar.gz \
|
||||
&& ls -laht lcov.tar.gz && sha256sum lcov.tar.gz \
|
||||
&& echo "61a22a62e20908b8b9e27d890bd0ea31f567a7b9668065589266371dcbca0992 lcov.tar.gz" | sha256sum --check \
|
||||
&& mkdir -p lcov && tar -xzf lcov.tar.gz -C lcov --strip-components=1 \
|
||||
&& cd lcov \
|
||||
@@ -253,7 +268,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.84.0
|
||||
ENV RUSTC_VERSION=1.84.1
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
@@ -261,6 +276,7 @@ ARG CARGO_HAKARI_VERSION=0.9.33
|
||||
ARG CARGO_DENY_VERSION=0.16.2
|
||||
ARG CARGO_HACK_VERSION=0.6.33
|
||||
ARG CARGO_NEXTEST_VERSION=0.9.85
|
||||
ARG CARGO_DIESEL_CLI_VERSION=2.2.6
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
|
||||
@@ -274,6 +290,8 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
|
||||
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \
|
||||
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \
|
||||
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} \
|
||||
--features postgres-bundled --no-default-features && \
|
||||
rm -rf /home/nonroot/.cargo/registry && \
|
||||
rm -rf /home/nonroot/.cargo/git
|
||||
|
||||
|
||||
57
build_tools/patches/pgcopydbv017.patch
Normal file
57
build_tools/patches/pgcopydbv017.patch
Normal file
@@ -0,0 +1,57 @@
|
||||
diff --git a/src/bin/pgcopydb/copydb.c b/src/bin/pgcopydb/copydb.c
|
||||
index d730b03..69a9be9 100644
|
||||
--- a/src/bin/pgcopydb/copydb.c
|
||||
+++ b/src/bin/pgcopydb/copydb.c
|
||||
@@ -44,6 +44,7 @@ GUC dstSettings[] = {
|
||||
{ "synchronous_commit", "'off'" },
|
||||
{ "statement_timeout", "0" },
|
||||
{ "lock_timeout", "0" },
|
||||
+ { "idle_in_transaction_session_timeout", "0" },
|
||||
{ NULL, NULL },
|
||||
};
|
||||
|
||||
diff --git a/src/bin/pgcopydb/pgsql.c b/src/bin/pgcopydb/pgsql.c
|
||||
index 94f2f46..e051ba8 100644
|
||||
--- a/src/bin/pgcopydb/pgsql.c
|
||||
+++ b/src/bin/pgcopydb/pgsql.c
|
||||
@@ -2319,6 +2319,11 @@ pgsql_execute_log_error(PGSQL *pgsql,
|
||||
|
||||
LinesBuffer lbuf = { 0 };
|
||||
|
||||
+ if (message != NULL){
|
||||
+ // make sure message is writable by splitLines
|
||||
+ message = strdup(message);
|
||||
+ }
|
||||
+
|
||||
if (!splitLines(&lbuf, message))
|
||||
{
|
||||
/* errors have already been logged */
|
||||
@@ -2332,6 +2337,7 @@ pgsql_execute_log_error(PGSQL *pgsql,
|
||||
PQbackendPID(pgsql->connection),
|
||||
lbuf.lines[lineNumber]);
|
||||
}
|
||||
+ free(message); // free copy of message we created above
|
||||
|
||||
if (pgsql->logSQL)
|
||||
{
|
||||
@@ -3174,11 +3180,18 @@ pgcopy_log_error(PGSQL *pgsql, PGresult *res, const char *context)
|
||||
/* errors have already been logged */
|
||||
return;
|
||||
}
|
||||
-
|
||||
if (res != NULL)
|
||||
{
|
||||
char *sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
|
||||
- strlcpy(pgsql->sqlstate, sqlstate, sizeof(pgsql->sqlstate));
|
||||
+ if (sqlstate == NULL)
|
||||
+ {
|
||||
+ // PQresultErrorField returned NULL!
|
||||
+ pgsql->sqlstate[0] = '\0'; // Set to an empty string to avoid segfault
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ strlcpy(pgsql->sqlstate, sqlstate, sizeof(pgsql->sqlstate));
|
||||
+ }
|
||||
}
|
||||
|
||||
char *endpoint =
|
||||
File diff suppressed because it is too large
Load Diff
@@ -34,6 +34,7 @@
|
||||
//! -r http://pg-ext-s3-gateway \
|
||||
//! ```
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
@@ -44,7 +45,7 @@ use std::{thread, time::Duration};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Arg;
|
||||
use clap::Parser;
|
||||
use compute_tools::disk_quota::set_disk_quota;
|
||||
use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use signal_hook::consts::{SIGQUIT, SIGTERM};
|
||||
@@ -73,10 +74,75 @@ use utils::failpoint_support;
|
||||
// in-case of not-set environment var
|
||||
const BUILD_TAG_DEFAULT: &str = "latest";
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let scenario = failpoint_support::init();
|
||||
// Compatibility hack: if the control plane specified any remote-ext-config
|
||||
// use the default value for extension storage proxy gateway.
|
||||
// Remove this once the control plane is updated to pass the gateway URL
|
||||
fn parse_remote_ext_config(arg: &str) -> Result<String> {
|
||||
if arg.starts_with("http") {
|
||||
Ok(arg.trim_end_matches('/').to_string())
|
||||
} else {
|
||||
Ok("http://pg-ext-s3-gateway".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
let (build_tag, clap_args) = init()?;
|
||||
#[derive(Parser)]
|
||||
#[command(rename_all = "kebab-case")]
|
||||
struct Cli {
|
||||
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
|
||||
pub pgbin: String,
|
||||
|
||||
#[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
|
||||
pub remote_ext_config: Option<String>,
|
||||
|
||||
#[arg(long, default_value_t = 3080)]
|
||||
pub http_port: u16,
|
||||
|
||||
#[arg(short = 'D', long, value_name = "DATADIR")]
|
||||
pub pgdata: String,
|
||||
|
||||
#[arg(short = 'C', long, value_name = "DATABASE_URL")]
|
||||
pub connstr: String,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[arg(long, default_value = "neon-postgres")]
|
||||
pub cgroup: String,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[arg(
|
||||
long,
|
||||
default_value = "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor"
|
||||
)]
|
||||
pub filecache_connstr: String,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[arg(long, default_value = "0.0.0.0:10301")]
|
||||
pub vm_monitor_addr: String,
|
||||
|
||||
#[arg(long, action = clap::ArgAction::SetTrue)]
|
||||
pub resize_swap_on_bind: bool,
|
||||
|
||||
#[arg(long)]
|
||||
pub set_disk_quota_for_fs: Option<String>,
|
||||
|
||||
#[arg(short = 's', long = "spec", group = "spec")]
|
||||
pub spec_json: Option<String>,
|
||||
|
||||
#[arg(short = 'S', long, group = "spec-path")]
|
||||
pub spec_path: Option<OsString>,
|
||||
|
||||
#[arg(short = 'i', long, group = "compute-id", conflicts_with_all = ["spec", "spec-path"])]
|
||||
pub compute_id: Option<String>,
|
||||
|
||||
#[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], requires = "compute-id", value_name = "CONTROL_PLANE_API_BASE_URL")]
|
||||
pub control_plane_uri: Option<String>,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
let build_tag = init()?;
|
||||
|
||||
let scenario = failpoint_support::init();
|
||||
|
||||
// enable core dumping for all child processes
|
||||
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
|
||||
@@ -85,13 +151,11 @@ fn main() -> Result<()> {
|
||||
// Enter startup tracing context
|
||||
let _startup_context_guard = startup_context_from_env();
|
||||
|
||||
let cli_args = process_cli(&clap_args)?;
|
||||
let cli_spec = try_spec_from_cli(&cli)?;
|
||||
|
||||
let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
|
||||
let compute = wait_spec(build_tag, &cli, cli_spec)?;
|
||||
|
||||
let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
|
||||
|
||||
start_postgres(&clap_args, wait_spec_result)?
|
||||
start_postgres(&cli, compute)?
|
||||
|
||||
// Startup is finished, exit the startup tracing span
|
||||
};
|
||||
@@ -108,7 +172,7 @@ fn main() -> Result<()> {
|
||||
deinit_and_exit(wait_pg_result);
|
||||
}
|
||||
|
||||
fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
fn init() -> Result<String> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
@@ -123,66 +187,7 @@ fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
.to_string();
|
||||
info!("build_tag: {build_tag}");
|
||||
|
||||
Ok((build_tag, cli().get_matches()))
|
||||
}
|
||||
|
||||
fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
|
||||
let pgbin_default = "postgres";
|
||||
let pgbin = matches
|
||||
.get_one::<String>("pgbin")
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or(pgbin_default);
|
||||
|
||||
let ext_remote_storage = matches
|
||||
.get_one::<String>("remote-ext-config")
|
||||
// Compatibility hack: if the control plane specified any remote-ext-config
|
||||
// use the default value for extension storage proxy gateway.
|
||||
// Remove this once the control plane is updated to pass the gateway URL
|
||||
.map(|conf| {
|
||||
if conf.starts_with("http") {
|
||||
conf.trim_end_matches('/')
|
||||
} else {
|
||||
"http://pg-ext-s3-gateway"
|
||||
}
|
||||
});
|
||||
|
||||
let http_port = *matches
|
||||
.get_one::<u16>("http-port")
|
||||
.expect("http-port is required");
|
||||
let pgdata = matches
|
||||
.get_one::<String>("pgdata")
|
||||
.expect("PGDATA path is required");
|
||||
let connstr = matches
|
||||
.get_one::<String>("connstr")
|
||||
.expect("Postgres connection string is required");
|
||||
let spec_json = matches.get_one::<String>("spec");
|
||||
let spec_path = matches.get_one::<String>("spec-path");
|
||||
let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
|
||||
let set_disk_quota_for_fs = matches.get_one::<String>("set-disk-quota-for-fs");
|
||||
|
||||
Ok(ProcessCliResult {
|
||||
connstr,
|
||||
pgdata,
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
http_port,
|
||||
spec_json,
|
||||
spec_path,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
})
|
||||
}
|
||||
|
||||
struct ProcessCliResult<'clap> {
|
||||
connstr: &'clap str,
|
||||
pgdata: &'clap str,
|
||||
pgbin: &'clap str,
|
||||
ext_remote_storage: Option<&'clap str>,
|
||||
http_port: u16,
|
||||
spec_json: Option<&'clap String>,
|
||||
spec_path: Option<&'clap String>,
|
||||
resize_swap_on_bind: bool,
|
||||
set_disk_quota_for_fs: Option<&'clap String>,
|
||||
Ok(build_tag)
|
||||
}
|
||||
|
||||
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
@@ -235,19 +240,9 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
}
|
||||
}
|
||||
|
||||
fn try_spec_from_cli(
|
||||
matches: &clap::ArgMatches,
|
||||
ProcessCliResult {
|
||||
spec_json,
|
||||
spec_path,
|
||||
..
|
||||
}: &ProcessCliResult,
|
||||
) -> Result<CliSpecParams> {
|
||||
let compute_id = matches.get_one::<String>("compute-id");
|
||||
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
|
||||
|
||||
fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
|
||||
// First, try to get cluster spec from the cli argument
|
||||
if let Some(spec_json) = spec_json {
|
||||
if let Some(ref spec_json) = cli.spec_json {
|
||||
info!("got spec from cli argument {}", spec_json);
|
||||
return Ok(CliSpecParams {
|
||||
spec: Some(serde_json::from_str(spec_json)?),
|
||||
@@ -256,7 +251,7 @@ fn try_spec_from_cli(
|
||||
}
|
||||
|
||||
// Second, try to read it from the file if path is provided
|
||||
if let Some(spec_path) = spec_path {
|
||||
if let Some(ref spec_path) = cli.spec_path {
|
||||
let file = File::open(Path::new(spec_path))?;
|
||||
return Ok(CliSpecParams {
|
||||
spec: Some(serde_json::from_reader(file)?),
|
||||
@@ -264,17 +259,20 @@ fn try_spec_from_cli(
|
||||
});
|
||||
}
|
||||
|
||||
let Some(compute_id) = compute_id else {
|
||||
if cli.compute_id.is_none() {
|
||||
panic!(
|
||||
"compute spec should be provided by one of the following ways: \
|
||||
--spec OR --spec-path OR --control-plane-uri and --compute-id"
|
||||
);
|
||||
};
|
||||
let Some(control_plane_uri) = control_plane_uri else {
|
||||
if cli.control_plane_uri.is_none() {
|
||||
panic!("must specify both --control-plane-uri and --compute-id or none");
|
||||
};
|
||||
|
||||
match get_spec_from_control_plane(control_plane_uri, compute_id) {
|
||||
match get_spec_from_control_plane(
|
||||
cli.control_plane_uri.as_ref().unwrap(),
|
||||
cli.compute_id.as_ref().unwrap(),
|
||||
) {
|
||||
Ok(spec) => Ok(CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed: true,
|
||||
@@ -298,21 +296,12 @@ struct CliSpecParams {
|
||||
|
||||
fn wait_spec(
|
||||
build_tag: String,
|
||||
ProcessCliResult {
|
||||
connstr,
|
||||
pgdata,
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
http_port,
|
||||
..
|
||||
}: ProcessCliResult,
|
||||
cli: &Cli,
|
||||
CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
}: CliSpecParams,
|
||||
) -> Result<WaitSpecResult> {
|
||||
) -> Result<Arc<ComputeNode>> {
|
||||
let mut new_state = ComputeState::new();
|
||||
let spec_set;
|
||||
|
||||
@@ -324,7 +313,7 @@ fn wait_spec(
|
||||
} else {
|
||||
spec_set = false;
|
||||
}
|
||||
let connstr = Url::parse(connstr).context("cannot parse connstr as a URL")?;
|
||||
let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
|
||||
let conn_conf = postgres::config::Config::from_str(connstr.as_str())
|
||||
.context("cannot build postgres config from connstr")?;
|
||||
let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str())
|
||||
@@ -333,14 +322,14 @@ fn wait_spec(
|
||||
connstr,
|
||||
conn_conf,
|
||||
tokio_conn_conf,
|
||||
pgdata: pgdata.to_string(),
|
||||
pgbin: pgbin.to_string(),
|
||||
pgversion: get_pg_version_string(pgbin),
|
||||
http_port,
|
||||
pgdata: cli.pgdata.clone(),
|
||||
pgbin: cli.pgbin.clone(),
|
||||
pgversion: get_pg_version_string(&cli.pgbin),
|
||||
http_port: cli.http_port,
|
||||
live_config_allowed,
|
||||
state: Mutex::new(new_state),
|
||||
state_changed: Condvar::new(),
|
||||
ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
|
||||
ext_remote_storage: cli.remote_ext_config.clone(),
|
||||
ext_download_progress: RwLock::new(HashMap::new()),
|
||||
build_tag,
|
||||
};
|
||||
@@ -357,7 +346,7 @@ fn wait_spec(
|
||||
// Launch http service first, so that we can serve control-plane requests
|
||||
// while configuration is still in progress.
|
||||
let _http_handle =
|
||||
launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
|
||||
launch_http_server(cli.http_port, &compute).expect("cannot launch http endpoint thread");
|
||||
|
||||
if !spec_set {
|
||||
// No spec provided, hang waiting for it.
|
||||
@@ -389,27 +378,12 @@ fn wait_spec(
|
||||
|
||||
launch_lsn_lease_bg_task_for_static(&compute);
|
||||
|
||||
Ok(WaitSpecResult {
|
||||
compute,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(),
|
||||
})
|
||||
}
|
||||
|
||||
struct WaitSpecResult {
|
||||
compute: Arc<ComputeNode>,
|
||||
resize_swap_on_bind: bool,
|
||||
set_disk_quota_for_fs: Option<String>,
|
||||
Ok(compute)
|
||||
}
|
||||
|
||||
fn start_postgres(
|
||||
// need to allow unused because `matches` is only used if target_os = "linux"
|
||||
#[allow(unused_variables)] matches: &clap::ArgMatches,
|
||||
WaitSpecResult {
|
||||
compute,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
}: WaitSpecResult,
|
||||
cli: &Cli,
|
||||
compute: Arc<ComputeNode>,
|
||||
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
|
||||
// We got all we need, update the state.
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
@@ -437,7 +411,7 @@ fn start_postgres(
|
||||
let mut delay_exit = false;
|
||||
|
||||
// Resize swap to the desired size if the compute spec says so
|
||||
if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
|
||||
if let (Some(size_bytes), true) = (swap_size_bytes, cli.resize_swap_on_bind) {
|
||||
// To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
|
||||
// *before* starting postgres.
|
||||
//
|
||||
@@ -464,9 +438,9 @@ fn start_postgres(
|
||||
|
||||
// Set disk quota if the compute spec says so
|
||||
if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
|
||||
(disk_quota_bytes, set_disk_quota_for_fs)
|
||||
(disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref())
|
||||
{
|
||||
match set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) {
|
||||
match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) {
|
||||
Ok(()) => {
|
||||
let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%disk_quota_bytes, %size_mib, "set disk quota");
|
||||
@@ -509,13 +483,7 @@ fn start_postgres(
|
||||
if #[cfg(target_os = "linux")] {
|
||||
use std::env;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
let vm_monitor_addr = matches
|
||||
.get_one::<String>("vm-monitor-addr")
|
||||
.expect("--vm-monitor-addr should always be set because it has a default arg");
|
||||
let file_cache_connstr = matches.get_one::<String>("filecache-connstr");
|
||||
let cgroup = matches.get_one::<String>("cgroup");
|
||||
|
||||
// Only make a runtime if we need to.
|
||||
// Note: it seems like you can make a runtime in an inner scope and
|
||||
// if you start a task in it it won't be dropped. However, make it
|
||||
// in the outermost scope just to be safe.
|
||||
@@ -538,15 +506,15 @@ fn start_postgres(
|
||||
let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
|
||||
None
|
||||
} else {
|
||||
file_cache_connstr.cloned()
|
||||
Some(cli.filecache_connstr.clone())
|
||||
};
|
||||
|
||||
let vm_monitor = rt.as_ref().map(|rt| {
|
||||
rt.spawn(vm_monitor::start(
|
||||
Box::leak(Box::new(vm_monitor::Args {
|
||||
cgroup: cgroup.cloned(),
|
||||
cgroup: Some(cli.cgroup.clone()),
|
||||
pgconnstr,
|
||||
addr: vm_monitor_addr.clone(),
|
||||
addr: cli.vm_monitor_addr.clone(),
|
||||
})),
|
||||
token.clone(),
|
||||
))
|
||||
@@ -702,105 +670,6 @@ fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
|
||||
exit(exit_code.unwrap_or(1))
|
||||
}
|
||||
|
||||
fn cli() -> clap::Command {
|
||||
// Env variable is set by `cargo`
|
||||
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
|
||||
clap::Command::new("compute_ctl")
|
||||
.version(version)
|
||||
.arg(
|
||||
Arg::new("http-port")
|
||||
.long("http-port")
|
||||
.value_name("HTTP_PORT")
|
||||
.default_value("3080")
|
||||
.value_parser(clap::value_parser!(u16))
|
||||
.required(false),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("connstr")
|
||||
.short('C')
|
||||
.long("connstr")
|
||||
.value_name("DATABASE_URL")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgdata")
|
||||
.short('D')
|
||||
.long("pgdata")
|
||||
.value_name("DATADIR")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgbin")
|
||||
.short('b')
|
||||
.long("pgbin")
|
||||
.default_value("postgres")
|
||||
.value_name("POSTGRES_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec")
|
||||
.short('s')
|
||||
.long("spec")
|
||||
.value_name("SPEC_JSON"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec-path")
|
||||
.short('S')
|
||||
.long("spec-path")
|
||||
.value_name("SPEC_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("compute-id")
|
||||
.short('i')
|
||||
.long("compute-id")
|
||||
.value_name("COMPUTE_ID"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("control-plane-uri")
|
||||
.short('p')
|
||||
.long("control-plane-uri")
|
||||
.value_name("CONTROL_PLANE_API_BASE_URI"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("remote-ext-config")
|
||||
.short('r')
|
||||
.long("remote-ext-config")
|
||||
.value_name("REMOTE_EXT_CONFIG"),
|
||||
)
|
||||
// TODO(fprasx): we currently have default arguments because the cloud PR
|
||||
// to pass them in hasn't been merged yet. We should get rid of them once
|
||||
// the PR is merged.
|
||||
.arg(
|
||||
Arg::new("vm-monitor-addr")
|
||||
.long("vm-monitor-addr")
|
||||
.default_value("0.0.0.0:10301")
|
||||
.value_name("VM_MONITOR_ADDR"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("cgroup")
|
||||
.long("cgroup")
|
||||
.default_value("neon-postgres")
|
||||
.value_name("CGROUP"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("filecache-connstr")
|
||||
.long("filecache-connstr")
|
||||
.default_value(
|
||||
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
|
||||
)
|
||||
.value_name("FILECACHE_CONNSTR"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("resize-swap-on-bind")
|
||||
.long("resize-swap-on-bind")
|
||||
.action(clap::ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("set-disk-quota-for-fs")
|
||||
.long("set-disk-quota-for-fs")
|
||||
.value_name("SET_DISK_QUOTA_FOR_FS")
|
||||
)
|
||||
}
|
||||
|
||||
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
|
||||
/// to prevent leakage. TODO: it is better to convert compute_ctl to async and
|
||||
/// wait for termination which would be easy then.
|
||||
@@ -810,7 +679,14 @@ fn handle_exit_signal(sig: i32) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert()
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use clap::CommandFactory;
|
||||
|
||||
use super::Cli;
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
Cli::command().debug_assert()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,6 +231,14 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
])
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir)
|
||||
.env(
|
||||
"ASAN_OPTIONS",
|
||||
std::env::var("ASAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
.env(
|
||||
"UBSAN_OPTIONS",
|
||||
std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
|
||||
@@ -261,7 +261,13 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
||||
let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", backtrace_setting);
|
||||
|
||||
// Pass through these environment variables to the command
|
||||
for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] {
|
||||
for var in [
|
||||
"LLVM_PROFILE_FILE",
|
||||
"FAILPOINTS",
|
||||
"RUST_LOG",
|
||||
"ASAN_OPTIONS",
|
||||
"UBSAN_OPTIONS",
|
||||
] {
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
filled_cmd = filled_cmd.env(var, val);
|
||||
}
|
||||
|
||||
@@ -388,6 +388,11 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<u8>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_creation_check_threshold' as integer")?,
|
||||
image_creation_preempt_threshold: settings
|
||||
.remove("image_creation_preempt_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
|
||||
pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
|
||||
walreceiver_connect_timeout: settings
|
||||
.remove("walreceiver_connect_timeout")
|
||||
|
||||
@@ -221,7 +221,17 @@ impl StorageController {
|
||||
"-p",
|
||||
&format!("{}", postgres_port),
|
||||
];
|
||||
let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?;
|
||||
let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();
|
||||
let envs = [
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
];
|
||||
let exitcode = Command::new(bin_path)
|
||||
.args(args)
|
||||
.envs(envs)
|
||||
.spawn()?
|
||||
.wait()
|
||||
.await?;
|
||||
|
||||
Ok(exitcode.success())
|
||||
}
|
||||
@@ -242,6 +252,11 @@ impl StorageController {
|
||||
|
||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||
let createdb_path = pg_bin_dir.join("createdb");
|
||||
let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();
|
||||
let envs = [
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
];
|
||||
let output = Command::new(&createdb_path)
|
||||
.args([
|
||||
"-h",
|
||||
@@ -254,6 +269,7 @@ impl StorageController {
|
||||
&username(),
|
||||
DB_NAME,
|
||||
])
|
||||
.envs(envs)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to spawn createdb");
|
||||
|
||||
@@ -32,6 +32,7 @@ reason = "the marvin attack only affects private key decryption, not public key
|
||||
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
|
||||
[licenses]
|
||||
allow = [
|
||||
"0BSD",
|
||||
"Apache-2.0",
|
||||
"BSD-2-Clause",
|
||||
"BSD-3-Clause",
|
||||
|
||||
@@ -52,6 +52,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
|
||||
if [ $pg_version -ge 16 ]; then
|
||||
docker cp ext-src $TEST_CONTAINER_NAME:/
|
||||
docker exec $TEST_CONTAINER_NAME bash -c "apt update && apt install -y libtap-parser-sourcehandler-pgtap-perl"
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
|
||||
4
docker-compose/ext-src/pgjwt-src/neon-test.sh
Executable file
4
docker-compose/ext-src/pgjwt-src/neon-test.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
cd "$(dirname "${0}")"
|
||||
pg_prove test.sql
|
||||
15
docker-compose/ext-src/pgjwt-src/test-upgrade.patch
Normal file
15
docker-compose/ext-src/pgjwt-src/test-upgrade.patch
Normal file
@@ -0,0 +1,15 @@
|
||||
diff --git a/test.sql b/test.sql
|
||||
index d7a0ca8..f15bc76 100644
|
||||
--- a/test.sql
|
||||
+++ b/test.sql
|
||||
@@ -9,9 +9,7 @@
|
||||
\set ON_ERROR_STOP true
|
||||
\set QUIET 1
|
||||
|
||||
-CREATE EXTENSION pgcrypto;
|
||||
-CREATE EXTENSION pgtap;
|
||||
-CREATE EXTENSION pgjwt;
|
||||
+CREATE EXTENSION IF NOT EXISTS pgtap;
|
||||
|
||||
BEGIN;
|
||||
SELECT plan(23);
|
||||
5
docker-compose/ext-src/pgjwt-src/test-upgrade.sh
Executable file
5
docker-compose/ext-src/pgjwt-src/test-upgrade.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
pg_prove test.sql
|
||||
@@ -24,7 +24,7 @@ function wait_for_ready {
|
||||
}
|
||||
function create_extensions() {
|
||||
for ext in ${1}; do
|
||||
docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext}"
|
||||
docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext} CASCADE"
|
||||
done
|
||||
}
|
||||
EXTENSIONS='[
|
||||
@@ -40,7 +40,8 @@ EXTENSIONS='[
|
||||
{"extname": "pg_uuidv7", "extdir": "pg_uuidv7-src"},
|
||||
{"extname": "roaringbitmap", "extdir": "pg_roaringbitmap-src"},
|
||||
{"extname": "semver", "extdir": "pg_semver-src"},
|
||||
{"extname": "pg_ivm", "extdir": "pg_ivm-src"}
|
||||
{"extname": "pg_ivm", "extdir": "pg_ivm-src"},
|
||||
{"extname": "pgjwt", "extdir": "pgjwt-src"}
|
||||
]'
|
||||
EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
|
||||
TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
|
||||
|
||||
@@ -204,14 +204,16 @@ impl RemoteExtSpec {
|
||||
|
||||
// Check if extension is present in public or custom.
|
||||
// If not, then it is not allowed to be used by this compute.
|
||||
if let Some(public_extensions) = &self.public_extensions {
|
||||
if !public_extensions.contains(&real_ext_name.to_string()) {
|
||||
if let Some(custom_extensions) = &self.custom_extensions {
|
||||
if !custom_extensions.contains(&real_ext_name.to_string()) {
|
||||
return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
if !self
|
||||
.public_extensions
|
||||
.as_ref()
|
||||
.is_some_and(|exts| exts.iter().any(|e| e == real_ext_name))
|
||||
&& !self
|
||||
.custom_extensions
|
||||
.as_ref()
|
||||
.is_some_and(|exts| exts.iter().any(|e| e == real_ext_name))
|
||||
{
|
||||
return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
|
||||
}
|
||||
|
||||
match self.extension_data.get(real_ext_name) {
|
||||
@@ -340,6 +342,102 @@ mod tests {
|
||||
use super::*;
|
||||
use std::fs::File;
|
||||
|
||||
#[test]
|
||||
fn allow_installing_remote_extensions() {
|
||||
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
|
||||
"public_extensions": null,
|
||||
"custom_extensions": null,
|
||||
"library_index": {},
|
||||
"extension_data": {},
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
rspec
|
||||
.get_ext("ext", false, "latest", "v17")
|
||||
.expect_err("Extension should not be found");
|
||||
|
||||
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
|
||||
"public_extensions": [],
|
||||
"custom_extensions": null,
|
||||
"library_index": {},
|
||||
"extension_data": {},
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
rspec
|
||||
.get_ext("ext", false, "latest", "v17")
|
||||
.expect_err("Extension should not be found");
|
||||
|
||||
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
|
||||
"public_extensions": [],
|
||||
"custom_extensions": [],
|
||||
"library_index": {
|
||||
"ext": "ext"
|
||||
},
|
||||
"extension_data": {
|
||||
"ext": {
|
||||
"control_data": {
|
||||
"ext.control": ""
|
||||
},
|
||||
"archive_path": ""
|
||||
}
|
||||
},
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
rspec
|
||||
.get_ext("ext", false, "latest", "v17")
|
||||
.expect_err("Extension should not be found");
|
||||
|
||||
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
|
||||
"public_extensions": [],
|
||||
"custom_extensions": ["ext"],
|
||||
"library_index": {
|
||||
"ext": "ext"
|
||||
},
|
||||
"extension_data": {
|
||||
"ext": {
|
||||
"control_data": {
|
||||
"ext.control": ""
|
||||
},
|
||||
"archive_path": ""
|
||||
}
|
||||
},
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
rspec
|
||||
.get_ext("ext", false, "latest", "v17")
|
||||
.expect("Extension should be found");
|
||||
|
||||
let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
|
||||
"public_extensions": ["ext"],
|
||||
"custom_extensions": [],
|
||||
"library_index": {
|
||||
"extlib": "ext",
|
||||
},
|
||||
"extension_data": {
|
||||
"ext": {
|
||||
"control_data": {
|
||||
"ext.control": ""
|
||||
},
|
||||
"archive_path": ""
|
||||
}
|
||||
},
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
rspec
|
||||
.get_ext("ext", false, "latest", "v17")
|
||||
.expect("Extension should be found");
|
||||
|
||||
// test library index for the case when library name
|
||||
// doesn't match the extension name
|
||||
rspec
|
||||
.get_ext("extlib", true, "latest", "v17")
|
||||
.expect("Library should be found");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_spec_file() {
|
||||
let file = File::open("tests/cluster_spec.json").unwrap();
|
||||
|
||||
@@ -323,6 +323,10 @@ pub struct TenantConfigToml {
|
||||
// Expresed in multiples of checkpoint distance.
|
||||
pub image_layer_creation_check_threshold: u8,
|
||||
|
||||
// How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
|
||||
// Set to 0 to disable preemption.
|
||||
pub image_creation_preempt_threshold: usize,
|
||||
|
||||
/// The length for an explicit LSN lease request.
|
||||
/// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
|
||||
#[serde(with = "humantime_serde")]
|
||||
@@ -547,6 +551,10 @@ pub mod tenant_conf_defaults {
|
||||
// Relevant: https://github.com/neondatabase/neon/issues/3394
|
||||
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
|
||||
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
||||
// If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
|
||||
// layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
|
||||
// want to enable this feature.
|
||||
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
|
||||
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
|
||||
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
|
||||
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
|
||||
@@ -605,6 +613,7 @@ impl Default for TenantConfigToml {
|
||||
lazy_slru_download: false,
|
||||
timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
|
||||
image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
|
||||
image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
|
||||
lsn_lease_length: LsnLease::DEFAULT_LENGTH,
|
||||
lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
|
||||
timeline_offloading: false,
|
||||
|
||||
@@ -498,6 +498,8 @@ pub struct TenantConfigPatch {
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub image_layer_creation_check_threshold: FieldPatch<u8>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub image_creation_preempt_threshold: FieldPatch<usize>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub lsn_lease_length: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub lsn_lease_length_for_ts: FieldPatch<String>,
|
||||
@@ -544,6 +546,7 @@ pub struct TenantConfig {
|
||||
pub lazy_slru_download: Option<bool>,
|
||||
pub timeline_get_throttle: Option<ThrottleConfig>,
|
||||
pub image_layer_creation_check_threshold: Option<u8>,
|
||||
pub image_creation_preempt_threshold: Option<usize>,
|
||||
pub lsn_lease_length: Option<String>,
|
||||
pub lsn_lease_length_for_ts: Option<String>,
|
||||
pub timeline_offloading: Option<bool>,
|
||||
@@ -581,6 +584,7 @@ impl TenantConfig {
|
||||
mut lazy_slru_download,
|
||||
mut timeline_get_throttle,
|
||||
mut image_layer_creation_check_threshold,
|
||||
mut image_creation_preempt_threshold,
|
||||
mut lsn_lease_length,
|
||||
mut lsn_lease_length_for_ts,
|
||||
mut timeline_offloading,
|
||||
@@ -635,6 +639,9 @@ impl TenantConfig {
|
||||
patch
|
||||
.image_layer_creation_check_threshold
|
||||
.apply(&mut image_layer_creation_check_threshold);
|
||||
patch
|
||||
.image_creation_preempt_threshold
|
||||
.apply(&mut image_creation_preempt_threshold);
|
||||
patch.lsn_lease_length.apply(&mut lsn_lease_length);
|
||||
patch
|
||||
.lsn_lease_length_for_ts
|
||||
@@ -679,6 +686,7 @@ impl TenantConfig {
|
||||
lazy_slru_download,
|
||||
timeline_get_throttle,
|
||||
image_layer_creation_check_threshold,
|
||||
image_creation_preempt_threshold,
|
||||
lsn_lease_length,
|
||||
lsn_lease_length_for_ts,
|
||||
timeline_offloading,
|
||||
|
||||
@@ -76,7 +76,15 @@ impl Conf {
|
||||
let mut cmd = Command::new(path);
|
||||
cmd.env_clear()
|
||||
.env("LD_LIBRARY_PATH", self.pg_lib_dir()?)
|
||||
.env("DYLD_LIBRARY_PATH", self.pg_lib_dir()?);
|
||||
.env("DYLD_LIBRARY_PATH", self.pg_lib_dir()?)
|
||||
.env(
|
||||
"ASAN_OPTIONS",
|
||||
std::env::var("ASAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
.env(
|
||||
"UBSAN_OPTIONS",
|
||||
std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
|
||||
);
|
||||
Ok(cmd)
|
||||
}
|
||||
|
||||
|
||||
@@ -64,6 +64,14 @@ pub async fn do_run_initdb(args: RunInitdbArgs<'_>) -> Result<(), Error> {
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", library_search_path)
|
||||
.env("DYLD_LIBRARY_PATH", library_search_path)
|
||||
.env(
|
||||
"ASAN_OPTIONS",
|
||||
std::env::var("ASAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
.env(
|
||||
"UBSAN_OPTIONS",
|
||||
std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
.stdin(std::process::Stdio::null())
|
||||
// stdout invocation produces the same output every time, we don't need it
|
||||
.stdout(std::process::Stdio::null())
|
||||
|
||||
@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT,
|
||||
DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
|
||||
DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
|
||||
};
|
||||
|
||||
/// External backup storage configuration, enough for creating a client for that storage.
|
||||
@@ -45,11 +45,11 @@ impl RemoteStorageKind {
|
||||
|
||||
impl RemoteStorageConfig {
|
||||
/// Helper to fetch the configured concurrency limit.
|
||||
pub fn concurrency_limit(&self) -> Option<usize> {
|
||||
pub fn concurrency_limit(&self) -> usize {
|
||||
match &self.storage {
|
||||
RemoteStorageKind::LocalFs { .. } => None,
|
||||
RemoteStorageKind::AwsS3(c) => Some(c.concurrency_limit.into()),
|
||||
RemoteStorageKind::AzureContainer(c) => Some(c.concurrency_limit.into()),
|
||||
RemoteStorageKind::LocalFs { .. } => DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT,
|
||||
RemoteStorageKind::AwsS3(c) => c.concurrency_limit.into(),
|
||||
RemoteStorageKind::AzureContainer(c) => c.concurrency_limit.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,6 +65,12 @@ pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
|
||||
/// Here, a limit of max 20k concurrent connections was noted.
|
||||
/// <https://learn.microsoft.com/en-us/answers/questions/1301863/is-there-any-limitation-to-concurrent-connections>
|
||||
pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 100;
|
||||
/// Set this limit analogously to the S3 limit.
|
||||
///
|
||||
/// The local filesystem backend doesn't enforce a concurrency limit itself, but this also bounds
|
||||
/// the upload queue concurrency. Some tests create thousands of uploads, which slows down the
|
||||
/// quadratic scheduling of the upload queue, and there is no point spawning so many Tokio tasks.
|
||||
pub const DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT: usize = 100;
|
||||
/// No limits on the client side, which currenltly means 1000 for AWS S3.
|
||||
/// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>
|
||||
pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
|
||||
|
||||
@@ -39,7 +39,7 @@ function initdb_with_args {
|
||||
;;
|
||||
esac
|
||||
|
||||
eval env -i LD_LIBRARY_PATH="$PG_BIN"/../lib "${cmd[*]}"
|
||||
eval env -i LD_LIBRARY_PATH="$PG_BIN"/../lib ASAN_OPTIONS="${ASAN_OPTIONS-}" UBSAN_OPTIONS="${UBSAN_OPTIONS-}" "${cmd[*]}"
|
||||
}
|
||||
|
||||
rm -fr "$DATA_DIR"
|
||||
|
||||
@@ -5,6 +5,27 @@ use metrics::{IntCounter, IntCounterVec};
|
||||
use once_cell::sync::Lazy;
|
||||
use strum_macros::{EnumString, VariantNames};
|
||||
|
||||
/// Logs a critical error, similarly to `tracing::error!`. This will:
|
||||
///
|
||||
/// * Emit an ERROR log message with prefix "CRITICAL:" and a backtrace.
|
||||
/// * Trigger a pageable alert (via the metric below).
|
||||
/// * Increment libmetrics_tracing_event_count{level="critical"}, and indirectly level="error".
|
||||
/// * In debug builds, panic the process.
|
||||
///
|
||||
/// When including errors in the message, please use {err:?} to include the error cause and original
|
||||
/// backtrace.
|
||||
#[macro_export]
|
||||
macro_rules! critical {
|
||||
($($arg:tt)*) => {{
|
||||
if cfg!(debug_assertions) {
|
||||
panic!($($arg)*);
|
||||
}
|
||||
$crate::logging::TRACING_EVENT_COUNT_METRIC.inc_critical();
|
||||
let backtrace = std::backtrace::Backtrace::capture();
|
||||
tracing::error!("CRITICAL: {}\n{backtrace}", format!($($arg)*));
|
||||
}};
|
||||
}
|
||||
|
||||
#[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub enum LogFormat {
|
||||
@@ -25,7 +46,10 @@ impl LogFormat {
|
||||
}
|
||||
}
|
||||
|
||||
struct TracingEventCountMetric {
|
||||
pub struct TracingEventCountMetric {
|
||||
/// CRITICAL is not a `tracing` log level. Instead, we increment it in the `critical!` macro,
|
||||
/// and also emit it as a regular error. These are thus double-counted, but that seems fine.
|
||||
critical: IntCounter,
|
||||
error: IntCounter,
|
||||
warn: IntCounter,
|
||||
info: IntCounter,
|
||||
@@ -33,7 +57,7 @@ struct TracingEventCountMetric {
|
||||
trace: IntCounter,
|
||||
}
|
||||
|
||||
static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
|
||||
pub static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
|
||||
let vec = metrics::register_int_counter_vec!(
|
||||
"libmetrics_tracing_event_count",
|
||||
"Number of tracing events, by level",
|
||||
@@ -46,6 +70,7 @@ static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(||
|
||||
impl TracingEventCountMetric {
|
||||
fn new(vec: IntCounterVec) -> Self {
|
||||
Self {
|
||||
critical: vec.with_label_values(&["critical"]),
|
||||
error: vec.with_label_values(&["error"]),
|
||||
warn: vec.with_label_values(&["warn"]),
|
||||
info: vec.with_label_values(&["info"]),
|
||||
@@ -54,6 +79,11 @@ impl TracingEventCountMetric {
|
||||
}
|
||||
}
|
||||
|
||||
// Allow public access from `critical!` macro.
|
||||
pub fn inc_critical(&self) {
|
||||
self.critical.inc();
|
||||
}
|
||||
|
||||
fn inc_for_level(&self, level: tracing::Level) {
|
||||
let counter = match level {
|
||||
tracing::Level::ERROR => &self.error,
|
||||
|
||||
@@ -177,8 +177,8 @@ impl FileCacheState {
|
||||
crate::spawn_with_cancel(
|
||||
token,
|
||||
|res| {
|
||||
if let Err(error) = res {
|
||||
error!(%error, "postgres error")
|
||||
if let Err(e) = res {
|
||||
error!(error = format_args!("{e:#}"), "postgres error");
|
||||
}
|
||||
},
|
||||
conn,
|
||||
@@ -205,7 +205,7 @@ impl FileCacheState {
|
||||
{
|
||||
Ok(rows) => Ok(rows),
|
||||
Err(e) => {
|
||||
error!(error = ?e, "postgres error: {e} -> retrying");
|
||||
error!(error = format_args!("{e:#}"), "postgres error -> retrying");
|
||||
|
||||
let client = FileCacheState::connect(&self.conn_str, self.token.clone())
|
||||
.await
|
||||
|
||||
@@ -191,15 +191,12 @@ async fn start_monitor(
|
||||
.await;
|
||||
let mut monitor = match monitor {
|
||||
Ok(Ok(monitor)) => monitor,
|
||||
Ok(Err(error)) => {
|
||||
error!(?error, "failed to create monitor");
|
||||
Ok(Err(e)) => {
|
||||
error!(error = format_args!("{e:#}"), "failed to create monitor");
|
||||
return;
|
||||
}
|
||||
Err(_) => {
|
||||
error!(
|
||||
?timeout,
|
||||
"creating monitor timed out (probably waiting to receive protocol range)"
|
||||
);
|
||||
error!(?timeout, "creating monitor timed out");
|
||||
return;
|
||||
}
|
||||
};
|
||||
@@ -207,6 +204,9 @@ async fn start_monitor(
|
||||
|
||||
match monitor.run().await {
|
||||
Ok(()) => info!("monitor was killed due to new connection"),
|
||||
Err(e) => error!(error = ?e, "monitor terminated unexpectedly"),
|
||||
Err(e) => error!(
|
||||
error = format_args!("{e:#}"),
|
||||
"monitor terminated unexpectedly"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -370,12 +370,16 @@ impl Runner {
|
||||
}),
|
||||
InboundMsgKind::InvalidMessage { error } => {
|
||||
warn!(
|
||||
%error, id, "received notification of an invalid message we sent"
|
||||
error = format_args!("{error:#}"),
|
||||
id, "received notification of an invalid message we sent"
|
||||
);
|
||||
Ok(None)
|
||||
}
|
||||
InboundMsgKind::InternalError { error } => {
|
||||
warn!(error, id, "agent experienced an internal error");
|
||||
warn!(
|
||||
error = format_args!("{error:#}"),
|
||||
id, "agent experienced an internal error"
|
||||
);
|
||||
Ok(None)
|
||||
}
|
||||
InboundMsgKind::HealthCheck {} => {
|
||||
@@ -476,7 +480,7 @@ impl Runner {
|
||||
// gives the outermost cause, and the debug impl
|
||||
// pretty-prints the error, whereas {:#} contains all the
|
||||
// causes, but is compact (no newlines).
|
||||
warn!(error = format!("{e:#}"), "error handling message");
|
||||
warn!(error = format_args!("{e:#}"), "error handling message");
|
||||
OutboundMsg::new(
|
||||
OutboundMsgKind::InternalError {
|
||||
error: e.to_string(),
|
||||
@@ -492,7 +496,7 @@ impl Runner {
|
||||
.context("failed to send message")?;
|
||||
}
|
||||
Err(e) => warn!(
|
||||
error = format!("{e}"),
|
||||
error = format_args!("{e:#}"),
|
||||
msg = ?msg,
|
||||
"received error message"
|
||||
),
|
||||
|
||||
@@ -3393,7 +3393,17 @@ where
|
||||
let status = response.status();
|
||||
info!(%status, "Cancelled request finished successfully")
|
||||
}
|
||||
Err(e) => error!("Cancelled request finished with an error: {e:?}"),
|
||||
Err(e) => match e {
|
||||
ApiError::ShuttingDown | ApiError::ResourceUnavailable(_) => {
|
||||
// Don't log this at error severity: they are normal during lifecycle of tenants/process
|
||||
info!("Cancelled request aborted for shutdown")
|
||||
}
|
||||
_ => {
|
||||
// Log these in a highly visible way, because we have no client to send the response to, but
|
||||
// would like to know that something went wrong.
|
||||
error!("Cancelled request finished with an error: {e:?}")
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
// only logging for cancelled panicked request handlers is the tracing_panic_hook,
|
||||
|
||||
@@ -263,14 +263,6 @@ pub(crate) const TENANT_HEATMAP_BASENAME: &str = "heatmap-v1.json";
|
||||
/// data directory at pageserver startup can be automatically removed.
|
||||
pub(crate) const TEMP_FILE_SUFFIX: &str = "___temp";
|
||||
|
||||
/// A marker file to mark that a timeline directory was not fully initialized.
|
||||
/// If a timeline directory with this marker is encountered at pageserver startup,
|
||||
/// the timeline directory and the marker file are both removed.
|
||||
/// Full path: `tenants/<tenant_id>/timelines/<timeline_id>___uninit`.
|
||||
pub(crate) const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
|
||||
|
||||
pub(crate) const TIMELINE_DELETE_MARK_SUFFIX: &str = "___delete";
|
||||
|
||||
pub fn is_temporary(path: &Utf8Path) -> bool {
|
||||
match path.file_name() {
|
||||
Some(name) => name.ends_with(TEMP_FILE_SUFFIX),
|
||||
@@ -278,25 +270,6 @@ pub fn is_temporary(path: &Utf8Path) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn ends_with_suffix(path: &Utf8Path, suffix: &str) -> bool {
|
||||
match path.file_name() {
|
||||
Some(name) => name.ends_with(suffix),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: DO NOT ADD new query methods like this, which will have a next step of parsing timelineid
|
||||
// from the directory name. Instead create type "UninitMark(TimelineId)" and only parse it once
|
||||
// from the name.
|
||||
|
||||
pub(crate) fn is_uninit_mark(path: &Utf8Path) -> bool {
|
||||
ends_with_suffix(path, TIMELINE_UNINIT_MARK_SUFFIX)
|
||||
}
|
||||
|
||||
pub(crate) fn is_delete_mark(path: &Utf8Path) -> bool {
|
||||
ends_with_suffix(path, TIMELINE_DELETE_MARK_SUFFIX)
|
||||
}
|
||||
|
||||
/// During pageserver startup, we need to order operations not to exhaust tokio worker threads by
|
||||
/// blocking.
|
||||
///
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use enum_map::EnumMap;
|
||||
use enum_map::{Enum as _, EnumMap};
|
||||
use futures::Future;
|
||||
use metrics::{
|
||||
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
|
||||
@@ -32,6 +32,7 @@ use utils::id::TimelineId;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{PageContentKind, RequestContext};
|
||||
use crate::pgdatadir_mapping::DatadirModificationStats;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::layer_map::LayerMap;
|
||||
use crate::tenant::mgr::TenantSlot;
|
||||
@@ -103,7 +104,7 @@ pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::n
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
// Buckets for background operations like compaction, GC, size calculation
|
||||
// Buckets for background operation duration in seconds, like compaction, GC, size calculation.
|
||||
const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
|
||||
|
||||
pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
@@ -235,7 +236,7 @@ pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(||
|
||||
|
||||
GetVectoredLatency {
|
||||
map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
|
||||
let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
|
||||
let task_kind = TaskKind::from_usize(task_kind_idx);
|
||||
|
||||
if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
|
||||
let task_kind = task_kind.into();
|
||||
@@ -258,7 +259,7 @@ pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
|
||||
|
||||
ScanLatency {
|
||||
map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
|
||||
let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
|
||||
let task_kind = TaskKind::from_usize(task_kind_idx);
|
||||
|
||||
if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
|
||||
let task_kind = task_kind.into();
|
||||
@@ -299,10 +300,10 @@ static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
|
||||
pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
|
||||
map: EnumMap::from_array(std::array::from_fn(|task_kind| {
|
||||
let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
|
||||
let task_kind = TaskKind::from_usize(task_kind);
|
||||
let task_kind: &'static str = task_kind.into();
|
||||
EnumMap::from_array(std::array::from_fn(|content_kind| {
|
||||
let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
|
||||
let content_kind = PageContentKind::from_usize(content_kind);
|
||||
let content_kind: &'static str = content_kind.into();
|
||||
PageCacheMetricsForTaskKind {
|
||||
read_accesses_immutable: {
|
||||
@@ -1912,7 +1913,7 @@ pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy
|
||||
|
||||
ComputeCommandCounters {
|
||||
map: EnumMap::from_array(std::array::from_fn(|i| {
|
||||
let command = <ComputeCommandKind as enum_map::Enum>::from_usize(i);
|
||||
let command = ComputeCommandKind::from_usize(i);
|
||||
let command_str: &'static str = command.into();
|
||||
inner.with_label_values(&[command_str])
|
||||
})),
|
||||
@@ -2212,11 +2213,13 @@ pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
|
||||
pub struct BackgroundLoopSemaphoreMetrics {
|
||||
counters: EnumMap<BackgroundLoopKind, IntCounterPair>,
|
||||
durations: EnumMap<BackgroundLoopKind, Counter>,
|
||||
durations: EnumMap<BackgroundLoopKind, Histogram>,
|
||||
waiting_tasks: EnumMap<BackgroundLoopKind, IntGauge>,
|
||||
running_tasks: EnumMap<BackgroundLoopKind, IntGauge>,
|
||||
}
|
||||
|
||||
pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> = Lazy::new(
|
||||
|| {
|
||||
pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> =
|
||||
Lazy::new(|| {
|
||||
let counters = register_int_counter_pair_vec!(
|
||||
"pageserver_background_loop_semaphore_wait_start_count",
|
||||
"Counter for background loop concurrency-limiting semaphore acquire calls started",
|
||||
@@ -2226,45 +2229,101 @@ pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let durations = register_counter_vec!(
|
||||
"pageserver_background_loop_semaphore_wait_duration_seconds",
|
||||
"Sum of wall clock time spent waiting on the background loop concurrency-limiting semaphore acquire calls",
|
||||
let durations = register_histogram_vec!(
|
||||
"pageserver_background_loop_semaphore_wait_seconds",
|
||||
"Seconds spent waiting on background loop semaphore acquisition",
|
||||
&["task"],
|
||||
vec![0.01, 1.0, 5.0, 10.0, 30.0, 60.0, 180.0, 300.0, 600.0],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let waiting_tasks = register_int_gauge_vec!(
|
||||
"pageserver_background_loop_semaphore_waiting_tasks",
|
||||
"Number of background loop tasks waiting for semaphore",
|
||||
&["task"],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let running_tasks = register_int_gauge_vec!(
|
||||
"pageserver_background_loop_semaphore_running_tasks",
|
||||
"Number of background loop tasks running concurrently",
|
||||
&["task"],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
BackgroundLoopSemaphoreMetrics {
|
||||
counters: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
|
||||
let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
|
||||
counters: EnumMap::from_array(std::array::from_fn(|i| {
|
||||
let kind = BackgroundLoopKind::from_usize(i);
|
||||
counters.with_label_values(&[kind.into()])
|
||||
})),
|
||||
durations: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
|
||||
let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
|
||||
durations: EnumMap::from_array(std::array::from_fn(|i| {
|
||||
let kind = BackgroundLoopKind::from_usize(i);
|
||||
durations.with_label_values(&[kind.into()])
|
||||
})),
|
||||
waiting_tasks: EnumMap::from_array(std::array::from_fn(|i| {
|
||||
let kind = BackgroundLoopKind::from_usize(i);
|
||||
waiting_tasks.with_label_values(&[kind.into()])
|
||||
})),
|
||||
running_tasks: EnumMap::from_array(std::array::from_fn(|i| {
|
||||
let kind = BackgroundLoopKind::from_usize(i);
|
||||
running_tasks.with_label_values(&[kind.into()])
|
||||
})),
|
||||
}
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
impl BackgroundLoopSemaphoreMetrics {
|
||||
pub(crate) fn measure_acquisition(&self, task: BackgroundLoopKind) -> impl Drop + '_ {
|
||||
struct Record<'a> {
|
||||
metrics: &'a BackgroundLoopSemaphoreMetrics,
|
||||
task: BackgroundLoopKind,
|
||||
_counter_guard: metrics::IntCounterPairGuard,
|
||||
start: Instant,
|
||||
}
|
||||
impl Drop for Record<'_> {
|
||||
fn drop(&mut self) {
|
||||
let elapsed = self.start.elapsed().as_secs_f64();
|
||||
self.metrics.durations[self.task].inc_by(elapsed);
|
||||
}
|
||||
}
|
||||
Record {
|
||||
metrics: self,
|
||||
/// Starts recording semaphore metrics. Call `acquired()` on the returned recorder when the
|
||||
/// semaphore is acquired, and drop it when the task completes or is cancelled.
|
||||
pub(crate) fn record(
|
||||
&self,
|
||||
task: BackgroundLoopKind,
|
||||
) -> BackgroundLoopSemaphoreMetricsRecorder {
|
||||
BackgroundLoopSemaphoreMetricsRecorder::start(self, task)
|
||||
}
|
||||
}
|
||||
|
||||
/// Records metrics for a background task.
|
||||
pub struct BackgroundLoopSemaphoreMetricsRecorder<'a> {
|
||||
metrics: &'a BackgroundLoopSemaphoreMetrics,
|
||||
task: BackgroundLoopKind,
|
||||
start: Instant,
|
||||
wait_counter_guard: Option<metrics::IntCounterPairGuard>,
|
||||
}
|
||||
|
||||
impl<'a> BackgroundLoopSemaphoreMetricsRecorder<'a> {
|
||||
/// Starts recording semaphore metrics, by recording wait time and incrementing
|
||||
/// `wait_start_count` and `waiting_tasks`.
|
||||
fn start(metrics: &'a BackgroundLoopSemaphoreMetrics, task: BackgroundLoopKind) -> Self {
|
||||
metrics.waiting_tasks[task].inc();
|
||||
Self {
|
||||
metrics,
|
||||
task,
|
||||
_counter_guard: self.counters[task].guard(),
|
||||
start: Instant::now(),
|
||||
wait_counter_guard: Some(metrics.counters[task].guard()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Signals that the semaphore has been acquired, and updates relevant metrics.
|
||||
pub fn acquired(&mut self) -> Duration {
|
||||
let waited = self.start.elapsed();
|
||||
self.wait_counter_guard.take().expect("already acquired");
|
||||
self.metrics.durations[self.task].observe(waited.as_secs_f64());
|
||||
self.metrics.waiting_tasks[self.task].dec();
|
||||
self.metrics.running_tasks[self.task].inc();
|
||||
waited
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for BackgroundLoopSemaphoreMetricsRecorder<'_> {
|
||||
/// The task either completed or was cancelled.
|
||||
fn drop(&mut self) {
|
||||
if self.wait_counter_guard.take().is_some() {
|
||||
// Waiting.
|
||||
self.metrics.durations[self.task].observe(self.start.elapsed().as_secs_f64());
|
||||
self.metrics.waiting_tasks[self.task].dec();
|
||||
} else {
|
||||
// Running.
|
||||
self.metrics.running_tasks[self.task].dec();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2378,11 +2437,40 @@ pub(crate) struct WalIngestMetrics {
|
||||
pub(crate) records_observed: IntCounter,
|
||||
pub(crate) records_committed: IntCounter,
|
||||
pub(crate) records_filtered: IntCounter,
|
||||
pub(crate) values_committed_metadata_images: IntCounter,
|
||||
pub(crate) values_committed_metadata_deltas: IntCounter,
|
||||
pub(crate) values_committed_data_images: IntCounter,
|
||||
pub(crate) values_committed_data_deltas: IntCounter,
|
||||
pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
|
||||
pub(crate) clear_vm_bits_unknown: IntCounterVec,
|
||||
}
|
||||
|
||||
impl WalIngestMetrics {
|
||||
pub(crate) fn inc_values_committed(&self, stats: &DatadirModificationStats) {
|
||||
if stats.metadata_images > 0 {
|
||||
self.values_committed_metadata_images
|
||||
.inc_by(stats.metadata_images);
|
||||
}
|
||||
if stats.metadata_deltas > 0 {
|
||||
self.values_committed_metadata_deltas
|
||||
.inc_by(stats.metadata_deltas);
|
||||
}
|
||||
if stats.data_images > 0 {
|
||||
self.values_committed_data_images.inc_by(stats.data_images);
|
||||
}
|
||||
if stats.data_deltas > 0 {
|
||||
self.values_committed_data_deltas.inc_by(stats.data_deltas);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
|
||||
let values_committed = register_int_counter_vec!(
|
||||
"pageserver_wal_ingest_values_committed",
|
||||
"Number of values committed to pageserver storage from WAL records",
|
||||
&["class", "kind"],
|
||||
)
|
||||
.expect("failed to define a metric");
|
||||
|
||||
WalIngestMetrics {
|
||||
bytes_received: register_int_counter!(
|
||||
"pageserver_wal_ingest_bytes_received",
|
||||
@@ -2409,17 +2497,15 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
|
||||
"Number of WAL records filtered out due to sharding"
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
values_committed_metadata_images: values_committed.with_label_values(&["metadata", "image"]),
|
||||
values_committed_metadata_deltas: values_committed.with_label_values(&["metadata", "delta"]),
|
||||
values_committed_data_images: values_committed.with_label_values(&["data", "image"]),
|
||||
values_committed_data_deltas: values_committed.with_label_values(&["data", "delta"]),
|
||||
gap_blocks_zeroed_on_rel_extend: register_int_counter!(
|
||||
"pageserver_gap_blocks_zeroed_on_rel_extend",
|
||||
"Total number of zero gap blocks written on relation extends"
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
clear_vm_bits_unknown: register_int_counter_vec!(
|
||||
"pageserver_wal_ingest_clear_vm_bits_unknown",
|
||||
"Number of ignored ClearVmBits operations due to unknown pages/relations",
|
||||
&["entity"],
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
}
|
||||
});
|
||||
|
||||
@@ -2486,7 +2572,7 @@ pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> =
|
||||
|
||||
pub(crate) struct WalRedoProcessCounters {
|
||||
pub(crate) started: IntCounter,
|
||||
pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
|
||||
pub(crate) killed_by_cause: EnumMap<WalRedoKillCause, IntCounter>,
|
||||
pub(crate) active_stderr_logger_tasks_started: IntCounter,
|
||||
pub(crate) active_stderr_logger_tasks_finished: IntCounter,
|
||||
}
|
||||
@@ -2528,7 +2614,7 @@ impl Default for WalRedoProcessCounters {
|
||||
Self {
|
||||
started,
|
||||
killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
|
||||
let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
|
||||
let cause = WalRedoKillCause::from_usize(i);
|
||||
let cause_str: &'static str = cause.into();
|
||||
killed.with_label_values(&[cause_str])
|
||||
})),
|
||||
|
||||
@@ -1280,8 +1280,6 @@ impl PageServerHandler {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// and log the info! line inside the request span
|
||||
.instrument(span.clone())
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
@@ -1692,7 +1690,7 @@ impl PageServerHandler {
|
||||
// to distinguish a misbehaving client (asking for old LSN) from a storage issue (data missing at a legitimate LSN).
|
||||
if request_lsn < **latest_gc_cutoff_lsn && !timeline.is_gc_blocked_by_lsn_lease_deadline() {
|
||||
let gc_info = &timeline.gc_info.read().unwrap();
|
||||
if !gc_info.leases.contains_key(&request_lsn) {
|
||||
if !gc_info.lsn_covered_by_lease(request_lsn) {
|
||||
return Err(
|
||||
PageStreamError::BadRequest(format!(
|
||||
"tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
|
||||
@@ -2037,6 +2035,12 @@ impl PageServerHandler {
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.await?;
|
||||
|
||||
if timeline.is_archived() == Some(true) {
|
||||
// TODO after a grace period, turn this log line into a hard error
|
||||
tracing::warn!("timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it.");
|
||||
//return Err(QueryError::NotFound("timeline is archived".into()))
|
||||
}
|
||||
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
if let Some(lsn) = lsn {
|
||||
// Backup was requested at a particular LSN. Wait for it to arrive.
|
||||
|
||||
@@ -48,7 +48,7 @@ use tracing::{debug, trace, warn};
|
||||
use utils::bin_ser::DeserializeError;
|
||||
use utils::pausable_failpoint;
|
||||
use utils::{bin_ser::BeSer, lsn::Lsn};
|
||||
use wal_decoder::serialized_batch::SerializedValueBatch;
|
||||
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
|
||||
|
||||
/// Max delta records appended to the AUX_FILES_KEY (for aux v1). The write path will write a full image once this threshold is reached.
|
||||
pub const MAX_AUX_FILE_DELTAS: usize = 1024;
|
||||
@@ -612,11 +612,18 @@ impl Timeline {
|
||||
pausable_failpoint!("find-lsn-for-timestamp-pausable");
|
||||
|
||||
let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
|
||||
let gc_cutoff_planned = {
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
gc_info.min_cutoff()
|
||||
};
|
||||
// Usually the planned cutoff is newer than the cutoff of the last gc run,
|
||||
// but let's be defensive.
|
||||
let gc_cutoff = gc_cutoff_planned.max(*gc_cutoff_lsn_guard);
|
||||
// We use this method to figure out the branching LSN for the new branch, but the
|
||||
// GC cutoff could be before the branching point and we cannot create a new branch
|
||||
// with LSN < `ancestor_lsn`. Thus, pick the maximum of these two to be
|
||||
// on the safe side.
|
||||
let min_lsn = std::cmp::max(*gc_cutoff_lsn_guard, self.get_ancestor_lsn());
|
||||
let min_lsn = std::cmp::max(gc_cutoff, self.get_ancestor_lsn());
|
||||
let max_lsn = self.get_last_record_lsn();
|
||||
|
||||
// LSNs are always 8-byte aligned. low/mid/high represent the
|
||||
@@ -1297,6 +1304,26 @@ impl DatadirModification<'_> {
|
||||
.is_some_and(|b| b.has_data())
|
||||
}
|
||||
|
||||
/// Returns statistics about the currently pending modifications.
|
||||
pub(crate) fn stats(&self) -> DatadirModificationStats {
|
||||
let mut stats = DatadirModificationStats::default();
|
||||
for (_, _, value) in self.pending_metadata_pages.values().flatten() {
|
||||
match value {
|
||||
Value::Image(_) => stats.metadata_images += 1,
|
||||
Value::WalRecord(r) if r.will_init() => stats.metadata_images += 1,
|
||||
Value::WalRecord(_) => stats.metadata_deltas += 1,
|
||||
}
|
||||
}
|
||||
for valuemeta in self.pending_data_batch.iter().flat_map(|b| &b.metadata) {
|
||||
match valuemeta {
|
||||
ValueMeta::Serialized(s) if s.will_init => stats.data_images += 1,
|
||||
ValueMeta::Serialized(_) => stats.data_deltas += 1,
|
||||
ValueMeta::Observed(_) => {}
|
||||
}
|
||||
}
|
||||
stats
|
||||
}
|
||||
|
||||
/// Set the current lsn
|
||||
pub(crate) fn set_lsn(&mut self, lsn: Lsn) -> anyhow::Result<()> {
|
||||
ensure!(
|
||||
@@ -2317,6 +2344,15 @@ impl DatadirModification<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics for a DatadirModification.
|
||||
#[derive(Default)]
|
||||
pub struct DatadirModificationStats {
|
||||
pub metadata_images: u64,
|
||||
pub metadata_deltas: u64,
|
||||
pub data_images: u64,
|
||||
pub data_deltas: u64,
|
||||
}
|
||||
|
||||
/// This struct facilitates accessing either a committed key from the timeline at a
|
||||
/// specific LSN, or the latest uncommitted key from a pending modification.
|
||||
///
|
||||
|
||||
@@ -46,6 +46,7 @@ use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Weak;
|
||||
use std::time::SystemTime;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use timeline::compaction::CompactionOutcome;
|
||||
use timeline::compaction::GcCompactionQueue;
|
||||
use timeline::import_pgdata;
|
||||
use timeline::offload::offload_timeline;
|
||||
@@ -95,7 +96,6 @@ use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::deletion_queue::DeletionQueueClient;
|
||||
use crate::deletion_queue::DeletionQueueError;
|
||||
use crate::import_datadir;
|
||||
use crate::is_uninit_mark;
|
||||
use crate::l0_flush::L0FlushGlobalState;
|
||||
use crate::metrics::CONCURRENT_INITDBS;
|
||||
use crate::metrics::INITDB_RUN_TIME;
|
||||
@@ -1793,11 +1793,7 @@ impl Tenant {
|
||||
let entry = entry.context("read timeline dir entry")?;
|
||||
let entry_path = entry.path();
|
||||
|
||||
let purge = if crate::is_temporary(entry_path)
|
||||
// TODO: remove uninit mark code (https://github.com/neondatabase/neon/issues/5718)
|
||||
|| is_uninit_mark(entry_path)
|
||||
|| crate::is_delete_mark(entry_path)
|
||||
{
|
||||
let purge = if crate::is_temporary(entry_path) {
|
||||
true
|
||||
} else {
|
||||
match TimelineId::try_from(entry_path.file_name()) {
|
||||
@@ -2912,10 +2908,10 @@ impl Tenant {
|
||||
self: &Arc<Self>,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, timeline::CompactionError> {
|
||||
) -> Result<CompactionOutcome, timeline::CompactionError> {
|
||||
// Don't start doing work during shutdown, or when broken, we do not need those in the logs
|
||||
if !self.is_active() {
|
||||
return Ok(false);
|
||||
return Ok(CompactionOutcome::Done);
|
||||
}
|
||||
|
||||
{
|
||||
@@ -2929,7 +2925,7 @@ impl Tenant {
|
||||
// to AttachedSingle state.
|
||||
if !conf.location.may_upload_layers_hint() {
|
||||
info!("Skipping compaction in location state {:?}", conf.location);
|
||||
return Ok(false);
|
||||
return Ok(CompactionOutcome::Done);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2972,7 +2968,7 @@ impl Tenant {
|
||||
// Before doing any I/O work, check our circuit breaker
|
||||
if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
|
||||
info!("Skipping compaction due to previous failures");
|
||||
return Ok(false);
|
||||
return Ok(CompactionOutcome::Done);
|
||||
}
|
||||
|
||||
let mut has_pending_task = false;
|
||||
@@ -2980,10 +2976,10 @@ impl Tenant {
|
||||
for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload
|
||||
{
|
||||
// pending_task_left == None: cannot compact, maybe still pending tasks
|
||||
// pending_task_left == Some(true): compaction task left
|
||||
// pending_task_left == Some(false): no compaction task left
|
||||
// pending_task_left == Some(Pending): compaction task left
|
||||
// pending_task_left == Some(Done): no compaction task left
|
||||
let pending_task_left = if *can_compact {
|
||||
let has_pending_l0_compaction_task = timeline
|
||||
let compaction_outcome = timeline
|
||||
.compact(cancel, EnumSet::empty(), ctx)
|
||||
.instrument(info_span!("compact_timeline", %timeline_id))
|
||||
.await
|
||||
@@ -3001,27 +2997,27 @@ impl Tenant {
|
||||
.fail(&CIRCUIT_BREAKERS_BROKEN, e);
|
||||
}
|
||||
})?;
|
||||
if has_pending_l0_compaction_task {
|
||||
Some(true)
|
||||
if let CompactionOutcome::Pending = compaction_outcome {
|
||||
Some(CompactionOutcome::Pending)
|
||||
} else {
|
||||
let queue = {
|
||||
let guard = self.scheduled_compaction_tasks.lock().unwrap();
|
||||
guard.get(timeline_id).cloned()
|
||||
};
|
||||
if let Some(queue) = queue {
|
||||
let has_pending_tasks = queue
|
||||
let outcome = queue
|
||||
.iteration(cancel, ctx, &self.gc_block, timeline)
|
||||
.await?;
|
||||
Some(has_pending_tasks)
|
||||
Some(outcome)
|
||||
} else {
|
||||
Some(false)
|
||||
Some(CompactionOutcome::Done)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
has_pending_task |= pending_task_left.unwrap_or(false);
|
||||
if pending_task_left == Some(false) && *can_offload {
|
||||
has_pending_task |= pending_task_left == Some(CompactionOutcome::Pending);
|
||||
if pending_task_left == Some(CompactionOutcome::Done) && *can_offload {
|
||||
pausable_failpoint!("before-timeline-auto-offload");
|
||||
match offload_timeline(self, timeline)
|
||||
.instrument(info_span!("offload_timeline", %timeline_id))
|
||||
@@ -3041,7 +3037,11 @@ impl Tenant {
|
||||
.unwrap()
|
||||
.success(&CIRCUIT_BREAKERS_UNBROKEN);
|
||||
|
||||
Ok(has_pending_task)
|
||||
Ok(if has_pending_task {
|
||||
CompactionOutcome::Pending
|
||||
} else {
|
||||
CompactionOutcome::Done
|
||||
})
|
||||
}
|
||||
|
||||
/// Cancel scheduled compaction tasks
|
||||
@@ -4642,22 +4642,26 @@ impl Tenant {
|
||||
|
||||
// check against last actual 'latest_gc_cutoff' first
|
||||
let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn();
|
||||
src_timeline
|
||||
.check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn)
|
||||
.context(format!(
|
||||
"invalid branch start lsn: less than latest GC cutoff {}",
|
||||
*latest_gc_cutoff_lsn,
|
||||
))
|
||||
.map_err(CreateTimelineError::AncestorLsn)?;
|
||||
|
||||
// and then the planned GC cutoff
|
||||
{
|
||||
let gc_info = src_timeline.gc_info.read().unwrap();
|
||||
let cutoff = gc_info.min_cutoff();
|
||||
if start_lsn < cutoff {
|
||||
return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!(
|
||||
"invalid branch start lsn: less than planned GC cutoff {cutoff}"
|
||||
)));
|
||||
let planned_cutoff = gc_info.min_cutoff();
|
||||
if gc_info.lsn_covered_by_lease(start_lsn) {
|
||||
tracing::info!("skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease", *latest_gc_cutoff_lsn);
|
||||
} else {
|
||||
src_timeline
|
||||
.check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn)
|
||||
.context(format!(
|
||||
"invalid branch start lsn: less than latest GC cutoff {}",
|
||||
*latest_gc_cutoff_lsn,
|
||||
))
|
||||
.map_err(CreateTimelineError::AncestorLsn)?;
|
||||
|
||||
// and then the planned GC cutoff
|
||||
if start_lsn < planned_cutoff {
|
||||
return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!(
|
||||
"invalid branch start lsn: less than planned GC cutoff {planned_cutoff}"
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5491,6 +5495,9 @@ pub(crate) mod harness {
|
||||
image_layer_creation_check_threshold: Some(
|
||||
tenant_conf.image_layer_creation_check_threshold,
|
||||
),
|
||||
image_creation_preempt_threshold: Some(
|
||||
tenant_conf.image_creation_preempt_threshold,
|
||||
),
|
||||
lsn_lease_length: Some(tenant_conf.lsn_lease_length),
|
||||
lsn_lease_length_for_ts: Some(tenant_conf.lsn_lease_length_for_ts),
|
||||
timeline_offloading: Some(tenant_conf.timeline_offloading),
|
||||
|
||||
@@ -357,6 +357,9 @@ pub struct TenantConfOpt {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub image_layer_creation_check_threshold: Option<u8>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub image_creation_preempt_threshold: Option<usize>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(with = "humantime_serde")]
|
||||
#[serde(default)]
|
||||
@@ -453,6 +456,9 @@ impl TenantConfOpt {
|
||||
image_layer_creation_check_threshold: self
|
||||
.image_layer_creation_check_threshold
|
||||
.unwrap_or(global_conf.image_layer_creation_check_threshold),
|
||||
image_creation_preempt_threshold: self
|
||||
.image_creation_preempt_threshold
|
||||
.unwrap_or(global_conf.image_creation_preempt_threshold),
|
||||
lsn_lease_length: self
|
||||
.lsn_lease_length
|
||||
.unwrap_or(global_conf.lsn_lease_length),
|
||||
@@ -504,6 +510,7 @@ impl TenantConfOpt {
|
||||
mut lazy_slru_download,
|
||||
mut timeline_get_throttle,
|
||||
mut image_layer_creation_check_threshold,
|
||||
mut image_creation_preempt_threshold,
|
||||
mut lsn_lease_length,
|
||||
mut lsn_lease_length_for_ts,
|
||||
mut timeline_offloading,
|
||||
@@ -578,6 +585,9 @@ impl TenantConfOpt {
|
||||
patch
|
||||
.image_layer_creation_check_threshold
|
||||
.apply(&mut image_layer_creation_check_threshold);
|
||||
patch
|
||||
.image_creation_preempt_threshold
|
||||
.apply(&mut image_creation_preempt_threshold);
|
||||
patch
|
||||
.lsn_lease_length
|
||||
.map(|v| humantime::parse_duration(&v))?
|
||||
@@ -626,6 +636,7 @@ impl TenantConfOpt {
|
||||
lazy_slru_download,
|
||||
timeline_get_throttle,
|
||||
image_layer_creation_check_threshold,
|
||||
image_creation_preempt_threshold,
|
||||
lsn_lease_length,
|
||||
lsn_lease_length_for_ts,
|
||||
timeline_offloading,
|
||||
@@ -689,6 +700,7 @@ impl From<TenantConfOpt> for models::TenantConfig {
|
||||
lazy_slru_download: value.lazy_slru_download,
|
||||
timeline_get_throttle: value.timeline_get_throttle,
|
||||
image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
|
||||
image_creation_preempt_threshold: value.image_creation_preempt_threshold,
|
||||
lsn_lease_length: value.lsn_lease_length.map(humantime),
|
||||
lsn_lease_length_for_ts: value.lsn_lease_length_for_ts.map(humantime),
|
||||
timeline_offloading: value.timeline_offloading,
|
||||
|
||||
@@ -437,8 +437,7 @@ impl RemoteTimelineClient {
|
||||
.conf
|
||||
.remote_storage_config
|
||||
.as_ref()
|
||||
.and_then(|r| r.concurrency_limit())
|
||||
.unwrap_or(0);
|
||||
.map_or(0, |r| r.concurrency_limit());
|
||||
let mut upload_queue = self.upload_queue.lock().unwrap();
|
||||
upload_queue.initialize_with_current_remote_index_part(index_part, inprogress_limit)?;
|
||||
self.update_remote_physical_size_gauge(Some(index_part));
|
||||
@@ -461,8 +460,7 @@ impl RemoteTimelineClient {
|
||||
.conf
|
||||
.remote_storage_config
|
||||
.as_ref()
|
||||
.and_then(|r| r.concurrency_limit())
|
||||
.unwrap_or(0);
|
||||
.map_or(0, |r| r.concurrency_limit());
|
||||
let mut upload_queue = self.upload_queue.lock().unwrap();
|
||||
upload_queue.initialize_empty_remote(local_metadata, inprogress_limit)?;
|
||||
self.update_remote_physical_size_gauge(None);
|
||||
@@ -484,8 +482,7 @@ impl RemoteTimelineClient {
|
||||
.conf
|
||||
.remote_storage_config
|
||||
.as_ref()
|
||||
.and_then(|r| r.concurrency_limit())
|
||||
.unwrap_or(0);
|
||||
.map_or(0, |r| r.concurrency_limit());
|
||||
|
||||
let mut upload_queue = self.upload_queue.lock().unwrap();
|
||||
upload_queue.initialize_with_current_remote_index_part(index_part, inprogress_limit)?;
|
||||
|
||||
@@ -9,13 +9,14 @@ use crate::{
|
||||
metrics::SECONDARY_MODE,
|
||||
tenant::{
|
||||
config::AttachmentMode,
|
||||
mgr::GetTenantError,
|
||||
mgr::TenantManager,
|
||||
mgr::{GetTenantError, TenantManager},
|
||||
remote_timeline_client::remote_heatmap_path,
|
||||
span::debug_assert_current_span_has_tenant_id,
|
||||
tasks::{warn_when_period_overrun, BackgroundLoopKind},
|
||||
Tenant,
|
||||
},
|
||||
virtual_file::VirtualFile,
|
||||
TEMP_FILE_SUFFIX,
|
||||
};
|
||||
|
||||
use futures::Future;
|
||||
@@ -32,7 +33,10 @@ use super::{
|
||||
};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info_span, instrument, Instrument};
|
||||
use utils::{backoff, completion::Barrier, yielding_loop::yielding_loop};
|
||||
use utils::{
|
||||
backoff, completion::Barrier, crashsafe::path_with_suffix_extension,
|
||||
yielding_loop::yielding_loop,
|
||||
};
|
||||
|
||||
pub(super) async fn heatmap_uploader_task(
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
@@ -461,6 +465,18 @@ async fn upload_tenant_heatmap(
|
||||
}
|
||||
}
|
||||
|
||||
// After a successful upload persist the fresh heatmap to disk.
|
||||
// When restarting, the tenant will read the heatmap from disk
|
||||
// and additively generate a new heatmap (see [`Timeline::generate_heatmap`]).
|
||||
// If the heatmap is stale, the additive generation can lead to keeping previously
|
||||
// evicted timelines on the secondarie's disk.
|
||||
let tenant_shard_id = tenant.get_tenant_shard_id();
|
||||
let heatmap_path = tenant.conf.tenant_heatmap_path(tenant_shard_id);
|
||||
let temp_path = path_with_suffix_extension(&heatmap_path, TEMP_FILE_SUFFIX);
|
||||
if let Err(err) = VirtualFile::crashsafe_overwrite(heatmap_path, temp_path, bytes).await {
|
||||
tracing::warn!("Non fatal IO error writing to disk after heatmap upload: {err}");
|
||||
}
|
||||
|
||||
tracing::info!("Successfully uploaded {size} byte heatmap to {path}");
|
||||
|
||||
Ok(UploadHeatmapOutcome::Uploaded(LastUploadState {
|
||||
|
||||
@@ -166,6 +166,10 @@ impl BatchLayerWriter {
|
||||
// END: catch every error and do the recovery in the above section
|
||||
Ok(generated_layers)
|
||||
}
|
||||
|
||||
pub fn pending_layer_num(&self) -> usize {
|
||||
self.generated_layer_writers.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// An image writer that takes images and produces multiple image layers.
|
||||
|
||||
@@ -3,19 +3,22 @@
|
||||
|
||||
use std::ops::ControlFlow;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::metrics::TENANT_TASK_EVENTS;
|
||||
use crate::metrics::{BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};
|
||||
use crate::task_mgr;
|
||||
use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
|
||||
use crate::tenant::throttle::Stats;
|
||||
use crate::tenant::timeline::compaction::CompactionOutcome;
|
||||
use crate::tenant::timeline::CompactionError;
|
||||
use crate::tenant::{Tenant, TenantState};
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::Rng;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::rate_limit::RateLimit;
|
||||
use utils::{backoff, completion, pausable_failpoint};
|
||||
|
||||
static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy<tokio::sync::Semaphore> =
|
||||
@@ -40,7 +43,16 @@ static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy<tokio::sync::Semaphore
|
||||
tokio::sync::Semaphore::new(permits)
|
||||
});
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy, strum_macros::IntoStaticStr, enum_map::Enum)]
|
||||
#[derive(
|
||||
Debug,
|
||||
PartialEq,
|
||||
Eq,
|
||||
Clone,
|
||||
Copy,
|
||||
strum_macros::IntoStaticStr,
|
||||
strum_macros::Display,
|
||||
enum_map::Enum,
|
||||
)]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub(crate) enum BackgroundLoopKind {
|
||||
Compaction,
|
||||
@@ -54,27 +66,45 @@ pub(crate) enum BackgroundLoopKind {
|
||||
SecondaryDownload,
|
||||
}
|
||||
|
||||
impl BackgroundLoopKind {
|
||||
fn as_static_str(&self) -> &'static str {
|
||||
self.into()
|
||||
}
|
||||
pub struct BackgroundLoopSemaphorePermit<'a> {
|
||||
_permit: tokio::sync::SemaphorePermit<'static>,
|
||||
_recorder: BackgroundLoopSemaphoreMetricsRecorder<'a>,
|
||||
}
|
||||
|
||||
/// Cancellation safe.
|
||||
pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
|
||||
loop_kind: BackgroundLoopKind,
|
||||
_ctx: &RequestContext,
|
||||
) -> tokio::sync::SemaphorePermit<'static> {
|
||||
let _guard = crate::metrics::BACKGROUND_LOOP_SEMAPHORE.measure_acquisition(loop_kind);
|
||||
) -> BackgroundLoopSemaphorePermit<'static> {
|
||||
// TODO: use a lower threshold and remove the pacer once we resolve some blockage.
|
||||
const WARN_THRESHOLD: Duration = Duration::from_secs(600);
|
||||
static WARN_PACER: Lazy<Mutex<RateLimit>> =
|
||||
Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
|
||||
|
||||
let mut recorder = crate::metrics::BACKGROUND_LOOP_SEMAPHORE.record(loop_kind);
|
||||
|
||||
if loop_kind == BackgroundLoopKind::InitialLogicalSizeCalculation {
|
||||
pausable_failpoint!("initial-size-calculation-permit-pause");
|
||||
}
|
||||
|
||||
// TODO: assert that we run on BACKGROUND_RUNTIME; requires tokio_unstable Handle::id();
|
||||
match CONCURRENT_BACKGROUND_TASKS.acquire().await {
|
||||
Ok(permit) => permit,
|
||||
Err(_closed) => unreachable!("we never close the semaphore"),
|
||||
let permit = CONCURRENT_BACKGROUND_TASKS
|
||||
.acquire()
|
||||
.await
|
||||
.expect("should never close");
|
||||
|
||||
let waited = recorder.acquired();
|
||||
if waited >= WARN_THRESHOLD {
|
||||
let waited = waited.as_secs_f64();
|
||||
WARN_PACER
|
||||
.lock()
|
||||
.unwrap()
|
||||
.call(|| warn!("{loop_kind} task waited {waited:.3}s for semaphore permit"));
|
||||
}
|
||||
|
||||
BackgroundLoopSemaphorePermit {
|
||||
_permit: permit,
|
||||
_recorder: recorder,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,11 +236,11 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
.run(tenant.compaction_iteration(&cancel, &ctx))
|
||||
.await;
|
||||
match output {
|
||||
Ok(has_pending_task) => {
|
||||
Ok(outcome) => {
|
||||
error_run_count = 0;
|
||||
// schedule the next compaction immediately in case there is a pending compaction task
|
||||
sleep_duration = if has_pending_task {
|
||||
Duration::ZERO
|
||||
sleep_duration = if let CompactionOutcome::Pending = outcome {
|
||||
Duration::from_secs(1)
|
||||
} else {
|
||||
period
|
||||
};
|
||||
@@ -616,7 +646,7 @@ pub(crate) fn warn_when_period_overrun(
|
||||
"task iteration took longer than the configured period"
|
||||
);
|
||||
crate::metrics::BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT
|
||||
.with_label_values(&[task.as_static_str(), &format!("{}", period.as_secs())])
|
||||
.with_label_values(&[task.into(), &format!("{}", period.as_secs())])
|
||||
.inc();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ use arc_swap::{ArcSwap, ArcSwapOption};
|
||||
use bytes::Bytes;
|
||||
use camino::Utf8Path;
|
||||
use chrono::{DateTime, Utc};
|
||||
use compaction::CompactionOutcome;
|
||||
use enumset::EnumSet;
|
||||
use fail::fail_point;
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
@@ -51,6 +52,7 @@ use tokio::{
|
||||
};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::critical;
|
||||
use utils::rate_limit::RateLimit;
|
||||
use utils::{
|
||||
fs_ext,
|
||||
@@ -189,6 +191,19 @@ pub enum ImageLayerCreationMode {
|
||||
Initial,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub enum LastImageLayerCreationStatus {
|
||||
Incomplete {
|
||||
/// The last key of the partition (exclusive) that was processed in the last
|
||||
/// image layer creation attempt. We will continue from this key in the next
|
||||
/// attempt.
|
||||
last_key: Key,
|
||||
},
|
||||
Complete,
|
||||
#[default]
|
||||
Initial,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ImageLayerCreationMode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self)
|
||||
@@ -347,6 +362,8 @@ pub struct Timeline {
|
||||
// garbage collecting data that is still needed by the child timelines.
|
||||
pub(crate) gc_info: std::sync::RwLock<GcInfo>,
|
||||
|
||||
pub(crate) last_image_layer_creation_status: ArcSwap<LastImageLayerCreationStatus>,
|
||||
|
||||
// It may change across major versions so for simplicity
|
||||
// keep it after running initdb for a timeline.
|
||||
// It is needed in checks when we want to error on some operations
|
||||
@@ -515,6 +532,9 @@ impl GcInfo {
|
||||
pub(super) fn remove_child_offloaded(&mut self, child_id: TimelineId) -> bool {
|
||||
self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::Yes)
|
||||
}
|
||||
pub(crate) fn lsn_covered_by_lease(&self, lsn: Lsn) -> bool {
|
||||
self.leases.contains_key(&lsn)
|
||||
}
|
||||
}
|
||||
|
||||
/// The `GcInfo` component describing which Lsns need to be retained. Functionally, this
|
||||
@@ -936,9 +956,16 @@ pub(crate) enum ShutdownMode {
|
||||
Hard,
|
||||
}
|
||||
|
||||
struct ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: Option<ImageLayerWriter>,
|
||||
next_start_key: Key,
|
||||
enum ImageLayerCreationOutcome {
|
||||
/// We generated an image layer
|
||||
Generated {
|
||||
unfinished_image_layer: ImageLayerWriter,
|
||||
},
|
||||
/// The key range is empty
|
||||
Empty,
|
||||
/// (Only used in metadata image layer creation), after reading the metadata keys, we decide to skip
|
||||
/// the image layer creation.
|
||||
Skip,
|
||||
}
|
||||
|
||||
/// Public interface functions
|
||||
@@ -1662,7 +1689,7 @@ impl Timeline {
|
||||
cancel: &CancellationToken,
|
||||
flags: EnumSet<CompactFlags>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, CompactionError> {
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
self.compact_with_options(
|
||||
cancel,
|
||||
CompactOptions {
|
||||
@@ -1684,7 +1711,7 @@ impl Timeline {
|
||||
cancel: &CancellationToken,
|
||||
options: CompactOptions,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, CompactionError> {
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
// most likely the cancellation token is from background task, but in tests it could be the
|
||||
// request task as well.
|
||||
|
||||
@@ -1704,8 +1731,8 @@ impl Timeline {
|
||||
// compaction task goes over it's period (20s) which is quite often in production.
|
||||
let (_guard, _permit) = tokio::select! {
|
||||
tuple = prepare => { tuple },
|
||||
_ = self.cancel.cancelled() => return Ok(false),
|
||||
_ = cancel.cancelled() => return Ok(false),
|
||||
_ = self.cancel.cancelled() => return Ok(CompactionOutcome::Done),
|
||||
_ = cancel.cancelled() => return Ok(CompactionOutcome::Done),
|
||||
};
|
||||
|
||||
let last_record_lsn = self.get_last_record_lsn();
|
||||
@@ -1713,13 +1740,13 @@ impl Timeline {
|
||||
// Last record Lsn could be zero in case the timeline was just created
|
||||
if !last_record_lsn.is_valid() {
|
||||
warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
|
||||
return Ok(false);
|
||||
return Ok(CompactionOutcome::Done);
|
||||
}
|
||||
|
||||
let result = match self.get_compaction_algorithm_settings().kind {
|
||||
CompactionAlgorithm::Tiered => {
|
||||
self.compact_tiered(cancel, ctx).await?;
|
||||
Ok(false)
|
||||
Ok(CompactionOutcome::Done)
|
||||
}
|
||||
CompactionAlgorithm::Legacy => self.compact_legacy(cancel, options, ctx).await,
|
||||
};
|
||||
@@ -1818,7 +1845,7 @@ impl Timeline {
|
||||
self.last_record_lsn.shutdown();
|
||||
|
||||
if let ShutdownMode::FreezeAndFlush = mode {
|
||||
if let Some((open, frozen)) = self
|
||||
let do_flush = if let Some((open, frozen)) = self
|
||||
.layers
|
||||
.read()
|
||||
.await
|
||||
@@ -1827,43 +1854,54 @@ impl Timeline {
|
||||
.ok()
|
||||
.filter(|(open, frozen)| *open || *frozen > 0)
|
||||
{
|
||||
tracing::info!(?open, frozen, "flushing and freezing on shutdown");
|
||||
if self.remote_client.is_archived() == Some(true) {
|
||||
// No point flushing on shutdown for an archived timeline: it is not important
|
||||
// to have it nice and fresh after our restart, and trying to flush here might
|
||||
// race with trying to offload it (which also stops the flush loop)
|
||||
false
|
||||
} else {
|
||||
tracing::info!(?open, frozen, "flushing and freezing on shutdown");
|
||||
true
|
||||
}
|
||||
} else {
|
||||
// this is double-shutdown, ignore it
|
||||
}
|
||||
// this is double-shutdown, it'll be a no-op
|
||||
true
|
||||
};
|
||||
|
||||
// we shut down walreceiver above, so, we won't add anything more
|
||||
// to the InMemoryLayer; freeze it and wait for all frozen layers
|
||||
// to reach the disk & upload queue, then shut the upload queue and
|
||||
// wait for it to drain.
|
||||
match self.freeze_and_flush().await {
|
||||
Ok(_) => {
|
||||
// drain the upload queue
|
||||
// if we did not wait for completion here, it might be our shutdown process
|
||||
// didn't wait for remote uploads to complete at all, as new tasks can forever
|
||||
// be spawned.
|
||||
//
|
||||
// what is problematic is the shutting down of RemoteTimelineClient, because
|
||||
// obviously it does not make sense to stop while we wait for it, but what
|
||||
// about corner cases like s3 suddenly hanging up?
|
||||
self.remote_client.shutdown().await;
|
||||
if do_flush {
|
||||
match self.freeze_and_flush().await {
|
||||
Ok(_) => {
|
||||
// drain the upload queue
|
||||
// if we did not wait for completion here, it might be our shutdown process
|
||||
// didn't wait for remote uploads to complete at all, as new tasks can forever
|
||||
// be spawned.
|
||||
//
|
||||
// what is problematic is the shutting down of RemoteTimelineClient, because
|
||||
// obviously it does not make sense to stop while we wait for it, but what
|
||||
// about corner cases like s3 suddenly hanging up?
|
||||
self.remote_client.shutdown().await;
|
||||
}
|
||||
Err(FlushLayerError::Cancelled) => {
|
||||
// this is likely the second shutdown, ignore silently.
|
||||
// TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
|
||||
debug_assert!(self.cancel.is_cancelled());
|
||||
}
|
||||
Err(e) => {
|
||||
// Non-fatal. Shutdown is infallible. Failures to flush just mean that
|
||||
// we have some extra WAL replay to do next time the timeline starts.
|
||||
warn!("failed to freeze and flush: {e:#}");
|
||||
}
|
||||
}
|
||||
Err(FlushLayerError::Cancelled) => {
|
||||
// this is likely the second shutdown, ignore silently.
|
||||
// TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
|
||||
debug_assert!(self.cancel.is_cancelled());
|
||||
}
|
||||
Err(e) => {
|
||||
// Non-fatal. Shutdown is infallible. Failures to flush just mean that
|
||||
// we have some extra WAL replay to do next time the timeline starts.
|
||||
warn!("failed to freeze and flush: {e:#}");
|
||||
}
|
||||
}
|
||||
|
||||
// `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but
|
||||
// we also do a final check here to ensure that the queue is empty.
|
||||
if !self.remote_client.no_pending_work() {
|
||||
warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
|
||||
// `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but
|
||||
// we also do a final check here to ensure that the queue is empty.
|
||||
if !self.remote_client.no_pending_work() {
|
||||
warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2338,6 +2376,18 @@ impl Timeline {
|
||||
)
|
||||
}
|
||||
|
||||
fn get_image_creation_preempt_threshold(&self) -> usize {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
.tenant_conf
|
||||
.image_creation_preempt_threshold
|
||||
.unwrap_or(
|
||||
self.conf
|
||||
.default_tenant_conf
|
||||
.image_creation_preempt_threshold,
|
||||
)
|
||||
}
|
||||
|
||||
/// Resolve the effective WAL receiver protocol to use for this tenant.
|
||||
///
|
||||
/// Priority order is:
|
||||
@@ -2488,6 +2538,10 @@ impl Timeline {
|
||||
|
||||
gc_info: std::sync::RwLock::new(GcInfo::default()),
|
||||
|
||||
last_image_layer_creation_status: ArcSwap::new(Arc::new(
|
||||
LastImageLayerCreationStatus::default(),
|
||||
)),
|
||||
|
||||
latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
|
||||
initdb_lsn: metadata.initdb_lsn(),
|
||||
|
||||
@@ -4031,15 +4085,20 @@ impl Timeline {
|
||||
}
|
||||
|
||||
let mut layers_to_upload = Vec::new();
|
||||
layers_to_upload.extend(
|
||||
self.create_image_layers(
|
||||
let (generated_image_layers, is_complete) = self
|
||||
.create_image_layers(
|
||||
&partitions,
|
||||
self.initdb_lsn,
|
||||
ImageLayerCreationMode::Initial,
|
||||
ctx,
|
||||
LastImageLayerCreationStatus::Initial,
|
||||
)
|
||||
.await?,
|
||||
.await?;
|
||||
debug_assert!(
|
||||
matches!(is_complete, LastImageLayerCreationStatus::Complete),
|
||||
"init image generation mode must fully cover the keyspace"
|
||||
);
|
||||
layers_to_upload.extend(generated_image_layers);
|
||||
|
||||
(layers_to_upload, None)
|
||||
} else {
|
||||
@@ -4296,7 +4355,7 @@ impl Timeline {
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// Is it time to create a new image layer for the given partition?
|
||||
// Is it time to create a new image layer for the given partition? True if we want to generate.
|
||||
async fn time_for_new_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> bool {
|
||||
let threshold = self.get_image_creation_threshold();
|
||||
|
||||
@@ -4359,7 +4418,6 @@ impl Timeline {
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
img_range: Range<Key>,
|
||||
start: Key,
|
||||
io_concurrency: IoConcurrency,
|
||||
) -> Result<ImageLayerCreationOutcome, CreateImageLayersError> {
|
||||
let mut wrote_keys = false;
|
||||
@@ -4447,26 +4505,23 @@ impl Timeline {
|
||||
lsn
|
||||
},
|
||||
);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: Some(image_layer_writer),
|
||||
next_start_key: img_range.end,
|
||||
Ok(ImageLayerCreationOutcome::Generated {
|
||||
unfinished_image_layer: image_layer_writer,
|
||||
})
|
||||
} else {
|
||||
// Special case: the image layer may be empty if this is a sharded tenant and the
|
||||
// partition does not cover any keys owned by this shard. In this case, to ensure
|
||||
// we don't leave gaps between image layers, leave `start` where it is, so that the next
|
||||
// layer we write will cover the key range that we just scanned.
|
||||
tracing::debug!("no data in range {}-{}", img_range.start, img_range.end);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: None,
|
||||
next_start_key: start,
|
||||
})
|
||||
Ok(ImageLayerCreationOutcome::Empty)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an image layer for metadata keys. This function produces one image layer for all metadata
|
||||
/// keys for now. Because metadata keys cannot exceed basebackup size limit, the image layer for it
|
||||
/// would not be too large to fit in a single image layer.
|
||||
///
|
||||
/// Creating image layers for metadata keys are different from relational keys. Firstly, instead of
|
||||
/// iterating each key and get an image for each of them, we do a `vectored_get` scan over the sparse
|
||||
/// keyspace to get all images in one run. Secondly, we use a different image layer generation metrics
|
||||
/// for metadata keys than relational keys, which is the number of delta files visited during the scan.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn create_image_layer_for_metadata_keys(
|
||||
self: &Arc<Self>,
|
||||
@@ -4476,12 +4531,13 @@ impl Timeline {
|
||||
ctx: &RequestContext,
|
||||
img_range: Range<Key>,
|
||||
mode: ImageLayerCreationMode,
|
||||
start: Key,
|
||||
io_concurrency: IoConcurrency,
|
||||
) -> Result<ImageLayerCreationOutcome, CreateImageLayersError> {
|
||||
// Metadata keys image layer creation.
|
||||
let mut reconstruct_state = ValuesReconstructState::new(io_concurrency);
|
||||
let begin = Instant::now();
|
||||
// Directly use `get_vectored_impl` to skip the max_vectored_read_key limit check. Note that the keyspace should
|
||||
// not contain too many keys, otherwise this takes a lot of memory.
|
||||
let data = self
|
||||
.get_vectored_impl(partition.clone(), lsn, &mut reconstruct_state, ctx)
|
||||
.await?;
|
||||
@@ -4506,10 +4562,7 @@ impl Timeline {
|
||||
);
|
||||
|
||||
if !trigger_generation && mode == ImageLayerCreationMode::Try {
|
||||
return Ok(ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: None,
|
||||
next_start_key: img_range.end,
|
||||
});
|
||||
return Ok(ImageLayerCreationOutcome::Skip);
|
||||
}
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(CreateImageLayersError::Cancelled);
|
||||
@@ -4540,20 +4593,12 @@ impl Timeline {
|
||||
lsn
|
||||
}
|
||||
);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: Some(image_layer_writer),
|
||||
next_start_key: img_range.end,
|
||||
Ok(ImageLayerCreationOutcome::Generated {
|
||||
unfinished_image_layer: image_layer_writer,
|
||||
})
|
||||
} else {
|
||||
// Special case: the image layer may be empty if this is a sharded tenant and the
|
||||
// partition does not cover any keys owned by this shard. In this case, to ensure
|
||||
// we don't leave gaps between image layers, leave `start` where it is, so that the next
|
||||
// layer we write will cover the key range that we just scanned.
|
||||
tracing::debug!("no data in range {}-{}", img_range.start, img_range.end);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: None,
|
||||
next_start_key: start,
|
||||
})
|
||||
Ok(ImageLayerCreationOutcome::Empty)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4609,6 +4654,8 @@ impl Timeline {
|
||||
decision
|
||||
}
|
||||
|
||||
/// Returns the image layers generated and an enum indicating whether the process is fully completed.
|
||||
/// true = we have generate all image layers, false = we preempt the process for L0 compaction.
|
||||
#[tracing::instrument(skip_all, fields(%lsn, %mode))]
|
||||
async fn create_image_layers(
|
||||
self: &Arc<Timeline>,
|
||||
@@ -4616,9 +4663,15 @@ impl Timeline {
|
||||
lsn: Lsn,
|
||||
mode: ImageLayerCreationMode,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Vec<ResidentLayer>, CreateImageLayersError> {
|
||||
last_status: LastImageLayerCreationStatus,
|
||||
) -> Result<(Vec<ResidentLayer>, LastImageLayerCreationStatus), CreateImageLayersError> {
|
||||
let timer = self.metrics.create_images_time_histo.start_timer();
|
||||
|
||||
if partitioning.parts.is_empty() {
|
||||
warn!("no partitions to create image layers for");
|
||||
return Ok((vec![], LastImageLayerCreationStatus::Complete));
|
||||
}
|
||||
|
||||
// We need to avoid holes between generated image layers.
|
||||
// Otherwise LayerMap::image_layer_exists will return false if key range of some layer is covered by more than one
|
||||
// image layer with hole between them. In this case such layer can not be utilized by GC.
|
||||
@@ -4630,15 +4683,65 @@ impl Timeline {
|
||||
// image layers <100000000..100000099> and <200000000..200000199> are not completely covering it.
|
||||
let mut start = Key::MIN;
|
||||
|
||||
let check_for_image_layers = self.should_check_if_image_layers_required(lsn);
|
||||
let check_for_image_layers =
|
||||
if let LastImageLayerCreationStatus::Incomplete { last_key } = last_status {
|
||||
info!(
|
||||
"resuming image layer creation: last_status=incomplete, continue from {}",
|
||||
last_key
|
||||
);
|
||||
true
|
||||
} else {
|
||||
self.should_check_if_image_layers_required(lsn)
|
||||
};
|
||||
|
||||
let mut batch_image_writer = BatchLayerWriter::new(self.conf).await?;
|
||||
|
||||
for partition in partitioning.parts.iter() {
|
||||
let mut all_generated = true;
|
||||
|
||||
let mut partition_processed = 0;
|
||||
let mut total_partitions = partitioning.parts.len();
|
||||
let mut last_partition_processed = None;
|
||||
let mut partition_parts = partitioning.parts.clone();
|
||||
|
||||
if let LastImageLayerCreationStatus::Incomplete { last_key } = last_status {
|
||||
// We need to skip the partitions that have already been processed.
|
||||
let mut found = false;
|
||||
for (i, partition) in partition_parts.iter().enumerate() {
|
||||
if last_key <= partition.end().unwrap() {
|
||||
// ```plain
|
||||
// |------|--------|----------|------|
|
||||
// ^last_key
|
||||
// ^start from this partition
|
||||
// ```
|
||||
// Why `i+1` instead of `i`?
|
||||
// It is possible that the user did some writes after the previous image layer creation attempt so that
|
||||
// a relation grows in size, and the last_key is now in the middle of the partition. In this case, we
|
||||
// still want to skip this partition, so that we can make progress and avoid generating image layers over
|
||||
// the same partition. Doing a mod to ensure we don't end up with an empty vec.
|
||||
if i + 1 >= total_partitions {
|
||||
// In general, this case should not happen -- if last_key is on the last partition, the previous
|
||||
// iteration of image layer creation should return a complete status.
|
||||
break; // with found=false
|
||||
}
|
||||
partition_parts = partition_parts.split_off(i + 1); // Remove the first i + 1 elements
|
||||
total_partitions = partition_parts.len();
|
||||
// Update the start key to the partition start.
|
||||
start = partition_parts[0].start().unwrap();
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
// Last key is within the last partition, or larger than all partitions.
|
||||
return Ok((vec![], LastImageLayerCreationStatus::Complete));
|
||||
}
|
||||
}
|
||||
|
||||
for partition in partition_parts.iter() {
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(CreateImageLayersError::Cancelled);
|
||||
}
|
||||
|
||||
partition_processed += 1;
|
||||
let img_range = start..partition.ranges.last().unwrap().end;
|
||||
let compact_metadata = partition.overlaps(&Key::metadata_key_range());
|
||||
if compact_metadata {
|
||||
@@ -4673,6 +4776,8 @@ impl Timeline {
|
||||
lsn_range: PersistentLayerDesc::image_layer_lsn_range(lsn),
|
||||
is_delta: false,
|
||||
}) {
|
||||
// TODO: this can be processed with the BatchLayerWriter::finish_with_discard
|
||||
// in the future.
|
||||
tracing::info!(
|
||||
"Skipping image layer at {lsn} {}..{}, already exists",
|
||||
img_range.start,
|
||||
@@ -4706,17 +4811,13 @@ impl Timeline {
|
||||
.map_err(|_| CreateImageLayersError::Cancelled)?,
|
||||
);
|
||||
|
||||
let ImageLayerCreationOutcome {
|
||||
unfinished_image_layer,
|
||||
next_start_key,
|
||||
} = if !compact_metadata {
|
||||
let outcome = if !compact_metadata {
|
||||
self.create_image_layer_for_rel_blocks(
|
||||
partition,
|
||||
image_layer_writer,
|
||||
lsn,
|
||||
ctx,
|
||||
img_range.clone(),
|
||||
start,
|
||||
io_concurrency,
|
||||
)
|
||||
.await?
|
||||
@@ -4728,18 +4829,58 @@ impl Timeline {
|
||||
ctx,
|
||||
img_range.clone(),
|
||||
mode,
|
||||
start,
|
||||
io_concurrency,
|
||||
)
|
||||
.await?
|
||||
};
|
||||
start = next_start_key;
|
||||
if let Some(unfinished_image_layer) = unfinished_image_layer {
|
||||
batch_image_writer.add_unfinished_image_writer(
|
||||
match outcome {
|
||||
ImageLayerCreationOutcome::Empty => {
|
||||
// No data in this partition, so we don't need to create an image layer (for now).
|
||||
// The next image layer should cover this key range, so we don't advance the `start`
|
||||
// key.
|
||||
}
|
||||
ImageLayerCreationOutcome::Generated {
|
||||
unfinished_image_layer,
|
||||
img_range,
|
||||
lsn,
|
||||
);
|
||||
} => {
|
||||
batch_image_writer.add_unfinished_image_writer(
|
||||
unfinished_image_layer,
|
||||
img_range.clone(),
|
||||
lsn,
|
||||
);
|
||||
// The next image layer should be generated right after this one.
|
||||
start = img_range.end;
|
||||
}
|
||||
ImageLayerCreationOutcome::Skip => {
|
||||
// We don't need to create an image layer for this partition.
|
||||
// The next image layer should NOT cover this range, otherwise
|
||||
// the keyspace becomes empty (reads don't go past image layers).
|
||||
start = img_range.end;
|
||||
}
|
||||
}
|
||||
|
||||
if let ImageLayerCreationMode::Try = mode {
|
||||
// We have at least made some progress
|
||||
if batch_image_writer.pending_layer_num() >= 1 {
|
||||
// The `Try` mode is currently only used on the compaction path. We want to avoid
|
||||
// image layer generation taking too long time and blocking L0 compaction. So in this
|
||||
// mode, we also inspect the current number of L0 layers and skip image layer generation
|
||||
// if there are too many of them.
|
||||
let num_of_l0_layers = {
|
||||
let layers = self.layers.read().await;
|
||||
layers.layer_map()?.level0_deltas().len()
|
||||
};
|
||||
let image_preempt_threshold = self.get_image_creation_preempt_threshold()
|
||||
* self.get_compaction_threshold();
|
||||
if image_preempt_threshold != 0 && num_of_l0_layers >= image_preempt_threshold {
|
||||
tracing::info!(
|
||||
"preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers {}",
|
||||
partition.start().unwrap(), partition.end().unwrap(), num_of_l0_layers
|
||||
);
|
||||
last_partition_processed = Some(partition.clone());
|
||||
all_generated = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4754,14 +4895,42 @@ impl Timeline {
|
||||
.open_mut()?
|
||||
.track_new_image_layers(&image_layers, &self.metrics);
|
||||
drop_wlock(guard);
|
||||
timer.stop_and_record();
|
||||
let duration = timer.stop_and_record();
|
||||
|
||||
// Creating image layers may have caused some previously visible layers to be covered
|
||||
if !image_layers.is_empty() {
|
||||
self.update_layer_visibility().await?;
|
||||
}
|
||||
|
||||
Ok(image_layers)
|
||||
let total_layer_size = image_layers
|
||||
.iter()
|
||||
.map(|l| l.metadata().file_size)
|
||||
.sum::<u64>();
|
||||
|
||||
info!(
|
||||
"created {} image layers ({} bytes) in {}s, processed {} out of {} partitions",
|
||||
image_layers.len(),
|
||||
total_layer_size,
|
||||
duration.as_secs_f64(),
|
||||
partition_processed,
|
||||
total_partitions
|
||||
);
|
||||
|
||||
Ok((
|
||||
image_layers,
|
||||
if all_generated {
|
||||
LastImageLayerCreationStatus::Complete
|
||||
} else {
|
||||
LastImageLayerCreationStatus::Incomplete {
|
||||
last_key: if let Some(last_partition_processed) = last_partition_processed {
|
||||
last_partition_processed.end().unwrap_or(Key::MIN)
|
||||
} else {
|
||||
// This branch should be unreachable, but in case it happens, we can just return the start key.
|
||||
Key::MIN
|
||||
},
|
||||
}
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
/// Wait until the background initial logical size calculation is complete, or
|
||||
@@ -5639,10 +5808,11 @@ impl Timeline {
|
||||
let img = match res {
|
||||
Ok(img) => img,
|
||||
Err(walredo::Error::Cancelled) => return Err(PageReconstructError::Cancelled),
|
||||
Err(walredo::Error::Other(e)) => {
|
||||
Err(walredo::Error::Other(err)) => {
|
||||
critical!("walredo failure during page reconstruction: {err:?}");
|
||||
return Err(PageReconstructError::WalRedo(
|
||||
e.context("reconstruct a page image"),
|
||||
))
|
||||
err.context("reconstruct a page image"),
|
||||
));
|
||||
}
|
||||
};
|
||||
Ok(img)
|
||||
|
||||
@@ -10,8 +10,8 @@ use std::sync::Arc;
|
||||
|
||||
use super::layer_manager::LayerManager;
|
||||
use super::{
|
||||
CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode,
|
||||
RecordedDuration, Timeline,
|
||||
CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, GetVectoredError,
|
||||
ImageLayerCreationMode, LastImageLayerCreationStatus, RecordedDuration, Timeline,
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, bail, Context};
|
||||
@@ -26,6 +26,7 @@ use pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId};
|
||||
use serde::Serialize;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, info, info_span, trace, warn, Instrument};
|
||||
use utils::critical;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
|
||||
@@ -33,6 +34,7 @@ use crate::page_cache;
|
||||
use crate::statvfs::Statvfs;
|
||||
use crate::tenant::checks::check_valid_layermap;
|
||||
use crate::tenant::gc_block::GcBlock;
|
||||
use crate::tenant::layer_map::LayerMap;
|
||||
use crate::tenant::remote_timeline_client::WaitCompletionError;
|
||||
use crate::tenant::storage_layer::batch_split_writer::{
|
||||
BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter,
|
||||
@@ -262,13 +264,13 @@ impl GcCompactionQueue {
|
||||
ctx: &RequestContext,
|
||||
gc_block: &GcBlock,
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> Result<bool, CompactionError> {
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
let _one_op_at_a_time_guard = self.consumer_lock.lock().await;
|
||||
let has_pending_tasks;
|
||||
let (id, item) = {
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
let Some((id, item)) = guard.queued.pop_front() else {
|
||||
return Ok(false);
|
||||
return Ok(CompactionOutcome::Done);
|
||||
};
|
||||
guard.running = Some((id, item.clone()));
|
||||
has_pending_tasks = !guard.queued.is_empty();
|
||||
@@ -323,7 +325,11 @@ impl GcCompactionQueue {
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
guard.running = None;
|
||||
}
|
||||
Ok(has_pending_tasks)
|
||||
Ok(if has_pending_tasks {
|
||||
CompactionOutcome::Pending
|
||||
} else {
|
||||
CompactionOutcome::Done
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
@@ -434,6 +440,11 @@ impl KeyHistoryRetention {
|
||||
if dry_run {
|
||||
return true;
|
||||
}
|
||||
if LayerMap::is_l0(&key.key_range, key.is_delta) {
|
||||
// gc-compaction should not produce L0 deltas, otherwise it will break the layer order.
|
||||
// We should ignore such layers.
|
||||
return true;
|
||||
}
|
||||
let layer_generation;
|
||||
{
|
||||
let guard = tline.layers.read().await;
|
||||
@@ -589,6 +600,17 @@ impl CompactionStatistics {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum CompactionOutcome {
|
||||
#[default]
|
||||
/// No layers need to be compacted after this round. Compaction doesn't need
|
||||
/// to be immediately scheduled.
|
||||
Done,
|
||||
/// Still has pending layers to be compacted after this round. Ideally, the scheduler
|
||||
/// should immediately schedule another compaction.
|
||||
Pending,
|
||||
}
|
||||
|
||||
impl Timeline {
|
||||
/// TODO: cancellation
|
||||
///
|
||||
@@ -598,7 +620,7 @@ impl Timeline {
|
||||
cancel: &CancellationToken,
|
||||
options: CompactOptions,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, CompactionError> {
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
if options
|
||||
.flags
|
||||
.contains(CompactFlags::EnhancedGcBottomMostCompaction)
|
||||
@@ -606,7 +628,7 @@ impl Timeline {
|
||||
self.compact_with_gc(cancel, options, ctx)
|
||||
.await
|
||||
.map_err(CompactionError::Other)?;
|
||||
return Ok(false);
|
||||
return Ok(CompactionOutcome::Done);
|
||||
}
|
||||
|
||||
if options.flags.contains(CompactFlags::DryRun) {
|
||||
@@ -666,9 +688,9 @@ impl Timeline {
|
||||
// Define partitioning schema if needed
|
||||
|
||||
// 1. L0 Compact
|
||||
let fully_compacted = {
|
||||
let l0_compaction_outcome = {
|
||||
let timer = self.metrics.compact_time_histo.start_timer();
|
||||
let fully_compacted = self
|
||||
let l0_compaction_outcome = self
|
||||
.compact_level0(
|
||||
target_file_size,
|
||||
options.flags.contains(CompactFlags::ForceL0Compaction),
|
||||
@@ -676,15 +698,15 @@ impl Timeline {
|
||||
)
|
||||
.await?;
|
||||
timer.stop_and_record();
|
||||
fully_compacted
|
||||
l0_compaction_outcome
|
||||
};
|
||||
|
||||
if !fully_compacted {
|
||||
if let CompactionOutcome::Pending = l0_compaction_outcome {
|
||||
// Yield and do not do any other kind of compaction. True means
|
||||
// that we have pending L0 compaction tasks and the compaction scheduler
|
||||
// will prioritize compacting this tenant/timeline again.
|
||||
info!("skipping image layer generation and shard ancestor compaction due to L0 compaction did not include all layers.");
|
||||
return Ok(true);
|
||||
return Ok(CompactionOutcome::Pending);
|
||||
}
|
||||
|
||||
// 2. Repartition and create image layers if necessary
|
||||
@@ -709,7 +731,7 @@ impl Timeline {
|
||||
.extend(sparse_partitioning.into_dense().parts);
|
||||
|
||||
// 3. Create new image layers for partitions that have been modified "enough".
|
||||
let image_layers = self
|
||||
let (image_layers, outcome) = self
|
||||
.create_image_layers(
|
||||
&partitioning,
|
||||
lsn,
|
||||
@@ -722,10 +744,30 @@ impl Timeline {
|
||||
ImageLayerCreationMode::Try
|
||||
},
|
||||
&image_ctx,
|
||||
self.last_image_layer_creation_status
|
||||
.load()
|
||||
.as_ref()
|
||||
.clone(),
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.inspect_err(|err| {
|
||||
if let CreateImageLayersError::GetVectoredError(
|
||||
GetVectoredError::MissingKey(_),
|
||||
) = err
|
||||
{
|
||||
critical!("missing key during compaction: {err:?}");
|
||||
}
|
||||
})?;
|
||||
|
||||
self.last_image_layer_creation_status
|
||||
.store(Arc::new(outcome.clone()));
|
||||
|
||||
self.upload_new_image_layers(image_layers)?;
|
||||
if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
|
||||
// Yield and do not do any other kind of compaction.
|
||||
info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
|
||||
return Ok(CompactionOutcome::Pending);
|
||||
}
|
||||
partitioning.parts.len()
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -753,7 +795,7 @@ impl Timeline {
|
||||
self.compact_shard_ancestors(rewrite_max, ctx).await?;
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
Ok(CompactionOutcome::Done)
|
||||
}
|
||||
|
||||
/// Check for layers that are elegible to be rewritten:
|
||||
@@ -1010,11 +1052,11 @@ impl Timeline {
|
||||
target_file_size: u64,
|
||||
force_compaction_ignore_threshold: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, CompactionError> {
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
let CompactLevel0Phase1Result {
|
||||
new_layers,
|
||||
deltas_to_compact,
|
||||
fully_compacted,
|
||||
outcome,
|
||||
} = {
|
||||
let phase1_span = info_span!("compact_level0_phase1");
|
||||
let ctx = ctx.attached_child();
|
||||
@@ -1043,12 +1085,12 @@ impl Timeline {
|
||||
|
||||
if new_layers.is_empty() && deltas_to_compact.is_empty() {
|
||||
// nothing to do
|
||||
return Ok(true);
|
||||
return Ok(CompactionOutcome::Done);
|
||||
}
|
||||
|
||||
self.finish_compact_batch(&new_layers, &Vec::new(), &deltas_to_compact)
|
||||
.await?;
|
||||
Ok(fully_compacted)
|
||||
Ok(outcome)
|
||||
}
|
||||
|
||||
/// Level0 files first phase of compaction, explained in the [`Self::compact_legacy`] comment.
|
||||
@@ -1503,11 +1545,9 @@ impl Timeline {
|
||||
.await
|
||||
.map_err(CompactionError::Other)?;
|
||||
} else {
|
||||
let shard = self.shard_identity.shard_index();
|
||||
let owner = self.shard_identity.get_shard_number(&key);
|
||||
if cfg!(debug_assertions) {
|
||||
panic!("key {key} does not belong on shard {shard}, owned by {owner}");
|
||||
}
|
||||
|
||||
// This happens after a shard split, when we're compacting an L0 created by our parent shard
|
||||
debug!("dropping key {key} during compaction (it belongs on shard {owner})");
|
||||
}
|
||||
|
||||
@@ -1592,7 +1632,11 @@ impl Timeline {
|
||||
.into_iter()
|
||||
.map(|x| x.drop_eviction_guard())
|
||||
.collect::<Vec<_>>(),
|
||||
fully_compacted,
|
||||
outcome: if fully_compacted {
|
||||
CompactionOutcome::Done
|
||||
} else {
|
||||
CompactionOutcome::Pending
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1603,7 +1647,7 @@ struct CompactLevel0Phase1Result {
|
||||
deltas_to_compact: Vec<Layer>,
|
||||
// Whether we have included all L0 layers, or selected only part of them due to the
|
||||
// L0 compaction size limit.
|
||||
fully_compacted: bool,
|
||||
outcome: CompactionOutcome,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -3234,11 +3278,7 @@ impl TimelineAdaptor {
|
||||
ranges: self.get_keyspace(key_range, lsn, ctx).await?,
|
||||
};
|
||||
// TODO set proper (stateful) start. The create_image_layer_for_rel_blocks function mostly
|
||||
let start = Key::MIN;
|
||||
let ImageLayerCreationOutcome {
|
||||
unfinished_image_layer,
|
||||
next_start_key: _,
|
||||
} = self
|
||||
let outcome = self
|
||||
.timeline
|
||||
.create_image_layer_for_rel_blocks(
|
||||
&keyspace,
|
||||
@@ -3246,13 +3286,15 @@ impl TimelineAdaptor {
|
||||
lsn,
|
||||
ctx,
|
||||
key_range.clone(),
|
||||
start,
|
||||
IoConcurrency::sequential(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(image_layer_writer) = unfinished_image_layer {
|
||||
let (desc, path) = image_layer_writer.finish(ctx).await?;
|
||||
if let ImageLayerCreationOutcome::Generated {
|
||||
unfinished_image_layer,
|
||||
} = outcome
|
||||
{
|
||||
let (desc, path) = unfinished_image_layer.finish(ctx).await?;
|
||||
let image_layer =
|
||||
Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?;
|
||||
self.new_images.push(image_layer);
|
||||
|
||||
@@ -30,8 +30,11 @@ use crate::{
|
||||
pgdatadir_mapping::CollectKeySpaceError,
|
||||
task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
|
||||
tenant::{
|
||||
size::CalculateSyntheticSizeError, storage_layer::LayerVisibilityHint,
|
||||
tasks::BackgroundLoopKind, timeline::EvictionError, LogicalSizeCalculationCause, Tenant,
|
||||
size::CalculateSyntheticSizeError,
|
||||
storage_layer::LayerVisibilityHint,
|
||||
tasks::{BackgroundLoopKind, BackgroundLoopSemaphorePermit},
|
||||
timeline::EvictionError,
|
||||
LogicalSizeCalculationCause, Tenant,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -330,7 +333,7 @@ impl Timeline {
|
||||
&self,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> ControlFlow<(), tokio::sync::SemaphorePermit<'static>> {
|
||||
) -> ControlFlow<(), BackgroundLoopSemaphorePermit<'static>> {
|
||||
let acquire_permit = crate::tenant::tasks::concurrent_background_tasks_rate_limit_permit(
|
||||
BackgroundLoopKind::Eviction,
|
||||
ctx,
|
||||
@@ -374,7 +377,7 @@ impl Timeline {
|
||||
p: &EvictionPolicyLayerAccessThreshold,
|
||||
cancel: &CancellationToken,
|
||||
gate: &GateGuard,
|
||||
permit: tokio::sync::SemaphorePermit<'static>,
|
||||
permit: BackgroundLoopSemaphorePermit<'static>,
|
||||
ctx: &RequestContext,
|
||||
) -> ControlFlow<()> {
|
||||
if !self.tenant_shard_id.is_shard_zero() {
|
||||
|
||||
@@ -39,7 +39,7 @@ use crate::{
|
||||
use postgres_backend::is_expected_io_error;
|
||||
use postgres_connection::PgConnectionConfig;
|
||||
use postgres_ffi::waldecoder::WalStreamDecoder;
|
||||
use utils::{id::NodeId, lsn::Lsn, postgres_client::PostgresClientProtocol};
|
||||
use utils::{critical, id::NodeId, lsn::Lsn, postgres_client::PostgresClientProtocol};
|
||||
use utils::{pageserver_feedback::PageserverFeedback, sync::gate::GateError};
|
||||
|
||||
/// Status of the connection.
|
||||
@@ -355,6 +355,19 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
// advances it to its end LSN. 0 is just an initialization placeholder.
|
||||
let mut modification = timeline.begin_modification(Lsn(0));
|
||||
|
||||
async fn commit(
|
||||
modification: &mut DatadirModification<'_>,
|
||||
ctx: &RequestContext,
|
||||
uncommitted: &mut u64,
|
||||
) -> anyhow::Result<()> {
|
||||
let stats = modification.stats();
|
||||
modification.commit(ctx).await?;
|
||||
WAL_INGEST.records_committed.inc_by(*uncommitted);
|
||||
WAL_INGEST.inc_values_committed(&stats);
|
||||
*uncommitted = 0;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
if !records.is_empty() {
|
||||
timeline
|
||||
.metrics
|
||||
@@ -366,8 +379,7 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
if matches!(interpreted.flush_uncommitted, FlushUncommittedRecords::Yes)
|
||||
&& uncommitted_records > 0
|
||||
{
|
||||
modification.commit(&ctx).await?;
|
||||
uncommitted_records = 0;
|
||||
commit(&mut modification, &ctx, &mut uncommitted_records).await?;
|
||||
}
|
||||
|
||||
let local_next_record_lsn = interpreted.next_record_lsn;
|
||||
@@ -381,6 +393,13 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("could not ingest record at {local_next_record_lsn}")
|
||||
})
|
||||
.inspect_err(|err| {
|
||||
// TODO: we can't differentiate cancellation errors with
|
||||
// anyhow::Error, so just ignore it if we're cancelled.
|
||||
if !cancellation.is_cancelled() {
|
||||
critical!("{err:?}")
|
||||
}
|
||||
})?;
|
||||
|
||||
uncommitted_records += 1;
|
||||
@@ -396,8 +415,7 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
|| modification.approx_pending_bytes()
|
||||
> DatadirModification::MAX_PENDING_BYTES
|
||||
{
|
||||
modification.commit(&ctx).await?;
|
||||
uncommitted_records = 0;
|
||||
commit(&mut modification, &ctx, &mut uncommitted_records).await?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -415,7 +433,7 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
|
||||
if uncommitted_records > 0 || needs_last_record_lsn_advance {
|
||||
// Commit any uncommitted records
|
||||
modification.commit(&ctx).await?;
|
||||
commit(&mut modification, &ctx, &mut uncommitted_records).await?;
|
||||
}
|
||||
|
||||
if !caught_up && streaming_lsn >= end_of_wal {
|
||||
@@ -442,10 +460,12 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
filtered: &mut u64,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let stats = modification.stats();
|
||||
modification.commit(ctx).await?;
|
||||
WAL_INGEST
|
||||
.records_committed
|
||||
.inc_by(*uncommitted - *filtered);
|
||||
modification.commit(ctx).await?;
|
||||
WAL_INGEST.inc_values_committed(&stats);
|
||||
*uncommitted = 0;
|
||||
*filtered = 0;
|
||||
Ok(())
|
||||
@@ -507,6 +527,13 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("could not ingest record at {next_record_lsn}")
|
||||
})
|
||||
.inspect_err(|err| {
|
||||
// TODO: we can't differentiate cancellation errors with
|
||||
// anyhow::Error, so just ignore it if we're cancelled.
|
||||
if !cancellation.is_cancelled() {
|
||||
critical!("{err:?}")
|
||||
}
|
||||
})?;
|
||||
if !ingested {
|
||||
tracing::debug!("ingest: filtered out record @ LSN {next_record_lsn}");
|
||||
|
||||
@@ -28,17 +28,9 @@ use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use postgres_ffi::fsm_logical_to_physical;
|
||||
use postgres_ffi::walrecord::*;
|
||||
use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz};
|
||||
use wal_decoder::models::*;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use bytes::{Buf, Bytes};
|
||||
use tracing::*;
|
||||
use utils::failpoint_support;
|
||||
use utils::rate_limit::RateLimit;
|
||||
|
||||
use crate::context::RequestContext;
|
||||
use crate::metrics::WAL_INGEST;
|
||||
@@ -50,11 +42,18 @@ use crate::ZERO_PAGE;
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use postgres_ffi::fsm_logical_to_physical;
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::walrecord::*;
|
||||
use postgres_ffi::TransactionId;
|
||||
use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz};
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::lsn::Lsn;
|
||||
use utils::rate_limit::RateLimit;
|
||||
use utils::{critical, failpoint_support};
|
||||
use wal_decoder::models::*;
|
||||
|
||||
enum_pgversion! {CheckPoint, pgv::CheckPoint}
|
||||
|
||||
@@ -327,93 +326,75 @@ impl WalIngest {
|
||||
let mut new_vm_blk = new_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
|
||||
let mut old_vm_blk = old_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
|
||||
|
||||
// Sometimes, Postgres seems to create heap WAL records with the
|
||||
// ALL_VISIBLE_CLEARED flag set, even though the bit in the VM page is
|
||||
// not set. In fact, it's possible that the VM page does not exist at all.
|
||||
// In that case, we don't want to store a record to clear the VM bit;
|
||||
// replaying it would fail to find the previous image of the page, because
|
||||
// it doesn't exist. So check if the VM page(s) exist, and skip the WAL
|
||||
// record if it doesn't.
|
||||
//
|
||||
// TODO: analyze the metrics and tighten this up accordingly. This logic
|
||||
// implicitly assumes that VM pages see explicit WAL writes before
|
||||
// implicit ClearVmBits, and will otherwise silently drop updates.
|
||||
// VM bits can only be cleared on the shard(s) owning the VM relation, and must be within
|
||||
// its view of the VM relation size. Out of caution, error instead of failing WAL ingestion,
|
||||
// as there has historically been cases where PostgreSQL has cleared spurious VM pages. See:
|
||||
// https://github.com/neondatabase/neon/pull/10634.
|
||||
let Some(vm_size) = get_relsize(modification, vm_rel, ctx).await? else {
|
||||
WAL_INGEST
|
||||
.clear_vm_bits_unknown
|
||||
.with_label_values(&["relation"])
|
||||
.inc();
|
||||
critical!("clear_vm_bits for unknown VM relation {vm_rel}");
|
||||
return Ok(());
|
||||
};
|
||||
if let Some(blknum) = new_vm_blk {
|
||||
if blknum >= vm_size {
|
||||
WAL_INGEST
|
||||
.clear_vm_bits_unknown
|
||||
.with_label_values(&["new_page"])
|
||||
.inc();
|
||||
critical!("new_vm_blk {blknum} not in {vm_rel} of size {vm_size}");
|
||||
new_vm_blk = None;
|
||||
}
|
||||
}
|
||||
if let Some(blknum) = old_vm_blk {
|
||||
if blknum >= vm_size {
|
||||
WAL_INGEST
|
||||
.clear_vm_bits_unknown
|
||||
.with_label_values(&["old_page"])
|
||||
.inc();
|
||||
critical!("old_vm_blk {blknum} not in {vm_rel} of size {vm_size}");
|
||||
old_vm_blk = None;
|
||||
}
|
||||
}
|
||||
|
||||
if new_vm_blk.is_some() || old_vm_blk.is_some() {
|
||||
if new_vm_blk == old_vm_blk {
|
||||
// An UPDATE record that needs to clear the bits for both old and the
|
||||
// new page, both of which reside on the same VM page.
|
||||
if new_vm_blk.is_none() && old_vm_blk.is_none() {
|
||||
return Ok(());
|
||||
} else if new_vm_blk == old_vm_blk {
|
||||
// An UPDATE record that needs to clear the bits for both old and the new page, both of
|
||||
// which reside on the same VM page.
|
||||
self.put_rel_wal_record(
|
||||
modification,
|
||||
vm_rel,
|
||||
new_vm_blk.unwrap(),
|
||||
NeonWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno,
|
||||
flags,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
} else {
|
||||
// Clear VM bits for one heap page, or for two pages that reside on different VM pages.
|
||||
if let Some(new_vm_blk) = new_vm_blk {
|
||||
self.put_rel_wal_record(
|
||||
modification,
|
||||
vm_rel,
|
||||
new_vm_blk.unwrap(),
|
||||
new_vm_blk,
|
||||
NeonWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno: None,
|
||||
flags,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
if let Some(old_vm_blk) = old_vm_blk {
|
||||
self.put_rel_wal_record(
|
||||
modification,
|
||||
vm_rel,
|
||||
old_vm_blk,
|
||||
NeonWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno: None,
|
||||
old_heap_blkno,
|
||||
flags,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
} else {
|
||||
// Clear VM bits for one heap page, or for two pages that reside on
|
||||
// different VM pages.
|
||||
if let Some(new_vm_blk) = new_vm_blk {
|
||||
self.put_rel_wal_record(
|
||||
modification,
|
||||
vm_rel,
|
||||
new_vm_blk,
|
||||
NeonWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno: None,
|
||||
flags,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
if let Some(old_vm_blk) = old_vm_blk {
|
||||
self.put_rel_wal_record(
|
||||
modification,
|
||||
vm_rel,
|
||||
old_vm_blk,
|
||||
NeonWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno: None,
|
||||
old_heap_blkno,
|
||||
flags,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -79,6 +79,14 @@ impl WalRedoProcess {
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir_path)
|
||||
.env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
|
||||
.env(
|
||||
"ASAN_OPTIONS",
|
||||
std::env::var("ASAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
.env(
|
||||
"UBSAN_OPTIONS",
|
||||
std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
|
||||
)
|
||||
// NB: The redo process is not trusted after we sent it the first
|
||||
// walredo work. Before that, it is trusted. Specifically, we trust
|
||||
// it to
|
||||
|
||||
@@ -509,47 +509,44 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
|
||||
tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
|
||||
chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_SHARED);
|
||||
|
||||
if (!LFC_ENABLED())
|
||||
{
|
||||
LWLockRelease(lfc_lock);
|
||||
return 0;
|
||||
}
|
||||
while (true)
|
||||
{
|
||||
int this_chunk = Min(nblocks, BLOCKS_PER_CHUNK - chunk_offs);
|
||||
if (LFC_ENABLED())
|
||||
{
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
|
||||
int this_chunk = Min(nblocks - i, BLOCKS_PER_CHUNK - chunk_offs);
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
|
||||
|
||||
if (entry != NULL)
|
||||
if (entry != NULL)
|
||||
{
|
||||
for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
|
||||
{
|
||||
for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
|
||||
if ((entry->bitmap[chunk_offs >> 5] &
|
||||
((uint32)1 << (chunk_offs & 31))) != 0)
|
||||
{
|
||||
if ((entry->bitmap[chunk_offs >> 5] &
|
||||
((uint32)1 << (chunk_offs & 31))) != 0)
|
||||
{
|
||||
BITMAP_SET(bitmap, i);
|
||||
found++;
|
||||
}
|
||||
BITMAP_SET(bitmap, i);
|
||||
found++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
i += this_chunk;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LWLockRelease(lfc_lock);
|
||||
return found;
|
||||
i += this_chunk;
|
||||
}
|
||||
|
||||
/*
|
||||
* Break out of the iteration before doing expensive stuff for
|
||||
* a next iteration
|
||||
*/
|
||||
if (i + 1 >= nblocks)
|
||||
if (i >= nblocks)
|
||||
break;
|
||||
|
||||
/*
|
||||
@@ -563,8 +560,8 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
LWLockRelease(lfc_lock);
|
||||
|
||||
#if USE_ASSERT_CHECKING
|
||||
do {
|
||||
#ifdef USE_ASSERT_CHECKING
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
for (int j = 0; j < nblocks; j++)
|
||||
@@ -574,7 +571,7 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
}
|
||||
|
||||
Assert(count == found);
|
||||
} while (false);
|
||||
}
|
||||
#endif
|
||||
|
||||
return found;
|
||||
|
||||
@@ -36,6 +36,11 @@
|
||||
#include "pagestore_client.h"
|
||||
#include "walproposer.h"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/sockios.h>
|
||||
#endif
|
||||
|
||||
#define PageStoreTrace DEBUG5
|
||||
|
||||
#define MIN_RECONNECT_INTERVAL_USEC 1000
|
||||
@@ -728,11 +733,36 @@ retry:
|
||||
INSTR_TIME_SUBTRACT(since_last_log, last_log_ts);
|
||||
if (INSTR_TIME_GET_MILLISEC(since_last_log) >= LOG_INTERVAL_MS)
|
||||
{
|
||||
int sndbuf = -1;
|
||||
int recvbuf = -1;
|
||||
#ifdef __linux__
|
||||
int socketfd;
|
||||
#endif
|
||||
|
||||
since_start = now;
|
||||
INSTR_TIME_SUBTRACT(since_start, start_ts);
|
||||
neon_shard_log(shard_no, LOG, "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses)",
|
||||
|
||||
#ifdef __linux__
|
||||
/*
|
||||
* get kernel's send and recv queue size via ioctl
|
||||
* https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27
|
||||
*/
|
||||
socketfd = PQsocket(pageserver_conn);
|
||||
if (socketfd != -1) {
|
||||
int ioctl_err;
|
||||
ioctl_err = ioctl(socketfd, SIOCOUTQ, &sndbuf);
|
||||
if (ioctl_err!= 0) {
|
||||
sndbuf = -errno;
|
||||
}
|
||||
ioctl_err = ioctl(socketfd, FIONREAD, &recvbuf);
|
||||
if (ioctl_err != 0) {
|
||||
recvbuf = -errno;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
neon_shard_log(shard_no, LOG, "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket sndbuf=%d recvbuf=%d)",
|
||||
INSTR_TIME_GET_DOUBLE(since_start),
|
||||
shard->nrequests_sent, shard->nresponses_received);
|
||||
shard->nrequests_sent, shard->nresponses_received, sndbuf, recvbuf);
|
||||
last_log_ts = now;
|
||||
logged = true;
|
||||
}
|
||||
|
||||
@@ -916,7 +916,7 @@ prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
|
||||
{
|
||||
uint64 min_ring_index;
|
||||
PrefetchRequest hashkey;
|
||||
#if USE_ASSERT_CHECKING
|
||||
#ifdef USE_ASSERT_CHECKING
|
||||
bool any_hits = false;
|
||||
#endif
|
||||
/* We will never read further ahead than our buffer can store. */
|
||||
@@ -955,7 +955,7 @@ Retry:
|
||||
else
|
||||
lsns = NULL;
|
||||
|
||||
#if USE_ASSERT_CHECKING
|
||||
#ifdef USE_ASSERT_CHECKING
|
||||
any_hits = true;
|
||||
#endif
|
||||
|
||||
@@ -3011,7 +3011,7 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
|
||||
start_ts = GetCurrentTimestamp();
|
||||
|
||||
if (RecoveryInProgress() && MyBackendType != B_STARTUP)
|
||||
XLogWaitForReplayOf(reqlsns[0].request_lsn);
|
||||
XLogWaitForReplayOf(reqlsns->request_lsn);
|
||||
|
||||
/*
|
||||
* Try to find prefetched page in the list of received pages.
|
||||
|
||||
@@ -19,14 +19,15 @@ aws-config.workspace = true
|
||||
aws-sdk-iam.workspace = true
|
||||
aws-sigv4.workspace = true
|
||||
base64.workspace = true
|
||||
boxcar = "0.2.8"
|
||||
bstr.workspace = true
|
||||
bytes = { workspace = true, features = ["serde"] }
|
||||
camino.workspace = true
|
||||
chrono.workspace = true
|
||||
clap = { workspace = true, features = ["derive", "env"] }
|
||||
clashmap.workspace = true
|
||||
compute_api.workspace = true
|
||||
consumption_metrics.workspace = true
|
||||
dashmap.workspace = true
|
||||
env_logger.workspace = true
|
||||
framed-websockets.workspace = true
|
||||
futures.workspace = true
|
||||
@@ -42,6 +43,7 @@ hyper0.workspace = true
|
||||
hyper = { workspace = true, features = ["server", "http1", "http2"] }
|
||||
hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] }
|
||||
http-body-util = { version = "0.1" }
|
||||
gettid = "0.1.3"
|
||||
indexmap = { workspace = true, features = ["serde"] }
|
||||
ipnet.workspace = true
|
||||
itertools.workspace = true
|
||||
@@ -50,6 +52,8 @@ lasso = { workspace = true, features = ["multi-threaded"] }
|
||||
measured = { workspace = true, features = ["lasso"] }
|
||||
metrics.workspace = true
|
||||
once_cell.workspace = true
|
||||
opentelemetry = { workspace = true, features = ["trace"] }
|
||||
papaya = "0.1.8"
|
||||
parking_lot.workspace = true
|
||||
parquet.workspace = true
|
||||
parquet_derive.workspace = true
|
||||
@@ -89,6 +93,9 @@ tokio = { workspace = true, features = ["signal"] }
|
||||
tracing-subscriber.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-log.workspace = true
|
||||
tracing-serde.workspace = true
|
||||
tracing-opentelemetry.workspace = true
|
||||
try-lock.workspace = true
|
||||
typed-json.workspace = true
|
||||
url.workspace = true
|
||||
@@ -112,6 +119,7 @@ rsa = "0.9"
|
||||
workspace_hack.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
assert-json-diff.workspace = true
|
||||
camino-tempfile.workspace = true
|
||||
fallible-iterator.workspace = true
|
||||
flate2.workspace = true
|
||||
|
||||
@@ -106,17 +106,7 @@ cases where it is hard to use rows represented as objects (e.g. when several fie
|
||||
|
||||
Proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so we can use *.localtest.me` which resolves to `127.0.0.1`.
|
||||
|
||||
Let's create self-signed certificate by running:
|
||||
```sh
|
||||
openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.localtest.me"
|
||||
```
|
||||
|
||||
Then we need to build proxy with 'testing' feature and run, e.g.:
|
||||
```sh
|
||||
RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://proxy:password@endpoint.localtest.me:5432/postgres' --is-private-access-proxy true -c server.crt -k server.key
|
||||
```
|
||||
|
||||
We will also need to have a postgres instance. Assuming that we have setted up docker we can set it up as follows:
|
||||
We will need to have a postgres instance. Assuming that we have set up docker we can set it up as follows:
|
||||
```sh
|
||||
docker run \
|
||||
--detach \
|
||||
@@ -133,8 +123,18 @@ docker exec -it proxy-postgres psql -U postgres -c "CREATE TABLE neon_control_pl
|
||||
docker exec -it proxy-postgres psql -U postgres -c "CREATE ROLE proxy WITH SUPERUSER LOGIN PASSWORD 'password';"
|
||||
```
|
||||
|
||||
Let's create self-signed certificate by running:
|
||||
```sh
|
||||
openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.localtest.me"
|
||||
```
|
||||
|
||||
Then we need to build proxy with 'testing' feature and run, e.g.:
|
||||
```sh
|
||||
RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://postgres:proxy-postgres@127.0.0.1:5432/postgres' -c server.crt -k server.key
|
||||
```
|
||||
|
||||
Now from client you can start a new session:
|
||||
|
||||
```sh
|
||||
PGSSLROOTCERT=./server.crt psql "postgresql://proxy:password@endpoint.localtest.me:4432/postgres?sslmode=verify-full"
|
||||
```
|
||||
```
|
||||
|
||||
@@ -7,8 +7,8 @@ use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::{info, info_span};
|
||||
|
||||
use super::{ComputeCredentialKeys, ControlPlaneApi};
|
||||
use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
|
||||
use super::ComputeCredentialKeys;
|
||||
use crate::auth::backend::ComputeUserInfo;
|
||||
use crate::auth::IpPattern;
|
||||
use crate::cache::Cached;
|
||||
use crate::config::AuthenticationConfig;
|
||||
@@ -84,26 +84,15 @@ pub(crate) fn new_psql_session_id() -> String {
|
||||
hex::encode(rand::random::<[u8; 8]>())
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl BackendIpAllowlist for ConsoleRedirectBackend {
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> auth::Result<Vec<auth::IpPattern>> {
|
||||
self.api
|
||||
.get_allowed_ips_and_secret(ctx, user_info)
|
||||
.await
|
||||
.map(|(ips, _)| ips.as_ref().clone())
|
||||
.map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl ConsoleRedirectBackend {
|
||||
pub fn new(console_uri: reqwest::Url, api: cplane_proxy_v1::NeonControlPlaneClient) -> Self {
|
||||
Self { console_uri, api }
|
||||
}
|
||||
|
||||
pub(crate) fn get_api(&self) -> &cplane_proxy_v1::NeonControlPlaneClient {
|
||||
&self.api
|
||||
}
|
||||
|
||||
pub(crate) async fn authenticate(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
@@ -191,6 +180,15 @@ async fn authenticate(
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the access over the public internet is allowed, otherwise block. Note that
|
||||
// the console redirect is not behind the VPC service endpoint, so we don't need to check
|
||||
// the VPC endpoint ID.
|
||||
if let Some(public_access_allowed) = db_info.public_access_allowed {
|
||||
if !public_access_allowed {
|
||||
return Err(auth::AuthError::NetworkNotAllowed);
|
||||
}
|
||||
}
|
||||
|
||||
client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
|
||||
|
||||
// This config should be self-contained, because we won't
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::sync::Arc;
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use arc_swap::ArcSwapOption;
|
||||
use dashmap::DashMap;
|
||||
use clashmap::ClashMap;
|
||||
use jose_jwk::crypto::KeyInfo;
|
||||
use reqwest::{redirect, Client};
|
||||
use reqwest_retry::policies::ExponentialBackoff;
|
||||
@@ -64,7 +64,7 @@ pub(crate) struct AuthRule {
|
||||
pub struct JwkCache {
|
||||
client: reqwest_middleware::ClientWithMiddleware,
|
||||
|
||||
map: DashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
|
||||
map: ClashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
|
||||
}
|
||||
|
||||
pub(crate) struct JwkCacheEntry {
|
||||
@@ -469,7 +469,7 @@ impl Default for JwkCache {
|
||||
|
||||
JwkCache {
|
||||
client,
|
||||
map: DashMap::default(),
|
||||
map: ClashMap::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,10 +26,12 @@ use crate::context::RequestContext;
|
||||
use crate::control_plane::client::ControlPlaneClient;
|
||||
use crate::control_plane::errors::GetAuthInfoError;
|
||||
use crate::control_plane::{
|
||||
self, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi,
|
||||
self, AccessBlockerFlags, AuthSecret, CachedAccessBlockerFlags, CachedAllowedIps,
|
||||
CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi,
|
||||
};
|
||||
use crate::intern::EndpointIdInt;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::protocol2::ConnectionInfoExtra;
|
||||
use crate::proxy::connect_compute::ComputeConnectBackend;
|
||||
use crate::proxy::NeonOptions;
|
||||
use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter};
|
||||
@@ -99,6 +101,13 @@ impl<T> Backend<'_, T> {
|
||||
Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_api(&self) -> &ControlPlaneClient {
|
||||
match self {
|
||||
Self::ControlPlane(api, _) => api,
|
||||
Self::Local(_) => panic!("Local backend has no API"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> Backend<'a, T> {
|
||||
@@ -247,15 +256,6 @@ impl AuthenticationConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub(crate) trait BackendIpAllowlist {
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> auth::Result<Vec<auth::IpPattern>>;
|
||||
}
|
||||
|
||||
/// True to its name, this function encapsulates our current auth trade-offs.
|
||||
/// Here, we choose the appropriate auth flow based on circumstances.
|
||||
///
|
||||
@@ -282,23 +282,51 @@ async fn auth_quirks(
|
||||
Ok(info) => (info, None),
|
||||
};
|
||||
|
||||
debug!("fetching user's authentication info");
|
||||
let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?;
|
||||
debug!("fetching authentication info and allowlists");
|
||||
|
||||
// check allowed list
|
||||
if config.ip_allowlist_check_enabled
|
||||
&& !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
|
||||
{
|
||||
return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
|
||||
let allowed_ips = if config.ip_allowlist_check_enabled {
|
||||
let allowed_ips = api.get_allowed_ips(ctx, &info).await?;
|
||||
if !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips) {
|
||||
return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
|
||||
}
|
||||
allowed_ips
|
||||
} else {
|
||||
Cached::new_uncached(Arc::new(vec![]))
|
||||
};
|
||||
|
||||
// check if a VPC endpoint ID is coming in and if yes, if it's allowed
|
||||
let access_blocks = api.get_block_public_or_vpc_access(ctx, &info).await?;
|
||||
if config.is_vpc_acccess_proxy {
|
||||
if access_blocks.vpc_access_blocked {
|
||||
return Err(AuthError::NetworkNotAllowed);
|
||||
}
|
||||
|
||||
let incoming_vpc_endpoint_id = match ctx.extra() {
|
||||
None => return Err(AuthError::MissingEndpointName),
|
||||
Some(ConnectionInfoExtra::Aws { vpce_id }) => {
|
||||
// Convert the vcpe_id to a string
|
||||
String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
|
||||
}
|
||||
Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
|
||||
};
|
||||
let allowed_vpc_endpoint_ids = api.get_allowed_vpc_endpoint_ids(ctx, &info).await?;
|
||||
// TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
|
||||
if !allowed_vpc_endpoint_ids.is_empty()
|
||||
&& !allowed_vpc_endpoint_ids.contains(&incoming_vpc_endpoint_id)
|
||||
{
|
||||
return Err(AuthError::vpc_endpoint_id_not_allowed(
|
||||
incoming_vpc_endpoint_id,
|
||||
));
|
||||
}
|
||||
} else if access_blocks.public_access_blocked {
|
||||
return Err(AuthError::NetworkNotAllowed);
|
||||
}
|
||||
|
||||
if !endpoint_rate_limiter.check(info.endpoint.clone().into(), 1) {
|
||||
return Err(AuthError::too_many_connections());
|
||||
}
|
||||
let cached_secret = match maybe_secret {
|
||||
Some(secret) => secret,
|
||||
None => api.get_role_secret(ctx, &info).await?,
|
||||
};
|
||||
let cached_secret = api.get_role_secret(ctx, &info).await?;
|
||||
let (cached_entry, secret) = cached_secret.take_value();
|
||||
|
||||
let secret = if let Some(secret) = secret {
|
||||
@@ -440,34 +468,38 @@ impl Backend<'_, ComputeUserInfo> {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn get_allowed_ips_and_secret(
|
||||
pub(crate) async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
|
||||
) -> Result<CachedAllowedIps, GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, user_info) => {
|
||||
api.get_allowed_ips_and_secret(ctx, user_info).await
|
||||
}
|
||||
Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
|
||||
Self::ControlPlane(api, user_info) => api.get_allowed_ips(ctx, user_info).await,
|
||||
Self::Local(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl BackendIpAllowlist for Backend<'_, ()> {
|
||||
async fn get_allowed_ips(
|
||||
pub(crate) async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> auth::Result<Vec<auth::IpPattern>> {
|
||||
let auth_data = match self {
|
||||
Self::ControlPlane(api, ()) => api.get_allowed_ips_and_secret(ctx, user_info).await,
|
||||
Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
|
||||
};
|
||||
) -> Result<CachedAllowedVpcEndpointIds, GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, user_info) => {
|
||||
api.get_allowed_vpc_endpoint_ids(ctx, user_info).await
|
||||
}
|
||||
Self::Local(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
|
||||
}
|
||||
}
|
||||
|
||||
auth_data
|
||||
.map(|(ips, _)| ips.as_ref().clone())
|
||||
.map_err(|e| e.into())
|
||||
pub(crate) async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<CachedAccessBlockerFlags, GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, user_info) => {
|
||||
api.get_block_public_or_vpc_access(ctx, user_info).await
|
||||
}
|
||||
Self::Local(_) => Ok(Cached::new_uncached(AccessBlockerFlags::default())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -514,7 +546,10 @@ mod tests {
|
||||
use crate::auth::{ComputeUserInfoMaybeEndpoint, IpPattern};
|
||||
use crate::config::AuthenticationConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::{self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret};
|
||||
use crate::control_plane::{
|
||||
self, AccessBlockerFlags, CachedAccessBlockerFlags, CachedAllowedIps,
|
||||
CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret,
|
||||
};
|
||||
use crate::proxy::NeonOptions;
|
||||
use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo};
|
||||
use crate::scram::threadpool::ThreadPool;
|
||||
@@ -523,6 +558,8 @@ mod tests {
|
||||
|
||||
struct Auth {
|
||||
ips: Vec<IpPattern>,
|
||||
vpc_endpoint_ids: Vec<String>,
|
||||
access_blocker_flags: AccessBlockerFlags,
|
||||
secret: AuthSecret,
|
||||
}
|
||||
|
||||
@@ -535,17 +572,31 @@ mod tests {
|
||||
Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
|
||||
}
|
||||
|
||||
async fn get_allowed_ips_and_secret(
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
_user_info: &super::ComputeUserInfo,
|
||||
) -> Result<
|
||||
(CachedAllowedIps, Option<CachedRoleSecret>),
|
||||
control_plane::errors::GetAuthInfoError,
|
||||
> {
|
||||
Ok((
|
||||
CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())),
|
||||
Some(CachedRoleSecret::new_uncached(Some(self.secret.clone()))),
|
||||
) -> Result<CachedAllowedIps, control_plane::errors::GetAuthInfoError> {
|
||||
Ok(CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())))
|
||||
}
|
||||
|
||||
async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
_user_info: &super::ComputeUserInfo,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, control_plane::errors::GetAuthInfoError> {
|
||||
Ok(CachedAllowedVpcEndpointIds::new_uncached(Arc::new(
|
||||
self.vpc_endpoint_ids.clone(),
|
||||
)))
|
||||
}
|
||||
|
||||
async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
_user_info: &super::ComputeUserInfo,
|
||||
) -> Result<CachedAccessBlockerFlags, control_plane::errors::GetAuthInfoError> {
|
||||
Ok(CachedAccessBlockerFlags::new_uncached(
|
||||
self.access_blocker_flags.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
@@ -575,6 +626,7 @@ mod tests {
|
||||
rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
|
||||
rate_limit_ip_subnet: 64,
|
||||
ip_allowlist_check_enabled: true,
|
||||
is_vpc_acccess_proxy: false,
|
||||
is_auth_broker: false,
|
||||
accept_jwts: false,
|
||||
console_redirect_confirmation_timeout: std::time::Duration::from_secs(5),
|
||||
@@ -642,6 +694,8 @@ mod tests {
|
||||
let ctx = RequestContext::test();
|
||||
let api = Auth {
|
||||
ips: vec![],
|
||||
vpc_endpoint_ids: vec![],
|
||||
access_blocker_flags: AccessBlockerFlags::default(),
|
||||
secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
|
||||
};
|
||||
|
||||
@@ -722,6 +776,8 @@ mod tests {
|
||||
let ctx = RequestContext::test();
|
||||
let api = Auth {
|
||||
ips: vec![],
|
||||
vpc_endpoint_ids: vec![],
|
||||
access_blocker_flags: AccessBlockerFlags::default(),
|
||||
secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
|
||||
};
|
||||
|
||||
@@ -774,6 +830,8 @@ mod tests {
|
||||
let ctx = RequestContext::test();
|
||||
let api = Auth {
|
||||
ips: vec![],
|
||||
vpc_endpoint_ids: vec![],
|
||||
access_blocker_flags: AccessBlockerFlags::default(),
|
||||
secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
|
||||
};
|
||||
|
||||
|
||||
@@ -55,6 +55,12 @@ pub(crate) enum AuthError {
|
||||
)]
|
||||
MissingEndpointName,
|
||||
|
||||
#[error(
|
||||
"VPC endpoint ID is not specified. \
|
||||
This endpoint requires a VPC endpoint ID to connect."
|
||||
)]
|
||||
MissingVPCEndpointId,
|
||||
|
||||
#[error("password authentication failed for user '{0}'")]
|
||||
PasswordFailed(Box<str>),
|
||||
|
||||
@@ -69,6 +75,15 @@ pub(crate) enum AuthError {
|
||||
)]
|
||||
IpAddressNotAllowed(IpAddr),
|
||||
|
||||
#[error("This connection is trying to access this endpoint from a blocked network.")]
|
||||
NetworkNotAllowed,
|
||||
|
||||
#[error(
|
||||
"This VPC endpoint id {0} is not allowed to connect to this endpoint. \
|
||||
Please add it to the allowed list in the Neon console."
|
||||
)]
|
||||
VpcEndpointIdNotAllowed(String),
|
||||
|
||||
#[error("Too many connections to this endpoint. Please try again later.")]
|
||||
TooManyConnections,
|
||||
|
||||
@@ -95,6 +110,10 @@ impl AuthError {
|
||||
AuthError::IpAddressNotAllowed(ip)
|
||||
}
|
||||
|
||||
pub(crate) fn vpc_endpoint_id_not_allowed(id: String) -> Self {
|
||||
AuthError::VpcEndpointIdNotAllowed(id)
|
||||
}
|
||||
|
||||
pub(crate) fn too_many_connections() -> Self {
|
||||
AuthError::TooManyConnections
|
||||
}
|
||||
@@ -122,8 +141,11 @@ impl UserFacingError for AuthError {
|
||||
Self::BadAuthMethod(_) => self.to_string(),
|
||||
Self::MalformedPassword(_) => self.to_string(),
|
||||
Self::MissingEndpointName => self.to_string(),
|
||||
Self::MissingVPCEndpointId => self.to_string(),
|
||||
Self::Io(_) => "Internal error".to_string(),
|
||||
Self::IpAddressNotAllowed(_) => self.to_string(),
|
||||
Self::NetworkNotAllowed => self.to_string(),
|
||||
Self::VpcEndpointIdNotAllowed(_) => self.to_string(),
|
||||
Self::TooManyConnections => self.to_string(),
|
||||
Self::UserTimeout(_) => self.to_string(),
|
||||
Self::ConfirmationTimeout(_) => self.to_string(),
|
||||
@@ -142,8 +164,11 @@ impl ReportableError for AuthError {
|
||||
Self::BadAuthMethod(_) => crate::error::ErrorKind::User,
|
||||
Self::MalformedPassword(_) => crate::error::ErrorKind::User,
|
||||
Self::MissingEndpointName => crate::error::ErrorKind::User,
|
||||
Self::MissingVPCEndpointId => crate::error::ErrorKind::User,
|
||||
Self::Io(_) => crate::error::ErrorKind::ClientDisconnect,
|
||||
Self::IpAddressNotAllowed(_) => crate::error::ErrorKind::User,
|
||||
Self::NetworkNotAllowed => crate::error::ErrorKind::User,
|
||||
Self::VpcEndpointIdNotAllowed(_) => crate::error::ErrorKind::User,
|
||||
Self::TooManyConnections => crate::error::ErrorKind::RateLimit,
|
||||
Self::UserTimeout(_) => crate::error::ErrorKind::User,
|
||||
Self::ConfirmationTimeout(_) => crate::error::ErrorKind::User,
|
||||
|
||||
@@ -284,6 +284,7 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
|
||||
rate_limiter: BucketRateLimiter::new(vec![]),
|
||||
rate_limit_ip_subnet: 64,
|
||||
ip_allowlist_check_enabled: true,
|
||||
is_vpc_acccess_proxy: false,
|
||||
is_auth_broker: false,
|
||||
accept_jwts: true,
|
||||
console_redirect_confirmation_timeout: Duration::ZERO,
|
||||
|
||||
@@ -630,6 +630,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
||||
rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
|
||||
rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
|
||||
ip_allowlist_check_enabled: !args.is_private_access_proxy,
|
||||
is_vpc_acccess_proxy: args.is_private_access_proxy,
|
||||
is_auth_broker: args.is_auth_broker,
|
||||
accept_jwts: args.is_auth_broker,
|
||||
console_redirect_confirmation_timeout: args.webauth_confirmation_timeout,
|
||||
|
||||
14
proxy/src/cache/endpoints.rs
vendored
14
proxy/src/cache/endpoints.rs
vendored
@@ -3,7 +3,7 @@ use std::future::pending;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use dashmap::DashSet;
|
||||
use clashmap::ClashSet;
|
||||
use redis::streams::{StreamReadOptions, StreamReadReply};
|
||||
use redis::{AsyncCommands, FromRedisValue, Value};
|
||||
use serde::Deserialize;
|
||||
@@ -55,9 +55,9 @@ impl TryFrom<&Value> for ControlPlaneEvent {
|
||||
|
||||
pub struct EndpointsCache {
|
||||
config: EndpointCacheConfig,
|
||||
endpoints: DashSet<EndpointIdInt>,
|
||||
branches: DashSet<BranchIdInt>,
|
||||
projects: DashSet<ProjectIdInt>,
|
||||
endpoints: ClashSet<EndpointIdInt>,
|
||||
branches: ClashSet<BranchIdInt>,
|
||||
projects: ClashSet<ProjectIdInt>,
|
||||
ready: AtomicBool,
|
||||
limiter: Arc<Mutex<GlobalRateLimiter>>,
|
||||
}
|
||||
@@ -69,9 +69,9 @@ impl EndpointsCache {
|
||||
config.limiter_info.clone(),
|
||||
))),
|
||||
config,
|
||||
endpoints: DashSet::new(),
|
||||
branches: DashSet::new(),
|
||||
projects: DashSet::new(),
|
||||
endpoints: ClashSet::new(),
|
||||
branches: ClashSet::new(),
|
||||
projects: ClashSet::new(),
|
||||
ready: AtomicBool::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
236
proxy/src/cache/project_info.rs
vendored
236
proxy/src/cache/project_info.rs
vendored
@@ -5,7 +5,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use dashmap::DashMap;
|
||||
use clashmap::ClashMap;
|
||||
use rand::{thread_rng, Rng};
|
||||
use smol_str::SmolStr;
|
||||
use tokio::sync::Mutex;
|
||||
@@ -15,13 +15,16 @@ use tracing::{debug, info};
|
||||
use super::{Cache, Cached};
|
||||
use crate::auth::IpPattern;
|
||||
use crate::config::ProjectInfoCacheOptions;
|
||||
use crate::control_plane::AuthSecret;
|
||||
use crate::intern::{EndpointIdInt, ProjectIdInt, RoleNameInt};
|
||||
use crate::control_plane::{AccessBlockerFlags, AuthSecret};
|
||||
use crate::intern::{AccountIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
|
||||
use crate::types::{EndpointId, RoleName};
|
||||
|
||||
#[async_trait]
|
||||
pub(crate) trait ProjectInfoCache {
|
||||
fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt);
|
||||
fn invalidate_allowed_vpc_endpoint_ids_for_projects(&self, project_ids: Vec<ProjectIdInt>);
|
||||
fn invalidate_allowed_vpc_endpoint_ids_for_org(&self, account_id: AccountIdInt);
|
||||
fn invalidate_block_public_or_vpc_access_for_project(&self, project_id: ProjectIdInt);
|
||||
fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt);
|
||||
async fn decrement_active_listeners(&self);
|
||||
async fn increment_active_listeners(&self);
|
||||
@@ -51,6 +54,8 @@ impl<T> From<T> for Entry<T> {
|
||||
struct EndpointInfo {
|
||||
secret: std::collections::HashMap<RoleNameInt, Entry<Option<AuthSecret>>>,
|
||||
allowed_ips: Option<Entry<Arc<Vec<IpPattern>>>>,
|
||||
block_public_or_vpc_access: Option<Entry<AccessBlockerFlags>>,
|
||||
allowed_vpc_endpoint_ids: Option<Entry<Arc<Vec<String>>>>,
|
||||
}
|
||||
|
||||
impl EndpointInfo {
|
||||
@@ -92,9 +97,52 @@ impl EndpointInfo {
|
||||
}
|
||||
None
|
||||
}
|
||||
pub(crate) fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
valid_since: Instant,
|
||||
ignore_cache_since: Option<Instant>,
|
||||
) -> Option<(Arc<Vec<String>>, bool)> {
|
||||
if let Some(allowed_vpc_endpoint_ids) = &self.allowed_vpc_endpoint_ids {
|
||||
if valid_since < allowed_vpc_endpoint_ids.created_at {
|
||||
return Some((
|
||||
allowed_vpc_endpoint_ids.value.clone(),
|
||||
Self::check_ignore_cache(
|
||||
ignore_cache_since,
|
||||
allowed_vpc_endpoint_ids.created_at,
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
pub(crate) fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
valid_since: Instant,
|
||||
ignore_cache_since: Option<Instant>,
|
||||
) -> Option<(AccessBlockerFlags, bool)> {
|
||||
if let Some(block_public_or_vpc_access) = &self.block_public_or_vpc_access {
|
||||
if valid_since < block_public_or_vpc_access.created_at {
|
||||
return Some((
|
||||
block_public_or_vpc_access.value.clone(),
|
||||
Self::check_ignore_cache(
|
||||
ignore_cache_since,
|
||||
block_public_or_vpc_access.created_at,
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) fn invalidate_allowed_ips(&mut self) {
|
||||
self.allowed_ips = None;
|
||||
}
|
||||
pub(crate) fn invalidate_allowed_vpc_endpoint_ids(&mut self) {
|
||||
self.allowed_vpc_endpoint_ids = None;
|
||||
}
|
||||
pub(crate) fn invalidate_block_public_or_vpc_access(&mut self) {
|
||||
self.block_public_or_vpc_access = None;
|
||||
}
|
||||
pub(crate) fn invalidate_role_secret(&mut self, role_name: RoleNameInt) {
|
||||
self.secret.remove(&role_name);
|
||||
}
|
||||
@@ -108,9 +156,11 @@ impl EndpointInfo {
|
||||
/// One may ask, why the data is stored per project, when on the user request there is only data about the endpoint available?
|
||||
/// On the cplane side updates are done per project (or per branch), so it's easier to invalidate the whole project cache.
|
||||
pub struct ProjectInfoCacheImpl {
|
||||
cache: DashMap<EndpointIdInt, EndpointInfo>,
|
||||
cache: ClashMap<EndpointIdInt, EndpointInfo>,
|
||||
|
||||
project2ep: DashMap<ProjectIdInt, HashSet<EndpointIdInt>>,
|
||||
project2ep: ClashMap<ProjectIdInt, HashSet<EndpointIdInt>>,
|
||||
// FIXME(stefan): we need a way to GC the account2ep map.
|
||||
account2ep: ClashMap<AccountIdInt, HashSet<EndpointIdInt>>,
|
||||
config: ProjectInfoCacheOptions,
|
||||
|
||||
start_time: Instant,
|
||||
@@ -120,6 +170,63 @@ pub struct ProjectInfoCacheImpl {
|
||||
|
||||
#[async_trait]
|
||||
impl ProjectInfoCache for ProjectInfoCacheImpl {
|
||||
fn invalidate_allowed_vpc_endpoint_ids_for_projects(&self, project_ids: Vec<ProjectIdInt>) {
|
||||
info!(
|
||||
"invalidating allowed vpc endpoint ids for projects `{}`",
|
||||
project_ids
|
||||
.iter()
|
||||
.map(|id| id.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
);
|
||||
for project_id in project_ids {
|
||||
let endpoints = self
|
||||
.project2ep
|
||||
.get(&project_id)
|
||||
.map(|kv| kv.value().clone())
|
||||
.unwrap_or_default();
|
||||
for endpoint_id in endpoints {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
|
||||
endpoint_info.invalidate_allowed_vpc_endpoint_ids();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn invalidate_allowed_vpc_endpoint_ids_for_org(&self, account_id: AccountIdInt) {
|
||||
info!(
|
||||
"invalidating allowed vpc endpoint ids for org `{}`",
|
||||
account_id
|
||||
);
|
||||
let endpoints = self
|
||||
.account2ep
|
||||
.get(&account_id)
|
||||
.map(|kv| kv.value().clone())
|
||||
.unwrap_or_default();
|
||||
for endpoint_id in endpoints {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
|
||||
endpoint_info.invalidate_allowed_vpc_endpoint_ids();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn invalidate_block_public_or_vpc_access_for_project(&self, project_id: ProjectIdInt) {
|
||||
info!(
|
||||
"invalidating block public or vpc access for project `{}`",
|
||||
project_id
|
||||
);
|
||||
let endpoints = self
|
||||
.project2ep
|
||||
.get(&project_id)
|
||||
.map(|kv| kv.value().clone())
|
||||
.unwrap_or_default();
|
||||
for endpoint_id in endpoints {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
|
||||
endpoint_info.invalidate_block_public_or_vpc_access();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt) {
|
||||
info!("invalidating allowed ips for project `{}`", project_id);
|
||||
let endpoints = self
|
||||
@@ -176,8 +283,9 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
|
||||
impl ProjectInfoCacheImpl {
|
||||
pub(crate) fn new(config: ProjectInfoCacheOptions) -> Self {
|
||||
Self {
|
||||
cache: DashMap::new(),
|
||||
project2ep: DashMap::new(),
|
||||
cache: ClashMap::new(),
|
||||
project2ep: ClashMap::new(),
|
||||
account2ep: ClashMap::new(),
|
||||
config,
|
||||
ttl_disabled_since_us: AtomicU64::new(u64::MAX),
|
||||
start_time: Instant::now(),
|
||||
@@ -226,6 +334,49 @@ impl ProjectInfoCacheImpl {
|
||||
}
|
||||
Some(Cached::new_uncached(value))
|
||||
}
|
||||
pub(crate) fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
endpoint_id: &EndpointId,
|
||||
) -> Option<Cached<&Self, Arc<Vec<String>>>> {
|
||||
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
|
||||
let (valid_since, ignore_cache_since) = self.get_cache_times();
|
||||
let endpoint_info = self.cache.get(&endpoint_id)?;
|
||||
let value = endpoint_info.get_allowed_vpc_endpoint_ids(valid_since, ignore_cache_since);
|
||||
let (value, ignore_cache) = value?;
|
||||
if !ignore_cache {
|
||||
let cached = Cached {
|
||||
token: Some((
|
||||
self,
|
||||
CachedLookupInfo::new_allowed_vpc_endpoint_ids(endpoint_id),
|
||||
)),
|
||||
value,
|
||||
};
|
||||
return Some(cached);
|
||||
}
|
||||
Some(Cached::new_uncached(value))
|
||||
}
|
||||
pub(crate) fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
endpoint_id: &EndpointId,
|
||||
) -> Option<Cached<&Self, AccessBlockerFlags>> {
|
||||
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
|
||||
let (valid_since, ignore_cache_since) = self.get_cache_times();
|
||||
let endpoint_info = self.cache.get(&endpoint_id)?;
|
||||
let value = endpoint_info.get_block_public_or_vpc_access(valid_since, ignore_cache_since);
|
||||
let (value, ignore_cache) = value?;
|
||||
if !ignore_cache {
|
||||
let cached = Cached {
|
||||
token: Some((
|
||||
self,
|
||||
CachedLookupInfo::new_block_public_or_vpc_access(endpoint_id),
|
||||
)),
|
||||
value,
|
||||
};
|
||||
return Some(cached);
|
||||
}
|
||||
Some(Cached::new_uncached(value))
|
||||
}
|
||||
|
||||
pub(crate) fn insert_role_secret(
|
||||
&self,
|
||||
project_id: ProjectIdInt,
|
||||
@@ -256,6 +407,43 @@ impl ProjectInfoCacheImpl {
|
||||
self.insert_project2endpoint(project_id, endpoint_id);
|
||||
self.cache.entry(endpoint_id).or_default().allowed_ips = Some(allowed_ips.into());
|
||||
}
|
||||
pub(crate) fn insert_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
account_id: Option<AccountIdInt>,
|
||||
project_id: ProjectIdInt,
|
||||
endpoint_id: EndpointIdInt,
|
||||
allowed_vpc_endpoint_ids: Arc<Vec<String>>,
|
||||
) {
|
||||
if self.cache.len() >= self.config.size {
|
||||
// If there are too many entries, wait until the next gc cycle.
|
||||
return;
|
||||
}
|
||||
if let Some(account_id) = account_id {
|
||||
self.insert_account2endpoint(account_id, endpoint_id);
|
||||
}
|
||||
self.insert_project2endpoint(project_id, endpoint_id);
|
||||
self.cache
|
||||
.entry(endpoint_id)
|
||||
.or_default()
|
||||
.allowed_vpc_endpoint_ids = Some(allowed_vpc_endpoint_ids.into());
|
||||
}
|
||||
pub(crate) fn insert_block_public_or_vpc_access(
|
||||
&self,
|
||||
project_id: ProjectIdInt,
|
||||
endpoint_id: EndpointIdInt,
|
||||
access_blockers: AccessBlockerFlags,
|
||||
) {
|
||||
if self.cache.len() >= self.config.size {
|
||||
// If there are too many entries, wait until the next gc cycle.
|
||||
return;
|
||||
}
|
||||
self.insert_project2endpoint(project_id, endpoint_id);
|
||||
self.cache
|
||||
.entry(endpoint_id)
|
||||
.or_default()
|
||||
.block_public_or_vpc_access = Some(access_blockers.into());
|
||||
}
|
||||
|
||||
fn insert_project2endpoint(&self, project_id: ProjectIdInt, endpoint_id: EndpointIdInt) {
|
||||
if let Some(mut endpoints) = self.project2ep.get_mut(&project_id) {
|
||||
endpoints.insert(endpoint_id);
|
||||
@@ -264,6 +452,14 @@ impl ProjectInfoCacheImpl {
|
||||
.insert(project_id, HashSet::from([endpoint_id]));
|
||||
}
|
||||
}
|
||||
fn insert_account2endpoint(&self, account_id: AccountIdInt, endpoint_id: EndpointIdInt) {
|
||||
if let Some(mut endpoints) = self.account2ep.get_mut(&account_id) {
|
||||
endpoints.insert(endpoint_id);
|
||||
} else {
|
||||
self.account2ep
|
||||
.insert(account_id, HashSet::from([endpoint_id]));
|
||||
}
|
||||
}
|
||||
fn get_cache_times(&self) -> (Instant, Option<Instant>) {
|
||||
let mut valid_since = Instant::now() - self.config.ttl;
|
||||
// Only ignore cache if ttl is disabled.
|
||||
@@ -302,7 +498,7 @@ impl ProjectInfoCacheImpl {
|
||||
let mut removed = 0;
|
||||
let shard = self.project2ep.shards()[shard].write();
|
||||
for (_, endpoints) in shard.iter() {
|
||||
for endpoint in endpoints.get() {
|
||||
for endpoint in endpoints {
|
||||
self.cache.remove(endpoint);
|
||||
removed += 1;
|
||||
}
|
||||
@@ -334,11 +530,25 @@ impl CachedLookupInfo {
|
||||
lookup_type: LookupType::AllowedIps,
|
||||
}
|
||||
}
|
||||
pub(self) fn new_allowed_vpc_endpoint_ids(endpoint_id: EndpointIdInt) -> Self {
|
||||
Self {
|
||||
endpoint_id,
|
||||
lookup_type: LookupType::AllowedVpcEndpointIds,
|
||||
}
|
||||
}
|
||||
pub(self) fn new_block_public_or_vpc_access(endpoint_id: EndpointIdInt) -> Self {
|
||||
Self {
|
||||
endpoint_id,
|
||||
lookup_type: LookupType::BlockPublicOrVpcAccess,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum LookupType {
|
||||
RoleSecret(RoleNameInt),
|
||||
AllowedIps,
|
||||
AllowedVpcEndpointIds,
|
||||
BlockPublicOrVpcAccess,
|
||||
}
|
||||
|
||||
impl Cache for ProjectInfoCacheImpl {
|
||||
@@ -360,6 +570,16 @@ impl Cache for ProjectInfoCacheImpl {
|
||||
endpoint_info.invalidate_allowed_ips();
|
||||
}
|
||||
}
|
||||
LookupType::AllowedVpcEndpointIds => {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
|
||||
endpoint_info.invalidate_allowed_vpc_endpoint_ids();
|
||||
}
|
||||
}
|
||||
LookupType::BlockPublicOrVpcAccess => {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
|
||||
endpoint_info.invalidate_block_public_or_vpc_access();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::convert::Infallible;
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -8,23 +9,22 @@ use pq_proto::CancelKeyData;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
|
||||
use crate::auth::backend::ComputeUserInfo;
|
||||
use crate::auth::{check_peer_addr_is_in_list, AuthError};
|
||||
use crate::config::ComputeConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::ControlPlaneApi;
|
||||
use crate::error::ReportableError;
|
||||
use crate::ext::LockExt;
|
||||
use crate::metrics::CancelChannelSizeGuard;
|
||||
use crate::metrics::{CancellationRequest, Metrics, RedisMsgKind};
|
||||
use crate::metrics::{CancelChannelSizeGuard, CancellationRequest, Metrics, RedisMsgKind};
|
||||
use crate::protocol2::ConnectionInfoExtra;
|
||||
use crate::rate_limiter::LeakyBucketRateLimiter;
|
||||
use crate::redis::keys::KeyPrefix;
|
||||
use crate::redis::kv_ops::RedisKVClient;
|
||||
use crate::tls::postgres_rustls::MakeRustlsConnect;
|
||||
use std::convert::Infallible;
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
type IpSubnetKey = IpNet;
|
||||
|
||||
@@ -135,6 +135,9 @@ pub(crate) enum CancelError {
|
||||
#[error("IP is not allowed")]
|
||||
IpNotAllowed,
|
||||
|
||||
#[error("VPC endpoint id is not allowed to connect")]
|
||||
VpcEndpointIdNotAllowed,
|
||||
|
||||
#[error("Authentication backend error")]
|
||||
AuthError(#[from] AuthError),
|
||||
|
||||
@@ -154,8 +157,9 @@ impl ReportableError for CancelError {
|
||||
}
|
||||
CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
|
||||
CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
|
||||
CancelError::IpNotAllowed => crate::error::ErrorKind::User,
|
||||
CancelError::NotFound => crate::error::ErrorKind::User,
|
||||
CancelError::IpNotAllowed
|
||||
| CancelError::VpcEndpointIdNotAllowed
|
||||
| CancelError::NotFound => crate::error::ErrorKind::User,
|
||||
CancelError::AuthError(_) => crate::error::ErrorKind::ControlPlane,
|
||||
CancelError::InternalError => crate::error::ErrorKind::Service,
|
||||
}
|
||||
@@ -267,11 +271,12 @@ impl CancellationHandler {
|
||||
/// Will fetch IP allowlist internally.
|
||||
///
|
||||
/// return Result primarily for tests
|
||||
pub(crate) async fn cancel_session<T: BackendIpAllowlist>(
|
||||
pub(crate) async fn cancel_session<T: ControlPlaneApi>(
|
||||
&self,
|
||||
key: CancelKeyData,
|
||||
ctx: RequestContext,
|
||||
check_allowed: bool,
|
||||
check_ip_allowed: bool,
|
||||
check_vpc_allowed: bool,
|
||||
auth_backend: &T,
|
||||
) -> Result<(), CancelError> {
|
||||
let subnet_key = match ctx.peer_addr() {
|
||||
@@ -306,11 +311,11 @@ impl CancellationHandler {
|
||||
return Err(CancelError::NotFound);
|
||||
};
|
||||
|
||||
if check_allowed {
|
||||
if check_ip_allowed {
|
||||
let ip_allowlist = auth_backend
|
||||
.get_allowed_ips(&ctx, &cancel_closure.user_info)
|
||||
.await
|
||||
.map_err(CancelError::AuthError)?;
|
||||
.map_err(|e| CancelError::AuthError(e.into()))?;
|
||||
|
||||
if !check_peer_addr_is_in_list(&ctx.peer_addr(), &ip_allowlist) {
|
||||
// log it here since cancel_session could be spawned in a task
|
||||
@@ -322,6 +327,40 @@ impl CancellationHandler {
|
||||
}
|
||||
}
|
||||
|
||||
// check if a VPC endpoint ID is coming in and if yes, if it's allowed
|
||||
let access_blocks = auth_backend
|
||||
.get_block_public_or_vpc_access(&ctx, &cancel_closure.user_info)
|
||||
.await
|
||||
.map_err(|e| CancelError::AuthError(e.into()))?;
|
||||
|
||||
if check_vpc_allowed {
|
||||
if access_blocks.vpc_access_blocked {
|
||||
return Err(CancelError::AuthError(AuthError::NetworkNotAllowed));
|
||||
}
|
||||
|
||||
let incoming_vpc_endpoint_id = match ctx.extra() {
|
||||
None => return Err(CancelError::AuthError(AuthError::MissingVPCEndpointId)),
|
||||
Some(ConnectionInfoExtra::Aws { vpce_id }) => {
|
||||
// Convert the vcpe_id to a string
|
||||
String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
|
||||
}
|
||||
Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
|
||||
};
|
||||
|
||||
let allowed_vpc_endpoint_ids = auth_backend
|
||||
.get_allowed_vpc_endpoint_ids(&ctx, &cancel_closure.user_info)
|
||||
.await
|
||||
.map_err(|e| CancelError::AuthError(e.into()))?;
|
||||
// TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
|
||||
if !allowed_vpc_endpoint_ids.is_empty()
|
||||
&& !allowed_vpc_endpoint_ids.contains(&incoming_vpc_endpoint_id)
|
||||
{
|
||||
return Err(CancelError::VpcEndpointIdNotAllowed);
|
||||
}
|
||||
} else if access_blocks.public_access_blocked {
|
||||
return Err(CancelError::VpcEndpointIdNotAllowed);
|
||||
}
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
|
||||
@@ -68,6 +68,7 @@ pub struct AuthenticationConfig {
|
||||
pub rate_limiter: AuthRateLimiter,
|
||||
pub rate_limit_ip_subnet: u8,
|
||||
pub ip_allowlist_check_enabled: bool,
|
||||
pub is_vpc_acccess_proxy: bool,
|
||||
pub jwks_cache: JwkCache,
|
||||
pub is_auth_broker: bool,
|
||||
pub accept_jwts: bool,
|
||||
|
||||
@@ -182,7 +182,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
cancel_key_data,
|
||||
ctx,
|
||||
config.authentication_config.ip_allowlist_check_enabled,
|
||||
backend,
|
||||
config.authentication_config.is_vpc_acccess_proxy,
|
||||
backend.get_api(),
|
||||
)
|
||||
.await
|
||||
.inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::intern::{BranchIdInt, ProjectIdInt};
|
||||
use crate::metrics::{
|
||||
ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting,
|
||||
};
|
||||
use crate::protocol2::ConnectionInfo;
|
||||
use crate::protocol2::{ConnectionInfo, ConnectionInfoExtra};
|
||||
use crate::types::{DbName, EndpointId, RoleName};
|
||||
|
||||
pub mod parquet;
|
||||
@@ -312,6 +312,15 @@ impl RequestContext {
|
||||
.ip()
|
||||
}
|
||||
|
||||
pub(crate) fn extra(&self) -> Option<ConnectionInfoExtra> {
|
||||
self.0
|
||||
.try_lock()
|
||||
.expect("should not deadlock")
|
||||
.conn_info
|
||||
.extra
|
||||
.clone()
|
||||
}
|
||||
|
||||
pub(crate) fn cold_start_info(&self) -> ColdStartInfo {
|
||||
self.0
|
||||
.try_lock()
|
||||
|
||||
@@ -22,7 +22,8 @@ use crate::control_plane::errors::{
|
||||
use crate::control_plane::locks::ApiLocks;
|
||||
use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason};
|
||||
use crate::control_plane::{
|
||||
AuthInfo, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, NodeInfo,
|
||||
AccessBlockerFlags, AuthInfo, AuthSecret, CachedAccessBlockerFlags, CachedAllowedIps,
|
||||
CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret, NodeInfo,
|
||||
};
|
||||
use crate::metrics::{CacheOutcome, Metrics};
|
||||
use crate::rate_limiter::WakeComputeRateLimiter;
|
||||
@@ -137,9 +138,6 @@ impl NeonControlPlaneClient {
|
||||
}
|
||||
};
|
||||
|
||||
// Ivan: don't know where it will be used, so I leave it here
|
||||
let _endpoint_vpc_ids = body.allowed_vpc_endpoint_ids.unwrap_or_default();
|
||||
|
||||
let secret = if body.role_secret.is_empty() {
|
||||
None
|
||||
} else {
|
||||
@@ -153,10 +151,23 @@ impl NeonControlPlaneClient {
|
||||
.proxy
|
||||
.allowed_ips_number
|
||||
.observe(allowed_ips.len() as f64);
|
||||
let allowed_vpc_endpoint_ids = body.allowed_vpc_endpoint_ids.unwrap_or_default();
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.allowed_vpc_endpoint_ids
|
||||
.observe(allowed_vpc_endpoint_ids.len() as f64);
|
||||
let block_public_connections = body.block_public_connections.unwrap_or_default();
|
||||
let block_vpc_connections = body.block_vpc_connections.unwrap_or_default();
|
||||
Ok(AuthInfo {
|
||||
secret,
|
||||
allowed_ips,
|
||||
allowed_vpc_endpoint_ids,
|
||||
project_id: body.project_id,
|
||||
account_id: body.account_id,
|
||||
access_blocker_flags: AccessBlockerFlags {
|
||||
public_access_blocked: block_public_connections,
|
||||
vpc_access_blocked: block_vpc_connections,
|
||||
},
|
||||
})
|
||||
}
|
||||
.inspect_err(|e| tracing::debug!(error = ?e))
|
||||
@@ -299,6 +310,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
return Ok(role_secret);
|
||||
}
|
||||
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
|
||||
let account_id = auth_info.account_id;
|
||||
if let Some(project_id) = auth_info.project_id {
|
||||
let normalized_ep_int = normalized_ep.into();
|
||||
self.caches.project_info.insert_role_secret(
|
||||
@@ -312,24 +324,35 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
normalized_ep_int,
|
||||
Arc::new(auth_info.allowed_ips),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
|
||||
account_id,
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
Arc::new(auth_info.allowed_vpc_endpoint_ids),
|
||||
);
|
||||
self.caches.project_info.insert_block_public_or_vpc_access(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
auth_info.access_blocker_flags,
|
||||
);
|
||||
ctx.set_project_id(project_id);
|
||||
}
|
||||
// When we just got a secret, we don't need to invalidate it.
|
||||
Ok(Cached::new_uncached(auth_info.secret))
|
||||
}
|
||||
|
||||
async fn get_allowed_ips_and_secret(
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
|
||||
) -> Result<CachedAllowedIps, GetAuthInfoError> {
|
||||
let normalized_ep = &user_info.endpoint.normalize();
|
||||
if let Some(allowed_ips) = self.caches.project_info.get_allowed_ips(normalized_ep) {
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.allowed_ips_cache_misses
|
||||
.allowed_ips_cache_misses // TODO SR: Should we rename this variable to something like allowed_ip_cache_stats?
|
||||
.inc(CacheOutcome::Hit);
|
||||
return Ok((allowed_ips, None));
|
||||
return Ok(allowed_ips);
|
||||
}
|
||||
Metrics::get()
|
||||
.proxy
|
||||
@@ -337,7 +360,10 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
.inc(CacheOutcome::Miss);
|
||||
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
|
||||
let allowed_ips = Arc::new(auth_info.allowed_ips);
|
||||
let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
|
||||
let access_blocker_flags = auth_info.access_blocker_flags;
|
||||
let user = &user_info.user;
|
||||
let account_id = auth_info.account_id;
|
||||
if let Some(project_id) = auth_info.project_id {
|
||||
let normalized_ep_int = normalized_ep.into();
|
||||
self.caches.project_info.insert_role_secret(
|
||||
@@ -351,12 +377,136 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
normalized_ep_int,
|
||||
allowed_ips.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
|
||||
account_id,
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_vpc_endpoint_ids.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_block_public_or_vpc_access(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
access_blocker_flags,
|
||||
);
|
||||
ctx.set_project_id(project_id);
|
||||
}
|
||||
Ok((
|
||||
Cached::new_uncached(allowed_ips),
|
||||
Some(Cached::new_uncached(auth_info.secret)),
|
||||
))
|
||||
Ok(Cached::new_uncached(allowed_ips))
|
||||
}
|
||||
|
||||
async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, GetAuthInfoError> {
|
||||
let normalized_ep = &user_info.endpoint.normalize();
|
||||
if let Some(allowed_vpc_endpoint_ids) = self
|
||||
.caches
|
||||
.project_info
|
||||
.get_allowed_vpc_endpoint_ids(normalized_ep)
|
||||
{
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.vpc_endpoint_id_cache_stats
|
||||
.inc(CacheOutcome::Hit);
|
||||
return Ok(allowed_vpc_endpoint_ids);
|
||||
}
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.vpc_endpoint_id_cache_stats
|
||||
.inc(CacheOutcome::Miss);
|
||||
|
||||
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
|
||||
let allowed_ips = Arc::new(auth_info.allowed_ips);
|
||||
let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
|
||||
let access_blocker_flags = auth_info.access_blocker_flags;
|
||||
let user = &user_info.user;
|
||||
let account_id = auth_info.account_id;
|
||||
if let Some(project_id) = auth_info.project_id {
|
||||
let normalized_ep_int = normalized_ep.into();
|
||||
self.caches.project_info.insert_role_secret(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
user.into(),
|
||||
auth_info.secret.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_ips(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_ips.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
|
||||
account_id,
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_vpc_endpoint_ids.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_block_public_or_vpc_access(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
access_blocker_flags,
|
||||
);
|
||||
ctx.set_project_id(project_id);
|
||||
}
|
||||
Ok(Cached::new_uncached(allowed_vpc_endpoint_ids))
|
||||
}
|
||||
|
||||
async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAccessBlockerFlags, GetAuthInfoError> {
|
||||
let normalized_ep = &user_info.endpoint.normalize();
|
||||
if let Some(access_blocker_flags) = self
|
||||
.caches
|
||||
.project_info
|
||||
.get_block_public_or_vpc_access(normalized_ep)
|
||||
{
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.access_blocker_flags_cache_stats
|
||||
.inc(CacheOutcome::Hit);
|
||||
return Ok(access_blocker_flags);
|
||||
}
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.access_blocker_flags_cache_stats
|
||||
.inc(CacheOutcome::Miss);
|
||||
|
||||
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
|
||||
let allowed_ips = Arc::new(auth_info.allowed_ips);
|
||||
let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
|
||||
let access_blocker_flags = auth_info.access_blocker_flags;
|
||||
let user = &user_info.user;
|
||||
let account_id = auth_info.account_id;
|
||||
if let Some(project_id) = auth_info.project_id {
|
||||
let normalized_ep_int = normalized_ep.into();
|
||||
self.caches.project_info.insert_role_secret(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
user.into(),
|
||||
auth_info.secret.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_ips(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_ips.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
|
||||
account_id,
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_vpc_endpoint_ids.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_block_public_or_vpc_access(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
access_blocker_flags.clone(),
|
||||
);
|
||||
ctx.set_project_id(project_id);
|
||||
}
|
||||
Ok(Cached::new_uncached(access_blocker_flags))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
|
||||
@@ -13,12 +13,14 @@ use crate::auth::backend::ComputeUserInfo;
|
||||
use crate::auth::IpPattern;
|
||||
use crate::cache::Cached;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::client::{CachedAllowedIps, CachedRoleSecret};
|
||||
use crate::control_plane::client::{
|
||||
CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedRoleSecret,
|
||||
};
|
||||
use crate::control_plane::errors::{
|
||||
ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError,
|
||||
};
|
||||
use crate::control_plane::messages::MetricsAuxInfo;
|
||||
use crate::control_plane::{AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo};
|
||||
use crate::control_plane::{AccessBlockerFlags, AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo};
|
||||
use crate::error::io_error;
|
||||
use crate::intern::RoleNameInt;
|
||||
use crate::types::{BranchId, EndpointId, ProjectId, RoleName};
|
||||
@@ -121,7 +123,10 @@ impl MockControlPlane {
|
||||
Ok(AuthInfo {
|
||||
secret,
|
||||
allowed_ips,
|
||||
allowed_vpc_endpoint_ids: vec![],
|
||||
project_id: None,
|
||||
account_id: None,
|
||||
access_blocker_flags: AccessBlockerFlags::default(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -214,16 +219,35 @@ impl super::ControlPlaneApi for MockControlPlane {
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_allowed_ips_and_secret(
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
|
||||
Ok((
|
||||
Cached::new_uncached(Arc::new(
|
||||
self.do_get_auth_info(user_info).await?.allowed_ips,
|
||||
)),
|
||||
None,
|
||||
) -> Result<CachedAllowedIps, GetAuthInfoError> {
|
||||
Ok(Cached::new_uncached(Arc::new(
|
||||
self.do_get_auth_info(user_info).await?.allowed_ips,
|
||||
)))
|
||||
}
|
||||
|
||||
async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, super::errors::GetAuthInfoError> {
|
||||
Ok(Cached::new_uncached(Arc::new(
|
||||
self.do_get_auth_info(user_info)
|
||||
.await?
|
||||
.allowed_vpc_endpoint_ids,
|
||||
)))
|
||||
}
|
||||
|
||||
async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<super::CachedAccessBlockerFlags, super::errors::GetAuthInfoError> {
|
||||
Ok(Cached::new_uncached(
|
||||
self.do_get_auth_info(user_info).await?.access_blocker_flags,
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::hash::Hash;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use dashmap::DashMap;
|
||||
use clashmap::ClashMap;
|
||||
use tokio::time::Instant;
|
||||
use tracing::{debug, info};
|
||||
|
||||
@@ -17,7 +17,8 @@ use crate::cache::project_info::ProjectInfoCacheImpl;
|
||||
use crate::config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions};
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::{
|
||||
errors, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi, NodeInfoCache,
|
||||
errors, CachedAccessBlockerFlags, CachedAllowedIps, CachedAllowedVpcEndpointIds,
|
||||
CachedNodeInfo, CachedRoleSecret, ControlPlaneApi, NodeInfoCache,
|
||||
};
|
||||
use crate::error::ReportableError;
|
||||
use crate::metrics::ApiLockMetrics;
|
||||
@@ -55,17 +56,45 @@ impl ControlPlaneApi for ControlPlaneClient {
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_allowed_ips_and_secret(
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError> {
|
||||
) -> Result<CachedAllowedIps, errors::GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ProxyV1(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
|
||||
Self::ProxyV1(api) => api.get_allowed_ips(ctx, user_info).await,
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
Self::PostgresMock(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
|
||||
Self::PostgresMock(api) => api.get_allowed_ips(ctx, user_info).await,
|
||||
#[cfg(test)]
|
||||
Self::Test(api) => api.get_allowed_ips_and_secret(),
|
||||
Self::Test(api) => api.get_allowed_ips(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ProxyV1(api) => api.get_allowed_vpc_endpoint_ids(ctx, user_info).await,
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
Self::PostgresMock(api) => api.get_allowed_vpc_endpoint_ids(ctx, user_info).await,
|
||||
#[cfg(test)]
|
||||
Self::Test(api) => api.get_allowed_vpc_endpoint_ids(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ProxyV1(api) => api.get_block_public_or_vpc_access(ctx, user_info).await,
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
Self::PostgresMock(api) => api.get_block_public_or_vpc_access(ctx, user_info).await,
|
||||
#[cfg(test)]
|
||||
Self::Test(api) => api.get_block_public_or_vpc_access(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,9 +131,15 @@ impl ControlPlaneApi for ControlPlaneClient {
|
||||
pub(crate) trait TestControlPlaneClient: Send + Sync + 'static {
|
||||
fn wake_compute(&self) -> Result<CachedNodeInfo, errors::WakeComputeError>;
|
||||
|
||||
fn get_allowed_ips_and_secret(
|
||||
fn get_allowed_ips(&self) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;
|
||||
|
||||
fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
|
||||
) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError>;
|
||||
|
||||
fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError>;
|
||||
|
||||
fn dyn_clone(&self) -> Box<dyn TestControlPlaneClient>;
|
||||
}
|
||||
@@ -148,7 +183,7 @@ impl ApiCaches {
|
||||
/// Various caches for [`control_plane`](super).
|
||||
pub struct ApiLocks<K> {
|
||||
name: &'static str,
|
||||
node_locks: DashMap<K, Arc<DynamicLimiter>>,
|
||||
node_locks: ClashMap<K, Arc<DynamicLimiter>>,
|
||||
config: RateLimiterConfig,
|
||||
timeout: Duration,
|
||||
epoch: std::time::Duration,
|
||||
@@ -180,7 +215,7 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
|
||||
) -> prometheus::Result<Self> {
|
||||
Ok(Self {
|
||||
name,
|
||||
node_locks: DashMap::with_shard_amount(shards),
|
||||
node_locks: ClashMap::with_shard_amount(shards),
|
||||
config,
|
||||
timeout,
|
||||
epoch,
|
||||
@@ -238,7 +273,7 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
|
||||
let mut lock = shard.write();
|
||||
let timer = self.metrics.reclamation_lag_seconds.start_timer();
|
||||
let count = lock
|
||||
.extract_if(|_, semaphore| Arc::strong_count(semaphore.get_mut()) == 1)
|
||||
.extract_if(|(_, semaphore)| Arc::strong_count(semaphore) == 1)
|
||||
.count();
|
||||
drop(lock);
|
||||
self.metrics.semaphores_unregistered.inc_by(count as u64);
|
||||
|
||||
@@ -4,7 +4,7 @@ use measured::FixedCardinalityLabel;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::auth::IpPattern;
|
||||
use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
|
||||
use crate::intern::{AccountIdInt, BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
|
||||
use crate::proxy::retry::CouldRetry;
|
||||
|
||||
/// Generic error response with human-readable description.
|
||||
@@ -227,8 +227,11 @@ pub(crate) struct UserFacingMessage {
|
||||
pub(crate) struct GetEndpointAccessControl {
|
||||
pub(crate) role_secret: Box<str>,
|
||||
pub(crate) allowed_ips: Option<Vec<IpPattern>>,
|
||||
pub(crate) allowed_vpc_endpoint_ids: Option<Vec<String>>,
|
||||
pub(crate) project_id: Option<ProjectIdInt>,
|
||||
pub(crate) allowed_vpc_endpoint_ids: Option<Vec<EndpointIdInt>>,
|
||||
pub(crate) account_id: Option<AccountIdInt>,
|
||||
pub(crate) block_public_connections: Option<bool>,
|
||||
pub(crate) block_vpc_connections: Option<bool>,
|
||||
}
|
||||
|
||||
/// Response which holds compute node's `host:port` pair.
|
||||
@@ -282,6 +285,10 @@ pub(crate) struct DatabaseInfo {
|
||||
pub(crate) aux: MetricsAuxInfo,
|
||||
#[serde(default)]
|
||||
pub(crate) allowed_ips: Option<Vec<IpPattern>>,
|
||||
#[serde(default)]
|
||||
pub(crate) allowed_vpc_endpoint_ids: Option<Vec<String>>,
|
||||
#[serde(default)]
|
||||
pub(crate) public_access_allowed: Option<bool>,
|
||||
}
|
||||
|
||||
// Manually implement debug to omit sensitive info.
|
||||
@@ -293,6 +300,7 @@ impl fmt::Debug for DatabaseInfo {
|
||||
.field("dbname", &self.dbname)
|
||||
.field("user", &self.user)
|
||||
.field("allowed_ips", &self.allowed_ips)
|
||||
.field("allowed_vpc_endpoint_ids", &self.allowed_vpc_endpoint_ids)
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
@@ -457,7 +465,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn parse_get_role_secret() -> anyhow::Result<()> {
|
||||
// Empty `allowed_ips` field.
|
||||
// Empty `allowed_ips` and `allowed_vpc_endpoint_ids` field.
|
||||
let json = json!({
|
||||
"role_secret": "secret",
|
||||
});
|
||||
@@ -467,9 +475,21 @@ mod tests {
|
||||
"allowed_ips": ["8.8.8.8"],
|
||||
});
|
||||
serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
|
||||
let json = json!({
|
||||
"role_secret": "secret",
|
||||
"allowed_vpc_endpoint_ids": ["vpce-0abcd1234567890ef"],
|
||||
});
|
||||
serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
|
||||
let json = json!({
|
||||
"role_secret": "secret",
|
||||
"allowed_ips": ["8.8.8.8"],
|
||||
"allowed_vpc_endpoint_ids": ["vpce-0abcd1234567890ef"],
|
||||
});
|
||||
serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
|
||||
let json = json!({
|
||||
"role_secret": "secret",
|
||||
"allowed_ips": ["8.8.8.8"],
|
||||
"allowed_vpc_endpoint_ids": ["vpce-0abcd1234567890ef"],
|
||||
"project_id": "project",
|
||||
});
|
||||
serde_json::from_str::<GetEndpointAccessControl>(&json.to_string())?;
|
||||
|
||||
@@ -19,6 +19,7 @@ use crate::cache::{Cached, TimedLru};
|
||||
use crate::config::ComputeConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo};
|
||||
use crate::intern::AccountIdInt;
|
||||
use crate::intern::ProjectIdInt;
|
||||
use crate::types::{EndpointCacheKey, EndpointId};
|
||||
use crate::{compute, scram};
|
||||
@@ -52,8 +53,14 @@ pub(crate) struct AuthInfo {
|
||||
pub(crate) secret: Option<AuthSecret>,
|
||||
/// List of IP addresses allowed for the autorization.
|
||||
pub(crate) allowed_ips: Vec<IpPattern>,
|
||||
/// List of VPC endpoints allowed for the autorization.
|
||||
pub(crate) allowed_vpc_endpoint_ids: Vec<String>,
|
||||
/// Project ID. This is used for cache invalidation.
|
||||
pub(crate) project_id: Option<ProjectIdInt>,
|
||||
/// Account ID. This is used for cache invalidation.
|
||||
pub(crate) account_id: Option<AccountIdInt>,
|
||||
/// Are public connections or VPC connections blocked?
|
||||
pub(crate) access_blocker_flags: AccessBlockerFlags,
|
||||
}
|
||||
|
||||
/// Info for establishing a connection to a compute node.
|
||||
@@ -95,11 +102,21 @@ impl NodeInfo {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Eq, PartialEq, Debug)]
|
||||
pub(crate) struct AccessBlockerFlags {
|
||||
pub public_access_blocked: bool,
|
||||
pub vpc_access_blocked: bool,
|
||||
}
|
||||
|
||||
pub(crate) type NodeInfoCache =
|
||||
TimedLru<EndpointCacheKey, Result<NodeInfo, Box<ControlPlaneErrorMessage>>>;
|
||||
pub(crate) type CachedNodeInfo = Cached<&'static NodeInfoCache, NodeInfo>;
|
||||
pub(crate) type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, Option<AuthSecret>>;
|
||||
pub(crate) type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPattern>>>;
|
||||
pub(crate) type CachedAllowedVpcEndpointIds =
|
||||
Cached<&'static ProjectInfoCacheImpl, Arc<Vec<String>>>;
|
||||
pub(crate) type CachedAccessBlockerFlags =
|
||||
Cached<&'static ProjectInfoCacheImpl, AccessBlockerFlags>;
|
||||
|
||||
/// This will allocate per each call, but the http requests alone
|
||||
/// already require a few allocations, so it should be fine.
|
||||
@@ -113,11 +130,23 @@ pub(crate) trait ControlPlaneApi {
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedRoleSecret, errors::GetAuthInfoError>;
|
||||
|
||||
async fn get_allowed_ips_and_secret(
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
|
||||
) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;
|
||||
|
||||
async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError>;
|
||||
|
||||
async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError>;
|
||||
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::sync::OnceLock;
|
||||
use lasso::{Capacity, MemoryLimits, Spur, ThreadedRodeo};
|
||||
use rustc_hash::FxHasher;
|
||||
|
||||
use crate::types::{BranchId, EndpointId, ProjectId, RoleName};
|
||||
use crate::types::{AccountId, BranchId, EndpointId, ProjectId, RoleName};
|
||||
|
||||
pub trait InternId: Sized + 'static {
|
||||
fn get_interner() -> &'static StringInterner<Self>;
|
||||
@@ -206,6 +206,26 @@ impl From<ProjectId> for ProjectIdInt {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct AccountIdTag;
|
||||
impl InternId for AccountIdTag {
|
||||
fn get_interner() -> &'static StringInterner<Self> {
|
||||
static ROLE_NAMES: OnceLock<StringInterner<AccountIdTag>> = OnceLock::new();
|
||||
ROLE_NAMES.get_or_init(Default::default)
|
||||
}
|
||||
}
|
||||
pub type AccountIdInt = InternedString<AccountIdTag>;
|
||||
impl From<&AccountId> for AccountIdInt {
|
||||
fn from(value: &AccountId) -> Self {
|
||||
AccountIdTag::get_interner().get_or_intern(value)
|
||||
}
|
||||
}
|
||||
impl From<AccountId> for AccountIdInt {
|
||||
fn from(value: AccountId) -> Self {
|
||||
AccountIdTag::get_interner().get_or_intern(&value)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
|
||||
@@ -1,10 +1,23 @@
|
||||
use tracing::Subscriber;
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::collections::HashMap;
|
||||
use std::hash::BuildHasher;
|
||||
use std::{env, io};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use opentelemetry::trace::TraceContextExt;
|
||||
use scopeguard::defer;
|
||||
use serde::ser::{SerializeMap, Serializer};
|
||||
use tracing::span;
|
||||
use tracing::subscriber::Interest;
|
||||
use tracing::{callsite, Event, Metadata, Span, Subscriber};
|
||||
use tracing_opentelemetry::OpenTelemetrySpanExt;
|
||||
use tracing_subscriber::filter::{EnvFilter, LevelFilter};
|
||||
use tracing_subscriber::fmt::format::{Format, Full};
|
||||
use tracing_subscriber::fmt::time::SystemTime;
|
||||
use tracing_subscriber::fmt::{FormatEvent, FormatFields};
|
||||
use tracing_subscriber::layer::{Context, Layer};
|
||||
use tracing_subscriber::prelude::*;
|
||||
use tracing_subscriber::registry::LookupSpan;
|
||||
use tracing_subscriber::registry::{LookupSpan, SpanRef};
|
||||
|
||||
/// Initialize logging and OpenTelemetry tracing and exporter.
|
||||
///
|
||||
@@ -15,6 +28,8 @@ use tracing_subscriber::registry::LookupSpan;
|
||||
/// destination, set `OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4318`.
|
||||
/// See <https://opentelemetry.io/docs/reference/specification/sdk-environment-variables>
|
||||
pub async fn init() -> anyhow::Result<LoggingGuard> {
|
||||
let logfmt = LogFormat::from_env()?;
|
||||
|
||||
let env_filter = EnvFilter::builder()
|
||||
.with_default_directive(LevelFilter::INFO.into())
|
||||
.from_env_lossy()
|
||||
@@ -29,17 +44,36 @@ pub async fn init() -> anyhow::Result<LoggingGuard> {
|
||||
.expect("this should be a valid filter directive"),
|
||||
);
|
||||
|
||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||
.with_ansi(false)
|
||||
.with_writer(std::io::stderr)
|
||||
.with_target(false);
|
||||
|
||||
let otlp_layer = tracing_utils::init_tracing("proxy").await;
|
||||
|
||||
let json_log_layer = if logfmt == LogFormat::Json {
|
||||
Some(JsonLoggingLayer {
|
||||
clock: RealClock,
|
||||
skipped_field_indices: papaya::HashMap::default(),
|
||||
writer: StderrWriter {
|
||||
stderr: std::io::stderr(),
|
||||
},
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let text_log_layer = if logfmt == LogFormat::Text {
|
||||
Some(
|
||||
tracing_subscriber::fmt::layer()
|
||||
.with_ansi(false)
|
||||
.with_writer(std::io::stderr)
|
||||
.with_target(false),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(env_filter)
|
||||
.with(otlp_layer)
|
||||
.with(fmt_layer)
|
||||
.with(json_log_layer)
|
||||
.with(text_log_layer)
|
||||
.try_init()?;
|
||||
|
||||
Ok(LoggingGuard)
|
||||
@@ -94,3 +128,857 @@ impl Drop for LoggingGuard {
|
||||
tracing_utils::shutdown_tracing();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: make JSON the default
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Default, Debug)]
|
||||
enum LogFormat {
|
||||
#[default]
|
||||
Text = 1,
|
||||
Json,
|
||||
}
|
||||
|
||||
impl LogFormat {
|
||||
fn from_env() -> anyhow::Result<Self> {
|
||||
let logfmt = env::var("LOGFMT");
|
||||
Ok(match logfmt.as_deref() {
|
||||
Err(_) => LogFormat::default(),
|
||||
Ok("text") => LogFormat::Text,
|
||||
Ok("json") => LogFormat::Json,
|
||||
Ok(logfmt) => anyhow::bail!("unknown log format: {logfmt}"),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
trait MakeWriter {
|
||||
fn make_writer(&self) -> impl io::Write;
|
||||
}
|
||||
|
||||
struct StderrWriter {
|
||||
stderr: io::Stderr,
|
||||
}
|
||||
|
||||
impl MakeWriter for StderrWriter {
|
||||
#[inline]
|
||||
fn make_writer(&self) -> impl io::Write {
|
||||
self.stderr.lock()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: move into separate module or even separate crate.
|
||||
trait Clock {
|
||||
fn now(&self) -> DateTime<Utc>;
|
||||
}
|
||||
|
||||
struct RealClock;
|
||||
|
||||
impl Clock for RealClock {
|
||||
#[inline]
|
||||
fn now(&self) -> DateTime<Utc> {
|
||||
Utc::now()
|
||||
}
|
||||
}
|
||||
|
||||
/// Name of the field used by tracing crate to store the event message.
|
||||
const MESSAGE_FIELD: &str = "message";
|
||||
|
||||
thread_local! {
|
||||
/// Protects against deadlocks and double panics during log writing.
|
||||
/// The current panic handler will use tracing to log panic information.
|
||||
static REENTRANCY_GUARD: Cell<bool> = const { Cell::new(false) };
|
||||
/// Thread-local instance with per-thread buffer for log writing.
|
||||
static EVENT_FORMATTER: RefCell<EventFormatter> = RefCell::new(EventFormatter::new());
|
||||
/// Cached OS thread ID.
|
||||
static THREAD_ID: u64 = gettid::gettid();
|
||||
}
|
||||
|
||||
/// Implements tracing layer to handle events specific to logging.
|
||||
struct JsonLoggingLayer<C: Clock, W: MakeWriter> {
|
||||
clock: C,
|
||||
skipped_field_indices: papaya::HashMap<callsite::Identifier, SkippedFieldIndices>,
|
||||
writer: W,
|
||||
}
|
||||
|
||||
impl<S, C: Clock + 'static, W: MakeWriter + 'static> Layer<S> for JsonLoggingLayer<C, W>
|
||||
where
|
||||
S: Subscriber + for<'a> LookupSpan<'a>,
|
||||
{
|
||||
fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) {
|
||||
use std::io::Write;
|
||||
|
||||
// TODO: consider special tracing subscriber to grab timestamp very
|
||||
// early, before OTel machinery, and add as event extension.
|
||||
let now = self.clock.now();
|
||||
|
||||
let res: io::Result<()> = REENTRANCY_GUARD.with(move |entered| {
|
||||
if entered.get() {
|
||||
let mut formatter = EventFormatter::new();
|
||||
formatter.format(now, event, &ctx, &self.skipped_field_indices)?;
|
||||
self.writer.make_writer().write_all(formatter.buffer())
|
||||
} else {
|
||||
entered.set(true);
|
||||
defer!(entered.set(false););
|
||||
|
||||
EVENT_FORMATTER.with_borrow_mut(move |formatter| {
|
||||
formatter.reset();
|
||||
formatter.format(now, event, &ctx, &self.skipped_field_indices)?;
|
||||
self.writer.make_writer().write_all(formatter.buffer())
|
||||
})
|
||||
}
|
||||
});
|
||||
|
||||
// In case logging fails we generate a simpler JSON object.
|
||||
if let Err(err) = res {
|
||||
if let Ok(mut line) = serde_json::to_vec(&serde_json::json!( {
|
||||
"timestamp": now.to_rfc3339_opts(chrono::SecondsFormat::Micros, true),
|
||||
"level": "ERROR",
|
||||
"message": format_args!("cannot log event: {err:?}"),
|
||||
"fields": {
|
||||
"event": format_args!("{event:?}"),
|
||||
},
|
||||
})) {
|
||||
line.push(b'\n');
|
||||
self.writer.make_writer().write_all(&line).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers a SpanFields instance as span extension.
|
||||
fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) {
|
||||
let span = ctx.span(id).expect("span must exist");
|
||||
let fields = SpanFields::default();
|
||||
fields.record_fields(attrs);
|
||||
// This could deadlock when there's a panic somewhere in the tracing
|
||||
// event handling and a read or write guard is still held. This includes
|
||||
// the OTel subscriber.
|
||||
span.extensions_mut().insert(fields);
|
||||
}
|
||||
|
||||
fn on_record(&self, id: &span::Id, values: &span::Record<'_>, ctx: Context<'_, S>) {
|
||||
let span = ctx.span(id).expect("span must exist");
|
||||
let ext = span.extensions();
|
||||
if let Some(data) = ext.get::<SpanFields>() {
|
||||
data.record_fields(values);
|
||||
}
|
||||
}
|
||||
|
||||
/// Called (lazily) whenever a new log call is executed. We quickly check
|
||||
/// for duplicate field names and record duplicates as skippable. Last one
|
||||
/// wins.
|
||||
fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest {
|
||||
if !metadata.is_event() {
|
||||
// Must not be never because we wouldn't get trace and span data.
|
||||
return Interest::always();
|
||||
}
|
||||
|
||||
let mut field_indices = SkippedFieldIndices::default();
|
||||
let mut seen_fields = HashMap::<&'static str, usize>::new();
|
||||
for field in metadata.fields() {
|
||||
use std::collections::hash_map::Entry;
|
||||
match seen_fields.entry(field.name()) {
|
||||
Entry::Vacant(entry) => {
|
||||
// field not seen yet
|
||||
entry.insert(field.index());
|
||||
}
|
||||
Entry::Occupied(mut entry) => {
|
||||
// replace currently stored index
|
||||
let old_index = entry.insert(field.index());
|
||||
// ... and append it to list of skippable indices
|
||||
field_indices.push(old_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !field_indices.is_empty() {
|
||||
self.skipped_field_indices
|
||||
.pin()
|
||||
.insert(metadata.callsite(), field_indices);
|
||||
}
|
||||
|
||||
Interest::always()
|
||||
}
|
||||
}
|
||||
|
||||
/// Stores span field values recorded during the spans lifetime.
|
||||
#[derive(Default)]
|
||||
struct SpanFields {
|
||||
// TODO: Switch to custom enum with lasso::Spur for Strings?
|
||||
fields: papaya::HashMap<&'static str, serde_json::Value>,
|
||||
}
|
||||
|
||||
impl SpanFields {
|
||||
#[inline]
|
||||
fn record_fields<R: tracing_subscriber::field::RecordFields>(&self, fields: R) {
|
||||
fields.record(&mut SpanFieldsRecorder {
|
||||
fields: self.fields.pin(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Implements a tracing field visitor to convert and store values.
|
||||
struct SpanFieldsRecorder<'m, S, G> {
|
||||
fields: papaya::HashMapRef<'m, &'static str, serde_json::Value, S, G>,
|
||||
}
|
||||
|
||||
impl<S: BuildHasher, G: papaya::Guard> tracing::field::Visit for SpanFieldsRecorder<'_, S, G> {
|
||||
#[inline]
|
||||
fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(value));
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(value));
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(value));
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {
|
||||
if let Ok(value) = i64::try_from(value) {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(value));
|
||||
} else {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(format!("{value}")));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {
|
||||
if let Ok(value) = u64::try_from(value) {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(value));
|
||||
} else {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(format!("{value}")));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(value));
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(value));
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(value));
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(format!("{value:?}")));
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_error(
|
||||
&mut self,
|
||||
field: &tracing::field::Field,
|
||||
value: &(dyn std::error::Error + 'static),
|
||||
) {
|
||||
self.fields
|
||||
.insert(field.name(), serde_json::Value::from(format!("{value}")));
|
||||
}
|
||||
}
|
||||
|
||||
/// List of field indices skipped during logging. Can list duplicate fields or
|
||||
/// metafields not meant to be logged.
|
||||
#[derive(Clone, Default)]
|
||||
struct SkippedFieldIndices {
|
||||
bits: u64,
|
||||
}
|
||||
|
||||
impl SkippedFieldIndices {
|
||||
#[inline]
|
||||
fn is_empty(&self) -> bool {
|
||||
self.bits == 0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn push(&mut self, index: usize) {
|
||||
self.bits |= 1u64
|
||||
.checked_shl(index as u32)
|
||||
.expect("field index too large");
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn contains(&self, index: usize) -> bool {
|
||||
self.bits
|
||||
& 1u64
|
||||
.checked_shl(index as u32)
|
||||
.expect("field index too large")
|
||||
!= 0
|
||||
}
|
||||
}
|
||||
|
||||
/// Formats a tracing event and writes JSON to its internal buffer including a newline.
|
||||
// TODO: buffer capacity management, truncate if too large
|
||||
struct EventFormatter {
|
||||
logline_buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl EventFormatter {
|
||||
#[inline]
|
||||
fn new() -> Self {
|
||||
EventFormatter {
|
||||
logline_buffer: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn buffer(&self) -> &[u8] {
|
||||
&self.logline_buffer
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reset(&mut self) {
|
||||
self.logline_buffer.clear();
|
||||
}
|
||||
|
||||
fn format<S>(
|
||||
&mut self,
|
||||
now: DateTime<Utc>,
|
||||
event: &Event<'_>,
|
||||
ctx: &Context<'_, S>,
|
||||
skipped_field_indices: &papaya::HashMap<callsite::Identifier, SkippedFieldIndices>,
|
||||
) -> io::Result<()>
|
||||
where
|
||||
S: Subscriber + for<'a> LookupSpan<'a>,
|
||||
{
|
||||
let timestamp = now.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
|
||||
|
||||
use tracing_log::NormalizeEvent;
|
||||
let normalized_meta = event.normalized_metadata();
|
||||
let meta = normalized_meta.as_ref().unwrap_or_else(|| event.metadata());
|
||||
|
||||
let skipped_field_indices = skipped_field_indices.pin();
|
||||
let skipped_field_indices = skipped_field_indices.get(&meta.callsite());
|
||||
|
||||
let mut serialize = || {
|
||||
let mut serializer = serde_json::Serializer::new(&mut self.logline_buffer);
|
||||
|
||||
let mut serializer = serializer.serialize_map(None)?;
|
||||
|
||||
// Timestamp comes first, so raw lines can be sorted by timestamp.
|
||||
serializer.serialize_entry("timestamp", ×tamp)?;
|
||||
|
||||
// Level next.
|
||||
serializer.serialize_entry("level", &meta.level().as_str())?;
|
||||
|
||||
// Message next.
|
||||
serializer.serialize_key("message")?;
|
||||
let mut message_extractor =
|
||||
MessageFieldExtractor::new(serializer, skipped_field_indices);
|
||||
event.record(&mut message_extractor);
|
||||
let mut serializer = message_extractor.into_serializer()?;
|
||||
|
||||
let mut fields_present = FieldsPresent(false, skipped_field_indices);
|
||||
event.record(&mut fields_present);
|
||||
if fields_present.0 {
|
||||
serializer.serialize_entry(
|
||||
"fields",
|
||||
&SerializableEventFields(event, skipped_field_indices),
|
||||
)?;
|
||||
}
|
||||
|
||||
let pid = std::process::id();
|
||||
if pid != 1 {
|
||||
serializer.serialize_entry("process_id", &pid)?;
|
||||
}
|
||||
|
||||
THREAD_ID.with(|tid| serializer.serialize_entry("thread_id", tid))?;
|
||||
|
||||
// TODO: tls cache? name could change
|
||||
if let Some(thread_name) = std::thread::current().name() {
|
||||
if !thread_name.is_empty() && thread_name != "tokio-runtime-worker" {
|
||||
serializer.serialize_entry("thread_name", thread_name)?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(task_id) = tokio::task::try_id() {
|
||||
serializer.serialize_entry("task_id", &format_args!("{task_id}"))?;
|
||||
}
|
||||
|
||||
serializer.serialize_entry("target", meta.target())?;
|
||||
|
||||
if let Some(module) = meta.module_path() {
|
||||
if module != meta.target() {
|
||||
serializer.serialize_entry("module", module)?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(file) = meta.file() {
|
||||
if let Some(line) = meta.line() {
|
||||
serializer.serialize_entry("src", &format_args!("{file}:{line}"))?;
|
||||
} else {
|
||||
serializer.serialize_entry("src", file)?;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let otel_context = Span::current().context();
|
||||
let otel_spanref = otel_context.span();
|
||||
let span_context = otel_spanref.span_context();
|
||||
if span_context.is_valid() {
|
||||
serializer.serialize_entry(
|
||||
"trace_id",
|
||||
&format_args!("{}", span_context.trace_id()),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
serializer.serialize_entry("spans", &SerializableSpanStack(ctx))?;
|
||||
|
||||
serializer.end()
|
||||
};
|
||||
|
||||
serialize().map_err(io::Error::other)?;
|
||||
self.logline_buffer.push(b'\n');
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts the message field that's mixed will other fields.
|
||||
struct MessageFieldExtractor<'a, S: serde::ser::SerializeMap> {
|
||||
serializer: S,
|
||||
skipped_field_indices: Option<&'a SkippedFieldIndices>,
|
||||
state: Option<Result<(), S::Error>>,
|
||||
}
|
||||
|
||||
impl<'a, S: serde::ser::SerializeMap> MessageFieldExtractor<'a, S> {
|
||||
#[inline]
|
||||
fn new(serializer: S, skipped_field_indices: Option<&'a SkippedFieldIndices>) -> Self {
|
||||
Self {
|
||||
serializer,
|
||||
skipped_field_indices,
|
||||
state: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn into_serializer(mut self) -> Result<S, S::Error> {
|
||||
match self.state {
|
||||
Some(Ok(())) => {}
|
||||
Some(Err(err)) => return Err(err),
|
||||
None => self.serializer.serialize_value("")?,
|
||||
}
|
||||
Ok(self.serializer)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn accept_field(&self, field: &tracing::field::Field) -> bool {
|
||||
self.state.is_none()
|
||||
&& field.name() == MESSAGE_FIELD
|
||||
&& !self
|
||||
.skipped_field_indices
|
||||
.is_some_and(|i| i.contains(field.index()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldExtractor<'_, S> {
|
||||
#[inline]
|
||||
fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
|
||||
if self.accept_field(field) {
|
||||
self.state = Some(self.serializer.serialize_value(&value));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
|
||||
if self.accept_field(field) {
|
||||
self.state = Some(self.serializer.serialize_value(&value));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
|
||||
if self.accept_field(field) {
|
||||
self.state = Some(self.serializer.serialize_value(&value));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {
|
||||
if self.accept_field(field) {
|
||||
self.state = Some(self.serializer.serialize_value(&value));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {
|
||||
if self.accept_field(field) {
|
||||
self.state = Some(self.serializer.serialize_value(&value));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
|
||||
if self.accept_field(field) {
|
||||
self.state = Some(self.serializer.serialize_value(&value));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {
|
||||
if self.accept_field(field) {
|
||||
self.state = Some(self.serializer.serialize_value(&format_args!("{value:x?}")));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
|
||||
if self.accept_field(field) {
|
||||
self.state = Some(self.serializer.serialize_value(&value));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
|
||||
if self.accept_field(field) {
|
||||
self.state = Some(self.serializer.serialize_value(&format_args!("{value:?}")));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_error(
|
||||
&mut self,
|
||||
field: &tracing::field::Field,
|
||||
value: &(dyn std::error::Error + 'static),
|
||||
) {
|
||||
if self.accept_field(field) {
|
||||
self.state = Some(self.serializer.serialize_value(&format_args!("{value}")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if there's any fields and field values present. If not, the JSON subobject
|
||||
/// can be skipped.
|
||||
// This is entirely optional and only cosmetic, though maybe helps a
|
||||
// bit during log parsing in dashboards when there's no field with empty object.
|
||||
struct FieldsPresent<'a>(pub bool, Option<&'a SkippedFieldIndices>);
|
||||
|
||||
// Even though some methods have an overhead (error, bytes) it is assumed the
|
||||
// compiler won't include this since we ignore the value entirely.
|
||||
impl tracing::field::Visit for FieldsPresent<'_> {
|
||||
#[inline]
|
||||
fn record_debug(&mut self, field: &tracing::field::Field, _: &dyn std::fmt::Debug) {
|
||||
if !self.1.is_some_and(|i| i.contains(field.index()))
|
||||
&& field.name() != MESSAGE_FIELD
|
||||
&& !field.name().starts_with("log.")
|
||||
{
|
||||
self.0 |= true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Serializes the fields directly supplied with a log event.
|
||||
struct SerializableEventFields<'a, 'event>(
|
||||
&'a tracing::Event<'event>,
|
||||
Option<&'a SkippedFieldIndices>,
|
||||
);
|
||||
|
||||
impl serde::ser::Serialize for SerializableEventFields<'_, '_> {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
use serde::ser::SerializeMap;
|
||||
let serializer = serializer.serialize_map(None)?;
|
||||
let mut message_skipper = MessageFieldSkipper::new(serializer, self.1);
|
||||
self.0.record(&mut message_skipper);
|
||||
let serializer = message_skipper.into_serializer()?;
|
||||
serializer.end()
|
||||
}
|
||||
}
|
||||
|
||||
/// A tracing field visitor that skips the message field.
|
||||
struct MessageFieldSkipper<'a, S: serde::ser::SerializeMap> {
|
||||
serializer: S,
|
||||
skipped_field_indices: Option<&'a SkippedFieldIndices>,
|
||||
state: Result<(), S::Error>,
|
||||
}
|
||||
|
||||
impl<'a, S: serde::ser::SerializeMap> MessageFieldSkipper<'a, S> {
|
||||
#[inline]
|
||||
fn new(serializer: S, skipped_field_indices: Option<&'a SkippedFieldIndices>) -> Self {
|
||||
Self {
|
||||
serializer,
|
||||
skipped_field_indices,
|
||||
state: Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn accept_field(&self, field: &tracing::field::Field) -> bool {
|
||||
self.state.is_ok()
|
||||
&& field.name() != MESSAGE_FIELD
|
||||
&& !field.name().starts_with("log.")
|
||||
&& !self
|
||||
.skipped_field_indices
|
||||
.is_some_and(|i| i.contains(field.index()))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn into_serializer(self) -> Result<S, S::Error> {
|
||||
self.state?;
|
||||
Ok(self.serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldSkipper<'_, S> {
|
||||
#[inline]
|
||||
fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
|
||||
if self.accept_field(field) {
|
||||
self.state = self.serializer.serialize_entry(field.name(), &value);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
|
||||
if self.accept_field(field) {
|
||||
self.state = self.serializer.serialize_entry(field.name(), &value);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
|
||||
if self.accept_field(field) {
|
||||
self.state = self.serializer.serialize_entry(field.name(), &value);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {
|
||||
if self.accept_field(field) {
|
||||
self.state = self.serializer.serialize_entry(field.name(), &value);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {
|
||||
if self.accept_field(field) {
|
||||
self.state = self.serializer.serialize_entry(field.name(), &value);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
|
||||
if self.accept_field(field) {
|
||||
self.state = self.serializer.serialize_entry(field.name(), &value);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {
|
||||
if self.accept_field(field) {
|
||||
self.state = self
|
||||
.serializer
|
||||
.serialize_entry(field.name(), &format_args!("{value:x?}"));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
|
||||
if self.accept_field(field) {
|
||||
self.state = self.serializer.serialize_entry(field.name(), &value);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
|
||||
if self.accept_field(field) {
|
||||
self.state = self
|
||||
.serializer
|
||||
.serialize_entry(field.name(), &format_args!("{value:?}"));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_error(
|
||||
&mut self,
|
||||
field: &tracing::field::Field,
|
||||
value: &(dyn std::error::Error + 'static),
|
||||
) {
|
||||
if self.accept_field(field) {
|
||||
self.state = self.serializer.serialize_value(&format_args!("{value}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Serializes the span stack from root to leaf (parent of event) enumerated
|
||||
/// inside an object where the keys are just the number padded with zeroes
|
||||
/// to retain sorting order.
|
||||
// The object is necessary because Loki cannot flatten arrays.
|
||||
struct SerializableSpanStack<'a, 'b, Span>(&'b Context<'a, Span>)
|
||||
where
|
||||
Span: Subscriber + for<'lookup> LookupSpan<'lookup>;
|
||||
|
||||
impl<Span> serde::ser::Serialize for SerializableSpanStack<'_, '_, Span>
|
||||
where
|
||||
Span: Subscriber + for<'lookup> LookupSpan<'lookup>,
|
||||
{
|
||||
fn serialize<Ser>(&self, serializer: Ser) -> Result<Ser::Ok, Ser::Error>
|
||||
where
|
||||
Ser: serde::ser::Serializer,
|
||||
{
|
||||
let mut serializer = serializer.serialize_map(None)?;
|
||||
|
||||
if let Some(leaf_span) = self.0.lookup_current() {
|
||||
for (i, span) in leaf_span.scope().from_root().enumerate() {
|
||||
serializer.serialize_entry(&format_args!("{i:02}"), &SerializableSpan(&span))?;
|
||||
}
|
||||
}
|
||||
|
||||
serializer.end()
|
||||
}
|
||||
}
|
||||
|
||||
/// Serializes a single span. Include the span ID, name and its fields as
|
||||
/// recorded up to this point.
|
||||
struct SerializableSpan<'a, 'b, Span>(&'b SpanRef<'a, Span>)
|
||||
where
|
||||
Span: for<'lookup> LookupSpan<'lookup>;
|
||||
|
||||
impl<Span> serde::ser::Serialize for SerializableSpan<'_, '_, Span>
|
||||
where
|
||||
Span: for<'lookup> LookupSpan<'lookup>,
|
||||
{
|
||||
fn serialize<Ser>(&self, serializer: Ser) -> Result<Ser::Ok, Ser::Error>
|
||||
where
|
||||
Ser: serde::ser::Serializer,
|
||||
{
|
||||
let mut serializer = serializer.serialize_map(None)?;
|
||||
// TODO: the span ID is probably only useful for debugging tracing.
|
||||
serializer.serialize_entry("span_id", &format_args!("{:016x}", self.0.id().into_u64()))?;
|
||||
serializer.serialize_entry("span_name", self.0.metadata().name())?;
|
||||
|
||||
let ext = self.0.extensions();
|
||||
if let Some(data) = ext.get::<SpanFields>() {
|
||||
for (key, value) in &data.fields.pin() {
|
||||
serializer.serialize_entry(key, value)?;
|
||||
}
|
||||
}
|
||||
|
||||
serializer.end()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::sync::{Arc, Mutex, MutexGuard};
|
||||
|
||||
use assert_json_diff::assert_json_eq;
|
||||
use tracing::info_span;
|
||||
|
||||
use super::*;
|
||||
|
||||
struct TestClock {
|
||||
current_time: Mutex<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
impl Clock for Arc<TestClock> {
|
||||
fn now(&self) -> DateTime<Utc> {
|
||||
*self.current_time.lock().expect("poisoned")
|
||||
}
|
||||
}
|
||||
|
||||
struct VecWriter<'a> {
|
||||
buffer: MutexGuard<'a, Vec<u8>>,
|
||||
}
|
||||
|
||||
impl MakeWriter for Arc<Mutex<Vec<u8>>> {
|
||||
fn make_writer(&self) -> impl io::Write {
|
||||
VecWriter {
|
||||
buffer: self.lock().expect("poisoned"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Write for VecWriter<'_> {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
self.buffer.write(buf)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_field_collection() {
|
||||
let clock = Arc::new(TestClock {
|
||||
current_time: Mutex::new(Utc::now()),
|
||||
});
|
||||
let buffer = Arc::new(Mutex::new(Vec::new()));
|
||||
let log_layer = JsonLoggingLayer {
|
||||
clock: clock.clone(),
|
||||
skipped_field_indices: papaya::HashMap::default(),
|
||||
writer: buffer.clone(),
|
||||
};
|
||||
|
||||
let registry = tracing_subscriber::Registry::default().with(log_layer);
|
||||
|
||||
tracing::subscriber::with_default(registry, || {
|
||||
info_span!("span1", x = 40, x = 41, x = 42).in_scope(|| {
|
||||
info_span!("span2").in_scope(|| {
|
||||
tracing::error!(
|
||||
a = 1,
|
||||
a = 2,
|
||||
a = 3,
|
||||
message = "explicit message field",
|
||||
"implicit message field"
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
let buffer = Arc::try_unwrap(buffer)
|
||||
.expect("no other reference")
|
||||
.into_inner()
|
||||
.expect("poisoned");
|
||||
let actual: serde_json::Value = serde_json::from_slice(&buffer).expect("valid JSON");
|
||||
let expected: serde_json::Value = serde_json::json!(
|
||||
{
|
||||
"timestamp": clock.now().to_rfc3339_opts(chrono::SecondsFormat::Micros, true),
|
||||
"level": "ERROR",
|
||||
"message": "explicit message field",
|
||||
"fields": {
|
||||
"a": 3,
|
||||
},
|
||||
"spans": {
|
||||
"00":{
|
||||
"span_id": "0000000000000001",
|
||||
"span_name": "span1",
|
||||
"x": 42,
|
||||
},
|
||||
"01": {
|
||||
"span_id": "0000000000000002",
|
||||
"span_name": "span2",
|
||||
}
|
||||
},
|
||||
"src": actual.as_object().unwrap().get("src").unwrap().as_str().unwrap(),
|
||||
"target": "proxy::logging::tests",
|
||||
"process_id": actual.as_object().unwrap().get("process_id").unwrap().as_number().unwrap(),
|
||||
"thread_id": actual.as_object().unwrap().get("thread_id").unwrap().as_number().unwrap(),
|
||||
"thread_name": "logging::tests::test_field_collection",
|
||||
}
|
||||
);
|
||||
|
||||
assert_json_eq!(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,6 +96,16 @@ pub struct ProxyMetrics {
|
||||
#[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))]
|
||||
pub allowed_ips_number: Histogram<10>,
|
||||
|
||||
/// Number of cache hits/misses for VPC endpoint IDs.
|
||||
pub vpc_endpoint_id_cache_stats: CounterVec<StaticLabelSet<CacheOutcome>>,
|
||||
|
||||
/// Number of cache hits/misses for access blocker flags.
|
||||
pub access_blocker_flags_cache_stats: CounterVec<StaticLabelSet<CacheOutcome>>,
|
||||
|
||||
/// Number of allowed VPC endpoints IDs
|
||||
#[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))]
|
||||
pub allowed_vpc_endpoint_ids: Histogram<10>,
|
||||
|
||||
/// Number of connections (per sni).
|
||||
pub accepted_connections_by_sni: CounterVec<StaticLabelSet<SniKind>>,
|
||||
|
||||
@@ -570,6 +580,9 @@ pub enum RedisEventsCount {
|
||||
CancelSession,
|
||||
PasswordUpdate,
|
||||
AllowedIpsUpdate,
|
||||
AllowedVpcEndpointIdsUpdateForProjects,
|
||||
AllowedVpcEndpointIdsUpdateForAllProjectsInOrg,
|
||||
BlockPublicOrVpcAccessUpdate,
|
||||
}
|
||||
|
||||
pub struct ThreadPoolWorkers(usize);
|
||||
|
||||
@@ -201,25 +201,26 @@ impl CopyBuffer {
|
||||
W: AsyncWrite + ?Sized,
|
||||
{
|
||||
loop {
|
||||
// If our buffer is empty, then we need to read some data to
|
||||
// continue.
|
||||
if self.pos == self.cap && !self.read_done {
|
||||
self.pos = 0;
|
||||
self.cap = 0;
|
||||
|
||||
// If there is some space left in our buffer, then we try to read some
|
||||
// data to continue, thus maximizing the chances of a large write.
|
||||
if self.cap < self.buf.len() && !self.read_done {
|
||||
match self.poll_fill_buf(cx, reader.as_mut()) {
|
||||
Poll::Ready(Ok(())) => (),
|
||||
Poll::Ready(Err(err)) => return Poll::Ready(Err(ErrorDirection::Read(err))),
|
||||
Poll::Pending => {
|
||||
// Try flushing when the reader has no progress to avoid deadlock
|
||||
// when the reader depends on buffered writer.
|
||||
if self.need_flush {
|
||||
ready!(writer.as_mut().poll_flush(cx))
|
||||
.map_err(ErrorDirection::Write)?;
|
||||
self.need_flush = false;
|
||||
}
|
||||
// Ignore pending reads when our buffer is not empty, because
|
||||
// we can try to write data immediately.
|
||||
if self.pos == self.cap {
|
||||
// Try flushing when the reader has no progress to avoid deadlock
|
||||
// when the reader depends on buffered writer.
|
||||
if self.need_flush {
|
||||
ready!(writer.as_mut().poll_flush(cx))
|
||||
.map_err(ErrorDirection::Write)?;
|
||||
self.need_flush = false;
|
||||
}
|
||||
|
||||
return Poll::Pending;
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -246,9 +247,13 @@ impl CopyBuffer {
|
||||
"writer returned length larger than input slice"
|
||||
);
|
||||
|
||||
// All data has been written, the buffer can be considered empty again
|
||||
self.pos = 0;
|
||||
self.cap = 0;
|
||||
|
||||
// If we've written all the data and we've seen EOF, flush out the
|
||||
// data and finish the transfer.
|
||||
if self.pos == self.cap && self.read_done {
|
||||
if self.read_done {
|
||||
ready!(writer.as_mut().poll_flush(cx)).map_err(ErrorDirection::Write)?;
|
||||
return Poll::Ready(Ok(self.amt));
|
||||
}
|
||||
|
||||
@@ -283,7 +283,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
cancel_key_data,
|
||||
ctx,
|
||||
config.authentication_config.ip_allowlist_check_enabled,
|
||||
auth_backend,
|
||||
config.authentication_config.is_vpc_acccess_proxy,
|
||||
auth_backend.get_api(),
|
||||
)
|
||||
.await
|
||||
.inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
|
||||
|
||||
@@ -26,7 +26,7 @@ use crate::config::{ComputeConfig, RetryConfig};
|
||||
use crate::control_plane::client::{ControlPlaneClient, TestControlPlaneClient};
|
||||
use crate::control_plane::messages::{ControlPlaneErrorMessage, Details, MetricsAuxInfo, Status};
|
||||
use crate::control_plane::{
|
||||
self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, NodeInfo, NodeInfoCache,
|
||||
self, CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedNodeInfo, NodeInfo, NodeInfoCache,
|
||||
};
|
||||
use crate::error::ErrorKind;
|
||||
use crate::tls::client_config::compute_client_config_with_certs;
|
||||
@@ -526,9 +526,19 @@ impl TestControlPlaneClient for TestConnectMechanism {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_allowed_ips_and_secret(
|
||||
fn get_allowed_ips(&self) -> Result<CachedAllowedIps, control_plane::errors::GetAuthInfoError> {
|
||||
unimplemented!("not used in tests")
|
||||
}
|
||||
|
||||
fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), control_plane::errors::GetAuthInfoError>
|
||||
) -> Result<CachedAllowedVpcEndpointIds, control_plane::errors::GetAuthInfoError> {
|
||||
unimplemented!("not used in tests")
|
||||
}
|
||||
|
||||
fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
) -> Result<control_plane::CachedAccessBlockerFlags, control_plane::errors::GetAuthInfoError>
|
||||
{
|
||||
unimplemented!("not used in tests")
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::hash::Hash;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use ahash::RandomState;
|
||||
use dashmap::DashMap;
|
||||
use clashmap::ClashMap;
|
||||
use rand::{thread_rng, Rng};
|
||||
use tokio::time::Instant;
|
||||
use tracing::info;
|
||||
@@ -14,7 +14,7 @@ use crate::intern::EndpointIdInt;
|
||||
pub type EndpointRateLimiter = LeakyBucketRateLimiter<EndpointIdInt>;
|
||||
|
||||
pub struct LeakyBucketRateLimiter<Key> {
|
||||
map: DashMap<Key, LeakyBucketState, RandomState>,
|
||||
map: ClashMap<Key, LeakyBucketState, RandomState>,
|
||||
config: utils::leaky_bucket::LeakyBucketConfig,
|
||||
access_count: AtomicUsize,
|
||||
}
|
||||
@@ -27,7 +27,7 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
|
||||
|
||||
pub fn new_with_shards(config: LeakyBucketConfig, shards: usize) -> Self {
|
||||
Self {
|
||||
map: DashMap::with_hasher_and_shard_amount(RandomState::new(), shards),
|
||||
map: ClashMap::with_hasher_and_shard_amount(RandomState::new(), shards),
|
||||
config: config.into(),
|
||||
access_count: AtomicUsize::new(0),
|
||||
}
|
||||
@@ -58,7 +58,7 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
|
||||
let shard = thread_rng().gen_range(0..n);
|
||||
self.map.shards()[shard]
|
||||
.write()
|
||||
.retain(|_, value| !value.get().bucket_is_empty(now));
|
||||
.retain(|(_, value)| !value.bucket_is_empty(now));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use anyhow::bail;
|
||||
use dashmap::DashMap;
|
||||
use clashmap::ClashMap;
|
||||
use itertools::Itertools;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
@@ -62,7 +62,7 @@ impl GlobalRateLimiter {
|
||||
pub type WakeComputeRateLimiter = BucketRateLimiter<EndpointIdInt, StdRng, RandomState>;
|
||||
|
||||
pub struct BucketRateLimiter<Key, Rand = StdRng, Hasher = RandomState> {
|
||||
map: DashMap<Key, Vec<RateBucket>, Hasher>,
|
||||
map: ClashMap<Key, Vec<RateBucket>, Hasher>,
|
||||
info: Cow<'static, [RateBucketInfo]>,
|
||||
access_count: AtomicUsize,
|
||||
rand: Mutex<Rand>,
|
||||
@@ -202,7 +202,7 @@ impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {
|
||||
info!(buckets = ?info, "endpoint rate limiter");
|
||||
Self {
|
||||
info,
|
||||
map: DashMap::with_hasher_and_shard_amount(hasher, 64),
|
||||
map: ClashMap::with_hasher_and_shard_amount(hasher, 64),
|
||||
access_count: AtomicUsize::new(1), // start from 1 to avoid GC on the first request
|
||||
rand: Mutex::new(rand),
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use anyhow::Ok;
|
||||
use pq_proto::{id_to_cancel_key, CancelKeyData};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::io::ErrorKind;
|
||||
|
||||
pub mod keyspace {
|
||||
pub const CANCEL_PREFIX: &str = "cancel";
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use redis::{AsyncCommands, ToRedisArgs};
|
||||
|
||||
use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
|
||||
|
||||
use crate::rate_limiter::{GlobalRateLimiter, RateBucketInfo};
|
||||
|
||||
pub struct RedisKVClient {
|
||||
|
||||
@@ -10,7 +10,7 @@ use uuid::Uuid;
|
||||
|
||||
use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
|
||||
use crate::cache::project_info::ProjectInfoCache;
|
||||
use crate::intern::{ProjectIdInt, RoleNameInt};
|
||||
use crate::intern::{AccountIdInt, ProjectIdInt, RoleNameInt};
|
||||
use crate::metrics::{Metrics, RedisErrors, RedisEventsCount};
|
||||
|
||||
const CPLANE_CHANNEL_NAME: &str = "neondb-proxy-ws-updates";
|
||||
@@ -86,9 +86,7 @@ pub(crate) struct BlockPublicOrVpcAccessUpdated {
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub(crate) struct AllowedVpcEndpointsUpdatedForOrg {
|
||||
// TODO: change type once the implementation is more fully fledged.
|
||||
// See e.g. https://github.com/neondatabase/neon/pull/10073.
|
||||
account_id: ProjectIdInt,
|
||||
account_id: AccountIdInt,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
@@ -205,6 +203,24 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
|
||||
.proxy
|
||||
.redis_events_count
|
||||
.inc(RedisEventsCount::PasswordUpdate);
|
||||
} else if matches!(
|
||||
msg,
|
||||
Notification::AllowedVpcEndpointsUpdatedForProjects { .. }
|
||||
) {
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.redis_events_count
|
||||
.inc(RedisEventsCount::AllowedVpcEndpointIdsUpdateForProjects);
|
||||
} else if matches!(msg, Notification::AllowedVpcEndpointsUpdatedForOrg { .. }) {
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.redis_events_count
|
||||
.inc(RedisEventsCount::AllowedVpcEndpointIdsUpdateForAllProjectsInOrg);
|
||||
} else if matches!(msg, Notification::BlockPublicOrVpcAccessUpdated { .. }) {
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.redis_events_count
|
||||
.inc(RedisEventsCount::BlockPublicOrVpcAccessUpdate);
|
||||
}
|
||||
// TODO: add additional metrics for the other event types.
|
||||
|
||||
@@ -230,20 +246,26 @@ fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
|
||||
Notification::AllowedIpsUpdate { allowed_ips_update } => {
|
||||
cache.invalidate_allowed_ips_for_project(allowed_ips_update.project_id);
|
||||
}
|
||||
Notification::BlockPublicOrVpcAccessUpdated {
|
||||
block_public_or_vpc_access_updated,
|
||||
} => cache.invalidate_block_public_or_vpc_access_for_project(
|
||||
block_public_or_vpc_access_updated.project_id,
|
||||
),
|
||||
Notification::AllowedVpcEndpointsUpdatedForOrg {
|
||||
allowed_vpc_endpoints_updated_for_org,
|
||||
} => cache.invalidate_allowed_vpc_endpoint_ids_for_org(
|
||||
allowed_vpc_endpoints_updated_for_org.account_id,
|
||||
),
|
||||
Notification::AllowedVpcEndpointsUpdatedForProjects {
|
||||
allowed_vpc_endpoints_updated_for_projects,
|
||||
} => cache.invalidate_allowed_vpc_endpoint_ids_for_projects(
|
||||
allowed_vpc_endpoints_updated_for_projects.project_ids,
|
||||
),
|
||||
Notification::PasswordUpdate { password_update } => cache
|
||||
.invalidate_role_secret_for_project(
|
||||
password_update.project_id,
|
||||
password_update.role_name,
|
||||
),
|
||||
Notification::BlockPublicOrVpcAccessUpdated { .. } => {
|
||||
// https://github.com/neondatabase/neon/pull/10073
|
||||
}
|
||||
Notification::AllowedVpcEndpointsUpdatedForOrg { .. } => {
|
||||
// https://github.com/neondatabase/neon/pull/10073
|
||||
}
|
||||
Notification::AllowedVpcEndpointsUpdatedForProjects { .. } => {
|
||||
// https://github.com/neondatabase/neon/pull/10073
|
||||
}
|
||||
Notification::UnknownTopic => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ use crate::control_plane::locks::ApiLocks;
|
||||
use crate::control_plane::CachedNodeInfo;
|
||||
use crate::error::{ErrorKind, ReportableError, UserFacingError};
|
||||
use crate::intern::EndpointIdInt;
|
||||
use crate::protocol2::ConnectionInfoExtra;
|
||||
use crate::proxy::connect_compute::ConnectMechanism;
|
||||
use crate::proxy::retry::{CouldRetry, ShouldRetryWakeCompute};
|
||||
use crate::rate_limiter::EndpointRateLimiter;
|
||||
@@ -57,23 +58,52 @@ impl PoolingBackend {
|
||||
|
||||
let user_info = user_info.clone();
|
||||
let backend = self.auth_backend.as_ref().map(|()| user_info.clone());
|
||||
let (allowed_ips, maybe_secret) = backend.get_allowed_ips_and_secret(ctx).await?;
|
||||
let allowed_ips = backend.get_allowed_ips(ctx).await?;
|
||||
|
||||
if self.config.authentication_config.ip_allowlist_check_enabled
|
||||
&& !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
|
||||
{
|
||||
return Err(AuthError::ip_address_not_allowed(ctx.peer_addr()));
|
||||
}
|
||||
|
||||
let access_blocker_flags = backend.get_block_public_or_vpc_access(ctx).await?;
|
||||
if self.config.authentication_config.is_vpc_acccess_proxy {
|
||||
if access_blocker_flags.vpc_access_blocked {
|
||||
return Err(AuthError::NetworkNotAllowed);
|
||||
}
|
||||
|
||||
let extra = ctx.extra();
|
||||
let incoming_endpoint_id = match extra {
|
||||
None => String::new(),
|
||||
Some(ConnectionInfoExtra::Aws { vpce_id }) => {
|
||||
// Convert the vcpe_id to a string
|
||||
String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
|
||||
}
|
||||
Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
|
||||
};
|
||||
|
||||
if incoming_endpoint_id.is_empty() {
|
||||
return Err(AuthError::MissingVPCEndpointId);
|
||||
}
|
||||
|
||||
let allowed_vpc_endpoint_ids = backend.get_allowed_vpc_endpoint_ids(ctx).await?;
|
||||
// TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
|
||||
if !allowed_vpc_endpoint_ids.is_empty()
|
||||
&& !allowed_vpc_endpoint_ids.contains(&incoming_endpoint_id)
|
||||
{
|
||||
return Err(AuthError::vpc_endpoint_id_not_allowed(incoming_endpoint_id));
|
||||
}
|
||||
} else if access_blocker_flags.public_access_blocked {
|
||||
return Err(AuthError::NetworkNotAllowed);
|
||||
}
|
||||
|
||||
if !self
|
||||
.endpoint_rate_limiter
|
||||
.check(user_info.endpoint.clone().into(), 1)
|
||||
{
|
||||
return Err(AuthError::too_many_connections());
|
||||
}
|
||||
let cached_secret = match maybe_secret {
|
||||
Some(secret) => secret,
|
||||
None => backend.get_role_secret(ctx).await?,
|
||||
};
|
||||
|
||||
let cached_secret = backend.get_role_secret(ctx).await?;
|
||||
let secret = match cached_secret.value.clone() {
|
||||
Some(secret) => self.config.authentication_config.check_rate_limit(
|
||||
ctx,
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::sync::atomic::{self, AtomicUsize};
|
||||
use std::sync::{Arc, Weak};
|
||||
use std::time::Duration;
|
||||
|
||||
use dashmap::DashMap;
|
||||
use clashmap::ClashMap;
|
||||
use parking_lot::RwLock;
|
||||
use postgres_client::ReadyForQueryStatus;
|
||||
use rand::Rng;
|
||||
@@ -351,11 +351,11 @@ where
|
||||
//
|
||||
// That should be a fairly conteded map, so return reference to the per-endpoint
|
||||
// pool as early as possible and release the lock.
|
||||
pub(crate) global_pool: DashMap<EndpointCacheKey, Arc<RwLock<P>>>,
|
||||
pub(crate) global_pool: ClashMap<EndpointCacheKey, Arc<RwLock<P>>>,
|
||||
|
||||
/// Number of endpoint-connection pools
|
||||
///
|
||||
/// [`DashMap::len`] iterates over all inner pools and acquires a read lock on each.
|
||||
/// [`ClashMap::len`] iterates over all inner pools and acquires a read lock on each.
|
||||
/// That seems like far too much effort, so we're using a relaxed increment counter instead.
|
||||
/// It's only used for diagnostics.
|
||||
pub(crate) global_pool_size: AtomicUsize,
|
||||
@@ -396,7 +396,7 @@ where
|
||||
pub(crate) fn new(config: &'static crate::config::HttpConfig) -> Arc<Self> {
|
||||
let shards = config.pool_options.pool_shards;
|
||||
Arc::new(Self {
|
||||
global_pool: DashMap::with_shard_amount(shards),
|
||||
global_pool: ClashMap::with_shard_amount(shards),
|
||||
global_pool_size: AtomicUsize::new(0),
|
||||
config,
|
||||
global_connections_count: Arc::new(AtomicUsize::new(0)),
|
||||
@@ -442,10 +442,10 @@ where
|
||||
.start_timer();
|
||||
let current_len = shard.len();
|
||||
let mut clients_removed = 0;
|
||||
shard.retain(|endpoint, x| {
|
||||
shard.retain(|(endpoint, x)| {
|
||||
// if the current endpoint pool is unique (no other strong or weak references)
|
||||
// then it is currently not in use by any connections.
|
||||
if let Some(pool) = Arc::get_mut(x.get_mut()) {
|
||||
if let Some(pool) = Arc::get_mut(x) {
|
||||
let endpoints = pool.get_mut();
|
||||
clients_removed = endpoints.clear_closed();
|
||||
|
||||
|
||||
@@ -97,6 +97,8 @@ smol_str_wrapper!(EndpointId);
|
||||
smol_str_wrapper!(BranchId);
|
||||
// 90% of project strings are 23 characters or less.
|
||||
smol_str_wrapper!(ProjectId);
|
||||
// 90% of account strings are 23 characters or less.
|
||||
smol_str_wrapper!(AccountId);
|
||||
|
||||
// will usually equal endpoint ID
|
||||
smol_str_wrapper!(EndpointCacheKey);
|
||||
|
||||
@@ -10,9 +10,9 @@ use anyhow::{bail, Context};
|
||||
use async_compression::tokio::write::GzipEncoder;
|
||||
use bytes::Bytes;
|
||||
use chrono::{DateTime, Datelike, Timelike, Utc};
|
||||
use clashmap::mapref::entry::Entry;
|
||||
use clashmap::ClashMap;
|
||||
use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_SIZE};
|
||||
use dashmap::mapref::entry::Entry;
|
||||
use dashmap::DashMap;
|
||||
use once_cell::sync::Lazy;
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -137,7 +137,7 @@ type FastHasher = std::hash::BuildHasherDefault<rustc_hash::FxHasher>;
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct Metrics {
|
||||
endpoints: DashMap<Ids, Arc<MetricCounter>, FastHasher>,
|
||||
endpoints: ClashMap<Ids, Arc<MetricCounter>, FastHasher>,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
@@ -213,7 +213,7 @@ pub async fn task_main(config: &MetricCollectionConfig) -> anyhow::Result<Infall
|
||||
}
|
||||
|
||||
fn collect_and_clear_metrics<C: Clearable>(
|
||||
endpoints: &DashMap<Ids, Arc<C>, FastHasher>,
|
||||
endpoints: &ClashMap<Ids, Arc<C>, FastHasher>,
|
||||
) -> Vec<(Ids, u64)> {
|
||||
let mut metrics_to_clear = Vec::new();
|
||||
|
||||
@@ -271,7 +271,7 @@ fn create_event_chunks<'a>(
|
||||
#[expect(clippy::too_many_arguments)]
|
||||
#[instrument(skip_all)]
|
||||
async fn collect_metrics_iteration(
|
||||
endpoints: &DashMap<Ids, Arc<MetricCounter>, FastHasher>,
|
||||
endpoints: &ClashMap<Ids, Arc<MetricCounter>, FastHasher>,
|
||||
client: &http::ClientWithMiddleware,
|
||||
metric_collection_endpoint: &reqwest::Url,
|
||||
storage: Option<&GenericRemoteStorage>,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user