mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-17 21:20:37 +00:00
Compare commits
4 Commits
release-67
...
jcsp/layer
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b58e9ef05b | ||
|
|
c2c9530ab7 | ||
|
|
c62f45fff4 | ||
|
|
6a9d51b41f |
46
.github/workflows/_benchmarking_preparation.yml
vendored
46
.github/workflows/_benchmarking_preparation.yml
vendored
@@ -3,23 +3,19 @@ name: Prepare benchmarking databases by restoring dumps
|
||||
on:
|
||||
workflow_call:
|
||||
# no inputs needed
|
||||
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
jobs:
|
||||
setup-databases:
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon ]
|
||||
platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon ]
|
||||
database: [ clickbench, tpch, userexample ]
|
||||
|
||||
|
||||
env:
|
||||
LD_LIBRARY_PATH: /tmp/neon/pg_install/v16/lib
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
@@ -27,10 +23,7 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
@@ -39,13 +32,13 @@ jobs:
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neon)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
|
||||
;;
|
||||
aws-rds-postgres)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
|
||||
;;
|
||||
aws-aurora-serverless-v2-postgres)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}"
|
||||
@@ -53,17 +46,10 @@ jobs:
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
@@ -71,23 +57,23 @@ jobs:
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
# we create a table that has one row for each database that we want to restore with the status whether the restore is done
|
||||
# we create a table that has one row for each database that we want to restore with the status whether the restore is done
|
||||
- name: Create benchmark_restore_status table if it does not exist
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
|
||||
DATABASE_NAME: ${{ matrix.database }}
|
||||
# to avoid a race condition of multiple jobs trying to create the table at the same time,
|
||||
# to avoid a race condition of multiple jobs trying to create the table at the same time,
|
||||
# we use an advisory lock
|
||||
run: |
|
||||
${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "
|
||||
SELECT pg_advisory_lock(4711);
|
||||
SELECT pg_advisory_lock(4711);
|
||||
CREATE TABLE IF NOT EXISTS benchmark_restore_status (
|
||||
databasename text primary key,
|
||||
restore_done boolean
|
||||
);
|
||||
SELECT pg_advisory_unlock(4711);
|
||||
"
|
||||
|
||||
|
||||
- name: Check if restore is already done
|
||||
id: check-restore-done
|
||||
env:
|
||||
@@ -121,7 +107,7 @@ jobs:
|
||||
DATABASE_NAME: ${{ matrix.database }}
|
||||
run: |
|
||||
mkdir -p /tmp/dumps
|
||||
aws s3 cp s3://neon-github-dev/performance/pgdumps/$DATABASE_NAME/$DATABASE_NAME.pg_dump /tmp/dumps/
|
||||
aws s3 cp s3://neon-github-dev/performance/pgdumps/$DATABASE_NAME/$DATABASE_NAME.pg_dump /tmp/dumps/
|
||||
|
||||
- name: Replace database name in connection string
|
||||
if: steps.check-restore-done.outputs.skip != 'true'
|
||||
@@ -140,17 +126,17 @@ jobs:
|
||||
else
|
||||
new_connstr="${base_connstr}/${DATABASE_NAME}"
|
||||
fi
|
||||
echo "database_connstr=${new_connstr}" >> $GITHUB_OUTPUT
|
||||
echo "database_connstr=${new_connstr}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Restore dump
|
||||
if: steps.check-restore-done.outputs.skip != 'true'
|
||||
env:
|
||||
DATABASE_NAME: ${{ matrix.database }}
|
||||
DATABASE_CONNSTR: ${{ steps.replace-dbname.outputs.database_connstr }}
|
||||
# the following works only with larger computes:
|
||||
# the following works only with larger computes:
|
||||
# PGOPTIONS: "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
|
||||
# we add the || true because:
|
||||
# the dumps were created with Neon and contain neon extensions that are not
|
||||
# the dumps were created with Neon and contain neon extensions that are not
|
||||
# available in RDS, so we will always report an error, but we can ignore it
|
||||
run: |
|
||||
${PG_BINARIES}/pg_restore --clean --if-exists --no-owner --jobs=4 \
|
||||
|
||||
@@ -236,7 +236,9 @@ jobs:
|
||||
|
||||
# run pageserver tests with different settings
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
for io_buffer_alignment in 0 1 512 ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine NEON_PAGESERVER_UNIT_TEST_IO_BUFFER_ALIGNMENT=$io_buffer_alignment ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
done
|
||||
done
|
||||
|
||||
# Run separate tests for real S3
|
||||
|
||||
119
.github/workflows/benchmarking.yml
vendored
119
.github/workflows/benchmarking.yml
vendored
@@ -12,6 +12,7 @@ on:
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 3 * * *' # run once a day, timezone is utc
|
||||
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
inputs:
|
||||
region_id:
|
||||
@@ -58,7 +59,7 @@ jobs:
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
id-token: write # Required for OIDC authentication in azure runners
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -67,10 +68,12 @@ jobs:
|
||||
PLATFORM: "neon-staging"
|
||||
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||
RUNNER: [ self-hosted, us-east-2, x64 ]
|
||||
IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
- DEFAULT_PG_VERSION: 16
|
||||
PLATFORM: "azure-staging"
|
||||
region_id: 'azure-eastus2'
|
||||
RUNNER: [ self-hosted, eastus2, x64 ]
|
||||
IMAGE: neondatabase/build-tools:pinned
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
||||
@@ -83,10 +86,7 @@ jobs:
|
||||
|
||||
runs-on: ${{ matrix.RUNNER }}
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: ${{ matrix.IMAGE }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
@@ -164,10 +164,6 @@ jobs:
|
||||
|
||||
replication-tests:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 16
|
||||
@@ -178,21 +174,12 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
@@ -280,7 +267,7 @@ jobs:
|
||||
region_id_default=${{ env.DEFAULT_REGION_ID }}
|
||||
runner_default='["self-hosted", "us-east-2", "x64"]'
|
||||
runner_azure='["self-hosted", "eastus2", "x64"]'
|
||||
image_default="neondatabase/build-tools:pinned"
|
||||
image_default="369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned"
|
||||
matrix='{
|
||||
"pg_version" : [
|
||||
16
|
||||
@@ -357,7 +344,7 @@ jobs:
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
id-token: write # Required for OIDC authentication in azure runners
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -384,7 +371,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
- name: Configure AWS credentials # necessary on Azure runners
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
@@ -505,15 +492,17 @@ jobs:
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
id-token: write # Required for OIDC authentication in azure runners
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- PLATFORM: "neonvm-captest-pgvector"
|
||||
RUNNER: [ self-hosted, us-east-2, x64 ]
|
||||
IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
- PLATFORM: "azure-captest-pgvector"
|
||||
RUNNER: [ self-hosted, eastus2, x64 ]
|
||||
IMAGE: neondatabase/build-tools:pinned
|
||||
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
|
||||
@@ -522,16 +511,13 @@ jobs:
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
|
||||
LD_LIBRARY_PATH: /home/nonroot/pg/usr/lib/x86_64-linux-gnu
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: ${{ matrix.PLATFORM }}
|
||||
|
||||
runs-on: ${{ matrix.RUNNER }}
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: ${{ matrix.IMAGE }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
@@ -541,26 +527,17 @@ jobs:
|
||||
# instead of using Neon artifacts containing pgbench
|
||||
- name: Install postgresql-16 where pytest expects it
|
||||
run: |
|
||||
# Just to make it easier to test things locally on macOS (with arm64)
|
||||
arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
|
||||
|
||||
cd /home/nonroot
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.0-1.pgdg110+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110+2_${arch}.deb"
|
||||
dpkg -x libpq5_17.0-1.pgdg110+1_${arch}.deb pg
|
||||
dpkg -x postgresql-16_16.4-1.pgdg110+2_${arch}.deb pg
|
||||
dpkg -x postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb pg
|
||||
|
||||
wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/libpq5_16.4-1.pgdg110%2B1_amd64.deb
|
||||
wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110%2B1_amd64.deb
|
||||
wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110%2B1_amd64.deb
|
||||
dpkg -x libpq5_16.4-1.pgdg110+1_amd64.deb pg
|
||||
dpkg -x postgresql-client-16_16.4-1.pgdg110+1_amd64.deb pg
|
||||
dpkg -x postgresql-16_16.4-1.pgdg110+1_amd64.deb pg
|
||||
mkdir -p /tmp/neon/pg_install/v16/bin
|
||||
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
|
||||
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql
|
||||
ln -s /home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu /tmp/neon/pg_install/v16/lib
|
||||
|
||||
LD_LIBRARY_PATH="/home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu:${LD_LIBRARY_PATH:-}"
|
||||
export LD_LIBRARY_PATH
|
||||
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> ${GITHUB_ENV}
|
||||
|
||||
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
|
||||
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql
|
||||
ln -s /home/nonroot/pg/usr/lib/x86_64-linux-gnu /tmp/neon/pg_install/v16/lib
|
||||
/tmp/neon/pg_install/v16/bin/pgbench --version
|
||||
/tmp/neon/pg_install/v16/bin/psql --version
|
||||
|
||||
@@ -582,7 +559,7 @@ jobs:
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Configure AWS credentials
|
||||
- name: Configure AWS credentials # necessary on Azure runners to read/write from/to S3
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
@@ -643,10 +620,6 @@ jobs:
|
||||
# *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
|
||||
# *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
needs: [ generate-matrices, pgbench-compare, prepare_AWS_RDS_databases ]
|
||||
|
||||
strategy:
|
||||
@@ -665,22 +638,12 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
@@ -751,10 +714,6 @@ jobs:
|
||||
#
|
||||
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
needs: [ generate-matrices, clickbench-compare, prepare_AWS_RDS_databases ]
|
||||
|
||||
strategy:
|
||||
@@ -772,22 +731,12 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
@@ -857,10 +806,6 @@ jobs:
|
||||
|
||||
user-examples-compare:
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
needs: [ generate-matrices, tpch-compare, prepare_AWS_RDS_databases ]
|
||||
|
||||
strategy:
|
||||
@@ -877,22 +822,12 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
|
||||
5
.github/workflows/build_and_test.yml
vendored
5
.github/workflows/build_and_test.yml
vendored
@@ -1190,9 +1190,10 @@ jobs:
|
||||
|
||||
files_to_promote+=("s3://${BUCKET}/${s3_key}")
|
||||
|
||||
for pg_version in v14 v15 v16 v17; do
|
||||
# TODO Add v17
|
||||
for pg_version in v14 v15 v16; do
|
||||
# We run less tests for debug builds, so we don't need to promote them
|
||||
if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v17" ] ; }; then
|
||||
if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then
|
||||
continue
|
||||
fi
|
||||
|
||||
|
||||
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -1321,7 +1321,6 @@ dependencies = [
|
||||
"clap",
|
||||
"comfy-table",
|
||||
"compute_api",
|
||||
"futures",
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"hyper 0.14.30",
|
||||
@@ -4297,7 +4296,6 @@ dependencies = [
|
||||
"camino-tempfile",
|
||||
"chrono",
|
||||
"clap",
|
||||
"compute_api",
|
||||
"consumption_metrics",
|
||||
"dashmap",
|
||||
"ecdsa 0.16.9",
|
||||
|
||||
@@ -13,9 +13,6 @@ RUN useradd -ms /bin/bash nonroot -b /home
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# System deps
|
||||
#
|
||||
# 'gdb' is included so that we get backtraces of core dumps produced in
|
||||
# regression tests
|
||||
RUN set -e \
|
||||
&& apt update \
|
||||
&& apt install -y \
|
||||
@@ -27,7 +24,6 @@ RUN set -e \
|
||||
cmake \
|
||||
curl \
|
||||
flex \
|
||||
gdb \
|
||||
git \
|
||||
gnupg \
|
||||
gzip \
|
||||
|
||||
@@ -11,10 +11,6 @@ commands:
|
||||
user: root
|
||||
sysvInitAction: sysinit
|
||||
shell: 'chmod 711 /neonvm/bin/resize-swap'
|
||||
- name: chmod-set-disk-quota
|
||||
user: root
|
||||
sysvInitAction: sysinit
|
||||
shell: 'chmod 711 /neonvm/bin/set-disk-quota'
|
||||
- name: pgbouncer
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
@@ -34,12 +30,11 @@ commands:
|
||||
shutdownHook: |
|
||||
su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
|
||||
files:
|
||||
- filename: compute_ctl-sudoers
|
||||
- filename: compute_ctl-resize-swap
|
||||
content: |
|
||||
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
|
||||
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
|
||||
# regardless of hostname (ALL)
|
||||
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota
|
||||
# as root without requiring entering a password (NOPASSWD), regardless of hostname (ALL)
|
||||
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap
|
||||
- filename: cgconfig.conf
|
||||
content: |
|
||||
# Configuration for cgroups in VM compute nodes
|
||||
@@ -105,7 +100,7 @@ merge: |
|
||||
&& apt install --no-install-recommends -y \
|
||||
sudo \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
COPY compute_ctl-sudoers /etc/sudoers.d/compute_ctl-sudoers
|
||||
COPY compute_ctl-resize-swap /etc/sudoers.d/compute_ctl-resize-swap
|
||||
|
||||
COPY cgconfig.conf /etc/cgconfig.conf
|
||||
|
||||
|
||||
@@ -44,7 +44,6 @@ use std::{thread, time::Duration};
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Arg;
|
||||
use compute_tools::disk_quota::set_disk_quota;
|
||||
use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use signal_hook::consts::{SIGQUIT, SIGTERM};
|
||||
use signal_hook::{consts::SIGINT, iterator::Signals};
|
||||
@@ -152,7 +151,6 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
|
||||
let spec_json = matches.get_one::<String>("spec");
|
||||
let spec_path = matches.get_one::<String>("spec-path");
|
||||
let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
|
||||
let set_disk_quota_for_fs = matches.get_one::<String>("set-disk-quota-for-fs");
|
||||
|
||||
Ok(ProcessCliResult {
|
||||
connstr,
|
||||
@@ -163,7 +161,6 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
|
||||
spec_json,
|
||||
spec_path,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -176,7 +173,6 @@ struct ProcessCliResult<'clap> {
|
||||
spec_json: Option<&'clap String>,
|
||||
spec_path: Option<&'clap String>,
|
||||
resize_swap_on_bind: bool,
|
||||
set_disk_quota_for_fs: Option<&'clap String>,
|
||||
}
|
||||
|
||||
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
@@ -297,7 +293,6 @@ fn wait_spec(
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
http_port,
|
||||
..
|
||||
}: ProcessCliResult,
|
||||
@@ -378,7 +373,6 @@ fn wait_spec(
|
||||
compute,
|
||||
http_port,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -387,7 +381,6 @@ struct WaitSpecResult {
|
||||
// passed through from ProcessCliResult
|
||||
http_port: u16,
|
||||
resize_swap_on_bind: bool,
|
||||
set_disk_quota_for_fs: Option<String>,
|
||||
}
|
||||
|
||||
fn start_postgres(
|
||||
@@ -397,7 +390,6 @@ fn start_postgres(
|
||||
compute,
|
||||
http_port,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
}: WaitSpecResult,
|
||||
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
|
||||
// We got all we need, update the state.
|
||||
@@ -411,7 +403,6 @@ fn start_postgres(
|
||||
);
|
||||
// before we release the mutex, fetch the swap size (if any) for later.
|
||||
let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
|
||||
let disk_quota_bytes = state.pspec.as_ref().unwrap().spec.disk_quota_bytes;
|
||||
drop(state);
|
||||
|
||||
// Launch remaining service threads
|
||||
@@ -431,8 +422,8 @@ fn start_postgres(
|
||||
// OOM-killed during startup because swap wasn't available yet.
|
||||
match resize_swap(size_bytes) {
|
||||
Ok(()) => {
|
||||
let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%size_bytes, %size_mib, "resized swap");
|
||||
let size_gib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%size_bytes, %size_gib, "resized swap");
|
||||
}
|
||||
Err(err) => {
|
||||
let err = err.context("failed to resize swap");
|
||||
@@ -441,29 +432,10 @@ fn start_postgres(
|
||||
// Mark compute startup as failed; don't try to start postgres, and report this
|
||||
// error to the control plane when it next asks.
|
||||
prestartup_failed = true;
|
||||
compute.set_failed_status(err);
|
||||
delay_exit = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set disk quota if the compute spec says so
|
||||
if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
|
||||
(disk_quota_bytes, set_disk_quota_for_fs)
|
||||
{
|
||||
match set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) {
|
||||
Ok(()) => {
|
||||
let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%disk_quota_bytes, %size_mib, "set disk quota");
|
||||
}
|
||||
Err(err) => {
|
||||
let err = err.context("failed to set disk quota");
|
||||
error!("{err:#}");
|
||||
|
||||
// Mark compute startup as failed; don't try to start postgres, and report this
|
||||
// error to the control plane when it next asks.
|
||||
prestartup_failed = true;
|
||||
compute.set_failed_status(err);
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
state.error = Some(format!("{err:?}"));
|
||||
state.status = ComputeStatus::Failed;
|
||||
compute.state_changed.notify_all();
|
||||
delay_exit = true;
|
||||
}
|
||||
}
|
||||
@@ -478,7 +450,16 @@ fn start_postgres(
|
||||
Ok(pg) => Some(pg),
|
||||
Err(err) => {
|
||||
error!("could not start the compute node: {:#}", err);
|
||||
compute.set_failed_status(err);
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
state.error = Some(format!("{:?}", err));
|
||||
state.status = ComputeStatus::Failed;
|
||||
// Notify others that Postgres failed to start. In case of configuring the
|
||||
// empty compute, it's likely that API handler is still waiting for compute
|
||||
// state change. With this we will notify it that compute is in Failed state,
|
||||
// so control plane will know about it earlier and record proper error instead
|
||||
// of timeout.
|
||||
compute.state_changed.notify_all();
|
||||
drop(state); // unlock
|
||||
delay_exit = true;
|
||||
None
|
||||
}
|
||||
@@ -769,11 +750,6 @@ fn cli() -> clap::Command {
|
||||
.long("resize-swap-on-bind")
|
||||
.action(clap::ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("set-disk-quota-for-fs")
|
||||
.long("set-disk-quota-for-fs")
|
||||
.value_name("SET_DISK_QUOTA_FOR_FS")
|
||||
)
|
||||
}
|
||||
|
||||
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
|
||||
|
||||
@@ -10,7 +10,6 @@ use std::sync::atomic::AtomicU32;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::{Condvar, Mutex, RwLock};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
@@ -306,13 +305,6 @@ impl ComputeNode {
|
||||
self.state_changed.notify_all();
|
||||
}
|
||||
|
||||
pub fn set_failed_status(&self, err: anyhow::Error) {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.error = Some(format!("{err:?}"));
|
||||
state.status = ComputeStatus::Failed;
|
||||
self.state_changed.notify_all();
|
||||
}
|
||||
|
||||
pub fn get_status(&self) -> ComputeStatus {
|
||||
self.state.lock().unwrap().status
|
||||
}
|
||||
@@ -718,7 +710,7 @@ impl ComputeNode {
|
||||
info!("running initdb");
|
||||
let initdb_bin = Path::new(&self.pgbin).parent().unwrap().join("initdb");
|
||||
Command::new(initdb_bin)
|
||||
.args(["--pgdata", pgdata])
|
||||
.args(["-D", pgdata])
|
||||
.output()
|
||||
.expect("cannot start initdb process");
|
||||
|
||||
@@ -1131,9 +1123,6 @@ impl ComputeNode {
|
||||
//
|
||||
// Use that as a default location and pattern, except macos where core dumps are written
|
||||
// to /cores/ directory by default.
|
||||
//
|
||||
// With default Linux settings, the core dump file is called just "core", so check for
|
||||
// that too.
|
||||
pub fn check_for_core_dumps(&self) -> Result<()> {
|
||||
let core_dump_dir = match std::env::consts::OS {
|
||||
"macos" => Path::new("/cores/"),
|
||||
@@ -1145,17 +1134,8 @@ impl ComputeNode {
|
||||
let files = fs::read_dir(core_dump_dir)?;
|
||||
let cores = files.filter_map(|entry| {
|
||||
let entry = entry.ok()?;
|
||||
|
||||
let is_core_dump = match entry.file_name().to_str()? {
|
||||
n if n.starts_with("core.") => true,
|
||||
"core" => true,
|
||||
_ => false,
|
||||
};
|
||||
if is_core_dump {
|
||||
Some(entry.path())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
let _ = entry.file_name().to_str()?.strip_prefix("core.")?;
|
||||
Some(entry.path())
|
||||
});
|
||||
|
||||
// Print backtrace for each core dump
|
||||
@@ -1406,36 +1386,6 @@ LIMIT 100",
|
||||
}
|
||||
Ok(remote_ext_metrics)
|
||||
}
|
||||
|
||||
/// Waits until current thread receives a state changed notification and
|
||||
/// the pageserver connection strings has changed.
|
||||
///
|
||||
/// The operation will time out after a specified duration.
|
||||
pub fn wait_timeout_while_pageserver_connstr_unchanged(&self, duration: Duration) {
|
||||
let state = self.state.lock().unwrap();
|
||||
let old_pageserver_connstr = state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.expect("spec must be set")
|
||||
.pageserver_connstr
|
||||
.clone();
|
||||
let mut unchanged = true;
|
||||
let _ = self
|
||||
.state_changed
|
||||
.wait_timeout_while(state, duration, |s| {
|
||||
let pageserver_connstr = &s
|
||||
.pspec
|
||||
.as_ref()
|
||||
.expect("spec must be set")
|
||||
.pageserver_connstr;
|
||||
unchanged = pageserver_connstr == &old_pageserver_connstr;
|
||||
unchanged
|
||||
})
|
||||
.unwrap();
|
||||
if !unchanged {
|
||||
info!("Pageserver config changed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn forward_termination_signal() {
|
||||
|
||||
@@ -11,17 +11,9 @@ use crate::compute::ComputeNode;
|
||||
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
||||
info!("waiting for reconfiguration requests");
|
||||
loop {
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
let state = compute.state.lock().unwrap();
|
||||
let mut state = compute.state_changed.wait(state).unwrap();
|
||||
|
||||
// We have to re-check the status after re-acquiring the lock because it could be that
|
||||
// the status has changed while we were waiting for the lock, and we might not need to
|
||||
// wait on the condition variable. Otherwise, we might end up in some soft-/deadlock, i.e.
|
||||
// we are waiting for a condition variable that will never be signaled.
|
||||
if state.status != ComputeStatus::ConfigurationPending {
|
||||
state = compute.state_changed.wait(state).unwrap();
|
||||
}
|
||||
|
||||
// Re-check the status after waking up
|
||||
if state.status == ComputeStatus::ConfigurationPending {
|
||||
info!("got configuration request");
|
||||
state.status = ComputeStatus::Configuration;
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
use anyhow::Context;
|
||||
|
||||
pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota";
|
||||
|
||||
/// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
|
||||
/// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.
|
||||
pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {
|
||||
let size_kb = size_bytes / 1024;
|
||||
// run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`
|
||||
let child_result = std::process::Command::new("/usr/bin/sudo")
|
||||
.arg(DISK_QUOTA_BIN)
|
||||
.arg(size_kb.to_string())
|
||||
.arg(fs_mountpoint)
|
||||
.spawn();
|
||||
|
||||
child_result
|
||||
.context("spawn() failed")
|
||||
.and_then(|mut child| child.wait().context("wait() failed"))
|
||||
.and_then(|status| match status.success() {
|
||||
true => Ok(()),
|
||||
false => Err(anyhow::anyhow!("process exited with {status}")),
|
||||
})
|
||||
// wrap any prior error with the overall context that we couldn't run the command
|
||||
.with_context(|| format!("could not run `/usr/bin/sudo {DISK_QUOTA_BIN}`"))
|
||||
}
|
||||
@@ -10,7 +10,6 @@ pub mod http;
|
||||
pub mod logger;
|
||||
pub mod catalog;
|
||||
pub mod compute;
|
||||
pub mod disk_quota;
|
||||
pub mod extension_server;
|
||||
pub mod lsn_lease;
|
||||
mod migration;
|
||||
|
||||
@@ -57,10 +57,10 @@ fn lsn_lease_bg_task(
|
||||
.max(valid_duration / 2);
|
||||
|
||||
info!(
|
||||
"Request succeeded, sleeping for {} seconds",
|
||||
"Succeeded, sleeping for {} seconds",
|
||||
sleep_duration.as_secs()
|
||||
);
|
||||
compute.wait_timeout_while_pageserver_connstr_unchanged(sleep_duration);
|
||||
thread::sleep(sleep_duration);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,7 +89,10 @@ fn acquire_lsn_lease_with_retry(
|
||||
.map(|connstr| {
|
||||
let mut config = postgres::Config::from_str(connstr).expect("Invalid connstr");
|
||||
if let Some(storage_auth_token) = &spec.storage_auth_token {
|
||||
info!("Got storage auth token from spec file");
|
||||
config.password(storage_auth_token.clone());
|
||||
} else {
|
||||
info!("Storage auth token not set");
|
||||
}
|
||||
config
|
||||
})
|
||||
@@ -105,11 +108,9 @@ fn acquire_lsn_lease_with_retry(
|
||||
bail!("Permanent error: lease could not be obtained, LSN is behind the GC cutoff");
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to acquire lsn lease: {e} (attempt {attempts})");
|
||||
warn!("Failed to acquire lsn lease: {e} (attempt {attempts}");
|
||||
|
||||
compute.wait_timeout_while_pageserver_connstr_unchanged(Duration::from_millis(
|
||||
retry_period_ms as u64,
|
||||
));
|
||||
thread::sleep(Duration::from_millis(retry_period_ms as u64));
|
||||
retry_period_ms *= 1.5;
|
||||
retry_period_ms = retry_period_ms.min(MAX_RETRY_PERIOD_MS);
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@ anyhow.workspace = true
|
||||
camino.workspace = true
|
||||
clap.workspace = true
|
||||
comfy-table.workspace = true
|
||||
futures.workspace = true
|
||||
humantime.workspace = true
|
||||
nix.workspace = true
|
||||
once_cell.workspace = true
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,94 +0,0 @@
|
||||
//! Branch mappings for convenience
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use utils::id::{TenantId, TenantTimelineId, TimelineId};
|
||||
|
||||
/// Keep human-readable aliases in memory (and persist them to config XXX), to hide tenant/timeline hex strings from the user.
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(default, deny_unknown_fields)]
|
||||
pub struct BranchMappings {
|
||||
/// Default tenant ID to use with the 'neon_local' command line utility, when
|
||||
/// --tenant_id is not explicitly specified. This comes from the branches.
|
||||
pub default_tenant_id: Option<TenantId>,
|
||||
|
||||
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
|
||||
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
|
||||
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
|
||||
pub mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
}
|
||||
|
||||
impl BranchMappings {
|
||||
pub fn register_branch_mapping(
|
||||
&mut self,
|
||||
branch_name: String,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> anyhow::Result<()> {
|
||||
let existing_values = self.mappings.entry(branch_name.clone()).or_default();
|
||||
|
||||
let existing_ids = existing_values
|
||||
.iter()
|
||||
.find(|(existing_tenant_id, _)| existing_tenant_id == &tenant_id);
|
||||
|
||||
if let Some((_, old_timeline_id)) = existing_ids {
|
||||
if old_timeline_id == &timeline_id {
|
||||
Ok(())
|
||||
} else {
|
||||
bail!("branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}");
|
||||
}
|
||||
} else {
|
||||
existing_values.push((tenant_id, timeline_id));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_branch_timeline_id(
|
||||
&self,
|
||||
branch_name: &str,
|
||||
tenant_id: TenantId,
|
||||
) -> Option<TimelineId> {
|
||||
// If it looks like a timeline ID, return it as it is
|
||||
if let Ok(timeline_id) = branch_name.parse::<TimelineId>() {
|
||||
return Some(timeline_id);
|
||||
}
|
||||
|
||||
self.mappings
|
||||
.get(branch_name)?
|
||||
.iter()
|
||||
.find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)
|
||||
.map(|&(_, timeline_id)| timeline_id)
|
||||
.map(TimelineId::from)
|
||||
}
|
||||
|
||||
pub fn timeline_name_mappings(&self) -> HashMap<TenantTimelineId, String> {
|
||||
self.mappings
|
||||
.iter()
|
||||
.flat_map(|(name, tenant_timelines)| {
|
||||
tenant_timelines.iter().map(|&(tenant_id, timeline_id)| {
|
||||
(TenantTimelineId::new(tenant_id, timeline_id), name.clone())
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn persist(&self, path: &Path) -> anyhow::Result<()> {
|
||||
let content = &toml::to_string_pretty(self)?;
|
||||
fs::write(path, content).with_context(|| {
|
||||
format!(
|
||||
"Failed to write branch information into path '{}'",
|
||||
path.display()
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load(path: &Path) -> anyhow::Result<BranchMappings> {
|
||||
let branches_file_contents = fs::read_to_string(path)?;
|
||||
Ok(toml::from_str(branches_file_contents.as_str())?)
|
||||
}
|
||||
}
|
||||
@@ -561,7 +561,6 @@ impl Endpoint {
|
||||
operation_uuid: None,
|
||||
features: self.features.clone(),
|
||||
swap_size_bytes: None,
|
||||
disk_quota_bytes: None,
|
||||
cluster: Cluster {
|
||||
cluster_id: None, // project ID: not used
|
||||
name: None, // project name: not used
|
||||
|
||||
@@ -113,7 +113,7 @@ impl SafekeeperNode {
|
||||
|
||||
pub async fn start(
|
||||
&self,
|
||||
extra_opts: &[String],
|
||||
extra_opts: Vec<String>,
|
||||
retry_timeout: &Duration,
|
||||
) -> anyhow::Result<()> {
|
||||
print!(
|
||||
@@ -196,7 +196,7 @@ impl SafekeeperNode {
|
||||
]);
|
||||
}
|
||||
|
||||
args.extend_from_slice(extra_opts);
|
||||
args.extend(extra_opts);
|
||||
|
||||
background_process::start_process(
|
||||
&format!("safekeeper-{id}"),
|
||||
|
||||
@@ -347,7 +347,7 @@ impl StorageController {
|
||||
|
||||
if !tokio::fs::try_exists(&pg_data_path).await? {
|
||||
let initdb_args = [
|
||||
"--pgdata",
|
||||
"-D",
|
||||
pg_data_path.as_ref(),
|
||||
"--username",
|
||||
&username(),
|
||||
|
||||
@@ -50,16 +50,6 @@ pub struct ComputeSpec {
|
||||
#[serde(default)]
|
||||
pub swap_size_bytes: Option<u64>,
|
||||
|
||||
/// If compute_ctl was passed `--set-disk-quota-for-fs`, a value of `Some(_)` instructs
|
||||
/// compute_ctl to run `/neonvm/bin/set-disk-quota` with the given size and fs, when the
|
||||
/// spec is first received.
|
||||
///
|
||||
/// Both this field and `--set-disk-quota-for-fs` are required, so that the control plane's
|
||||
/// spec generation doesn't need to be aware of the actual compute it's running on, while
|
||||
/// guaranteeing gradual rollout of disk quota.
|
||||
#[serde(default)]
|
||||
pub disk_quota_bytes: Option<u64>,
|
||||
|
||||
/// Expected cluster state at the end of transition process.
|
||||
pub cluster: Cluster,
|
||||
pub delta_operations: Option<Vec<DeltaOp>>,
|
||||
@@ -278,22 +268,6 @@ pub struct GenericOption {
|
||||
/// declare a `trait` on it.
|
||||
pub type GenericOptions = Option<Vec<GenericOption>>;
|
||||
|
||||
/// Configured the local-proxy application with the relevant JWKS and roles it should
|
||||
/// use for authorizing connect requests using JWT.
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct LocalProxySpec {
|
||||
pub jwks: Vec<JwksSettings>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct JwksSettings {
|
||||
pub id: String,
|
||||
pub role_names: Vec<String>,
|
||||
pub jwks_url: String,
|
||||
pub provider_name: String,
|
||||
pub jwt_audience: Option<String>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -984,7 +984,6 @@ pub fn short_error(e: &QueryError) -> String {
|
||||
}
|
||||
|
||||
fn log_query_error(query: &str, e: &QueryError) {
|
||||
// If you want to change the log level of a specific error, also re-categorize it in `BasebackupQueryTimeOngoingRecording`.
|
||||
match e {
|
||||
QueryError::Disconnected(ConnectionError::Io(io_error)) => {
|
||||
if is_expected_io_error(io_error) {
|
||||
|
||||
@@ -93,9 +93,9 @@ impl Conf {
|
||||
);
|
||||
let output = self
|
||||
.new_pg_command("initdb")?
|
||||
.arg("--pgdata")
|
||||
.arg("-D")
|
||||
.arg(&self.datadir)
|
||||
.args(["--username", "postgres", "--no-instructions", "--no-sync"])
|
||||
.args(["-U", "postgres", "--no-instructions", "--no-sync"])
|
||||
.output()?;
|
||||
debug!("initdb output: {:?}", output);
|
||||
ensure!(
|
||||
|
||||
@@ -736,22 +736,4 @@ impl Client {
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn timeline_init_lsn_lease(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<LsnLease> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/lsn_lease",
|
||||
self.mgmt_api_endpoint,
|
||||
);
|
||||
|
||||
self.request(Method::POST, &uri, LsnLeaseRequest { lsn })
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ use clap::{Arg, ArgAction, Command};
|
||||
|
||||
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
|
||||
use pageserver::config::PageserverIdentity;
|
||||
use pageserver::controller_upcall_client::ControllerUpcallClient;
|
||||
use pageserver::control_plane_client::ControlPlaneClient;
|
||||
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
||||
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
|
||||
use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME};
|
||||
@@ -396,7 +396,7 @@ fn start_pageserver(
|
||||
// Set up deletion queue
|
||||
let (deletion_queue, deletion_workers) = DeletionQueue::new(
|
||||
remote_storage.clone(),
|
||||
ControllerUpcallClient::new(conf, &shutdown_pageserver),
|
||||
ControlPlaneClient::new(conf, &shutdown_pageserver),
|
||||
conf,
|
||||
);
|
||||
if let Some(deletion_workers) = deletion_workers {
|
||||
|
||||
@@ -17,12 +17,9 @@ use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};
|
||||
use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
|
||||
use pageserver_api::config::NodeMetadata;
|
||||
|
||||
/// The Pageserver's client for using the storage controller upcall API: this is a small API
|
||||
/// for dealing with generations (see docs/rfcs/025-generation-numbers.md).
|
||||
///
|
||||
/// The server presenting this API may either be the storage controller or some other
|
||||
/// service (such as the Neon control plane) providing a store of generation numbers.
|
||||
pub struct ControllerUpcallClient {
|
||||
/// The Pageserver's client for using the control plane API: this is a small subset
|
||||
/// of the overall control plane API, for dealing with generations (see docs/rfcs/025-generation-numbers.md)
|
||||
pub struct ControlPlaneClient {
|
||||
http_client: reqwest::Client,
|
||||
base_url: Url,
|
||||
node_id: NodeId,
|
||||
@@ -48,7 +45,7 @@ pub trait ControlPlaneGenerationsApi {
|
||||
) -> impl Future<Output = Result<HashMap<TenantShardId, bool>, RetryForeverError>> + Send;
|
||||
}
|
||||
|
||||
impl ControllerUpcallClient {
|
||||
impl ControlPlaneClient {
|
||||
/// A None return value indicates that the input `conf` object does not have control
|
||||
/// plane API enabled.
|
||||
pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Option<Self> {
|
||||
@@ -117,7 +114,7 @@ impl ControllerUpcallClient {
|
||||
}
|
||||
}
|
||||
|
||||
impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
||||
impl ControlPlaneGenerationsApi for ControlPlaneClient {
|
||||
/// Block until we get a successful response, or error out if we are shut down
|
||||
async fn re_attach(
|
||||
&self,
|
||||
@@ -219,38 +216,29 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
||||
.join("validate")
|
||||
.expect("Failed to build validate path");
|
||||
|
||||
// When sending validate requests, break them up into chunks so that we
|
||||
// avoid possible edge cases of generating any HTTP requests that
|
||||
// require database I/O across many thousands of tenants.
|
||||
let mut result: HashMap<TenantShardId, bool> = HashMap::with_capacity(tenants.len());
|
||||
for tenant_chunk in (tenants).chunks(128) {
|
||||
let request = ValidateRequest {
|
||||
tenants: tenant_chunk
|
||||
.iter()
|
||||
.map(|(id, generation)| ValidateRequestTenant {
|
||||
id: *id,
|
||||
gen: (*generation).into().expect(
|
||||
"Generation should always be valid for a Tenant doing deletions",
|
||||
),
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
let request = ValidateRequest {
|
||||
tenants: tenants
|
||||
.into_iter()
|
||||
.map(|(id, gen)| ValidateRequestTenant {
|
||||
id,
|
||||
gen: gen
|
||||
.into()
|
||||
.expect("Generation should always be valid for a Tenant doing deletions"),
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
|
||||
failpoint_support::sleep_millis_async!(
|
||||
"control-plane-client-validate-sleep",
|
||||
&self.cancel
|
||||
);
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(RetryForeverError::ShuttingDown);
|
||||
}
|
||||
|
||||
let response: ValidateResponse =
|
||||
self.retry_http_forever(&re_attach_path, request).await?;
|
||||
for rt in response.tenants {
|
||||
result.insert(rt.id, rt.valid);
|
||||
}
|
||||
failpoint_support::sleep_millis_async!("control-plane-client-validate-sleep", &self.cancel);
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(RetryForeverError::ShuttingDown);
|
||||
}
|
||||
|
||||
Ok(result.into_iter().collect())
|
||||
let response: ValidateResponse = self.retry_http_forever(&re_attach_path, request).await?;
|
||||
|
||||
Ok(response
|
||||
.tenants
|
||||
.into_iter()
|
||||
.map(|rt| (rt.id, rt.valid))
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,7 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
|
||||
use crate::control_plane_client::ControlPlaneGenerationsApi;
|
||||
use crate::metrics;
|
||||
use crate::tenant::remote_timeline_client::remote_layer_path;
|
||||
use crate::tenant::remote_timeline_client::remote_timeline_path;
|
||||
@@ -622,7 +622,7 @@ impl DeletionQueue {
|
||||
/// If remote_storage is None, then the returned workers will also be None.
|
||||
pub fn new<C>(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
controller_upcall_client: Option<C>,
|
||||
control_plane_client: Option<C>,
|
||||
conf: &'static PageServerConf,
|
||||
) -> (Self, Option<DeletionQueueWorkers<C>>)
|
||||
where
|
||||
@@ -662,7 +662,7 @@ impl DeletionQueue {
|
||||
conf,
|
||||
backend_rx,
|
||||
executor_tx,
|
||||
controller_upcall_client,
|
||||
control_plane_client,
|
||||
lsn_table.clone(),
|
||||
cancel.clone(),
|
||||
),
|
||||
@@ -704,7 +704,7 @@ mod test {
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
use crate::{
|
||||
controller_upcall_client::RetryForeverError,
|
||||
control_plane_client::RetryForeverError,
|
||||
repository::Key,
|
||||
tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
|
||||
};
|
||||
|
||||
@@ -25,8 +25,8 @@ use tracing::info;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
|
||||
use crate::controller_upcall_client::RetryForeverError;
|
||||
use crate::control_plane_client::ControlPlaneGenerationsApi;
|
||||
use crate::control_plane_client::RetryForeverError;
|
||||
use crate::metrics;
|
||||
use crate::virtual_file::MaybeFatalIo;
|
||||
|
||||
@@ -61,7 +61,7 @@ where
|
||||
tx: tokio::sync::mpsc::Sender<DeleterMessage>,
|
||||
|
||||
// Client for calling into control plane API for validation of deletes
|
||||
controller_upcall_client: Option<C>,
|
||||
control_plane_client: Option<C>,
|
||||
|
||||
// DeletionLists which are waiting generation validation. Not safe to
|
||||
// execute until [`validate`] has processed them.
|
||||
@@ -94,7 +94,7 @@ where
|
||||
conf: &'static PageServerConf,
|
||||
rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,
|
||||
tx: tokio::sync::mpsc::Sender<DeleterMessage>,
|
||||
controller_upcall_client: Option<C>,
|
||||
control_plane_client: Option<C>,
|
||||
lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
|
||||
cancel: CancellationToken,
|
||||
) -> Self {
|
||||
@@ -102,7 +102,7 @@ where
|
||||
conf,
|
||||
rx,
|
||||
tx,
|
||||
controller_upcall_client,
|
||||
control_plane_client,
|
||||
lsn_table,
|
||||
pending_lists: Vec::new(),
|
||||
validated_lists: Vec::new(),
|
||||
@@ -145,8 +145,8 @@ where
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let tenants_valid = if let Some(controller_upcall_client) = &self.controller_upcall_client {
|
||||
match controller_upcall_client
|
||||
let tenants_valid = if let Some(control_plane_client) = &self.control_plane_client {
|
||||
match control_plane_client
|
||||
.validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -824,7 +824,7 @@ async fn get_lsn_by_timestamp_handler(
|
||||
|
||||
let lease = if with_lease {
|
||||
timeline
|
||||
.init_lsn_lease(lsn, timeline.get_lsn_lease_length_for_ts(), &ctx)
|
||||
.make_lsn_lease(lsn, timeline.get_lsn_lease_length_for_ts(), &ctx)
|
||||
.inspect_err(|_| {
|
||||
warn!("fail to grant a lease to {}", lsn);
|
||||
})
|
||||
@@ -1692,18 +1692,9 @@ async fn lsn_lease_handler(
|
||||
let timeline =
|
||||
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
|
||||
.await?;
|
||||
|
||||
let result = async {
|
||||
timeline
|
||||
.init_lsn_lease(lsn, timeline.get_lsn_lease_length(), &ctx)
|
||||
.map_err(|e| {
|
||||
ApiError::InternalServerError(
|
||||
e.context(format!("invalid lsn lease request at {lsn}")),
|
||||
)
|
||||
})
|
||||
}
|
||||
.instrument(info_span!("init_lsn_lease", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await?;
|
||||
let result = timeline
|
||||
.make_lsn_lease(lsn, timeline.get_lsn_lease_length(), &ctx)
|
||||
.map_err(|e| ApiError::InternalServerError(e.context("lsn lease http handler")))?;
|
||||
|
||||
json_response(StatusCode::OK, result)
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ pub mod basebackup;
|
||||
pub mod config;
|
||||
pub mod consumption_metrics;
|
||||
pub mod context;
|
||||
pub mod controller_upcall_client;
|
||||
pub mod control_plane_client;
|
||||
pub mod deletion_queue;
|
||||
pub mod disk_usage_eviction_task;
|
||||
pub mod http;
|
||||
|
||||
@@ -8,8 +8,6 @@ use metrics::{
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use postgres_backend::{is_expected_io_error, QueryError};
|
||||
use pq_proto::framed::ConnectionError;
|
||||
use strum::{EnumCount, VariantNames};
|
||||
use strum_macros::{IntoStaticStr, VariantNames};
|
||||
use tracing::warn;
|
||||
@@ -1510,7 +1508,6 @@ static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
|
||||
pub(crate) struct BasebackupQueryTime {
|
||||
ok: Histogram,
|
||||
error: Histogram,
|
||||
client_error: Histogram,
|
||||
}
|
||||
|
||||
pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
|
||||
@@ -1524,7 +1521,6 @@ pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|
|
||||
BasebackupQueryTime {
|
||||
ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
|
||||
error: vec.get_metric_with_label_values(&["error"]).unwrap(),
|
||||
client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1561,7 +1557,7 @@ impl BasebackupQueryTime {
|
||||
}
|
||||
|
||||
impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
|
||||
pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
|
||||
pub(crate) fn observe<T, E>(self, res: &Result<T, E>) {
|
||||
let elapsed = self.start.elapsed();
|
||||
let ex_throttled = self
|
||||
.ctx
|
||||
@@ -1580,15 +1576,10 @@ impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
|
||||
elapsed
|
||||
}
|
||||
};
|
||||
// If you want to change categorize of a specific error, also change it in `log_query_error`.
|
||||
let metric = match res {
|
||||
Ok(_) => &self.parent.ok,
|
||||
Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
|
||||
if is_expected_io_error(io_error) =>
|
||||
{
|
||||
&self.parent.client_error
|
||||
}
|
||||
Err(_) => &self.parent.error,
|
||||
let metric = if res.is_ok() {
|
||||
&self.parent.ok
|
||||
} else {
|
||||
&self.parent.error
|
||||
};
|
||||
metric.observe(ex_throttled.as_secs_f64());
|
||||
}
|
||||
|
||||
@@ -273,20 +273,10 @@ async fn page_service_conn_main(
|
||||
info!("Postgres client disconnected ({io_error})");
|
||||
Ok(())
|
||||
} else {
|
||||
let tenant_id = conn_handler.timeline_handles.tenant_id();
|
||||
Err(io_error).context(format!(
|
||||
"Postgres connection error for tenant_id={:?} client at peer_addr={}",
|
||||
tenant_id, peer_addr
|
||||
))
|
||||
Err(io_error).context("Postgres connection error")
|
||||
}
|
||||
}
|
||||
other => {
|
||||
let tenant_id = conn_handler.timeline_handles.tenant_id();
|
||||
other.context(format!(
|
||||
"Postgres query error for tenant_id={:?} client peer_addr={}",
|
||||
tenant_id, peer_addr
|
||||
))
|
||||
}
|
||||
other => other.context("Postgres query error"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -350,10 +340,6 @@ impl TimelineHandles {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn tenant_id(&self) -> Option<TenantId> {
|
||||
self.wrapper.tenant_id.get().copied()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct TenantManagerWrapper {
|
||||
@@ -833,7 +819,7 @@ impl PageServerHandler {
|
||||
set_tracing_field_shard_id(&timeline);
|
||||
|
||||
let lease = timeline
|
||||
.renew_lsn_lease(lsn, timeline.get_lsn_lease_length(), ctx)
|
||||
.make_lsn_lease(lsn, timeline.get_lsn_lease_length(), ctx)
|
||||
.inspect_err(|e| {
|
||||
warn!("{e}");
|
||||
})
|
||||
@@ -1011,6 +997,7 @@ impl PageServerHandler {
|
||||
)
|
||||
.await?;
|
||||
|
||||
tracing::info!("get_rel_page_at_lsn: {lsn}");
|
||||
let page = timeline
|
||||
.get_rel_page_at_lsn(req.rel, req.blkno, Version::Lsn(lsn), ctx)
|
||||
.await?;
|
||||
|
||||
@@ -21,7 +21,6 @@ use futures::stream::FuturesUnordered;
|
||||
use futures::StreamExt;
|
||||
use pageserver_api::models;
|
||||
use pageserver_api::models::AuxFilePolicy;
|
||||
use pageserver_api::models::LsnLease;
|
||||
use pageserver_api::models::TimelineArchivalState;
|
||||
use pageserver_api::models::TimelineState;
|
||||
use pageserver_api::models::TopTenantShardItem;
|
||||
@@ -183,54 +182,27 @@ pub struct TenantSharedResources {
|
||||
pub(super) struct AttachedTenantConf {
|
||||
tenant_conf: TenantConfOpt,
|
||||
location: AttachedLocationConfig,
|
||||
/// The deadline before which we are blocked from GC so that
|
||||
/// leases have a chance to be renewed.
|
||||
lsn_lease_deadline: Option<tokio::time::Instant>,
|
||||
}
|
||||
|
||||
impl AttachedTenantConf {
|
||||
fn new(tenant_conf: TenantConfOpt, location: AttachedLocationConfig) -> Self {
|
||||
// Sets a deadline before which we cannot proceed to GC due to lsn lease.
|
||||
//
|
||||
// We do this as the leases mapping are not persisted to disk. By delaying GC by lease
|
||||
// length, we guarantee that all the leases we granted before will have a chance to renew
|
||||
// when we run GC for the first time after restart / transition from AttachedMulti to AttachedSingle.
|
||||
let lsn_lease_deadline = if location.attach_mode == AttachmentMode::Single {
|
||||
Some(
|
||||
tokio::time::Instant::now()
|
||||
+ tenant_conf
|
||||
.lsn_lease_length
|
||||
.unwrap_or(LsnLease::DEFAULT_LENGTH),
|
||||
)
|
||||
} else {
|
||||
// We don't use `lsn_lease_deadline` to delay GC in AttachedMulti and AttachedStale
|
||||
// because we don't do GC in these modes.
|
||||
None
|
||||
};
|
||||
|
||||
Self {
|
||||
tenant_conf,
|
||||
location,
|
||||
lsn_lease_deadline,
|
||||
}
|
||||
}
|
||||
|
||||
fn try_from(location_conf: LocationConf) -> anyhow::Result<Self> {
|
||||
match &location_conf.mode {
|
||||
LocationMode::Attached(attach_conf) => {
|
||||
Ok(Self::new(location_conf.tenant_conf, *attach_conf))
|
||||
}
|
||||
LocationMode::Attached(attach_conf) => Ok(Self {
|
||||
tenant_conf: location_conf.tenant_conf,
|
||||
location: *attach_conf,
|
||||
}),
|
||||
LocationMode::Secondary(_) => {
|
||||
anyhow::bail!("Attempted to construct AttachedTenantConf from a LocationConf in secondary mode")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_gc_blocked_by_lsn_lease_deadline(&self) -> bool {
|
||||
self.lsn_lease_deadline
|
||||
.map(|d| tokio::time::Instant::now() < d)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
struct TimelinePreload {
|
||||
timeline_id: TimelineId,
|
||||
@@ -1850,11 +1822,6 @@ impl Tenant {
|
||||
info!("Skipping GC in location state {:?}", conf.location);
|
||||
return Ok(GcResult::default());
|
||||
}
|
||||
|
||||
if conf.is_gc_blocked_by_lsn_lease_deadline() {
|
||||
info!("Skipping GC because lsn lease deadline is not reached");
|
||||
return Ok(GcResult::default());
|
||||
}
|
||||
}
|
||||
|
||||
let _guard = match self.gc_block.start().await {
|
||||
@@ -2663,8 +2630,6 @@ impl Tenant {
|
||||
Arc::new(AttachedTenantConf {
|
||||
tenant_conf: new_tenant_conf.clone(),
|
||||
location: inner.location,
|
||||
// Attached location is not changed, no need to update lsn lease deadline.
|
||||
lsn_lease_deadline: inner.lsn_lease_deadline,
|
||||
})
|
||||
});
|
||||
|
||||
@@ -3922,9 +3887,9 @@ async fn run_initdb(
|
||||
let _permit = INIT_DB_SEMAPHORE.acquire().await;
|
||||
|
||||
let initdb_command = tokio::process::Command::new(&initdb_bin_path)
|
||||
.args(["--pgdata", initdb_target_dir.as_ref()])
|
||||
.args(["--username", &conf.superuser])
|
||||
.args(["--encoding", "utf8"])
|
||||
.args(["-D", initdb_target_dir.as_ref()])
|
||||
.args(["-U", &conf.superuser])
|
||||
.args(["-E", "utf8"])
|
||||
.arg("--no-instructions")
|
||||
.arg("--no-sync")
|
||||
.env_clear()
|
||||
@@ -4496,17 +4461,13 @@ mod tests {
|
||||
tline.freeze_and_flush().await.map_err(|e| e.into())
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
#[tokio::test]
|
||||
async fn test_prohibit_branch_creation_on_garbage_collected_data() -> anyhow::Result<()> {
|
||||
let (tenant, ctx) =
|
||||
TenantHarness::create("test_prohibit_branch_creation_on_garbage_collected_data")
|
||||
.await?
|
||||
.load()
|
||||
.await;
|
||||
// Advance to the lsn lease deadline so that GC is not blocked by
|
||||
// initial transition into AttachedSingle.
|
||||
tokio::time::advance(tenant.get_lsn_lease_length()).await;
|
||||
tokio::time::resume();
|
||||
let tline = tenant
|
||||
.create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
@@ -7283,17 +7244,9 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
#[tokio::test]
|
||||
async fn test_lsn_lease() -> anyhow::Result<()> {
|
||||
let (tenant, ctx) = TenantHarness::create("test_lsn_lease")
|
||||
.await
|
||||
.unwrap()
|
||||
.load()
|
||||
.await;
|
||||
// Advance to the lsn lease deadline so that GC is not blocked by
|
||||
// initial transition into AttachedSingle.
|
||||
tokio::time::advance(tenant.get_lsn_lease_length()).await;
|
||||
tokio::time::resume();
|
||||
let (tenant, ctx) = TenantHarness::create("test_lsn_lease").await?.load().await;
|
||||
let key = Key::from_hex("010000000033333333444444445500000000").unwrap();
|
||||
|
||||
let end_lsn = Lsn(0x100);
|
||||
@@ -7321,33 +7274,24 @@ mod tests {
|
||||
|
||||
let leased_lsns = [0x30, 0x50, 0x70];
|
||||
let mut leases = Vec::new();
|
||||
leased_lsns.iter().for_each(|n| {
|
||||
leases.push(
|
||||
timeline
|
||||
.init_lsn_lease(Lsn(*n), timeline.get_lsn_lease_length(), &ctx)
|
||||
.expect("lease request should succeed"),
|
||||
);
|
||||
let _: anyhow::Result<_> = leased_lsns.iter().try_for_each(|n| {
|
||||
leases.push(timeline.make_lsn_lease(Lsn(*n), timeline.get_lsn_lease_length(), &ctx)?);
|
||||
Ok(())
|
||||
});
|
||||
|
||||
let updated_lease_0 = timeline
|
||||
.renew_lsn_lease(Lsn(leased_lsns[0]), Duration::from_secs(0), &ctx)
|
||||
.expect("lease renewal should succeed");
|
||||
assert_eq!(
|
||||
updated_lease_0.valid_until, leases[0].valid_until,
|
||||
" Renewing with shorter lease should not change the lease."
|
||||
);
|
||||
// Renewing with shorter lease should not change the lease.
|
||||
let updated_lease_0 =
|
||||
timeline.make_lsn_lease(Lsn(leased_lsns[0]), Duration::from_secs(0), &ctx)?;
|
||||
assert_eq!(updated_lease_0.valid_until, leases[0].valid_until);
|
||||
|
||||
let updated_lease_1 = timeline
|
||||
.renew_lsn_lease(
|
||||
Lsn(leased_lsns[1]),
|
||||
timeline.get_lsn_lease_length() * 2,
|
||||
&ctx,
|
||||
)
|
||||
.expect("lease renewal should succeed");
|
||||
assert!(
|
||||
updated_lease_1.valid_until > leases[1].valid_until,
|
||||
"Renewing with a long lease should renew lease with later expiration time."
|
||||
);
|
||||
// Renewing with a long lease should renew lease with later expiration time.
|
||||
let updated_lease_1 = timeline.make_lsn_lease(
|
||||
Lsn(leased_lsns[1]),
|
||||
timeline.get_lsn_lease_length() * 2,
|
||||
&ctx,
|
||||
)?;
|
||||
|
||||
assert!(updated_lease_1.valid_until > leases[1].valid_until);
|
||||
|
||||
// Force set disk consistent lsn so we can get the cutoff at `end_lsn`.
|
||||
info!(
|
||||
@@ -7364,8 +7308,7 @@ mod tests {
|
||||
&CancellationToken::new(),
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
.await?;
|
||||
|
||||
// Keeping everything <= Lsn(0x80) b/c leases:
|
||||
// 0/10: initdb layer
|
||||
@@ -7379,16 +7322,13 @@ mod tests {
|
||||
// Make lease on a already GC-ed LSN.
|
||||
// 0/80 does not have a valid lease + is below latest_gc_cutoff
|
||||
assert!(Lsn(0x80) < *timeline.get_latest_gc_cutoff_lsn());
|
||||
timeline
|
||||
.init_lsn_lease(Lsn(0x80), timeline.get_lsn_lease_length(), &ctx)
|
||||
.expect_err("lease request on GC-ed LSN should fail");
|
||||
let res = timeline.make_lsn_lease(Lsn(0x80), timeline.get_lsn_lease_length(), &ctx);
|
||||
assert!(res.is_err());
|
||||
|
||||
// Should still be able to renew a currently valid lease
|
||||
// Assumption: original lease to is still valid for 0/50.
|
||||
// (use `Timeline::init_lsn_lease` for testing so it always does validation)
|
||||
timeline
|
||||
.init_lsn_lease(Lsn(leased_lsns[1]), timeline.get_lsn_lease_length(), &ctx)
|
||||
.expect("lease renewal with validation should succeed");
|
||||
let _ =
|
||||
timeline.make_lsn_lease(Lsn(leased_lsns[1]), timeline.get_lsn_lease_length(), &ctx)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,12 +1,29 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use utils::id::TimelineId;
|
||||
use std::{collections::HashMap, time::Duration};
|
||||
|
||||
use super::remote_timeline_client::index::GcBlockingReason;
|
||||
use tokio::time::Instant;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
type Storage = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>;
|
||||
type TimelinesBlocked = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>;
|
||||
|
||||
/// GcBlock provides persistent (per-timeline) gc blocking.
|
||||
#[derive(Default)]
|
||||
struct Storage {
|
||||
timelines_blocked: TimelinesBlocked,
|
||||
/// The deadline before which we are blocked from GC so that
|
||||
/// leases have a chance to be renewed.
|
||||
lsn_lease_deadline: Option<Instant>,
|
||||
}
|
||||
|
||||
impl Storage {
|
||||
fn is_blocked_by_lsn_lease_deadline(&self) -> bool {
|
||||
self.lsn_lease_deadline
|
||||
.map(|d| Instant::now() < d)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
/// GcBlock provides persistent (per-timeline) gc blocking and facilitates transient time based gc
|
||||
/// blocking.
|
||||
#[derive(Default)]
|
||||
pub(crate) struct GcBlock {
|
||||
/// The timelines which have current reasons to block gc.
|
||||
@@ -49,6 +66,17 @@ impl GcBlock {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets a deadline before which we cannot proceed to GC due to lsn lease.
|
||||
///
|
||||
/// We do this as the leases mapping are not persisted to disk. By delaying GC by lease
|
||||
/// length, we guarantee that all the leases we granted before will have a chance to renew
|
||||
/// when we run GC for the first time after restart / transition from AttachedMulti to AttachedSingle.
|
||||
pub(super) fn set_lsn_lease_deadline(&self, lsn_lease_length: Duration) {
|
||||
let deadline = Instant::now() + lsn_lease_length;
|
||||
let mut g = self.reasons.lock().unwrap();
|
||||
g.lsn_lease_deadline = Some(deadline);
|
||||
}
|
||||
|
||||
/// Describe the current gc blocking reasons.
|
||||
///
|
||||
/// TODO: make this json serializable.
|
||||
@@ -74,7 +102,7 @@ impl GcBlock {
|
||||
) -> anyhow::Result<bool> {
|
||||
let (added, uploaded) = {
|
||||
let mut g = self.reasons.lock().unwrap();
|
||||
let set = g.entry(timeline.timeline_id).or_default();
|
||||
let set = g.timelines_blocked.entry(timeline.timeline_id).or_default();
|
||||
let added = set.insert(reason);
|
||||
|
||||
// LOCK ORDER: intentionally hold the lock, see self.reasons.
|
||||
@@ -105,7 +133,7 @@ impl GcBlock {
|
||||
|
||||
let (remaining_blocks, uploaded) = {
|
||||
let mut g = self.reasons.lock().unwrap();
|
||||
match g.entry(timeline.timeline_id) {
|
||||
match g.timelines_blocked.entry(timeline.timeline_id) {
|
||||
Entry::Occupied(mut oe) => {
|
||||
let set = oe.get_mut();
|
||||
set.remove(reason);
|
||||
@@ -119,7 +147,7 @@ impl GcBlock {
|
||||
}
|
||||
}
|
||||
|
||||
let remaining_blocks = g.len();
|
||||
let remaining_blocks = g.timelines_blocked.len();
|
||||
|
||||
// LOCK ORDER: intentionally hold the lock while scheduling; see self.reasons
|
||||
let uploaded = timeline
|
||||
@@ -144,11 +172,11 @@ impl GcBlock {
|
||||
pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
|
||||
let unblocked = {
|
||||
let mut g = self.reasons.lock().unwrap();
|
||||
if g.is_empty() {
|
||||
if g.timelines_blocked.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
g.remove(&timeline.timeline_id);
|
||||
g.timelines_blocked.remove(&timeline.timeline_id);
|
||||
|
||||
BlockingReasons::clean_and_summarize(g).is_none()
|
||||
};
|
||||
@@ -159,10 +187,11 @@ impl GcBlock {
|
||||
}
|
||||
|
||||
/// Initialize with the non-deleted timelines of this tenant.
|
||||
pub(crate) fn set_scanned(&self, scanned: Storage) {
|
||||
pub(crate) fn set_scanned(&self, scanned: TimelinesBlocked) {
|
||||
let mut g = self.reasons.lock().unwrap();
|
||||
assert!(g.is_empty());
|
||||
g.extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));
|
||||
assert!(g.timelines_blocked.is_empty());
|
||||
g.timelines_blocked
|
||||
.extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));
|
||||
|
||||
if let Some(reasons) = BlockingReasons::clean_and_summarize(g) {
|
||||
tracing::info!(summary=?reasons, "initialized with gc blocked");
|
||||
@@ -176,6 +205,7 @@ pub(super) struct Guard<'a> {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct BlockingReasons {
|
||||
tenant_blocked_by_lsn_lease_deadline: bool,
|
||||
timelines: usize,
|
||||
reasons: enumset::EnumSet<GcBlockingReason>,
|
||||
}
|
||||
@@ -184,8 +214,8 @@ impl std::fmt::Display for BlockingReasons {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{} timelines block for {:?}",
|
||||
self.timelines, self.reasons
|
||||
"tenant_blocked_by_lsn_lease_deadline: {}, {} timelines block for {:?}",
|
||||
self.tenant_blocked_by_lsn_lease_deadline, self.timelines, self.reasons
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -193,13 +223,15 @@ impl std::fmt::Display for BlockingReasons {
|
||||
impl BlockingReasons {
|
||||
fn clean_and_summarize(mut g: std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
|
||||
let mut reasons = enumset::EnumSet::empty();
|
||||
g.retain(|_key, value| {
|
||||
g.timelines_blocked.retain(|_key, value| {
|
||||
reasons = reasons.union(*value);
|
||||
!value.is_empty()
|
||||
});
|
||||
if !g.is_empty() {
|
||||
let blocked_by_lsn_lease_deadline = g.is_blocked_by_lsn_lease_deadline();
|
||||
if !g.timelines_blocked.is_empty() || blocked_by_lsn_lease_deadline {
|
||||
Some(BlockingReasons {
|
||||
timelines: g.len(),
|
||||
tenant_blocked_by_lsn_lease_deadline: blocked_by_lsn_lease_deadline,
|
||||
timelines: g.timelines_blocked.len(),
|
||||
reasons,
|
||||
})
|
||||
} else {
|
||||
@@ -208,14 +240,17 @@ impl BlockingReasons {
|
||||
}
|
||||
|
||||
fn summarize(g: &std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
|
||||
if g.is_empty() {
|
||||
let blocked_by_lsn_lease_deadline = g.is_blocked_by_lsn_lease_deadline();
|
||||
if g.timelines_blocked.is_empty() && !blocked_by_lsn_lease_deadline {
|
||||
None
|
||||
} else {
|
||||
let reasons = g
|
||||
.timelines_blocked
|
||||
.values()
|
||||
.fold(enumset::EnumSet::empty(), |acc, next| acc.union(*next));
|
||||
Some(BlockingReasons {
|
||||
timelines: g.len(),
|
||||
tenant_blocked_by_lsn_lease_deadline: blocked_by_lsn_lease_deadline,
|
||||
timelines: g.timelines_blocked.len(),
|
||||
reasons,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1470,4 +1470,52 @@ mod tests {
|
||||
LayerVisibilityHint::Visible
|
||||
));
|
||||
}
|
||||
|
||||
/// Exercise edge case of querying at exactly the LSN of an image layer
|
||||
#[test]
|
||||
fn layer_search_at_image_lsn() {
|
||||
let tenant_id = TenantId::generate();
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
let timeline_id = TimelineId::generate();
|
||||
|
||||
let last_record_lsn = Lsn::from_hex("00000000DEADBEEF").unwrap();
|
||||
|
||||
let mut layer_map = LayerMap::default();
|
||||
let mut updates = layer_map.batch_update();
|
||||
|
||||
let image_layer = PersistentLayerDesc {
|
||||
key_range: Key::from_i128(0)..Key::from_i128(i128::MAX),
|
||||
lsn_range: PersistentLayerDesc::image_layer_lsn_range(last_record_lsn),
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
is_delta: false,
|
||||
file_size: 123,
|
||||
};
|
||||
|
||||
let delta_layer = PersistentLayerDesc {
|
||||
key_range: Key::from_i128(0)..Key::from_i128(i128::MAX),
|
||||
lsn_range: Lsn(0)..Lsn(0xdead0000),
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
is_delta: true,
|
||||
file_size: 123,
|
||||
};
|
||||
|
||||
updates.insert_historic(image_layer.clone());
|
||||
updates.insert_historic(delta_layer);
|
||||
|
||||
updates.flush();
|
||||
|
||||
// FIXME: according to the search() docstring, it searches for layers with start LSNs _less then_
|
||||
// `end_lsn` -- i.e. it's correct that if you ask for exactly the LSN of an image layer, it shouldn't hit
|
||||
// it. However, the way that page_service calls it is to take the last_record_lsn of a Timeline
|
||||
// and pass that directly into LayerMap::search().
|
||||
|
||||
let searched = layer_map
|
||||
.search(Key::from_i128(12345), last_record_lsn)
|
||||
.unwrap();
|
||||
|
||||
// We searched at the LSN of the image layer: we should hit it
|
||||
assert_eq!(searched.layer.as_ref(), &image_layer);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,8 +30,8 @@ use utils::{backoff, completion, crashsafe};
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::controller_upcall_client::{
|
||||
ControlPlaneGenerationsApi, ControllerUpcallClient, RetryForeverError,
|
||||
use crate::control_plane_client::{
|
||||
ControlPlaneClient, ControlPlaneGenerationsApi, RetryForeverError,
|
||||
};
|
||||
use crate::deletion_queue::DeletionQueueClient;
|
||||
use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
|
||||
@@ -122,7 +122,7 @@ pub(crate) enum ShardSelector {
|
||||
Known(ShardIndex),
|
||||
}
|
||||
|
||||
/// A convenience for use with the re_attach ControllerUpcallClient function: rather
|
||||
/// A convenience for use with the re_attach ControlPlaneClient function: rather
|
||||
/// than the serializable struct, we build this enum that encapsulates
|
||||
/// the invariant that attached tenants always have generations.
|
||||
///
|
||||
@@ -219,11 +219,7 @@ async fn safe_rename_tenant_dir(path: impl AsRef<Utf8Path>) -> std::io::Result<U
|
||||
+ TEMP_FILE_SUFFIX;
|
||||
let tmp_path = path_with_suffix_extension(&path, &rand_suffix);
|
||||
fs::rename(path.as_ref(), &tmp_path).await?;
|
||||
fs::File::open(parent)
|
||||
.await?
|
||||
.sync_all()
|
||||
.await
|
||||
.maybe_fatal_err("safe_rename_tenant_dir")?;
|
||||
fs::File::open(parent).await?.sync_all().await?;
|
||||
Ok(tmp_path)
|
||||
}
|
||||
|
||||
@@ -345,7 +341,7 @@ async fn init_load_generations(
|
||||
"Emergency mode! Tenants will be attached unsafely using their last known generation"
|
||||
);
|
||||
emergency_generations(tenant_confs)
|
||||
} else if let Some(client) = ControllerUpcallClient::new(conf, cancel) {
|
||||
} else if let Some(client) = ControlPlaneClient::new(conf, cancel) {
|
||||
info!("Calling control plane API to re-attach tenants");
|
||||
// If we are configured to use the control plane API, then it is the source of truth for what tenants to load.
|
||||
match client.re_attach(conf).await {
|
||||
@@ -953,6 +949,12 @@ impl TenantManager {
|
||||
(LocationMode::Attached(attach_conf), Some(TenantSlot::Attached(tenant))) => {
|
||||
match attach_conf.generation.cmp(&tenant.generation) {
|
||||
Ordering::Equal => {
|
||||
if attach_conf.attach_mode == AttachmentMode::Single {
|
||||
tenant
|
||||
.gc_block
|
||||
.set_lsn_lease_deadline(tenant.get_lsn_lease_length());
|
||||
}
|
||||
|
||||
// A transition from Attached to Attached in the same generation, we may
|
||||
// take our fast path and just provide the updated configuration
|
||||
// to the tenant.
|
||||
|
||||
@@ -178,7 +178,6 @@ async fn download_object<'a>(
|
||||
destination_file
|
||||
.flush()
|
||||
.await
|
||||
.maybe_fatal_err("download_object sync_all")
|
||||
.with_context(|| format!("flush source file at {dst_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
@@ -186,7 +185,6 @@ async fn download_object<'a>(
|
||||
destination_file
|
||||
.sync_all()
|
||||
.await
|
||||
.maybe_fatal_err("download_object sync_all")
|
||||
.with_context(|| format!("failed to fsync source file at {dst_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
@@ -234,7 +232,6 @@ async fn download_object<'a>(
|
||||
destination_file
|
||||
.sync_all()
|
||||
.await
|
||||
.maybe_fatal_err("download_object sync_all")
|
||||
.with_context(|| format!("failed to fsync source file at {dst_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
|
||||
@@ -433,6 +433,7 @@ impl ReadableLayer {
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), GetVectoredError> {
|
||||
tracing::info!("get_values_reconstruct_data: {:?}", self.id());
|
||||
match self {
|
||||
ReadableLayer::PersistentLayer(layer) => {
|
||||
layer
|
||||
|
||||
@@ -40,11 +40,11 @@ use crate::tenant::storage_layer::layer::S3_UPLOAD_LIMIT;
|
||||
use crate::tenant::timeline::GetVectoredError;
|
||||
use crate::tenant::vectored_blob_io::{
|
||||
BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
|
||||
VectoredReadPlanner,
|
||||
VectoredReadCoalesceMode, VectoredReadPlanner,
|
||||
};
|
||||
use crate::tenant::PageReconstructError;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
|
||||
use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
|
||||
use crate::virtual_file::{self, VirtualFile};
|
||||
use crate::{walrecord, TEMP_FILE_SUFFIX};
|
||||
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
@@ -589,9 +589,7 @@ impl DeltaLayerWriterInner {
|
||||
);
|
||||
|
||||
// fsync the file
|
||||
file.sync_all()
|
||||
.await
|
||||
.maybe_fatal_err("delta_layer sync_all")?;
|
||||
file.sync_all().await?;
|
||||
|
||||
trace!("created delta layer {}", self.path);
|
||||
|
||||
@@ -1135,7 +1133,7 @@ impl DeltaLayerInner {
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize> {
|
||||
use crate::tenant::vectored_blob_io::{
|
||||
BlobMeta, ChunkedVectoredReadBuilder, VectoredReadExtended,
|
||||
BlobMeta, VectoredReadBuilder, VectoredReadExtended,
|
||||
};
|
||||
use futures::stream::TryStreamExt;
|
||||
|
||||
@@ -1185,8 +1183,8 @@ impl DeltaLayerInner {
|
||||
|
||||
let mut prev: Option<(Key, Lsn, BlobRef)> = None;
|
||||
|
||||
let mut read_builder: Option<ChunkedVectoredReadBuilder> = None;
|
||||
let align = virtual_file::get_io_buffer_alignment();
|
||||
let mut read_builder: Option<VectoredReadBuilder> = None;
|
||||
let read_mode = VectoredReadCoalesceMode::get();
|
||||
|
||||
let max_read_size = self
|
||||
.max_vectored_read_bytes
|
||||
@@ -1230,12 +1228,12 @@ impl DeltaLayerInner {
|
||||
{
|
||||
None
|
||||
} else {
|
||||
read_builder.replace(ChunkedVectoredReadBuilder::new(
|
||||
read_builder.replace(VectoredReadBuilder::new(
|
||||
offsets.start.pos(),
|
||||
offsets.end.pos(),
|
||||
meta,
|
||||
max_read_size,
|
||||
align,
|
||||
read_mode,
|
||||
))
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -41,7 +41,7 @@ use crate::tenant::vectored_blob_io::{
|
||||
};
|
||||
use crate::tenant::PageReconstructError;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||
use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
|
||||
use crate::virtual_file::{self, VirtualFile};
|
||||
use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bytes::{Bytes, BytesMut};
|
||||
@@ -889,9 +889,7 @@ impl ImageLayerWriterInner {
|
||||
// set inner.file here. The first read will have to re-open it.
|
||||
|
||||
// fsync the file
|
||||
file.sync_all()
|
||||
.await
|
||||
.maybe_fatal_err("image_layer sync_all")?;
|
||||
file.sync_all().await?;
|
||||
|
||||
trace!("created image layer {}", self.path);
|
||||
|
||||
|
||||
@@ -330,6 +330,7 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
|
||||
|
||||
let mut first = true;
|
||||
tenant.gc_block.set_lsn_lease_deadline(tenant.get_lsn_lease_length());
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {
|
||||
|
||||
@@ -66,7 +66,6 @@ use std::{
|
||||
use crate::{
|
||||
aux_file::AuxFileSizeEstimator,
|
||||
tenant::{
|
||||
config::AttachmentMode,
|
||||
layer_map::{LayerMap, SearchResult},
|
||||
metadata::TimelineMetadata,
|
||||
storage_layer::{inmemory_layer::IndexEntry, PersistentLayerDesc},
|
||||
@@ -1325,38 +1324,16 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Initializes an LSN lease. The function will return an error if the requested LSN is less than the `latest_gc_cutoff_lsn`.
|
||||
pub(crate) fn init_lsn_lease(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
length: Duration,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<LsnLease> {
|
||||
self.make_lsn_lease(lsn, length, true, ctx)
|
||||
}
|
||||
|
||||
/// Renews a lease at a particular LSN. The requested LSN is not validated against the `latest_gc_cutoff_lsn` when we are in the grace period.
|
||||
pub(crate) fn renew_lsn_lease(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
length: Duration,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<LsnLease> {
|
||||
self.make_lsn_lease(lsn, length, false, ctx)
|
||||
}
|
||||
|
||||
/// Obtains a temporary lease blocking garbage collection for the given LSN.
|
||||
///
|
||||
/// If we are in `AttachedSingle` mode and is not blocked by the lsn lease deadline, this function will error
|
||||
/// if the requesting LSN is less than the `latest_gc_cutoff_lsn` and there is no existing request present.
|
||||
///
|
||||
/// If there is an existing lease in the map, the lease will be renewed only if the request extends the lease.
|
||||
/// The returned lease is therefore the maximum between the existing lease and the requesting lease.
|
||||
fn make_lsn_lease(
|
||||
/// This function will error if the requesting LSN is less than the `latest_gc_cutoff_lsn` and there is also
|
||||
/// no existing lease to renew. If there is an existing lease in the map, the lease will be renewed only if
|
||||
/// the request extends the lease. The returned lease is therefore the maximum between the existing lease and
|
||||
/// the requesting lease.
|
||||
pub(crate) fn make_lsn_lease(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
length: Duration,
|
||||
init: bool,
|
||||
_ctx: &RequestContext,
|
||||
) -> anyhow::Result<LsnLease> {
|
||||
let lease = {
|
||||
@@ -1370,8 +1347,8 @@ impl Timeline {
|
||||
|
||||
let entry = gc_info.leases.entry(lsn);
|
||||
|
||||
match entry {
|
||||
Entry::Occupied(mut occupied) => {
|
||||
let lease = {
|
||||
if let Entry::Occupied(mut occupied) = entry {
|
||||
let existing_lease = occupied.get_mut();
|
||||
if valid_until > existing_lease.valid_until {
|
||||
existing_lease.valid_until = valid_until;
|
||||
@@ -1383,28 +1360,20 @@ impl Timeline {
|
||||
}
|
||||
|
||||
existing_lease.clone()
|
||||
}
|
||||
Entry::Vacant(vacant) => {
|
||||
// Reject already GC-ed LSN (lsn < latest_gc_cutoff) if we are in AttachedSingle and
|
||||
// not blocked by the lsn lease deadline.
|
||||
let validate = {
|
||||
let conf = self.tenant_conf.load();
|
||||
conf.location.attach_mode == AttachmentMode::Single
|
||||
&& !conf.is_gc_blocked_by_lsn_lease_deadline()
|
||||
};
|
||||
|
||||
if init || validate {
|
||||
let latest_gc_cutoff_lsn = self.get_latest_gc_cutoff_lsn();
|
||||
if lsn < *latest_gc_cutoff_lsn {
|
||||
bail!("tried to request a page version that was garbage collected. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
|
||||
}
|
||||
} else {
|
||||
// Reject already GC-ed LSN (lsn < latest_gc_cutoff)
|
||||
let latest_gc_cutoff_lsn = self.get_latest_gc_cutoff_lsn();
|
||||
if lsn < *latest_gc_cutoff_lsn {
|
||||
bail!("tried to request a page version that was garbage collected. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
|
||||
}
|
||||
|
||||
let dt: DateTime<Utc> = valid_until.into();
|
||||
info!("lease created, valid until {}", dt);
|
||||
vacant.insert(LsnLease { valid_until }).clone()
|
||||
entry.or_insert(LsnLease { valid_until }).clone()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
lease
|
||||
};
|
||||
|
||||
Ok(lease)
|
||||
@@ -1981,6 +1950,8 @@ impl Timeline {
|
||||
.unwrap_or(self.conf.default_tenant_conf.lsn_lease_length)
|
||||
}
|
||||
|
||||
// TODO(yuchen): remove unused flag after implementing https://github.com/neondatabase/neon/issues/8072
|
||||
#[allow(unused)]
|
||||
pub(crate) fn get_lsn_lease_length_for_ts(&self) -> Duration {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
@@ -3885,21 +3856,21 @@ impl Timeline {
|
||||
)));
|
||||
}
|
||||
|
||||
let distance = lsn.0 - partition_lsn.0;
|
||||
if *partition_lsn != Lsn(0)
|
||||
&& distance <= self.repartition_threshold
|
||||
&& !flags.contains(CompactFlags::ForceRepartition)
|
||||
{
|
||||
debug!(
|
||||
distance,
|
||||
threshold = self.repartition_threshold,
|
||||
"no repartitioning needed"
|
||||
);
|
||||
return Ok((
|
||||
(dense_partition.clone(), sparse_partition.clone()),
|
||||
*partition_lsn,
|
||||
));
|
||||
}
|
||||
// let distance = lsn.0 - partition_lsn.0;
|
||||
// if *partition_lsn != Lsn(0)
|
||||
// && distance <= self.repartition_threshold
|
||||
// && !flags.contains(CompactFlags::ForceRepartition)
|
||||
// {
|
||||
// debug!(
|
||||
// distance,
|
||||
// threshold = self.repartition_threshold,
|
||||
// "no repartitioning needed"
|
||||
// );
|
||||
// return Ok((
|
||||
// (dense_partition.clone(), sparse_partition.clone()),
|
||||
// *partition_lsn,
|
||||
// ));
|
||||
// }
|
||||
|
||||
let (dense_ks, sparse_ks) = self.collect_keyspace(lsn, ctx).await?;
|
||||
let dense_partitioning = dense_ks.partition(&self.shard_identity, partition_size);
|
||||
@@ -5808,6 +5779,7 @@ impl<'a> TimelineWriter<'a> {
|
||||
/// the 'lsn' or anything older. The previous last record LSN is stored alongside
|
||||
/// the latest and can be read.
|
||||
pub(crate) fn finish_write(&self, new_lsn: Lsn) {
|
||||
tracing::info!("finish_write @ {new_lsn}");
|
||||
self.tl.finish_write(new_lsn);
|
||||
}
|
||||
|
||||
|
||||
@@ -364,6 +364,10 @@ impl Timeline {
|
||||
// 3. Create new image layers for partitions that have been modified
|
||||
// "enough". Skip image layer creation if L0 compaction cannot keep up.
|
||||
if fully_compacted {
|
||||
tracing::info!(
|
||||
"create_image_layers @ {lsn} (latest {})",
|
||||
self.get_last_record_lsn()
|
||||
);
|
||||
let image_layers = self
|
||||
.create_image_layers(
|
||||
&partitioning,
|
||||
|
||||
@@ -185,7 +185,171 @@ pub(crate) enum VectoredReadExtended {
|
||||
No,
|
||||
}
|
||||
|
||||
/// A vectored read builder that tries to coalesce all reads that fits in a chunk.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum VectoredReadCoalesceMode {
|
||||
/// Only coalesce exactly adjacent reads.
|
||||
AdjacentOnly,
|
||||
/// In addition to adjacent reads, also consider reads whose corresponding
|
||||
/// `end` and `start` offsets reside at the same chunk.
|
||||
Chunked(usize),
|
||||
}
|
||||
|
||||
impl VectoredReadCoalesceMode {
|
||||
/// [`AdjacentVectoredReadBuilder`] is used if alignment requirement is 0,
|
||||
/// whereas [`ChunkedVectoredReadBuilder`] is used for alignment requirement 1 and higher.
|
||||
pub(crate) fn get() -> Self {
|
||||
let align = virtual_file::get_io_buffer_alignment_raw();
|
||||
if align == 0 {
|
||||
VectoredReadCoalesceMode::AdjacentOnly
|
||||
} else {
|
||||
VectoredReadCoalesceMode::Chunked(align)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum VectoredReadBuilder {
|
||||
Adjacent(AdjacentVectoredReadBuilder),
|
||||
Chunked(ChunkedVectoredReadBuilder),
|
||||
}
|
||||
|
||||
impl VectoredReadBuilder {
|
||||
fn new_impl(
|
||||
start_offset: u64,
|
||||
end_offset: u64,
|
||||
meta: BlobMeta,
|
||||
max_read_size: Option<usize>,
|
||||
mode: VectoredReadCoalesceMode,
|
||||
) -> Self {
|
||||
match mode {
|
||||
VectoredReadCoalesceMode::AdjacentOnly => Self::Adjacent(
|
||||
AdjacentVectoredReadBuilder::new(start_offset, end_offset, meta, max_read_size),
|
||||
),
|
||||
VectoredReadCoalesceMode::Chunked(chunk_size) => {
|
||||
Self::Chunked(ChunkedVectoredReadBuilder::new(
|
||||
start_offset,
|
||||
end_offset,
|
||||
meta,
|
||||
max_read_size,
|
||||
chunk_size,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new(
|
||||
start_offset: u64,
|
||||
end_offset: u64,
|
||||
meta: BlobMeta,
|
||||
max_read_size: usize,
|
||||
mode: VectoredReadCoalesceMode,
|
||||
) -> Self {
|
||||
Self::new_impl(start_offset, end_offset, meta, Some(max_read_size), mode)
|
||||
}
|
||||
|
||||
pub(crate) fn new_streaming(
|
||||
start_offset: u64,
|
||||
end_offset: u64,
|
||||
meta: BlobMeta,
|
||||
mode: VectoredReadCoalesceMode,
|
||||
) -> Self {
|
||||
Self::new_impl(start_offset, end_offset, meta, None, mode)
|
||||
}
|
||||
|
||||
pub(crate) fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {
|
||||
match self {
|
||||
VectoredReadBuilder::Adjacent(builder) => builder.extend(start, end, meta),
|
||||
VectoredReadBuilder::Chunked(builder) => builder.extend(start, end, meta),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn build(self) -> VectoredRead {
|
||||
match self {
|
||||
VectoredReadBuilder::Adjacent(builder) => builder.build(),
|
||||
VectoredReadBuilder::Chunked(builder) => builder.build(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn size(&self) -> usize {
|
||||
match self {
|
||||
VectoredReadBuilder::Adjacent(builder) => builder.size(),
|
||||
VectoredReadBuilder::Chunked(builder) => builder.size(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct AdjacentVectoredReadBuilder {
|
||||
/// Start offset of the read.
|
||||
start: u64,
|
||||
// End offset of the read.
|
||||
end: u64,
|
||||
/// Start offset and metadata for each blob in this read
|
||||
blobs_at: VecMap<u64, BlobMeta>,
|
||||
max_read_size: Option<usize>,
|
||||
}
|
||||
|
||||
impl AdjacentVectoredReadBuilder {
|
||||
/// Start building a new vectored read.
|
||||
///
|
||||
/// Note that by design, this does not check against reading more than `max_read_size` to
|
||||
/// support reading larger blobs than the configuration value. The builder will be single use
|
||||
/// however after that.
|
||||
pub(crate) fn new(
|
||||
start_offset: u64,
|
||||
end_offset: u64,
|
||||
meta: BlobMeta,
|
||||
max_read_size: Option<usize>,
|
||||
) -> Self {
|
||||
let mut blobs_at = VecMap::default();
|
||||
blobs_at
|
||||
.append(start_offset, meta)
|
||||
.expect("First insertion always succeeds");
|
||||
|
||||
Self {
|
||||
start: start_offset,
|
||||
end: end_offset,
|
||||
blobs_at,
|
||||
max_read_size,
|
||||
}
|
||||
}
|
||||
/// Attempt to extend the current read with a new blob if the start
|
||||
/// offset matches with the current end of the vectored read
|
||||
/// and the resuting size is below the max read size
|
||||
pub(crate) fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {
|
||||
tracing::trace!(start, end, "trying to extend");
|
||||
let size = (end - start) as usize;
|
||||
let not_limited_by_max_read_size = {
|
||||
if let Some(max_read_size) = self.max_read_size {
|
||||
self.size() + size <= max_read_size
|
||||
} else {
|
||||
true
|
||||
}
|
||||
};
|
||||
|
||||
if self.end == start && not_limited_by_max_read_size {
|
||||
self.end = end;
|
||||
self.blobs_at
|
||||
.append(start, meta)
|
||||
.expect("LSNs are ordered within vectored reads");
|
||||
|
||||
return VectoredReadExtended::Yes;
|
||||
}
|
||||
|
||||
VectoredReadExtended::No
|
||||
}
|
||||
|
||||
pub(crate) fn size(&self) -> usize {
|
||||
(self.end - self.start) as usize
|
||||
}
|
||||
|
||||
pub(crate) fn build(self) -> VectoredRead {
|
||||
VectoredRead {
|
||||
start: self.start,
|
||||
end: self.end,
|
||||
blobs_at: self.blobs_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct ChunkedVectoredReadBuilder {
|
||||
/// Start block number
|
||||
start_blk_no: usize,
|
||||
@@ -209,7 +373,7 @@ impl ChunkedVectoredReadBuilder {
|
||||
/// Note that by design, this does not check against reading more than `max_read_size` to
|
||||
/// support reading larger blobs than the configuration value. The builder will be single use
|
||||
/// however after that.
|
||||
fn new_impl(
|
||||
pub(crate) fn new(
|
||||
start_offset: u64,
|
||||
end_offset: u64,
|
||||
meta: BlobMeta,
|
||||
@@ -232,25 +396,6 @@ impl ChunkedVectoredReadBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new(
|
||||
start_offset: u64,
|
||||
end_offset: u64,
|
||||
meta: BlobMeta,
|
||||
max_read_size: usize,
|
||||
align: usize,
|
||||
) -> Self {
|
||||
Self::new_impl(start_offset, end_offset, meta, Some(max_read_size), align)
|
||||
}
|
||||
|
||||
pub(crate) fn new_streaming(
|
||||
start_offset: u64,
|
||||
end_offset: u64,
|
||||
meta: BlobMeta,
|
||||
align: usize,
|
||||
) -> Self {
|
||||
Self::new_impl(start_offset, end_offset, meta, None, align)
|
||||
}
|
||||
|
||||
/// Attempts to extend the current read with a new blob if the new blob resides in the same or the immediate next chunk.
|
||||
///
|
||||
/// The resulting size also must be below the max read size.
|
||||
@@ -329,17 +474,17 @@ pub struct VectoredReadPlanner {
|
||||
|
||||
max_read_size: usize,
|
||||
|
||||
align: usize,
|
||||
mode: VectoredReadCoalesceMode,
|
||||
}
|
||||
|
||||
impl VectoredReadPlanner {
|
||||
pub fn new(max_read_size: usize) -> Self {
|
||||
let align = virtual_file::get_io_buffer_alignment();
|
||||
let mode = VectoredReadCoalesceMode::get();
|
||||
Self {
|
||||
blobs: BTreeMap::new(),
|
||||
prev: None,
|
||||
max_read_size,
|
||||
align,
|
||||
mode,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -400,7 +545,7 @@ impl VectoredReadPlanner {
|
||||
}
|
||||
|
||||
pub fn finish(self) -> Vec<VectoredRead> {
|
||||
let mut current_read_builder: Option<ChunkedVectoredReadBuilder> = None;
|
||||
let mut current_read_builder: Option<VectoredReadBuilder> = None;
|
||||
let mut reads = Vec::new();
|
||||
|
||||
for (key, blobs_for_key) in self.blobs {
|
||||
@@ -413,12 +558,12 @@ impl VectoredReadPlanner {
|
||||
};
|
||||
|
||||
if extended == VectoredReadExtended::No {
|
||||
let next_read_builder = ChunkedVectoredReadBuilder::new(
|
||||
let next_read_builder = VectoredReadBuilder::new(
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta { key, lsn },
|
||||
self.max_read_size,
|
||||
self.align,
|
||||
self.mode,
|
||||
);
|
||||
|
||||
let prev_read_builder = current_read_builder.replace(next_read_builder);
|
||||
@@ -543,7 +688,7 @@ impl<'a> VectoredBlobReader<'a> {
|
||||
/// `handle` gets called and when the current key would just exceed the read_size and
|
||||
/// max_cnt constraints.
|
||||
pub struct StreamingVectoredReadPlanner {
|
||||
read_builder: Option<ChunkedVectoredReadBuilder>,
|
||||
read_builder: Option<VectoredReadBuilder>,
|
||||
// Arguments for previous blob passed into [`StreamingVectoredReadPlanner::handle`]
|
||||
prev: Option<(Key, Lsn, u64)>,
|
||||
/// Max read size per batch. This is not a strict limit. If there are [0, 100) and [100, 200), while the `max_read_size` is 150,
|
||||
@@ -554,21 +699,21 @@ pub struct StreamingVectoredReadPlanner {
|
||||
/// Size of the current batch
|
||||
cnt: usize,
|
||||
|
||||
align: usize,
|
||||
mode: VectoredReadCoalesceMode,
|
||||
}
|
||||
|
||||
impl StreamingVectoredReadPlanner {
|
||||
pub fn new(max_read_size: u64, max_cnt: usize) -> Self {
|
||||
assert!(max_cnt > 0);
|
||||
assert!(max_read_size > 0);
|
||||
let align = virtual_file::get_io_buffer_alignment();
|
||||
let mode = VectoredReadCoalesceMode::get();
|
||||
Self {
|
||||
read_builder: None,
|
||||
prev: None,
|
||||
max_cnt,
|
||||
max_read_size,
|
||||
cnt: 0,
|
||||
align,
|
||||
mode,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -617,11 +762,11 @@ impl StreamingVectoredReadPlanner {
|
||||
}
|
||||
None => {
|
||||
self.read_builder = {
|
||||
Some(ChunkedVectoredReadBuilder::new_streaming(
|
||||
Some(VectoredReadBuilder::new_streaming(
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta { key, lsn },
|
||||
self.align,
|
||||
self.mode,
|
||||
))
|
||||
};
|
||||
}
|
||||
@@ -947,7 +1092,7 @@ mod tests {
|
||||
let reserved_bytes = blobs.iter().map(|bl| bl.len()).max().unwrap() * 2 + 16;
|
||||
let mut buf = BytesMut::with_capacity(reserved_bytes);
|
||||
|
||||
let align = virtual_file::get_io_buffer_alignment();
|
||||
let mode = VectoredReadCoalesceMode::get();
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&file);
|
||||
let meta = BlobMeta {
|
||||
key: Key::MIN,
|
||||
@@ -959,8 +1104,7 @@ mod tests {
|
||||
if idx + 1 == offsets.len() {
|
||||
continue;
|
||||
}
|
||||
let read_builder =
|
||||
ChunkedVectoredReadBuilder::new(*offset, *end, meta, 16 * 4096, align);
|
||||
let read_builder = VectoredReadBuilder::new(*offset, *end, meta, 16 * 4096, mode);
|
||||
let read = read_builder.build();
|
||||
let result = vectored_blob_reader.read_blobs(&read, buf, &ctx).await?;
|
||||
assert_eq!(result.blobs.len(), 1);
|
||||
|
||||
@@ -466,7 +466,6 @@ impl VirtualFile {
|
||||
&[]
|
||||
};
|
||||
utils::crashsafe::overwrite(&final_path, &tmp_path, content)
|
||||
.maybe_fatal_err("crashsafe_overwrite")
|
||||
})
|
||||
.await
|
||||
.expect("blocking task is never aborted")
|
||||
@@ -476,7 +475,7 @@ impl VirtualFile {
|
||||
pub async fn sync_all(&self) -> Result<(), Error> {
|
||||
with_file!(self, StorageIoOperation::Fsync, |file_guard| {
|
||||
let (_file_guard, res) = io_engine::get().sync_all(file_guard).await;
|
||||
res.maybe_fatal_err("sync_all")
|
||||
res
|
||||
})
|
||||
}
|
||||
|
||||
@@ -484,7 +483,7 @@ impl VirtualFile {
|
||||
pub async fn sync_data(&self) -> Result<(), Error> {
|
||||
with_file!(self, StorageIoOperation::Fsync, |file_guard| {
|
||||
let (_file_guard, res) = io_engine::get().sync_data(file_guard).await;
|
||||
res.maybe_fatal_err("sync_data")
|
||||
res
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1148,9 +1147,7 @@ pub fn init(num_slots: usize, engine: IoEngineKind, io_buffer_alignment: usize)
|
||||
panic!("virtual_file::init called twice");
|
||||
}
|
||||
if set_io_buffer_alignment(io_buffer_alignment).is_err() {
|
||||
panic!(
|
||||
"IO buffer alignment needs to be a power of two and greater than 512, got {io_buffer_alignment}"
|
||||
);
|
||||
panic!("IO buffer alignment ({io_buffer_alignment}) is not a power of two");
|
||||
}
|
||||
io_engine::init(engine);
|
||||
crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);
|
||||
@@ -1177,16 +1174,14 @@ fn get_open_files() -> &'static OpenFiles {
|
||||
|
||||
static IO_BUFFER_ALIGNMENT: AtomicUsize = AtomicUsize::new(DEFAULT_IO_BUFFER_ALIGNMENT);
|
||||
|
||||
/// Returns true if the alignment is a power of two and is greater or equal to 512.
|
||||
fn is_valid_io_buffer_alignment(align: usize) -> bool {
|
||||
align.is_power_of_two() && align >= 512
|
||||
/// Returns true if `x` is zero or a power of two.
|
||||
fn is_zero_or_power_of_two(x: usize) -> bool {
|
||||
(x == 0) || ((x & (x - 1)) == 0)
|
||||
}
|
||||
|
||||
/// Sets IO buffer alignment requirement. Returns error if the alignment requirement is
|
||||
/// not a power of two or less than 512 bytes.
|
||||
#[allow(unused)]
|
||||
pub(crate) fn set_io_buffer_alignment(align: usize) -> Result<(), usize> {
|
||||
if is_valid_io_buffer_alignment(align) {
|
||||
if is_zero_or_power_of_two(align) {
|
||||
IO_BUFFER_ALIGNMENT.store(align, std::sync::atomic::Ordering::Relaxed);
|
||||
Ok(())
|
||||
} else {
|
||||
@@ -1194,19 +1189,19 @@ pub(crate) fn set_io_buffer_alignment(align: usize) -> Result<(), usize> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the io buffer alignment.
|
||||
/// Gets the io buffer alignment requirement. Returns 0 if there is no requirement specified.
|
||||
///
|
||||
/// This function should be used for getting the actual alignment value to use.
|
||||
pub(crate) fn get_io_buffer_alignment() -> usize {
|
||||
/// This function should be used to check the raw config value.
|
||||
pub(crate) fn get_io_buffer_alignment_raw() -> usize {
|
||||
let align = IO_BUFFER_ALIGNMENT.load(std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
if cfg!(test) {
|
||||
let env_var_name = "NEON_PAGESERVER_UNIT_TEST_IO_BUFFER_ALIGNMENT";
|
||||
if let Some(test_align) = utils::env::var(env_var_name) {
|
||||
if is_valid_io_buffer_alignment(test_align) {
|
||||
if is_zero_or_power_of_two(test_align) {
|
||||
test_align
|
||||
} else {
|
||||
panic!("IO buffer alignment needs to be a power of two and greater than 512, got {test_align}");
|
||||
panic!("IO buffer alignment ({test_align}) is not a power of two");
|
||||
}
|
||||
} else {
|
||||
align
|
||||
@@ -1216,6 +1211,14 @@ pub(crate) fn get_io_buffer_alignment() -> usize {
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the io buffer alignment requirement. Returns 1 if the alignment config is set to zero.
|
||||
///
|
||||
/// This function should be used for getting the actual alignment value to use.
|
||||
pub(crate) fn get_io_buffer_alignment() -> usize {
|
||||
let align = get_io_buffer_alignment_raw();
|
||||
align.max(1)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::context::DownloadBehavior;
|
||||
|
||||
@@ -1473,33 +1473,11 @@ walprop_pg_wal_read(Safekeeper *sk, char *buf, XLogRecPtr startptr, Size count,
|
||||
{
|
||||
NeonWALReadResult res;
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 17
|
||||
if (!sk->wp->config->syncSafekeepers)
|
||||
{
|
||||
Size rbytes;
|
||||
rbytes = WALReadFromBuffers(buf, startptr, count,
|
||||
walprop_pg_get_timeline_id());
|
||||
|
||||
startptr += rbytes;
|
||||
count -= rbytes;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (count == 0)
|
||||
{
|
||||
res = NEON_WALREAD_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(count > 0);
|
||||
|
||||
/* Now read the remaining WAL from the WAL file */
|
||||
res = NeonWALRead(sk->xlogreader,
|
||||
buf,
|
||||
startptr,
|
||||
count,
|
||||
walprop_pg_get_timeline_id());
|
||||
}
|
||||
res = NeonWALRead(sk->xlogreader,
|
||||
buf,
|
||||
startptr,
|
||||
count,
|
||||
walprop_pg_get_timeline_id());
|
||||
|
||||
if (res == NEON_WALREAD_SUCCESS)
|
||||
{
|
||||
|
||||
@@ -24,7 +24,6 @@ bytes = { workspace = true, features = ["serde"] }
|
||||
camino.workspace = true
|
||||
chrono.workspace = true
|
||||
clap.workspace = true
|
||||
compute_api.workspace = true
|
||||
consumption_metrics.workspace = true
|
||||
dashmap.workspace = true
|
||||
env_logger.workspace = true
|
||||
|
||||
@@ -80,14 +80,6 @@ pub(crate) trait TestBackend: Send + Sync + 'static {
|
||||
fn get_allowed_ips_and_secret(
|
||||
&self,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>;
|
||||
fn dyn_clone(&self) -> Box<dyn TestBackend>;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl Clone for Box<dyn TestBackend> {
|
||||
fn clone(&self) -> Self {
|
||||
TestBackend::dyn_clone(&**self)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Backend<'_, (), ()> {
|
||||
@@ -593,14 +585,6 @@ mod tests {
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
_ctx: &RequestMonitoring,
|
||||
_endpoint: crate::EndpointId,
|
||||
) -> anyhow::Result<Vec<super::jwt::AuthRule>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
_ctx: &RequestMonitoring,
|
||||
|
||||
@@ -12,10 +12,7 @@ use serde::{Deserialize, Deserializer};
|
||||
use signature::Verifier;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::{
|
||||
context::RequestMonitoring, http::parse_json_body_with_limit, intern::RoleNameInt, EndpointId,
|
||||
RoleName,
|
||||
};
|
||||
use crate::{context::RequestMonitoring, http::parse_json_body_with_limit, EndpointId, RoleName};
|
||||
|
||||
// TODO(conrad): make these configurable.
|
||||
const CLOCK_SKEW_LEEWAY: Duration = Duration::from_secs(30);
|
||||
@@ -30,6 +27,7 @@ pub(crate) trait FetchAuthRules: Clone + Send + Sync + 'static {
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
endpoint: EndpointId,
|
||||
role_name: RoleName,
|
||||
) -> impl Future<Output = anyhow::Result<Vec<AuthRule>>> + Send;
|
||||
}
|
||||
|
||||
@@ -37,11 +35,10 @@ pub(crate) struct AuthRule {
|
||||
pub(crate) id: String,
|
||||
pub(crate) jwks_url: url::Url,
|
||||
pub(crate) audience: Option<String>,
|
||||
pub(crate) role_names: Vec<RoleNameInt>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct JwkCache {
|
||||
pub(crate) struct JwkCache {
|
||||
client: reqwest::Client,
|
||||
|
||||
map: DashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
|
||||
@@ -57,28 +54,18 @@ pub(crate) struct JwkCacheEntry {
|
||||
}
|
||||
|
||||
impl JwkCacheEntry {
|
||||
fn find_jwk_and_audience(
|
||||
&self,
|
||||
key_id: &str,
|
||||
role_name: &RoleName,
|
||||
) -> Option<(&jose_jwk::Jwk, Option<&str>)> {
|
||||
self.key_sets
|
||||
.values()
|
||||
// make sure our requested role has access to the key set
|
||||
.filter(|key_set| key_set.role_names.iter().any(|role| **role == **role_name))
|
||||
// try and find the requested key-id in the key set
|
||||
.find_map(|key_set| {
|
||||
key_set
|
||||
.find_key(key_id)
|
||||
.map(|jwk| (jwk, key_set.audience.as_deref()))
|
||||
})
|
||||
fn find_jwk_and_audience(&self, key_id: &str) -> Option<(&jose_jwk::Jwk, Option<&str>)> {
|
||||
self.key_sets.values().find_map(|key_set| {
|
||||
key_set
|
||||
.find_key(key_id)
|
||||
.map(|jwk| (jwk, key_set.audience.as_deref()))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct KeySet {
|
||||
jwks: jose_jwk::JwkSet,
|
||||
audience: Option<String>,
|
||||
role_names: Vec<RoleNameInt>,
|
||||
}
|
||||
|
||||
impl KeySet {
|
||||
@@ -119,6 +106,7 @@ impl JwkCacheEntryLock {
|
||||
ctx: &RequestMonitoring,
|
||||
client: &reqwest::Client,
|
||||
endpoint: EndpointId,
|
||||
role_name: RoleName,
|
||||
auth_rules: &F,
|
||||
) -> anyhow::Result<Arc<JwkCacheEntry>> {
|
||||
// double check that no one beat us to updating the cache.
|
||||
@@ -131,10 +119,11 @@ impl JwkCacheEntryLock {
|
||||
}
|
||||
}
|
||||
|
||||
let rules = auth_rules.fetch_auth_rules(ctx, endpoint).await?;
|
||||
let rules = auth_rules
|
||||
.fetch_auth_rules(ctx, endpoint, role_name)
|
||||
.await?;
|
||||
let mut key_sets =
|
||||
ahash::HashMap::with_capacity_and_hasher(rules.len(), ahash::RandomState::new());
|
||||
|
||||
// TODO(conrad): run concurrently
|
||||
// TODO(conrad): strip the JWKs urls (should be checked by cplane as well - cloud#16284)
|
||||
for rule in rules {
|
||||
@@ -162,7 +151,6 @@ impl JwkCacheEntryLock {
|
||||
KeySet {
|
||||
jwks,
|
||||
audience: rule.audience,
|
||||
role_names: rule.role_names,
|
||||
},
|
||||
);
|
||||
}
|
||||
@@ -185,6 +173,7 @@ impl JwkCacheEntryLock {
|
||||
ctx: &RequestMonitoring,
|
||||
client: &reqwest::Client,
|
||||
endpoint: EndpointId,
|
||||
role_name: RoleName,
|
||||
fetch: &F,
|
||||
) -> Result<Arc<JwkCacheEntry>, anyhow::Error> {
|
||||
let now = Instant::now();
|
||||
@@ -194,7 +183,9 @@ impl JwkCacheEntryLock {
|
||||
let Some(cached) = guard else {
|
||||
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||
let permit = self.acquire_permit().await;
|
||||
return self.renew_jwks(permit, ctx, client, endpoint, fetch).await;
|
||||
return self
|
||||
.renew_jwks(permit, ctx, client, endpoint, role_name, fetch)
|
||||
.await;
|
||||
};
|
||||
|
||||
let last_update = now.duration_since(cached.last_retrieved);
|
||||
@@ -205,7 +196,9 @@ impl JwkCacheEntryLock {
|
||||
let permit = self.acquire_permit().await;
|
||||
|
||||
// it's been too long since we checked the keys. wait for them to update.
|
||||
return self.renew_jwks(permit, ctx, client, endpoint, fetch).await;
|
||||
return self
|
||||
.renew_jwks(permit, ctx, client, endpoint, role_name, fetch)
|
||||
.await;
|
||||
}
|
||||
|
||||
// every 5 minutes we should spawn a job to eagerly update the token.
|
||||
@@ -219,7 +212,7 @@ impl JwkCacheEntryLock {
|
||||
let ctx = ctx.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = entry
|
||||
.renew_jwks(permit, &ctx, &client, endpoint, &fetch)
|
||||
.renew_jwks(permit, &ctx, &client, endpoint, role_name, &fetch)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(error=?e, "could not fetch JWKs in background job");
|
||||
@@ -239,7 +232,7 @@ impl JwkCacheEntryLock {
|
||||
jwt: &str,
|
||||
client: &reqwest::Client,
|
||||
endpoint: EndpointId,
|
||||
role_name: &RoleName,
|
||||
role_name: RoleName,
|
||||
fetch: &F,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
// JWT compact form is defined to be
|
||||
@@ -261,26 +254,30 @@ impl JwkCacheEntryLock {
|
||||
let sig = base64::decode_config(signature, base64::URL_SAFE_NO_PAD)
|
||||
.context("Provided authentication token is not a valid JWT encoding")?;
|
||||
|
||||
ensure!(
|
||||
header.typ == "JWT",
|
||||
"Provided authentication token is not a valid JWT encoding"
|
||||
);
|
||||
ensure!(header.typ == "JWT");
|
||||
let kid = header.key_id.context("missing key id")?;
|
||||
|
||||
let mut guard = self
|
||||
.get_or_update_jwk_cache(ctx, client, endpoint.clone(), fetch)
|
||||
.get_or_update_jwk_cache(ctx, client, endpoint.clone(), role_name.clone(), fetch)
|
||||
.await?;
|
||||
|
||||
// get the key from the JWKs if possible. If not, wait for the keys to update.
|
||||
let (jwk, expected_audience) = loop {
|
||||
match guard.find_jwk_and_audience(kid, role_name) {
|
||||
match guard.find_jwk_and_audience(kid) {
|
||||
Some(jwk) => break jwk,
|
||||
None if guard.last_retrieved.elapsed() > MIN_RENEW => {
|
||||
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||
|
||||
let permit = self.acquire_permit().await;
|
||||
guard = self
|
||||
.renew_jwks(permit, ctx, client, endpoint.clone(), fetch)
|
||||
.renew_jwks(
|
||||
permit,
|
||||
ctx,
|
||||
client,
|
||||
endpoint.clone(),
|
||||
role_name.clone(),
|
||||
fetch,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
_ => {
|
||||
@@ -323,14 +320,11 @@ impl JwkCacheEntryLock {
|
||||
let now = SystemTime::now();
|
||||
|
||||
if let Some(exp) = payload.expiration {
|
||||
ensure!(now < exp + CLOCK_SKEW_LEEWAY, "JWT token has expired");
|
||||
ensure!(now < exp + CLOCK_SKEW_LEEWAY);
|
||||
}
|
||||
|
||||
if let Some(nbf) = payload.not_before {
|
||||
ensure!(
|
||||
nbf < now + CLOCK_SKEW_LEEWAY,
|
||||
"JWT token is not yet ready to use"
|
||||
);
|
||||
ensure!(nbf < now + CLOCK_SKEW_LEEWAY);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -342,7 +336,7 @@ impl JwkCache {
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
endpoint: EndpointId,
|
||||
role_name: &RoleName,
|
||||
role_name: RoleName,
|
||||
fetch: &F,
|
||||
jwt: &str,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
@@ -578,7 +572,7 @@ mod tests {
|
||||
format!("{header}.{body}")
|
||||
}
|
||||
|
||||
fn new_ec_jwt(kid: String, key: &p256::SecretKey) -> String {
|
||||
fn new_ec_jwt(kid: String, key: p256::SecretKey) -> String {
|
||||
use p256::ecdsa::{Signature, SigningKey};
|
||||
|
||||
let payload = build_jwt_payload(kid, jose_jwa::Signing::Es256);
|
||||
@@ -666,6 +660,11 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
|
||||
let (ec1, jwk3) = new_ec_jwk("3".into());
|
||||
let (ec2, jwk4) = new_ec_jwk("4".into());
|
||||
|
||||
let jwt1 = new_rsa_jwt("1".into(), rs1);
|
||||
let jwt2 = new_rsa_jwt("2".into(), rs2);
|
||||
let jwt3 = new_ec_jwt("3".into(), ec1);
|
||||
let jwt4 = new_ec_jwt("4".into(), ec2);
|
||||
|
||||
let foo_jwks = jose_jwk::JwkSet {
|
||||
keys: vec![jwk1, jwk3],
|
||||
};
|
||||
@@ -707,98 +706,47 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Fetch(SocketAddr, Vec<RoleNameInt>);
|
||||
struct Fetch(SocketAddr);
|
||||
|
||||
impl FetchAuthRules for Fetch {
|
||||
async fn fetch_auth_rules(
|
||||
&self,
|
||||
_ctx: &RequestMonitoring,
|
||||
_endpoint: EndpointId,
|
||||
_role_name: RoleName,
|
||||
) -> anyhow::Result<Vec<AuthRule>> {
|
||||
Ok(vec![
|
||||
AuthRule {
|
||||
id: "foo".to_owned(),
|
||||
jwks_url: format!("http://{}/foo", self.0).parse().unwrap(),
|
||||
audience: None,
|
||||
role_names: self.1.clone(),
|
||||
},
|
||||
AuthRule {
|
||||
id: "bar".to_owned(),
|
||||
jwks_url: format!("http://{}/bar", self.0).parse().unwrap(),
|
||||
audience: None,
|
||||
role_names: self.1.clone(),
|
||||
},
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
let role_name1 = RoleName::from("anonymous");
|
||||
let role_name2 = RoleName::from("authenticated");
|
||||
|
||||
let fetch = Fetch(
|
||||
addr,
|
||||
vec![
|
||||
RoleNameInt::from(&role_name1),
|
||||
RoleNameInt::from(&role_name2),
|
||||
],
|
||||
);
|
||||
|
||||
let role_name = RoleName::from("user");
|
||||
let endpoint = EndpointId::from("ep");
|
||||
|
||||
let jwk_cache = Arc::new(JwkCacheEntryLock::default());
|
||||
|
||||
let jwt1 = new_rsa_jwt("1".into(), rs1);
|
||||
let jwt2 = new_rsa_jwt("2".into(), rs2);
|
||||
let jwt3 = new_ec_jwt("3".into(), &ec1);
|
||||
let jwt4 = new_ec_jwt("4".into(), &ec2);
|
||||
|
||||
// had the wrong kid, therefore will have the wrong ecdsa signature
|
||||
let bad_jwt = new_ec_jwt("3".into(), &ec2);
|
||||
// this role_name is not accepted
|
||||
let bad_role_name = RoleName::from("cloud_admin");
|
||||
|
||||
let err = jwk_cache
|
||||
.check_jwt(
|
||||
&RequestMonitoring::test(),
|
||||
&bad_jwt,
|
||||
&client,
|
||||
endpoint.clone(),
|
||||
&role_name1,
|
||||
&fetch,
|
||||
)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("signature error"));
|
||||
|
||||
let err = jwk_cache
|
||||
.check_jwt(
|
||||
&RequestMonitoring::test(),
|
||||
&jwt1,
|
||||
&client,
|
||||
endpoint.clone(),
|
||||
&bad_role_name,
|
||||
&fetch,
|
||||
)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("jwk not found"));
|
||||
|
||||
let tokens = [jwt1, jwt2, jwt3, jwt4];
|
||||
let role_names = [role_name1, role_name2];
|
||||
for role in &role_names {
|
||||
for token in &tokens {
|
||||
jwk_cache
|
||||
.check_jwt(
|
||||
&RequestMonitoring::test(),
|
||||
token,
|
||||
&client,
|
||||
endpoint.clone(),
|
||||
role,
|
||||
&fetch,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
for token in [jwt1, jwt2, jwt3, jwt4] {
|
||||
jwk_cache
|
||||
.check_jwt(
|
||||
&RequestMonitoring::test(),
|
||||
&token,
|
||||
&client,
|
||||
endpoint.clone(),
|
||||
role_name.clone(),
|
||||
&Fetch(addr),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::net::SocketAddr;
|
||||
use std::{collections::HashMap, net::SocketAddr};
|
||||
|
||||
use anyhow::Context;
|
||||
use arc_swap::ArcSwapOption;
|
||||
@@ -10,8 +10,8 @@ use crate::{
|
||||
NodeInfo,
|
||||
},
|
||||
context::RequestMonitoring,
|
||||
intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag},
|
||||
EndpointId,
|
||||
intern::{BranchIdInt, BranchIdTag, EndpointIdTag, InternId, ProjectIdInt, ProjectIdTag},
|
||||
EndpointId, RoleName,
|
||||
};
|
||||
|
||||
use super::jwt::{AuthRule, FetchAuthRules, JwkCache};
|
||||
@@ -48,17 +48,26 @@ impl LocalBackend {
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct StaticAuthRules;
|
||||
|
||||
pub static JWKS_ROLE_MAP: ArcSwapOption<EndpointJwksResponse> = ArcSwapOption::const_empty();
|
||||
pub static JWKS_ROLE_MAP: ArcSwapOption<JwksRoleSettings> = ArcSwapOption::const_empty();
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct JwksRoleSettings {
|
||||
pub roles: HashMap<RoleName, EndpointJwksResponse>,
|
||||
pub project_id: ProjectIdInt,
|
||||
pub branch_id: BranchIdInt,
|
||||
}
|
||||
|
||||
impl FetchAuthRules for StaticAuthRules {
|
||||
async fn fetch_auth_rules(
|
||||
&self,
|
||||
_ctx: &RequestMonitoring,
|
||||
_endpoint: EndpointId,
|
||||
role_name: RoleName,
|
||||
) -> anyhow::Result<Vec<AuthRule>> {
|
||||
let mappings = JWKS_ROLE_MAP.load();
|
||||
let role_mappings = mappings
|
||||
.as_deref()
|
||||
.and_then(|m| m.roles.get(&role_name))
|
||||
.context("JWKs settings for this role were not configured")?;
|
||||
let mut rules = vec![];
|
||||
for setting in &role_mappings.jwks {
|
||||
@@ -66,7 +75,6 @@ impl FetchAuthRules for StaticAuthRules {
|
||||
id: setting.id.clone(),
|
||||
jwks_url: setting.jwks_url.clone(),
|
||||
audience: setting.jwt_audience.clone(),
|
||||
role_names: setting.role_names.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -1,35 +1,34 @@
|
||||
use std::{net::SocketAddr, pin::pin, str::FromStr, sync::Arc, time::Duration};
|
||||
use std::{
|
||||
net::SocketAddr,
|
||||
path::{Path, PathBuf},
|
||||
pin::pin,
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use compute_api::spec::LocalProxySpec;
|
||||
use anyhow::{bail, ensure};
|
||||
use dashmap::DashMap;
|
||||
use futures::future::Either;
|
||||
use futures::{future::Either, FutureExt};
|
||||
use proxy::{
|
||||
auth::backend::local::{LocalBackend, JWKS_ROLE_MAP},
|
||||
auth::backend::local::{JwksRoleSettings, LocalBackend, JWKS_ROLE_MAP},
|
||||
cancellation::CancellationHandlerMain,
|
||||
config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig},
|
||||
console::{
|
||||
locks::ApiLocks,
|
||||
messages::{EndpointJwksResponse, JwksSettings},
|
||||
},
|
||||
console::{locks::ApiLocks, messages::JwksRoleMapping},
|
||||
http::health_server::AppMetrics,
|
||||
intern::RoleNameInt,
|
||||
metrics::{Metrics, ThreadPoolMetrics},
|
||||
rate_limiter::{BucketRateLimiter, EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo},
|
||||
scram::threadpool::ThreadPool,
|
||||
serverless::{self, cancel_set::CancelSet, GlobalConnPoolOptions},
|
||||
RoleName,
|
||||
};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
project_build_tag!(BUILD_TAG);
|
||||
|
||||
use clap::Parser;
|
||||
use tokio::{net::TcpListener, sync::Notify, task::JoinSet};
|
||||
use tokio::{net::TcpListener, task::JoinSet};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info, warn};
|
||||
use utils::{pid_file, project_build_tag, project_git_version, sentry_init::init_sentry};
|
||||
use utils::{project_build_tag, project_git_version, sentry_init::init_sentry};
|
||||
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
@@ -73,12 +72,9 @@ struct LocalProxyCliArgs {
|
||||
/// Address of the postgres server
|
||||
#[clap(long, default_value = "127.0.0.1:5432")]
|
||||
compute: SocketAddr,
|
||||
/// Path of the local proxy config file
|
||||
/// File address of the local proxy config file
|
||||
#[clap(long, default_value = "./localproxy.json")]
|
||||
config_path: Utf8PathBuf,
|
||||
/// Path of the local proxy PID file
|
||||
#[clap(long, default_value = "./localproxy.pid")]
|
||||
pid_path: Utf8PathBuf,
|
||||
config_path: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(clap::Args, Clone, Copy, Debug)]
|
||||
@@ -130,24 +126,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
let args = LocalProxyCliArgs::parse();
|
||||
let config = build_config(&args)?;
|
||||
|
||||
// before we bind to any ports, write the process ID to a file
|
||||
// so that compute-ctl can find our process later
|
||||
// in order to trigger the appropriate SIGHUP on config change.
|
||||
//
|
||||
// This also claims a "lock" that makes sure only one instance
|
||||
// of local-proxy runs at a time.
|
||||
let _process_guard = loop {
|
||||
match pid_file::claim_for_current_process(&args.pid_path) {
|
||||
Ok(guard) => break guard,
|
||||
Err(e) => {
|
||||
// compute-ctl might have tried to read the pid-file to let us
|
||||
// know about some config change. We should try again.
|
||||
error!(path=?args.pid_path, "could not claim PID file guard: {e:?}");
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let metrics_listener = TcpListener::bind(args.metrics).await?.into_std()?;
|
||||
let http_listener = TcpListener::bind(args.http).await?;
|
||||
let shutdown = CancellationToken::new();
|
||||
@@ -161,30 +139,12 @@ async fn main() -> anyhow::Result<()> {
|
||||
16,
|
||||
));
|
||||
|
||||
// write the process ID to a file so that compute-ctl can find our process later
|
||||
// in order to trigger the appropriate SIGHUP on config change.
|
||||
let pid = std::process::id();
|
||||
info!("process running in PID {pid}");
|
||||
std::fs::write(args.pid_path, format!("{pid}\n")).context("writing PID to file")?;
|
||||
refresh_config(args.config_path.clone()).await;
|
||||
|
||||
let mut maintenance_tasks = JoinSet::new();
|
||||
|
||||
let refresh_config_notify = Arc::new(Notify::new());
|
||||
maintenance_tasks.spawn(proxy::handle_signals(shutdown.clone(), {
|
||||
let refresh_config_notify = Arc::clone(&refresh_config_notify);
|
||||
move || {
|
||||
refresh_config_notify.notify_one();
|
||||
}
|
||||
maintenance_tasks.spawn(proxy::handle_signals(shutdown.clone(), move || {
|
||||
refresh_config(args.config_path.clone()).map(Ok)
|
||||
}));
|
||||
|
||||
// trigger the first config load **after** setting up the signal hook
|
||||
// to avoid the race condition where:
|
||||
// 1. No config file registered when local-proxy starts up
|
||||
// 2. The config file is written but the signal hook is not yet received
|
||||
// 3. local-proxy completes startup but has no config loaded, despite there being a registerd config.
|
||||
refresh_config_notify.notify_one();
|
||||
tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));
|
||||
|
||||
maintenance_tasks.spawn(proxy::http::health_server::task_main(
|
||||
metrics_listener,
|
||||
AppMetrics {
|
||||
@@ -285,84 +245,81 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
|
||||
})))
|
||||
}
|
||||
|
||||
async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
|
||||
loop {
|
||||
rx.notified().await;
|
||||
|
||||
match refresh_config_inner(&path).await {
|
||||
Ok(()) => {}
|
||||
Err(e) => {
|
||||
error!(error=?e, ?path, "could not read config file");
|
||||
}
|
||||
async fn refresh_config(path: PathBuf) {
|
||||
match refresh_config_inner(&path).await {
|
||||
Ok(()) => {}
|
||||
Err(e) => {
|
||||
error!(error=?e, ?path, "could not read config file");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {
|
||||
async fn refresh_config_inner(path: &Path) -> anyhow::Result<()> {
|
||||
let bytes = tokio::fs::read(&path).await?;
|
||||
let data: LocalProxySpec = serde_json::from_slice(&bytes)?;
|
||||
let mut data: JwksRoleMapping = serde_json::from_slice(&bytes)?;
|
||||
|
||||
let mut jwks_set = vec![];
|
||||
let mut settings = None;
|
||||
|
||||
for jwks in data.jwks {
|
||||
let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;
|
||||
for mapping in data.roles.values_mut() {
|
||||
for jwks in &mut mapping.jwks {
|
||||
ensure!(
|
||||
jwks.jwks_url.has_authority()
|
||||
&& (jwks.jwks_url.scheme() == "http" || jwks.jwks_url.scheme() == "https"),
|
||||
"Invalid JWKS url. Must be HTTP",
|
||||
);
|
||||
|
||||
ensure!(
|
||||
jwks_url.has_authority()
|
||||
&& (jwks_url.scheme() == "http" || jwks_url.scheme() == "https"),
|
||||
"Invalid JWKS url. Must be HTTP",
|
||||
);
|
||||
ensure!(
|
||||
jwks.jwks_url
|
||||
.host()
|
||||
.is_some_and(|h| h != url::Host::Domain("")),
|
||||
"Invalid JWKS url. No domain listed",
|
||||
);
|
||||
|
||||
ensure!(
|
||||
jwks_url.host().is_some_and(|h| h != url::Host::Domain("")),
|
||||
"Invalid JWKS url. No domain listed",
|
||||
);
|
||||
|
||||
// clear username, password and ports
|
||||
jwks_url
|
||||
.set_username("")
|
||||
.expect("url can be a base and has a valid host and is not a file. should not error");
|
||||
jwks_url
|
||||
.set_password(None)
|
||||
.expect("url can be a base and has a valid host and is not a file. should not error");
|
||||
// local testing is hard if we need to have a specific restricted port
|
||||
if cfg!(not(feature = "testing")) {
|
||||
jwks_url.set_port(None).expect(
|
||||
// clear username, password and ports
|
||||
jwks.jwks_url.set_username("").expect(
|
||||
"url can be a base and has a valid host and is not a file. should not error",
|
||||
);
|
||||
}
|
||||
|
||||
// clear query params
|
||||
jwks_url.set_fragment(None);
|
||||
jwks_url.query_pairs_mut().clear().finish();
|
||||
|
||||
if jwks_url.scheme() != "https" {
|
||||
// local testing is hard if we need to set up https support.
|
||||
jwks.jwks_url.set_password(None).expect(
|
||||
"url can be a base and has a valid host and is not a file. should not error",
|
||||
);
|
||||
// local testing is hard if we need to have a specific restricted port
|
||||
if cfg!(not(feature = "testing")) {
|
||||
jwks_url
|
||||
.set_scheme("https")
|
||||
.expect("should not error to set the scheme to https if it was http");
|
||||
} else {
|
||||
warn!(scheme = jwks_url.scheme(), "JWKS url is not HTTPS");
|
||||
jwks.jwks_url.set_port(None).expect(
|
||||
"url can be a base and has a valid host and is not a file. should not error",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
jwks_set.push(JwksSettings {
|
||||
id: jwks.id,
|
||||
jwks_url,
|
||||
provider_name: jwks.provider_name,
|
||||
jwt_audience: jwks.jwt_audience,
|
||||
role_names: jwks
|
||||
.role_names
|
||||
.into_iter()
|
||||
.map(RoleName::from)
|
||||
.map(|s| RoleNameInt::from(&s))
|
||||
.collect(),
|
||||
})
|
||||
// clear query params
|
||||
jwks.jwks_url.set_fragment(None);
|
||||
jwks.jwks_url.query_pairs_mut().clear().finish();
|
||||
|
||||
if jwks.jwks_url.scheme() != "https" {
|
||||
// local testing is hard if we need to set up https support.
|
||||
if cfg!(not(feature = "testing")) {
|
||||
jwks.jwks_url
|
||||
.set_scheme("https")
|
||||
.expect("should not error to set the scheme to https if it was http");
|
||||
} else {
|
||||
warn!(scheme = jwks.jwks_url.scheme(), "JWKS url is not HTTPS");
|
||||
}
|
||||
}
|
||||
|
||||
let (pr, br) = settings.get_or_insert((jwks.project_id, jwks.branch_id));
|
||||
ensure!(
|
||||
*pr == jwks.project_id,
|
||||
"inconsistent project IDs configured"
|
||||
);
|
||||
ensure!(*br == jwks.branch_id, "inconsistent branch IDs configured");
|
||||
}
|
||||
}
|
||||
|
||||
info!("successfully loaded new config");
|
||||
JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));
|
||||
if let Some((project_id, branch_id)) = settings {
|
||||
JWKS_ROLE_MAP.store(Some(Arc::new(JwksRoleSettings {
|
||||
roles: data.roles,
|
||||
project_id,
|
||||
branch_id,
|
||||
})));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -133,7 +133,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
proxy_listener,
|
||||
cancellation_token.clone(),
|
||||
));
|
||||
let signals_task = tokio::spawn(proxy::handle_signals(cancellation_token, || {}));
|
||||
let signals_task = tokio::spawn(proxy::handle_signals(cancellation_token, || async {
|
||||
Ok(())
|
||||
}));
|
||||
|
||||
// the signal task cant ever succeed.
|
||||
// the main task can error, or can succeed on cancellation.
|
||||
|
||||
@@ -461,7 +461,10 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
// maintenance tasks. these never return unless there's an error
|
||||
let mut maintenance_tasks = JoinSet::new();
|
||||
maintenance_tasks.spawn(proxy::handle_signals(cancellation_token.clone(), || {}));
|
||||
maintenance_tasks.spawn(proxy::handle_signals(
|
||||
cancellation_token.clone(),
|
||||
|| async { Ok(()) },
|
||||
));
|
||||
maintenance_tasks.spawn(http::health_server::task_main(
|
||||
http_listener,
|
||||
AppMetrics {
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
use measured::FixedCardinalityLabel;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use crate::auth::IpPattern;
|
||||
|
||||
use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
|
||||
use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt};
|
||||
use crate::proxy::retry::CouldRetry;
|
||||
use crate::RoleName;
|
||||
|
||||
/// Generic error response with human-readable description.
|
||||
/// Note that we can't always present it to user as is.
|
||||
@@ -346,6 +348,11 @@ impl ColdStartInfo {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct JwksRoleMapping {
|
||||
pub roles: HashMap<RoleName, EndpointJwksResponse>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct EndpointJwksResponse {
|
||||
pub jwks: Vec<JwksSettings>,
|
||||
@@ -354,10 +361,11 @@ pub struct EndpointJwksResponse {
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct JwksSettings {
|
||||
pub id: String,
|
||||
pub project_id: ProjectIdInt,
|
||||
pub branch_id: BranchIdInt,
|
||||
pub jwks_url: url::Url,
|
||||
pub provider_name: String,
|
||||
pub jwt_audience: Option<String>,
|
||||
pub role_names: Vec<RoleNameInt>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -5,10 +5,7 @@ pub mod neon;
|
||||
use super::messages::{ConsoleError, MetricsAuxInfo};
|
||||
use crate::{
|
||||
auth::{
|
||||
backend::{
|
||||
jwt::{AuthRule, FetchAuthRules},
|
||||
ComputeCredentialKeys, ComputeUserInfo,
|
||||
},
|
||||
backend::{ComputeCredentialKeys, ComputeUserInfo},
|
||||
IpPattern,
|
||||
},
|
||||
cache::{endpoints::EndpointsCache, project_info::ProjectInfoCacheImpl, Cached, TimedLru},
|
||||
@@ -19,7 +16,7 @@ use crate::{
|
||||
intern::ProjectIdInt,
|
||||
metrics::ApiLockMetrics,
|
||||
rate_limiter::{DynamicLimiter, Outcome, RateLimiterConfig, Token},
|
||||
scram, EndpointCacheKey, EndpointId,
|
||||
scram, EndpointCacheKey,
|
||||
};
|
||||
use dashmap::DashMap;
|
||||
use std::{hash::Hash, sync::Arc, time::Duration};
|
||||
@@ -337,12 +334,6 @@ pub(crate) trait Api {
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
|
||||
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
endpoint: EndpointId,
|
||||
) -> anyhow::Result<Vec<AuthRule>>;
|
||||
|
||||
/// Wake up the compute node and return the corresponding connection info.
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
@@ -352,7 +343,6 @@ pub(crate) trait Api {
|
||||
}
|
||||
|
||||
#[non_exhaustive]
|
||||
#[derive(Clone)]
|
||||
pub enum ConsoleBackend {
|
||||
/// Current Cloud API (V2).
|
||||
Console(neon::Api),
|
||||
@@ -396,20 +386,6 @@ impl Api for ConsoleBackend {
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
endpoint: EndpointId,
|
||||
) -> anyhow::Result<Vec<AuthRule>> {
|
||||
match self {
|
||||
Self::Console(api) => api.get_endpoint_jwks(ctx, endpoint).await,
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
Self::Postgres(api) => api.get_endpoint_jwks(ctx, endpoint).await,
|
||||
#[cfg(test)]
|
||||
Self::Test(_api) => Ok(vec![]),
|
||||
}
|
||||
}
|
||||
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
@@ -576,13 +552,3 @@ impl WakeComputePermit {
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
impl FetchAuthRules for ConsoleBackend {
|
||||
async fn fetch_auth_rules(
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
endpoint: EndpointId,
|
||||
) -> anyhow::Result<Vec<AuthRule>> {
|
||||
self.get_endpoint_jwks(ctx, endpoint).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,9 +4,7 @@ use super::{
|
||||
errors::{ApiError, GetAuthInfoError, WakeComputeError},
|
||||
AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo,
|
||||
};
|
||||
use crate::{
|
||||
auth::backend::jwt::AuthRule, context::RequestMonitoring, intern::RoleNameInt, RoleName,
|
||||
};
|
||||
use crate::context::RequestMonitoring;
|
||||
use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
|
||||
use crate::{auth::IpPattern, cache::Cached};
|
||||
use crate::{
|
||||
@@ -120,39 +118,6 @@ impl Api {
|
||||
})
|
||||
}
|
||||
|
||||
async fn do_get_endpoint_jwks(&self, endpoint: EndpointId) -> anyhow::Result<Vec<AuthRule>> {
|
||||
let (client, connection) =
|
||||
tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;
|
||||
|
||||
let connection = tokio::spawn(connection);
|
||||
|
||||
let res = client.query(
|
||||
"select id, jwks_url, audience, role_names from neon_control_plane.endpoint_jwks where endpoint_id = $1",
|
||||
&[&endpoint.as_str()],
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut rows = vec![];
|
||||
for row in res {
|
||||
rows.push(AuthRule {
|
||||
id: row.get("id"),
|
||||
jwks_url: url::Url::parse(row.get("jwks_url"))?,
|
||||
audience: row.get("audience"),
|
||||
role_names: row
|
||||
.get::<_, Vec<String>>("role_names")
|
||||
.into_iter()
|
||||
.map(RoleName::from)
|
||||
.map(|s| RoleNameInt::from(&s))
|
||||
.collect(),
|
||||
});
|
||||
}
|
||||
|
||||
drop(client);
|
||||
connection.await??;
|
||||
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
async fn do_wake_compute(&self) -> Result<NodeInfo, WakeComputeError> {
|
||||
let mut config = compute::ConnCfg::new();
|
||||
config
|
||||
@@ -220,14 +185,6 @@ impl super::Api for Api {
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
_ctx: &RequestMonitoring,
|
||||
endpoint: EndpointId,
|
||||
) -> anyhow::Result<Vec<AuthRule>> {
|
||||
self.do_get_endpoint_jwks(endpoint).await
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
|
||||
@@ -7,33 +7,27 @@ use super::{
|
||||
NodeInfo,
|
||||
};
|
||||
use crate::{
|
||||
auth::backend::{jwt::AuthRule, ComputeUserInfo},
|
||||
auth::backend::ComputeUserInfo,
|
||||
compute,
|
||||
console::messages::{ColdStartInfo, EndpointJwksResponse, Reason},
|
||||
console::messages::{ColdStartInfo, Reason},
|
||||
http,
|
||||
metrics::{CacheOutcome, Metrics},
|
||||
rate_limiter::WakeComputeRateLimiter,
|
||||
scram, EndpointCacheKey, EndpointId,
|
||||
scram, EndpointCacheKey,
|
||||
};
|
||||
use crate::{cache::Cached, context::RequestMonitoring};
|
||||
use ::http::{header::AUTHORIZATION, HeaderName};
|
||||
use anyhow::bail;
|
||||
use futures::TryFutureExt;
|
||||
use std::{sync::Arc, time::Duration};
|
||||
use tokio::time::Instant;
|
||||
use tokio_postgres::config::SslMode;
|
||||
use tracing::{debug, error, info, info_span, warn, Instrument};
|
||||
|
||||
const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Api {
|
||||
endpoint: http::Endpoint,
|
||||
pub caches: &'static ApiCaches,
|
||||
pub(crate) locks: &'static ApiLocks<EndpointCacheKey>,
|
||||
pub(crate) wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
|
||||
// put in a shared ref so we don't copy secrets all over in memory
|
||||
jwt: Arc<str>,
|
||||
jwt: String,
|
||||
}
|
||||
|
||||
impl Api {
|
||||
@@ -44,9 +38,7 @@ impl Api {
|
||||
locks: &'static ApiLocks<EndpointCacheKey>,
|
||||
wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
|
||||
) -> Self {
|
||||
let jwt = std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN")
|
||||
.unwrap_or_default()
|
||||
.into();
|
||||
let jwt = std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN").unwrap_or_default();
|
||||
Self {
|
||||
endpoint,
|
||||
caches,
|
||||
@@ -79,9 +71,9 @@ impl Api {
|
||||
async {
|
||||
let request = self
|
||||
.endpoint
|
||||
.get_path("proxy_get_role_secret")
|
||||
.header(X_REQUEST_ID, &request_id)
|
||||
.header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
|
||||
.get("proxy_get_role_secret")
|
||||
.header("X-Request-ID", &request_id)
|
||||
.header("Authorization", format!("Bearer {}", &self.jwt))
|
||||
.query(&[("session_id", ctx.session_id())])
|
||||
.query(&[
|
||||
("application_name", application_name.as_str()),
|
||||
@@ -133,61 +125,6 @@ impl Api {
|
||||
.await
|
||||
}
|
||||
|
||||
async fn do_get_endpoint_jwks(
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
endpoint: EndpointId,
|
||||
) -> anyhow::Result<Vec<AuthRule>> {
|
||||
if !self
|
||||
.caches
|
||||
.endpoints_cache
|
||||
.is_valid(ctx, &endpoint.normalize())
|
||||
.await
|
||||
{
|
||||
bail!("endpoint not found");
|
||||
}
|
||||
let request_id = ctx.session_id().to_string();
|
||||
async {
|
||||
let request = self
|
||||
.endpoint
|
||||
.get_with_url(|url| {
|
||||
url.path_segments_mut()
|
||||
.push("endpoints")
|
||||
.push(endpoint.as_str())
|
||||
.push("jwks");
|
||||
})
|
||||
.header(X_REQUEST_ID, &request_id)
|
||||
.header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
|
||||
.query(&[("session_id", ctx.session_id())])
|
||||
.build()?;
|
||||
|
||||
info!(url = request.url().as_str(), "sending http request");
|
||||
let start = Instant::now();
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
|
||||
let response = self.endpoint.execute(request).await?;
|
||||
drop(pause);
|
||||
info!(duration = ?start.elapsed(), "received http response");
|
||||
|
||||
let body = parse_body::<EndpointJwksResponse>(response).await?;
|
||||
|
||||
let rules = body
|
||||
.jwks
|
||||
.into_iter()
|
||||
.map(|jwks| AuthRule {
|
||||
id: jwks.id,
|
||||
jwks_url: jwks.jwks_url,
|
||||
audience: jwks.jwt_audience,
|
||||
role_names: jwks.role_names,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(rules)
|
||||
}
|
||||
.map_err(crate::error::log_error)
|
||||
.instrument(info_span!("http", id = request_id))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn do_wake_compute(
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
@@ -198,7 +135,7 @@ impl Api {
|
||||
async {
|
||||
let mut request_builder = self
|
||||
.endpoint
|
||||
.get_path("proxy_wake_compute")
|
||||
.get("proxy_wake_compute")
|
||||
.header("X-Request-ID", &request_id)
|
||||
.header("Authorization", format!("Bearer {}", &self.jwt))
|
||||
.query(&[("session_id", ctx.session_id())])
|
||||
@@ -325,15 +262,6 @@ impl super::Api for Api {
|
||||
))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
endpoint: EndpointId,
|
||||
) -> anyhow::Result<Vec<AuthRule>> {
|
||||
self.do_get_endpoint_jwks(ctx, endpoint).await
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
|
||||
@@ -86,17 +86,9 @@ impl Endpoint {
|
||||
|
||||
/// Return a [builder](RequestBuilder) for a `GET` request,
|
||||
/// appending a single `path` segment to the base endpoint URL.
|
||||
pub(crate) fn get_path(&self, path: &str) -> RequestBuilder {
|
||||
self.get_with_url(|u| {
|
||||
u.path_segments_mut().push(path);
|
||||
})
|
||||
}
|
||||
|
||||
/// Return a [builder](RequestBuilder) for a `GET` request,
|
||||
/// accepting a closure to modify the url path segments for more complex paths queries.
|
||||
pub(crate) fn get_with_url(&self, f: impl for<'a> FnOnce(&'a mut ApiUrl)) -> RequestBuilder {
|
||||
pub(crate) fn get(&self, path: &str) -> RequestBuilder {
|
||||
let mut url = self.endpoint.clone();
|
||||
f(&mut url);
|
||||
url.path_segments_mut().push(path);
|
||||
self.client.get(url.into_inner())
|
||||
}
|
||||
|
||||
@@ -152,7 +144,7 @@ mod tests {
|
||||
|
||||
// Validate that this pattern makes sense.
|
||||
let req = endpoint
|
||||
.get_path("frobnicate")
|
||||
.get("frobnicate")
|
||||
.query(&[
|
||||
("foo", Some("10")), // should be just `foo=10`
|
||||
("bar", None), // shouldn't be passed at all
|
||||
@@ -170,7 +162,7 @@ mod tests {
|
||||
let endpoint = Endpoint::new(url, Client::new());
|
||||
|
||||
let req = endpoint
|
||||
.get_path("frobnicate")
|
||||
.get("frobnicate")
|
||||
.query(&[("session_id", uuid::Uuid::nil())])
|
||||
.build()?;
|
||||
|
||||
|
||||
@@ -130,14 +130,14 @@ impl<Id: InternId> Default for StringInterner<Id> {
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct RoleNameTag;
|
||||
pub(crate) struct RoleNameTag;
|
||||
impl InternId for RoleNameTag {
|
||||
fn get_interner() -> &'static StringInterner<Self> {
|
||||
static ROLE_NAMES: OnceLock<StringInterner<RoleNameTag>> = OnceLock::new();
|
||||
ROLE_NAMES.get_or_init(Default::default)
|
||||
}
|
||||
}
|
||||
pub type RoleNameInt = InternedString<RoleNameTag>;
|
||||
pub(crate) type RoleNameInt = InternedString<RoleNameTag>;
|
||||
impl From<&RoleName> for RoleNameInt {
|
||||
fn from(value: &RoleName) -> Self {
|
||||
RoleNameTag::get_interner().get_or_intern(value)
|
||||
|
||||
@@ -82,7 +82,7 @@
|
||||
impl_trait_overcaptures,
|
||||
)]
|
||||
|
||||
use std::convert::Infallible;
|
||||
use std::{convert::Infallible, future::Future};
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use intern::{EndpointIdInt, EndpointIdTag, InternId};
|
||||
@@ -117,12 +117,13 @@ pub mod usage_metrics;
|
||||
pub mod waiters;
|
||||
|
||||
/// Handle unix signals appropriately.
|
||||
pub async fn handle_signals<F>(
|
||||
pub async fn handle_signals<F, Fut>(
|
||||
token: CancellationToken,
|
||||
mut refresh_config: F,
|
||||
) -> anyhow::Result<Infallible>
|
||||
where
|
||||
F: FnMut(),
|
||||
F: FnMut() -> Fut,
|
||||
Fut: Future<Output = anyhow::Result<()>>,
|
||||
{
|
||||
use tokio::signal::unix::{signal, SignalKind};
|
||||
|
||||
@@ -135,7 +136,7 @@ where
|
||||
// Hangup is commonly used for config reload.
|
||||
_ = hangup.recv() => {
|
||||
warn!("received SIGHUP");
|
||||
refresh_config();
|
||||
refresh_config().await?;
|
||||
}
|
||||
// Shut down the whole application.
|
||||
_ = interrupt.recv() => {
|
||||
|
||||
@@ -525,10 +525,6 @@ impl TestBackend for TestConnectMechanism {
|
||||
{
|
||||
unimplemented!("not used in tests")
|
||||
}
|
||||
|
||||
fn dyn_clone(&self) -> Box<dyn TestBackend> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo {
|
||||
|
||||
@@ -43,13 +43,6 @@ impl ThreadPool {
|
||||
pub fn new(n_workers: u8) -> Arc<Self> {
|
||||
// rayon would be nice here, but yielding in rayon does not work well afaict.
|
||||
|
||||
if n_workers == 0 {
|
||||
return Arc::new(Self {
|
||||
runtime: None,
|
||||
metrics: Arc::new(ThreadPoolMetrics::new(n_workers as usize)),
|
||||
});
|
||||
}
|
||||
|
||||
Arc::new_cyclic(|pool| {
|
||||
let pool = pool.clone();
|
||||
let worker_id = AtomicUsize::new(0);
|
||||
|
||||
@@ -119,7 +119,7 @@ impl PoolingBackend {
|
||||
.check_jwt(
|
||||
ctx,
|
||||
user_info.endpoint.clone(),
|
||||
&user_info.user,
|
||||
user_info.user.clone(),
|
||||
&StaticAuthRules,
|
||||
jwt,
|
||||
)
|
||||
|
||||
@@ -374,16 +374,14 @@ type JoinTaskRes = Result<anyhow::Result<()>, JoinError>;
|
||||
|
||||
async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
|
||||
// fsync the datadir to make sure we have a consistent state on disk.
|
||||
if !conf.no_sync {
|
||||
let dfd = File::open(&conf.workdir).context("open datadir for syncfs")?;
|
||||
let started = Instant::now();
|
||||
utils::crashsafe::syncfs(dfd)?;
|
||||
let elapsed = started.elapsed();
|
||||
info!(
|
||||
elapsed_ms = elapsed.as_millis(),
|
||||
"syncfs data directory done"
|
||||
);
|
||||
}
|
||||
let dfd = File::open(&conf.workdir).context("open datadir for syncfs")?;
|
||||
let started = Instant::now();
|
||||
utils::crashsafe::syncfs(dfd)?;
|
||||
let elapsed = started.elapsed();
|
||||
info!(
|
||||
elapsed_ms = elapsed.as_millis(),
|
||||
"syncfs data directory done"
|
||||
);
|
||||
|
||||
info!("starting safekeeper WAL service on {}", conf.listen_pg_addr);
|
||||
let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
|
||||
|
||||
@@ -9,7 +9,7 @@ use crate::walproposer_sim::{
|
||||
|
||||
pub mod walproposer_sim;
|
||||
|
||||
// Generates 500 random seeds and runs a schedule for each of them.
|
||||
// Generates 2000 random seeds and runs a schedule for each of them.
|
||||
// If you see this test fail, please report the last seed to the
|
||||
// @safekeeper team.
|
||||
#[test]
|
||||
@@ -17,7 +17,7 @@ fn test_random_schedules() -> anyhow::Result<()> {
|
||||
let clock = init_logger();
|
||||
let mut config = TestConfig::new(Some(clock));
|
||||
|
||||
for _ in 0..500 {
|
||||
for _ in 0..2000 {
|
||||
let seed: u64 = rand::thread_rng().gen();
|
||||
config.network = generate_network_opts(seed);
|
||||
|
||||
|
||||
@@ -3311,7 +3311,7 @@ class VanillaPostgres(PgProtocol):
|
||||
self.pg_bin = pg_bin
|
||||
self.running = False
|
||||
if init:
|
||||
self.pg_bin.run_capture(["initdb", "--pgdata", str(pgdatadir)])
|
||||
self.pg_bin.run_capture(["initdb", "-D", str(pgdatadir)])
|
||||
self.configure([f"port = {port}\n"])
|
||||
|
||||
def enable_tls(self):
|
||||
|
||||
@@ -8,7 +8,6 @@ import requests
|
||||
from fixtures.common_types import Lsn, TenantId, TenantTimelineId, TimelineId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
|
||||
# Walreceiver as returned by sk's timeline status endpoint.
|
||||
@@ -162,16 +161,6 @@ class SafekeeperHttpClient(requests.Session, MetricsGetter):
|
||||
walreceivers=walreceivers,
|
||||
)
|
||||
|
||||
# Get timeline_start_lsn, waiting until it's nonzero. It is a way to ensure
|
||||
# that the timeline is fully initialized at the safekeeper.
|
||||
def get_non_zero_timeline_start_lsn(self, tenant_id: TenantId, timeline_id: TimelineId) -> Lsn:
|
||||
def timeline_start_lsn_non_zero() -> Lsn:
|
||||
s = self.timeline_status(tenant_id, timeline_id).timeline_start_lsn
|
||||
assert s > Lsn(0)
|
||||
return s
|
||||
|
||||
return wait_until(30, 1, timeline_start_lsn_non_zero)
|
||||
|
||||
def get_commit_lsn(self, tenant_id: TenantId, timeline_id: TimelineId) -> Lsn:
|
||||
return self.timeline_status(tenant_id, timeline_id).commit_lsn
|
||||
|
||||
|
||||
@@ -56,20 +56,32 @@ class Workload:
|
||||
with ENDPOINT_LOCK:
|
||||
self._endpoint.reconfigure()
|
||||
|
||||
def endpoint(self, pageserver_id: Optional[int] = None) -> Endpoint:
|
||||
def go_readonly(self):
|
||||
self.stop()
|
||||
self._endpoint = self.make_endpoint(readonly=True, pageserver_id=None)
|
||||
self._endpoint.start(pageserver_id=None)
|
||||
|
||||
def make_endpoint(self, readonly: bool, pageserver_id: Optional[int] = None) -> Endpoint:
|
||||
# We may be running alongside other Workloads for different tenants. Full TTID is
|
||||
# obnoxiously long for use here, but a cut-down version is still unique enough for tests.
|
||||
endpoint_id = f"ep-workload-{str(self.tenant_id)[0:4]}-{str(self.timeline_id)[0:4]}"
|
||||
|
||||
if readonly:
|
||||
self._endpoint_opts["hot_standby"] = True
|
||||
|
||||
return self.env.endpoints.create(
|
||||
self.branch_name,
|
||||
tenant_id=self.tenant_id,
|
||||
pageserver_id=pageserver_id,
|
||||
endpoint_id=endpoint_id,
|
||||
**self._endpoint_opts,
|
||||
)
|
||||
|
||||
def endpoint(self, pageserver_id: Optional[int] = None) -> Endpoint:
|
||||
with ENDPOINT_LOCK:
|
||||
if self._endpoint is None:
|
||||
self._endpoint = self.env.endpoints.create(
|
||||
self.branch_name,
|
||||
tenant_id=self.tenant_id,
|
||||
pageserver_id=pageserver_id,
|
||||
endpoint_id=endpoint_id,
|
||||
**self._endpoint_opts,
|
||||
)
|
||||
self._endpoint = self.make_endpoint(pageserver_id=pageserver_id, readonly=False)
|
||||
|
||||
self._endpoint.start(pageserver_id=pageserver_id)
|
||||
else:
|
||||
self._endpoint.reconfigure(pageserver_id=pageserver_id)
|
||||
|
||||
@@ -11,6 +11,7 @@ from fixtures.neon_fixtures import (
|
||||
generate_uploads_and_deletions,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import wait_for_last_record_lsn
|
||||
from fixtures.utils import wait_until
|
||||
from fixtures.workload import Workload
|
||||
|
||||
@@ -412,3 +413,42 @@ def test_image_layer_compression(neon_env_builder: NeonEnvBuilder, enabled: bool
|
||||
f"SELECT count(*) FROM foo WHERE id={v} and val=repeat('abcde{v:0>3}', 500)"
|
||||
)
|
||||
assert res[0][0] == 1
|
||||
|
||||
|
||||
def test_image_layer_reads(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
env.pageserver.http_client().set_tenant_config(
|
||||
tenant_id,
|
||||
{
|
||||
"compaction_period": "0s",
|
||||
},
|
||||
)
|
||||
|
||||
workload = Workload(env, tenant_id, timeline_id)
|
||||
workload.init()
|
||||
workload.write_rows(256)
|
||||
workload.validate()
|
||||
|
||||
workload.go_readonly()
|
||||
|
||||
commit_lsn = env.safekeepers[0].http_client().get_commit_lsn(tenant_id, timeline_id)
|
||||
wait_for_last_record_lsn(env.pageserver.http_client(), tenant_id, timeline_id, commit_lsn)
|
||||
log.info(f"Ingested up to commit_lsn {commit_lsn}")
|
||||
|
||||
env.pageserver.http_client().timeline_compact(
|
||||
tenant_id, timeline_id, force_image_layer_creation=True
|
||||
)
|
||||
|
||||
# Uncomment this checkpoint, and the logs will show getpage requests hitting the image layers we
|
||||
# just created. However, without the checkpoint, getpage requests will hit one InMemoryLayer and
|
||||
# one persistent delta layer.
|
||||
# env.pageserver.http_client().timeline_checkpoint(tenant_id, timeline_id, wait_until_uploaded=True)
|
||||
|
||||
# This should send getpage requests at the same LSN where we just created image layers
|
||||
workload.validate()
|
||||
|
||||
# Nothing should have written in the meantime
|
||||
assert commit_lsn == env.safekeepers[0].http_client().get_commit_lsn(tenant_id, timeline_id)
|
||||
|
||||
@@ -21,7 +21,7 @@ from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import (
|
||||
timeline_delete_wait_completed,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.pg_version import PgVersion, skip_on_postgres
|
||||
from fixtures.remote_storage import RemoteStorageKind, S3Storage, s3_storage
|
||||
from fixtures.workload import Workload
|
||||
|
||||
@@ -156,6 +156,9 @@ ingest_lag_log_line = ".*ingesting record with timestamp lagging more than wait_
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
@skip_on_postgres(
|
||||
PgVersion.V17, "There are no snapshots yet"
|
||||
) # TODO: revert this once we have snapshots
|
||||
def test_backward_compatibility(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
test_output_dir: Path,
|
||||
@@ -203,6 +206,9 @@ def test_backward_compatibility(
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
@skip_on_postgres(
|
||||
PgVersion.V17, "There are no snapshots yet"
|
||||
) # TODO: revert this once we have snapshots
|
||||
def test_forward_compatibility(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
test_output_dir: Path,
|
||||
|
||||
@@ -27,7 +27,7 @@ def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*basebackup .* failed: invalid basebackup lsn.*",
|
||||
".*/lsn_lease.*invalid lsn lease request.*",
|
||||
".*page_service.*handle_make_lsn_lease.*.*tried to request a page version that was garbage collected",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -108,7 +108,7 @@ def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
assert cur.fetchone() == (1,)
|
||||
|
||||
# Create node at pre-initdb lsn
|
||||
with pytest.raises(Exception, match="invalid lsn lease request"):
|
||||
with pytest.raises(Exception, match="invalid basebackup lsn"):
|
||||
# compute node startup with invalid LSN should fail
|
||||
env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
@@ -167,23 +167,6 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
)
|
||||
return last_flush_lsn
|
||||
|
||||
def trigger_gc_and_select(env: NeonEnv, ep_static: Endpoint):
|
||||
"""
|
||||
Trigger GC manually on all pageservers. Then run an `SELECT` query.
|
||||
"""
|
||||
for shard, ps in tenant_get_shards(env, env.initial_tenant):
|
||||
client = ps.http_client()
|
||||
gc_result = client.timeline_gc(shard, env.initial_timeline, 0)
|
||||
log.info(f"{gc_result=}")
|
||||
|
||||
assert (
|
||||
gc_result["layers_removed"] == 0
|
||||
), "No layers should be removed, old layers are guarded by leases."
|
||||
|
||||
with ep_static.cursor() as cur:
|
||||
cur.execute("SELECT count(*) FROM t0")
|
||||
assert cur.fetchone() == (ROW_COUNT,)
|
||||
|
||||
# Insert some records on main branch
|
||||
with env.endpoints.create_start("main") as ep_main:
|
||||
with ep_main.cursor() as cur:
|
||||
@@ -210,31 +193,25 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
generate_updates_on_main(env, ep_main, i, end=100)
|
||||
|
||||
trigger_gc_and_select(env, ep_static)
|
||||
# Trigger GC
|
||||
for shard, ps in tenant_get_shards(env, env.initial_tenant):
|
||||
client = ps.http_client()
|
||||
gc_result = client.timeline_gc(shard, env.initial_timeline, 0)
|
||||
log.info(f"{gc_result=}")
|
||||
|
||||
# Trigger Pageserver restarts
|
||||
for ps in env.pageservers:
|
||||
ps.stop()
|
||||
# Static compute should have at least one lease request failure due to connection.
|
||||
time.sleep(LSN_LEASE_LENGTH / 2)
|
||||
ps.start()
|
||||
assert (
|
||||
gc_result["layers_removed"] == 0
|
||||
), "No layers should be removed, old layers are guarded by leases."
|
||||
|
||||
trigger_gc_and_select(env, ep_static)
|
||||
|
||||
# Reconfigure pageservers
|
||||
env.pageservers[0].stop()
|
||||
env.storage_controller.node_configure(
|
||||
env.pageservers[0].id, {"availability": "Offline"}
|
||||
)
|
||||
env.storage_controller.reconcile_until_idle()
|
||||
|
||||
trigger_gc_and_select(env, ep_static)
|
||||
with ep_static.cursor() as cur:
|
||||
cur.execute("SELECT count(*) FROM t0")
|
||||
assert cur.fetchone() == (ROW_COUNT,)
|
||||
|
||||
# Do some update so we can increment latest_gc_cutoff
|
||||
generate_updates_on_main(env, ep_main, i, end=100)
|
||||
|
||||
# Wait for the existing lease to expire.
|
||||
time.sleep(LSN_LEASE_LENGTH + 1)
|
||||
time.sleep(LSN_LEASE_LENGTH)
|
||||
# Now trigger GC again, layers should be removed.
|
||||
for shard, ps in tenant_get_shards(env, env.initial_tenant):
|
||||
client = ps.http_client()
|
||||
|
||||
@@ -45,7 +45,10 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool
|
||||
tenant_after = http.tenant_status(env.initial_tenant)
|
||||
assert tenant_before != tenant_after
|
||||
gc_blocking = tenant_after["gc_blocking"]
|
||||
assert gc_blocking == "BlockingReasons { timelines: 1, reasons: EnumSet(Manual) }"
|
||||
assert (
|
||||
gc_blocking
|
||||
== "BlockingReasons { tenant_blocked_by_lsn_lease_deadline: false, timelines: 1, reasons: EnumSet(Manual) }"
|
||||
)
|
||||
|
||||
wait_for_another_gc_round()
|
||||
pss.assert_log_contains(gc_skipped_line)
|
||||
|
||||
@@ -772,7 +772,7 @@ class ProposerPostgres(PgProtocol):
|
||||
def initdb(self):
|
||||
"""Run initdb"""
|
||||
|
||||
args = ["initdb", "--username", "cloud_admin", "--pgdata", self.pg_data_dir_path()]
|
||||
args = ["initdb", "-U", "cloud_admin", "-D", self.pg_data_dir_path()]
|
||||
self.pg_bin.run(args)
|
||||
|
||||
def start(self):
|
||||
@@ -2084,13 +2084,8 @@ def test_timeline_copy(neon_env_builder: NeonEnvBuilder, insert_rows: int):
|
||||
|
||||
endpoint.safe_psql("create table t(key int, value text)")
|
||||
|
||||
# Note: currently timelines on sks are created by compute and commit of
|
||||
# transaction above is finished when 2/3 sks received it, so there is a
|
||||
# small chance that timeline on this sk is not created/initialized yet,
|
||||
# hence the usage of waiting function to prevent flakiness.
|
||||
timeline_start_lsn = (
|
||||
env.safekeepers[0].http_client().get_non_zero_timeline_start_lsn(tenant_id, timeline_id)
|
||||
)
|
||||
timeline_status = env.safekeepers[0].http_client().timeline_status(tenant_id, timeline_id)
|
||||
timeline_start_lsn = timeline_status.timeline_start_lsn
|
||||
log.info(f"Timeline start LSN: {timeline_start_lsn}")
|
||||
|
||||
current_percent = 0.0
|
||||
|
||||
Reference in New Issue
Block a user