mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-15 00:20:36 +00:00
Compare commits
29 Commits
arpad/endp
...
feat/ci_wo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
723a79159c | ||
|
|
d133f831c0 | ||
|
|
aa19a412e2 | ||
|
|
8d81c836a2 | ||
|
|
3a3fcb3745 | ||
|
|
441e769d67 | ||
|
|
1bf8857962 | ||
|
|
eb8a87d2ec | ||
|
|
8c9bf3e8d4 | ||
|
|
c4ddac3fcc | ||
|
|
3bb61ce8fa | ||
|
|
73f494a0da | ||
|
|
7ee766c8b1 | ||
|
|
57f58801af | ||
|
|
22963c7531 | ||
|
|
fb05a2e549 | ||
|
|
3ab7297a51 | ||
|
|
b2cf8797b0 | ||
|
|
e86abd8916 | ||
|
|
9da0b4d228 | ||
|
|
350ae9a9fe | ||
|
|
44c1f52e24 | ||
|
|
f5e21b9dc2 | ||
|
|
e47c846821 | ||
|
|
fbc6b7fae8 | ||
|
|
bd517b1d60 | ||
|
|
10be4cbed8 | ||
|
|
f977a62727 | ||
|
|
9814b7cfaa |
@@ -19,8 +19,8 @@ on:
|
||||
description: 'debug or release'
|
||||
required: true
|
||||
type: string
|
||||
test-cfg:
|
||||
description: 'a json object of postgres versions and lfc states to run regression tests on'
|
||||
pg-versions:
|
||||
description: 'a json array of postgres versions to run regression tests on'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
@@ -276,14 +276,14 @@ jobs:
|
||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJSON(format('{{"include":{0}}}', inputs.test-cfg)) }}
|
||||
matrix:
|
||||
pg_version: ${{ fromJson(inputs.pg-versions) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Pytest regression tests
|
||||
continue-on-error: ${{ matrix.lfc_state == 'with-lfc' }}
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
timeout-minutes: 60
|
||||
with:
|
||||
@@ -300,7 +300,6 @@ jobs:
|
||||
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
||||
BUILD_TAG: ${{ inputs.build-tag }}
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||
|
||||
12
.github/workflows/benchmarking.yml
vendored
12
.github/workflows/benchmarking.yml
vendored
@@ -558,12 +558,12 @@ jobs:
|
||||
arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
|
||||
|
||||
cd /home/nonroot
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.2-1.pgdg110+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.6-1.pgdg110+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.6-1.pgdg110+1_${arch}.deb"
|
||||
dpkg -x libpq5_17.2-1.pgdg110+1_${arch}.deb pg
|
||||
dpkg -x postgresql-16_16.6-1.pgdg110+1_${arch}.deb pg
|
||||
dpkg -x postgresql-client-16_16.6-1.pgdg110+1_${arch}.deb pg
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.1-1.pgdg110+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.5-1.pgdg110+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.5-1.pgdg110+1_${arch}.deb"
|
||||
dpkg -x libpq5_17.1-1.pgdg110+1_${arch}.deb pg
|
||||
dpkg -x postgresql-16_16.5-1.pgdg110+1_${arch}.deb pg
|
||||
dpkg -x postgresql-client-16_16.5-1.pgdg110+1_${arch}.deb pg
|
||||
|
||||
mkdir -p /tmp/neon/pg_install/v16/bin
|
||||
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
|
||||
|
||||
82
.github/workflows/build-macos.yml
vendored
Normal file
82
.github/workflows/build-macos.yml
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
name: Check neon with MacOS builds
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
|
||||
# TODO: move `check-*` and `files-changed` jobs to the "Caller" Workflow
|
||||
# We should care about that as Github has limitations:
|
||||
# - You can connect up to four levels of workflows
|
||||
# - You can call a maximum of 20 unique reusable workflows from a single workflow file.
|
||||
# https://docs.github.com/en/actions/sharing-automations/reusing-workflows#limitations
|
||||
jobs:
|
||||
files-changed:
|
||||
name: Detect what files changed
|
||||
runs-on: ubuntu-22.04
|
||||
timeout-minutes: 3
|
||||
outputs:
|
||||
postgres_changes: ${{ steps.postgres_changes.outputs.changes }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@6ccd57f4c5d15bdc2fef309bd9fb6cc9db2ef1c6 # v4.1.7
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Check for Postgres changes
|
||||
uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242 #v3
|
||||
id: postgres_changes
|
||||
with:
|
||||
token: ${{ github.token }}
|
||||
filters: |
|
||||
v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
|
||||
v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']
|
||||
v16: ['vendor/postgres-v16/**', 'Makefile', 'pgxn/**']
|
||||
v17: ['vendor/postgres-v17/**', 'Makefile', 'pgxn/**']
|
||||
base: ${{ github.event_name != 'pull_request' && (github.event.merge_group.base_ref || github.ref_name) || '' }}
|
||||
ref: ${{ github.event_name != 'pull_request' && (github.event.merge_group.head_ref || github.ref) || ''}}
|
||||
|
||||
check-macos-build:
|
||||
needs: [ files-changed ]
|
||||
if: |
|
||||
needs.files-changed.outputs.postgres_changes != '[]' && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
postgres-version: ${{ fromJSON(needs.files-changed.outputs.postgres_changes) }}
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
steps:
|
||||
- name: Checkout main repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Checkout submodule vendor/postgres-${{ matrix.postgres-version }}
|
||||
run: |
|
||||
git submodule init vendor/postgres-${{ matrix.postgres-version }}
|
||||
git submodule update --depth 1
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
echo brew install flex bison openssl protobuf icu4c
|
||||
|
||||
- name: Build Postgres ${{ matrix.postgres-version }}
|
||||
run: |
|
||||
echo make postgres-${{ matrix.postgres-version }}
|
||||
|
||||
- name: Build Neon Pg Ext ${{ matrix.postgres-version }}
|
||||
run: |
|
||||
echo make "neon-pg-ext-${{ matrix.postgres-version }}"
|
||||
|
||||
- name: Build walproposer-lib (only for v17)
|
||||
if: matrix.postgres-version == 'v17'
|
||||
run:
|
||||
echo make walproposer-lib
|
||||
11
.github/workflows/build_and_test.yml
vendored
11
.github/workflows/build_and_test.yml
vendored
@@ -6,7 +6,7 @@ on:
|
||||
- main
|
||||
- release
|
||||
- release-proxy
|
||||
pull_request:
|
||||
# pull_request:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -253,14 +253,7 @@ jobs:
|
||||
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds
|
||||
# run without LFC on v17 release only
|
||||
test-cfg: |
|
||||
${{ matrix.build-type == 'release' && '[{"pg_version":"v14", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v15", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v16", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "with-lfc"}]'
|
||||
|| '[{"pg_version":"v17", "lfc_state": "without-lfc"}]' }}
|
||||
pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16", "v17"]' || '["v17"]' }}
|
||||
secrets: inherit
|
||||
|
||||
# Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
|
||||
|
||||
174
.github/workflows/neon_extra_builds.yml
vendored
174
.github/workflows/neon_extra_builds.yml
vendored
@@ -31,180 +31,10 @@ jobs:
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
secrets: inherit
|
||||
|
||||
check-macos-build:
|
||||
run-macos-build:
|
||||
needs: [ check-permissions ]
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
timeout-minutes: 90
|
||||
runs-on: macos-15
|
||||
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Install macOS postgres dependencies
|
||||
run: brew install flex bison openssl protobuf icu4c
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 15 revision for caching
|
||||
id: pg_v15_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 16 revision for caching
|
||||
id: pg_v16_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 17 revision for caching
|
||||
id: pg_v17_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_15
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Set extra env for macOS
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Cache cargo deps
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
run: make postgres-v14 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v15
|
||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||
run: make postgres-v15 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v16
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: make postgres-v16 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v17
|
||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
||||
run: make postgres-v17 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: make neon-pg-ext -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build walproposer-lib
|
||||
run: make walproposer-lib -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Run cargo build
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release
|
||||
|
||||
- name: Check that no warnings are produced
|
||||
run: ./run_clippy.sh
|
||||
|
||||
gather-rust-build-stats:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
runs-on: [ self-hosted, large ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
env:
|
||||
BUILD_TYPE: release
|
||||
# build with incremental compilation produce partial results
|
||||
# so do not attempt to cache this build, also disable the incremental compilation
|
||||
CARGO_INCREMENTAL: 0
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
# Some of our rust modules use FFI and need those to be checked
|
||||
- name: Get postgres headers
|
||||
run: make postgres-headers -j$(nproc)
|
||||
|
||||
- name: Build walproposer-lib
|
||||
run: make walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Produce the build stats
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)
|
||||
|
||||
- name: Upload the build stats
|
||||
id: upload-stats
|
||||
env:
|
||||
BUCKET: neon-github-public-dev
|
||||
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
run: |
|
||||
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/build-stats/${SHA}/${GITHUB_RUN_ID}/cargo-timing.html
|
||||
aws s3 cp --only-show-errors ./target/cargo-timings/cargo-timing.html "s3://${BUCKET}/build-stats/${SHA}/${GITHUB_RUN_ID}/"
|
||||
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Publish build stats report
|
||||
uses: actions/github-script@v7
|
||||
env:
|
||||
REPORT_URL: ${{ steps.upload-stats.outputs.report-url }}
|
||||
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
with:
|
||||
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
||||
retries: 5
|
||||
script: |
|
||||
const { REPORT_URL, SHA } = process.env
|
||||
|
||||
await github.rest.repos.createCommitStatus({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
sha: `${SHA}`,
|
||||
state: 'success',
|
||||
target_url: `${REPORT_URL}`,
|
||||
context: `Build stats (release)`,
|
||||
})
|
||||
uses: ./.github/workflows/build-macos.yml
|
||||
@@ -4,12 +4,10 @@ on:
|
||||
schedule:
|
||||
- cron: '*/15 * * * *'
|
||||
- cron: '25 0 * * *'
|
||||
- cron: '25 1 * * 6'
|
||||
|
||||
jobs:
|
||||
gh-workflow-stats-batch-2h:
|
||||
name: GitHub Workflow Stats Batch 2 hours
|
||||
if: github.event.schedule == '*/15 * * * *'
|
||||
gh-workflow-stats-batch:
|
||||
name: GitHub Workflow Stats Batch
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
@@ -18,36 +16,14 @@ jobs:
|
||||
uses: neondatabase/gh-workflow-stats-action@v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_neon"
|
||||
db_table: "gh_workflow_stats_batch_neon"
|
||||
gh_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
duration: '2h'
|
||||
|
||||
gh-workflow-stats-batch-48h:
|
||||
name: GitHub Workflow Stats Batch 48 hours
|
||||
if: github.event.schedule == '25 0 * * *'
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
steps:
|
||||
- name: Export Workflow Run for the past 48 hours
|
||||
- name: Export Workflow Run for the past 24 hours
|
||||
if: github.event.schedule == '25 0 * * *'
|
||||
uses: neondatabase/gh-workflow-stats-action@v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_neon"
|
||||
db_table: "gh_workflow_stats_batch_neon"
|
||||
gh_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
duration: '48h'
|
||||
|
||||
gh-workflow-stats-batch-30d:
|
||||
name: GitHub Workflow Stats Batch 30 days
|
||||
if: github.event.schedule == '25 1 * * 6'
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
steps:
|
||||
- name: Export Workflow Run for the past 30 days
|
||||
uses: neondatabase/gh-workflow-stats-action@v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_neon"
|
||||
gh_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
duration: '720h'
|
||||
duration: '24h'
|
||||
|
||||
41
.github/workflows/report-workflow-stats.yml
vendored
Normal file
41
.github/workflows/report-workflow-stats.yml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
name: Report Workflow Stats
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows:
|
||||
- Add `external` label to issues and PRs created by external users
|
||||
- Benchmarking
|
||||
- Build and Test
|
||||
- Build and Test Locally
|
||||
- Build build-tools image
|
||||
- Check Permissions
|
||||
- Check neon with extra platform builds
|
||||
- Cloud Regression Test
|
||||
- Create Release Branch
|
||||
- Handle `approved-for-ci-run` label
|
||||
- Lint GitHub Workflows
|
||||
- Notify Slack channel about upcoming release
|
||||
- Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
|
||||
- Pin build-tools image
|
||||
- Prepare benchmarking databases by restoring dumps
|
||||
- Push images to ACR
|
||||
- Test Postgres client libraries
|
||||
- Trigger E2E Tests
|
||||
- cleanup caches by a branch
|
||||
- Pre-merge checks
|
||||
types: [completed]
|
||||
|
||||
jobs:
|
||||
gh-workflow-stats:
|
||||
name: Github Workflow Stats
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
steps:
|
||||
- name: Export GH Workflow Stats
|
||||
uses: neondatabase/gh-workflow-stats-action@v0.1.4
|
||||
with:
|
||||
DB_URI: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
DB_TABLE: "gh_workflow_stats_neon"
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GH_RUN_ID: ${{ github.event.workflow_run.id }}
|
||||
300
Cargo.lock
generated
300
Cargo.lock
generated
@@ -46,15 +46,6 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aligned-vec"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7e0966165eaf052580bd70eb1b32cb3d6245774c0104d1b2793e9650bf83b52a"
|
||||
dependencies = [
|
||||
"equator",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.16"
|
||||
@@ -155,12 +146,6 @@ dependencies = [
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs"
|
||||
version = "0.6.2"
|
||||
@@ -374,28 +359,6 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-kms"
|
||||
version = "1.47.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "564a597a3c71a957d60a2e4c62c93d78ee5a0d636531e15b760acad983a5c18e"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"http 0.2.9",
|
||||
"once_cell",
|
||||
"regex-lite",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-s3"
|
||||
version = "1.52.0"
|
||||
@@ -612,9 +575,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-runtime"
|
||||
version = "1.7.2"
|
||||
version = "1.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a065c0fe6fdbdf9f11817eb68582b2ab4aff9e9c39e986ae48f7ec576c6322db"
|
||||
checksum = "d1ce695746394772e7000b39fe073095db6d45a862d0767dd5ad0ac0d7f8eb87"
|
||||
dependencies = [
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
@@ -779,7 +742,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"paste",
|
||||
"pin-project",
|
||||
"quick-xml 0.31.0",
|
||||
"quick-xml",
|
||||
"rand 0.8.5",
|
||||
"reqwest 0.11.19",
|
||||
"rustc_version",
|
||||
@@ -1257,10 +1220,6 @@ name = "compute_tools"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"aws-config",
|
||||
"aws-sdk-kms",
|
||||
"aws-sdk-s3",
|
||||
"base64 0.13.1",
|
||||
"bytes",
|
||||
"camino",
|
||||
"cfg-if",
|
||||
@@ -1278,16 +1237,13 @@ dependencies = [
|
||||
"opentelemetry",
|
||||
"opentelemetry_sdk",
|
||||
"postgres",
|
||||
"postgres_initdb",
|
||||
"prometheus",
|
||||
"regex",
|
||||
"remote_storage",
|
||||
"reqwest 0.12.4",
|
||||
"rlimit",
|
||||
"rust-ini",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"signal-hook",
|
||||
"tar",
|
||||
"thiserror",
|
||||
@@ -1425,15 +1381,6 @@ version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
|
||||
|
||||
[[package]]
|
||||
name = "cpp_demangle"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96e58d342ad113c2b878f16d5d034c03be492ae460cdbc02b7f0f2284d310c7d"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.9"
|
||||
@@ -1957,26 +1904,6 @@ dependencies = [
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equator"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c35da53b5a021d2484a7cc49b2ac7f2d840f8236a286f84202369bd338d761ea"
|
||||
dependencies = [
|
||||
"equator-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equator-macro"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
@@ -2084,18 +2011,6 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "findshlibs"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fixedbitset"
|
||||
version = "0.4.2"
|
||||
@@ -2174,9 +2089,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
|
||||
checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
@@ -2184,9 +2099,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
||||
checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
@@ -2201,9 +2116,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
|
||||
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
|
||||
|
||||
[[package]]
|
||||
name = "futures-lite"
|
||||
@@ -2222,9 +2137,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
||||
checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -2233,15 +2148,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
|
||||
checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
|
||||
checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
|
||||
|
||||
[[package]]
|
||||
name = "futures-timer"
|
||||
@@ -2251,9 +2166,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
||||
checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
@@ -2799,24 +2714,6 @@ version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac"
|
||||
|
||||
[[package]]
|
||||
name = "inferno"
|
||||
version = "0.11.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"indexmap 2.0.1",
|
||||
"is-terminal",
|
||||
"itoa",
|
||||
"log",
|
||||
"num-format",
|
||||
"once_cell",
|
||||
"quick-xml 0.26.0",
|
||||
"rgb",
|
||||
"str_stack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inotify"
|
||||
version = "0.9.6"
|
||||
@@ -2867,9 +2764,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.10.1"
|
||||
version = "2.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708"
|
||||
checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
@@ -3156,15 +3053,6 @@ version = "2.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.7.1"
|
||||
@@ -3390,16 +3278,6 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
|
||||
|
||||
[[package]]
|
||||
name = "num-format"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.45"
|
||||
@@ -3741,7 +3619,6 @@ dependencies = [
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"pageserver_compaction",
|
||||
"pin-project-lite",
|
||||
"postgres",
|
||||
@@ -3750,7 +3627,6 @@ dependencies = [
|
||||
"postgres_backend",
|
||||
"postgres_connection",
|
||||
"postgres_ffi",
|
||||
"postgres_initdb",
|
||||
"pq_proto",
|
||||
"procfs",
|
||||
"rand 0.8.5",
|
||||
@@ -4182,7 +4058,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"once_cell",
|
||||
"pq_proto",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"serde",
|
||||
"thiserror",
|
||||
@@ -4226,48 +4102,12 @@ dependencies = [
|
||||
"utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres_initdb"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"camino",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "powerfmt"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
|
||||
|
||||
[[package]]
|
||||
name = "pprof"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebbe2f8898beba44815fdc9e5a4ae9c929e21c5dc29b0c774a15555f7f58d6d0"
|
||||
dependencies = [
|
||||
"aligned-vec",
|
||||
"backtrace",
|
||||
"cfg-if",
|
||||
"criterion",
|
||||
"findshlibs",
|
||||
"inferno",
|
||||
"libc",
|
||||
"log",
|
||||
"nix 0.26.4",
|
||||
"once_cell",
|
||||
"parking_lot 0.12.1",
|
||||
"protobuf",
|
||||
"protobuf-codegen-pure",
|
||||
"smallvec",
|
||||
"symbolic-demangle",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.17"
|
||||
@@ -4420,31 +4260,6 @@ dependencies = [
|
||||
"prost",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "protobuf"
|
||||
version = "2.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
|
||||
|
||||
[[package]]
|
||||
name = "protobuf-codegen"
|
||||
version = "2.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6"
|
||||
dependencies = [
|
||||
"protobuf",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "protobuf-codegen-pure"
|
||||
version = "2.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865"
|
||||
dependencies = [
|
||||
"protobuf",
|
||||
"protobuf-codegen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proxy"
|
||||
version = "0.1.0"
|
||||
@@ -4518,7 +4333,7 @@ dependencies = [
|
||||
"rsa",
|
||||
"rstest",
|
||||
"rustc-hash",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"scopeguard",
|
||||
@@ -4556,15 +4371,6 @@ dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.31.0"
|
||||
@@ -5047,15 +4853,6 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rgb"
|
||||
version = "0.8.50"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.6"
|
||||
@@ -5231,9 +5028,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.23.18"
|
||||
version = "0.23.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c9cc1d47e243d655ace55ed38201c19ae02c148ae56412ab8750e8f0166ab7f"
|
||||
checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e"
|
||||
dependencies = [
|
||||
"log",
|
||||
"once_cell",
|
||||
@@ -5369,7 +5166,6 @@ dependencies = [
|
||||
"postgres-protocol",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
"pprof",
|
||||
"pq_proto",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
@@ -5916,12 +5712,6 @@ dependencies = [
|
||||
"der 0.7.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
@@ -5948,7 +5738,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"parking_lot 0.12.1",
|
||||
"prost",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"tokio",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
@@ -6031,7 +5821,7 @@ dependencies = [
|
||||
"postgres_ffi",
|
||||
"remote_storage",
|
||||
"reqwest 0.12.4",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -6068,12 +5858,6 @@ dependencies = [
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "str_stack"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
|
||||
|
||||
[[package]]
|
||||
name = "stringprep"
|
||||
version = "0.1.2"
|
||||
@@ -6121,29 +5905,6 @@ version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20e16a0f46cf5fd675563ef54f26e83e20f2366bcf027bcb3cc3ed2b98aaf2ca"
|
||||
|
||||
[[package]]
|
||||
name = "symbolic-common"
|
||||
version = "12.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "366f1b4c6baf6cfefc234bbd4899535fca0b06c74443039a73f6dfb2fad88d77"
|
||||
dependencies = [
|
||||
"debugid",
|
||||
"memmap2",
|
||||
"stable_deref_trait",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "symbolic-demangle"
|
||||
version = "12.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aba05ba5b9962ea5617baf556293720a8b2d0a282aa14ee4bf10e22efc7da8c8"
|
||||
dependencies = [
|
||||
"cpp_demangle",
|
||||
"rustc-demangle",
|
||||
"symbolic-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
@@ -6493,7 +6254,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04fb792ccd6bbcd4bba408eb8a292f70fc4a3589e5d793626f45190e6454b6ab"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-rustls 0.26.0",
|
||||
@@ -6527,7 +6288,7 @@ version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
|
||||
dependencies = [
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"rustls-pki-types",
|
||||
"tokio",
|
||||
]
|
||||
@@ -6936,7 +6697,7 @@ dependencies = [
|
||||
"base64 0.22.1",
|
||||
"log",
|
||||
"once_cell",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"rustls-pki-types",
|
||||
"url",
|
||||
"webpki-roots 0.26.1",
|
||||
@@ -7011,7 +6772,6 @@ dependencies = [
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"postgres_connection",
|
||||
"pprof",
|
||||
"pq_proto",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
@@ -7546,7 +7306,6 @@ dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
"axum-core",
|
||||
"base64 0.13.1",
|
||||
"base64 0.21.1",
|
||||
"base64ct",
|
||||
"bytes",
|
||||
@@ -7581,7 +7340,6 @@ dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"memchr",
|
||||
"nix 0.26.4",
|
||||
"nom",
|
||||
"num-bigint",
|
||||
"num-integer",
|
||||
@@ -7598,7 +7356,7 @@ dependencies = [
|
||||
"regex-automata 0.4.3",
|
||||
"regex-syntax 0.8.2",
|
||||
"reqwest 0.12.4",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
||||
11
Cargo.toml
11
Cargo.toml
@@ -34,7 +34,6 @@ members = [
|
||||
"libs/vm_monitor",
|
||||
"libs/walproposer",
|
||||
"libs/wal_decoder",
|
||||
"libs/postgres_initdb",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -58,7 +57,6 @@ async-trait = "0.1"
|
||||
aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
|
||||
aws-sdk-s3 = "1.52"
|
||||
aws-sdk-iam = "1.46.0"
|
||||
aws-sdk-kms = "1.47.0"
|
||||
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
|
||||
aws-smithy-types = "1.2"
|
||||
aws-credential-types = "1.2.0"
|
||||
@@ -75,7 +73,7 @@ bytes = "1.0"
|
||||
camino = "1.1.6"
|
||||
cfg-if = "1.0.0"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
clap = { version = "4.0", features = ["derive", "env"] }
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
comfy-table = "7.1"
|
||||
const_format = "0.2"
|
||||
crc32c = "0.6"
|
||||
@@ -108,7 +106,7 @@ hyper-util = "0.1"
|
||||
tokio-tungstenite = "0.21.0"
|
||||
indexmap = "2"
|
||||
indoc = "2"
|
||||
ipnet = "2.10.0"
|
||||
ipnet = "2.9.0"
|
||||
itertools = "0.10"
|
||||
itoa = "1.0.11"
|
||||
jsonwebtoken = "9"
|
||||
@@ -132,7 +130,6 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pin-project-lite = "0.2"
|
||||
pprof = { version = "0.14", features = ["criterion", "flamegraph", "protobuf", "protobuf-codec"] }
|
||||
procfs = "0.16"
|
||||
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
||||
prost = "0.13"
|
||||
@@ -156,7 +153,7 @@ sentry = { version = "0.32", default-features = false, features = ["backtrace",
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_path_to_error = "0.1"
|
||||
serde_with = { version = "2.0", features = [ "base64" ] }
|
||||
serde_with = "2.0"
|
||||
serde_assert = "0.5.0"
|
||||
sha2 = "0.10.2"
|
||||
signal-hook = "0.3"
|
||||
@@ -215,14 +212,12 @@ tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", br
|
||||
compute_api = { version = "0.1", path = "./libs/compute_api/" }
|
||||
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
|
||||
metrics = { version = "0.1", path = "./libs/metrics/" }
|
||||
pageserver = { path = "./pageserver" }
|
||||
pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
|
||||
pageserver_client = { path = "./pageserver/client" }
|
||||
pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
|
||||
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
|
||||
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
|
||||
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
|
||||
postgres_initdb = { path = "./libs/postgres_initdb" }
|
||||
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
|
||||
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
|
||||
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
|
||||
|
||||
@@ -132,7 +132,7 @@ make -j`sysctl -n hw.logicalcpu` -s
|
||||
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
|
||||
|
||||
To run the integration tests or Python scripts (not required to use the code), install
|
||||
Python (3.11 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
|
||||
Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
|
||||
|
||||
|
||||
#### Running neon database
|
||||
|
||||
@@ -234,7 +234,7 @@ USER nonroot:nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
# Python
|
||||
ENV PYTHON_VERSION=3.11.10 \
|
||||
ENV PYTHON_VERSION=3.9.19 \
|
||||
PYENV_ROOT=/home/nonroot/.pyenv \
|
||||
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
|
||||
RUN set -e \
|
||||
|
||||
@@ -1243,7 +1243,7 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Compile and run the Neon-specific `compute_ctl` and `fast_import` binaries
|
||||
# Compile and run the Neon-specific `compute_ctl` binary
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
|
||||
@@ -1264,7 +1264,6 @@ RUN cd compute_tools && mold -run cargo build --locked --profile release-line-de
|
||||
FROM debian:$DEBIAN_FLAVOR AS compute-tools-image
|
||||
|
||||
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
|
||||
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/fast_import /usr/local/bin/fast_import
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1459,7 +1458,6 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
||||
|
||||
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
|
||||
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
|
||||
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/fast_import /usr/local/bin/fast_import
|
||||
|
||||
# pgbouncer and its config
|
||||
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
|
||||
@@ -1535,25 +1533,6 @@ RUN apt update && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||
|
||||
# s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2
|
||||
# used by fast_import
|
||||
ARG TARGETARCH
|
||||
ADD https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_linux_$TARGETARCH.deb /tmp/s5cmd.deb
|
||||
RUN set -ex; \
|
||||
\
|
||||
# Determine the expected checksum based on TARGETARCH
|
||||
if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
CHECKSUM="392c385320cd5ffa435759a95af77c215553d967e4b1c0fffe52e4f14c29cf85"; \
|
||||
elif [ "${TARGETARCH}" = "arm64" ]; then \
|
||||
CHECKSUM="939bee3cf4b5604ddb00e67f8c157b91d7c7a5b553d1fbb6890fad32894b7b46"; \
|
||||
else \
|
||||
echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
|
||||
fi; \
|
||||
\
|
||||
# Compute and validate the checksum
|
||||
echo "${CHECKSUM} /tmp/s5cmd.deb" | sha256sum -c -
|
||||
RUN dpkg -i /tmp/s5cmd.deb && rm /tmp/s5cmd.deb
|
||||
|
||||
ENV LANG=en_US.utf8
|
||||
USER postgres
|
||||
ENTRYPOINT ["/usr/local/bin/compute_ctl"]
|
||||
|
||||
@@ -10,10 +10,6 @@ default = []
|
||||
testing = []
|
||||
|
||||
[dependencies]
|
||||
base64.workspace = true
|
||||
aws-config.workspace = true
|
||||
aws-sdk-s3.workspace = true
|
||||
aws-sdk-kms.workspace = true
|
||||
anyhow.workspace = true
|
||||
camino.workspace = true
|
||||
chrono.workspace = true
|
||||
@@ -31,8 +27,6 @@ opentelemetry.workspace = true
|
||||
opentelemetry_sdk.workspace = true
|
||||
postgres.workspace = true
|
||||
regex.workspace = true
|
||||
serde.workspace = true
|
||||
serde_with.workspace = true
|
||||
serde_json.workspace = true
|
||||
signal-hook.workspace = true
|
||||
tar.workspace = true
|
||||
@@ -49,7 +43,6 @@ thiserror.workspace = true
|
||||
url.workspace = true
|
||||
prometheus.workspace = true
|
||||
|
||||
postgres_initdb.workspace = true
|
||||
compute_api.workspace = true
|
||||
utils.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
|
||||
@@ -105,11 +105,6 @@ fn main() -> Result<()> {
|
||||
fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
opentelemetry::global::set_error_handler(|err| {
|
||||
tracing::info!("OpenTelemetry error: {err}");
|
||||
})
|
||||
.expect("global error handler lock poisoned");
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
thread::spawn(move || {
|
||||
for sig in signals.forever() {
|
||||
|
||||
@@ -1,338 +0,0 @@
|
||||
//! This program dumps a remote Postgres database into a local Postgres database
|
||||
//! and uploads the resulting PGDATA into object storage for import into a Timeline.
|
||||
//!
|
||||
//! # Context, Architecture, Design
|
||||
//!
|
||||
//! See cloud.git Fast Imports RFC (<https://github.com/neondatabase/cloud/pull/19799>)
|
||||
//! for the full picture.
|
||||
//! The RFC describing the storage pieces of importing the PGDATA dump into a Timeline
|
||||
//! is publicly accessible at <https://github.com/neondatabase/neon/pull/9538>.
|
||||
//!
|
||||
//! # This is a Prototype!
|
||||
//!
|
||||
//! This program is part of a prototype feature and not yet used in production.
|
||||
//!
|
||||
//! The cloud.git RFC contains lots of suggestions for improving e2e throughput
|
||||
//! of this step of the timeline import process.
|
||||
//!
|
||||
//! # Local Testing
|
||||
//!
|
||||
//! - Comment out most of the pgxns in The Dockerfile.compute-tools to speed up the build.
|
||||
//! - Build the image with the following command:
|
||||
//!
|
||||
//! ```bash
|
||||
//! docker buildx build --build-arg DEBIAN_FLAVOR=bullseye-slim --build-arg GIT_VERSION=local --build-arg PG_VERSION=v14 --build-arg BUILD_TAG="$(date --iso-8601=s -u)" -t localhost:3030/localregistry/compute-node-v14:latest -f compute/Dockerfile.com
|
||||
//! docker push localhost:3030/localregistry/compute-node-v14:latest
|
||||
//! ```
|
||||
|
||||
use anyhow::Context;
|
||||
use aws_config::BehaviorVersion;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use clap::Parser;
|
||||
use nix::unistd::Pid;
|
||||
use tracing::{info, info_span, warn, Instrument};
|
||||
use utils::fs_ext::is_directory_empty;
|
||||
|
||||
#[path = "fast_import/child_stdio_to_log.rs"]
|
||||
mod child_stdio_to_log;
|
||||
#[path = "fast_import/s3_uri.rs"]
|
||||
mod s3_uri;
|
||||
#[path = "fast_import/s5cmd.rs"]
|
||||
mod s5cmd;
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long)]
|
||||
working_directory: Utf8PathBuf,
|
||||
#[clap(long, env = "NEON_IMPORTER_S3_PREFIX")]
|
||||
s3_prefix: s3_uri::S3Uri,
|
||||
#[clap(long)]
|
||||
pg_bin_dir: Utf8PathBuf,
|
||||
#[clap(long)]
|
||||
pg_lib_dir: Utf8PathBuf,
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(serde::Deserialize)]
|
||||
struct Spec {
|
||||
encryption_secret: EncryptionSecret,
|
||||
#[serde_as(as = "serde_with::base64::Base64")]
|
||||
source_connstring_ciphertext_base64: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
enum EncryptionSecret {
|
||||
#[allow(clippy::upper_case_acronyms)]
|
||||
KMS { key_id: String },
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
utils::logging::init(
|
||||
utils::logging::LogFormat::Plain,
|
||||
utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
|
||||
utils::logging::Output::Stdout,
|
||||
)?;
|
||||
|
||||
info!("starting");
|
||||
|
||||
let Args {
|
||||
working_directory,
|
||||
s3_prefix,
|
||||
pg_bin_dir,
|
||||
pg_lib_dir,
|
||||
} = Args::parse();
|
||||
|
||||
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
|
||||
let spec: Spec = {
|
||||
let spec_key = s3_prefix.append("/spec.json");
|
||||
let s3_client = aws_sdk_s3::Client::new(&aws_config);
|
||||
let object = s3_client
|
||||
.get_object()
|
||||
.bucket(&spec_key.bucket)
|
||||
.key(spec_key.key)
|
||||
.send()
|
||||
.await
|
||||
.context("get spec from s3")?
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.context("download spec body")?;
|
||||
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
|
||||
};
|
||||
|
||||
match tokio::fs::create_dir(&working_directory).await {
|
||||
Ok(()) => {}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
|
||||
if !is_directory_empty(&working_directory)
|
||||
.await
|
||||
.context("check if working directory is empty")?
|
||||
{
|
||||
anyhow::bail!("working directory is not empty");
|
||||
} else {
|
||||
// ok
|
||||
}
|
||||
}
|
||||
Err(e) => return Err(anyhow::Error::new(e).context("create working directory")),
|
||||
}
|
||||
|
||||
let pgdata_dir = working_directory.join("pgdata");
|
||||
tokio::fs::create_dir(&pgdata_dir)
|
||||
.await
|
||||
.context("create pgdata directory")?;
|
||||
|
||||
//
|
||||
// Setup clients
|
||||
//
|
||||
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
let kms_client = aws_sdk_kms::Client::new(&aws_config);
|
||||
|
||||
//
|
||||
// Initialize pgdata
|
||||
//
|
||||
let superuser = "cloud_admin"; // XXX: this shouldn't be hard-coded
|
||||
postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
|
||||
superuser,
|
||||
locale: "en_US.UTF-8", // XXX: this shouldn't be hard-coded,
|
||||
pg_version: 140000, // XXX: this shouldn't be hard-coded but derived from which compute image we're running in
|
||||
initdb_bin: pg_bin_dir.join("initdb").as_ref(),
|
||||
library_search_path: &pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
|
||||
pgdata: &pgdata_dir,
|
||||
})
|
||||
.await
|
||||
.context("initdb")?;
|
||||
|
||||
let nproc = num_cpus::get();
|
||||
|
||||
//
|
||||
// Launch postgres process
|
||||
//
|
||||
let mut postgres_proc = tokio::process::Command::new(pg_bin_dir.join("postgres"))
|
||||
.arg("-D")
|
||||
.arg(&pgdata_dir)
|
||||
.args(["-c", "wal_level=minimal"])
|
||||
.args(["-c", "shared_buffers=10GB"])
|
||||
.args(["-c", "max_wal_senders=0"])
|
||||
.args(["-c", "fsync=off"])
|
||||
.args(["-c", "full_page_writes=off"])
|
||||
.args(["-c", "synchronous_commit=off"])
|
||||
.args(["-c", "maintenance_work_mem=8388608"])
|
||||
.args(["-c", &format!("max_parallel_maintenance_workers={nproc}")])
|
||||
.args(["-c", &format!("max_parallel_workers={nproc}")])
|
||||
.args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
|
||||
.args(["-c", &format!("max_worker_processes={nproc}")])
|
||||
.args(["-c", "effective_io_concurrency=100"])
|
||||
.env_clear()
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.context("spawn postgres")?;
|
||||
|
||||
info!("spawned postgres, waiting for it to become ready");
|
||||
tokio::spawn(
|
||||
child_stdio_to_log::relay_process_output(
|
||||
postgres_proc.stdout.take(),
|
||||
postgres_proc.stderr.take(),
|
||||
)
|
||||
.instrument(info_span!("postgres")),
|
||||
);
|
||||
let restore_pg_connstring =
|
||||
format!("host=localhost port=5432 user={superuser} dbname=postgres");
|
||||
loop {
|
||||
let res = tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await;
|
||||
if res.is_ok() {
|
||||
info!("postgres is ready, could connect to it");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Decrypt connection string
|
||||
//
|
||||
let source_connection_string = {
|
||||
match spec.encryption_secret {
|
||||
EncryptionSecret::KMS { key_id } => {
|
||||
let mut output = kms_client
|
||||
.decrypt()
|
||||
.key_id(key_id)
|
||||
.ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
|
||||
spec.source_connstring_ciphertext_base64,
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.context("decrypt source connection string")?;
|
||||
let plaintext = output
|
||||
.plaintext
|
||||
.take()
|
||||
.context("get plaintext source connection string")?;
|
||||
String::from_utf8(plaintext.into_inner())
|
||||
.context("parse source connection string as utf8")?
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// Start the work
|
||||
//
|
||||
|
||||
let dumpdir = working_directory.join("dumpdir");
|
||||
|
||||
let common_args = [
|
||||
// schema mapping (prob suffices to specify them on one side)
|
||||
"--no-owner".to_string(),
|
||||
"--no-privileges".to_string(),
|
||||
"--no-publications".to_string(),
|
||||
"--no-security-labels".to_string(),
|
||||
"--no-subscriptions".to_string(),
|
||||
"--no-tablespaces".to_string(),
|
||||
// format
|
||||
"--format".to_string(),
|
||||
"directory".to_string(),
|
||||
// concurrency
|
||||
"--jobs".to_string(),
|
||||
num_cpus::get().to_string(),
|
||||
// progress updates
|
||||
"--verbose".to_string(),
|
||||
];
|
||||
|
||||
info!("dump into the working directory");
|
||||
{
|
||||
let mut pg_dump = tokio::process::Command::new(pg_bin_dir.join("pg_dump"))
|
||||
.args(&common_args)
|
||||
.arg("-f")
|
||||
.arg(&dumpdir)
|
||||
.arg("--no-sync")
|
||||
// POSITIONAL args
|
||||
// source db (db name included in connection string)
|
||||
.arg(&source_connection_string)
|
||||
// how we run it
|
||||
.env_clear()
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.context("spawn pg_dump")?;
|
||||
|
||||
info!(pid=%pg_dump.id().unwrap(), "spawned pg_dump");
|
||||
|
||||
tokio::spawn(
|
||||
child_stdio_to_log::relay_process_output(pg_dump.stdout.take(), pg_dump.stderr.take())
|
||||
.instrument(info_span!("pg_dump")),
|
||||
);
|
||||
|
||||
let st = pg_dump.wait().await.context("wait for pg_dump")?;
|
||||
info!(status=?st, "pg_dump exited");
|
||||
if !st.success() {
|
||||
warn!(status=%st, "pg_dump failed, restore will likely fail as well");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: do it in a streaming way, plenty of internal research done on this already
|
||||
// TODO: do the unlogged table trick
|
||||
|
||||
info!("restore from working directory into vanilla postgres");
|
||||
{
|
||||
let mut pg_restore = tokio::process::Command::new(pg_bin_dir.join("pg_restore"))
|
||||
.args(&common_args)
|
||||
.arg("-d")
|
||||
.arg(&restore_pg_connstring)
|
||||
// POSITIONAL args
|
||||
.arg(&dumpdir)
|
||||
// how we run it
|
||||
.env_clear()
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.context("spawn pg_restore")?;
|
||||
|
||||
info!(pid=%pg_restore.id().unwrap(), "spawned pg_restore");
|
||||
tokio::spawn(
|
||||
child_stdio_to_log::relay_process_output(
|
||||
pg_restore.stdout.take(),
|
||||
pg_restore.stderr.take(),
|
||||
)
|
||||
.instrument(info_span!("pg_restore")),
|
||||
);
|
||||
let st = pg_restore.wait().await.context("wait for pg_restore")?;
|
||||
info!(status=?st, "pg_restore exited");
|
||||
if !st.success() {
|
||||
warn!(status=%st, "pg_restore failed, restore will likely fail as well");
|
||||
}
|
||||
}
|
||||
|
||||
info!("shutdown postgres");
|
||||
{
|
||||
nix::sys::signal::kill(
|
||||
Pid::from_raw(
|
||||
i32::try_from(postgres_proc.id().unwrap()).expect("convert child pid to i32"),
|
||||
),
|
||||
nix::sys::signal::SIGTERM,
|
||||
)
|
||||
.context("signal postgres to shut down")?;
|
||||
postgres_proc
|
||||
.wait()
|
||||
.await
|
||||
.context("wait for postgres to shut down")?;
|
||||
}
|
||||
|
||||
info!("upload pgdata");
|
||||
s5cmd::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/"))
|
||||
.await
|
||||
.context("sync dump directory to destination")?;
|
||||
|
||||
info!("write status");
|
||||
{
|
||||
let status_dir = working_directory.join("status");
|
||||
std::fs::create_dir(&status_dir).context("create status directory")?;
|
||||
let status_file = status_dir.join("status");
|
||||
std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
|
||||
.context("write status file")?;
|
||||
s5cmd::sync(&status_file, &s3_prefix.append("/status/pgdata"))
|
||||
.await
|
||||
.context("sync status directory to destination")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::{ChildStderr, ChildStdout};
|
||||
use tracing::info;
|
||||
|
||||
/// Asynchronously relays the output from a child process's `stdout` and `stderr` to the tracing log.
|
||||
/// Each line is read and logged individually, with lossy UTF-8 conversion.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `stdout`: An `Option<ChildStdout>` from the child process.
|
||||
/// * `stderr`: An `Option<ChildStderr>` from the child process.
|
||||
///
|
||||
pub(crate) async fn relay_process_output(stdout: Option<ChildStdout>, stderr: Option<ChildStderr>) {
|
||||
let stdout_fut = async {
|
||||
if let Some(stdout) = stdout {
|
||||
let reader = BufReader::new(stdout);
|
||||
let mut lines = reader.lines();
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
info!(fd = "stdout", "{}", line);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let stderr_fut = async {
|
||||
if let Some(stderr) = stderr {
|
||||
let reader = BufReader::new(stderr);
|
||||
let mut lines = reader.lines();
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
info!(fd = "stderr", "{}", line);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
tokio::join!(stdout_fut, stderr_fut);
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// Struct to hold parsed S3 components
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct S3Uri {
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
}
|
||||
|
||||
impl FromStr for S3Uri {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
/// Parse an S3 URI into a bucket and key
|
||||
fn from_str(uri: &str) -> Result<Self> {
|
||||
// Ensure the URI starts with "s3://"
|
||||
if !uri.starts_with("s3://") {
|
||||
return Err(anyhow::anyhow!("Invalid S3 URI scheme"));
|
||||
}
|
||||
|
||||
// Remove the "s3://" prefix
|
||||
let stripped_uri = &uri[5..];
|
||||
|
||||
// Split the remaining string into bucket and key parts
|
||||
if let Some((bucket, key)) = stripped_uri.split_once('/') {
|
||||
Ok(S3Uri {
|
||||
bucket: bucket.to_string(),
|
||||
key: key.to_string(),
|
||||
})
|
||||
} else {
|
||||
Err(anyhow::anyhow!(
|
||||
"Invalid S3 URI format, missing bucket or key"
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl S3Uri {
|
||||
pub fn append(&self, suffix: &str) -> Self {
|
||||
Self {
|
||||
bucket: self.bucket.clone(),
|
||||
key: format!("{}{}", self.key, suffix),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for S3Uri {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "s3://{}/{}", self.bucket, self.key)
|
||||
}
|
||||
}
|
||||
|
||||
impl clap::builder::TypedValueParser for S3Uri {
|
||||
type Value = Self;
|
||||
|
||||
fn parse_ref(
|
||||
&self,
|
||||
_cmd: &clap::Command,
|
||||
_arg: Option<&clap::Arg>,
|
||||
value: &std::ffi::OsStr,
|
||||
) -> Result<Self::Value, clap::Error> {
|
||||
let value_str = value.to_str().ok_or_else(|| {
|
||||
clap::Error::raw(
|
||||
clap::error::ErrorKind::InvalidUtf8,
|
||||
"Invalid UTF-8 sequence",
|
||||
)
|
||||
})?;
|
||||
S3Uri::from_str(value_str).map_err(|e| {
|
||||
clap::Error::raw(
|
||||
clap::error::ErrorKind::InvalidValue,
|
||||
format!("Failed to parse S3 URI: {}", e),
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
use anyhow::Context;
|
||||
use camino::Utf8Path;
|
||||
|
||||
use super::s3_uri::S3Uri;
|
||||
|
||||
pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
|
||||
let mut builder = tokio::process::Command::new("s5cmd");
|
||||
// s5cmd uses aws-sdk-go v1, hence doesn't support AWS_ENDPOINT_URL
|
||||
if let Some(val) = std::env::var_os("AWS_ENDPOINT_URL") {
|
||||
builder.arg("--endpoint-url").arg(val);
|
||||
}
|
||||
builder
|
||||
.arg("sync")
|
||||
.arg(local.as_str())
|
||||
.arg(remote.to_string());
|
||||
let st = builder
|
||||
.spawn()
|
||||
.context("spawn s5cmd")?
|
||||
.wait()
|
||||
.await
|
||||
.context("wait for s5cmd")?;
|
||||
if st.success() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(anyhow::anyhow!("s5cmd failed"))
|
||||
}
|
||||
}
|
||||
@@ -116,7 +116,7 @@ pub fn write_postgres_conf(
|
||||
vartype: "enum".to_owned(),
|
||||
};
|
||||
|
||||
writeln!(file, "{}", opt.to_pg_setting())?;
|
||||
write!(file, "{}", opt.to_pg_setting())?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@ use anyhow::Result;
|
||||
use hyper::header::CONTENT_TYPE;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Method, Request, Response, Server, StatusCode};
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::Encoder;
|
||||
use metrics::TextEncoder;
|
||||
use tokio::task;
|
||||
@@ -73,22 +72,10 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
|
||||
(&Method::GET, "/metrics") => {
|
||||
debug!("serving /metrics GET request");
|
||||
|
||||
// When we call TextEncoder::encode() below, it will immediately
|
||||
// return an error if a metric family has no metrics, so we need to
|
||||
// preemptively filter out metric families with no metrics.
|
||||
let metrics = installed_extensions::collect()
|
||||
.into_iter()
|
||||
.filter(|m| !m.get_metric().is_empty())
|
||||
.collect::<Vec<MetricFamily>>();
|
||||
|
||||
let encoder = TextEncoder::new();
|
||||
let mut buffer = vec![];
|
||||
|
||||
if let Err(err) = encoder.encode(&metrics, &mut buffer) {
|
||||
let msg = format!("error handling /metrics request: {err}");
|
||||
error!(msg);
|
||||
return render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
let metrics = installed_extensions::collect();
|
||||
let encoder = TextEncoder::new();
|
||||
encoder.encode(&metrics, &mut buffer).unwrap();
|
||||
|
||||
match Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
|
||||
@@ -115,7 +115,7 @@ pub fn get_installed_extensions_sync(connstr: Url) -> Result<()> {
|
||||
|
||||
static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"compute_installed_extensions",
|
||||
"installed_extensions",
|
||||
"Number of databases where the version of extension is installed",
|
||||
&["extension_name", "version"]
|
||||
)
|
||||
|
||||
@@ -1153,7 +1153,6 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
|
||||
timeline_info.timeline_id
|
||||
);
|
||||
}
|
||||
// TODO: rename to import-basebackup-plus-wal
|
||||
TimelineCmd::Import(args) => {
|
||||
let tenant_id = get_tenant_id(args.tenant_id, env)?;
|
||||
let timeline_id = args.timeline_id;
|
||||
|
||||
@@ -33,6 +33,7 @@ reason = "the marvin attack only affects private key decryption, not public key
|
||||
[licenses]
|
||||
allow = [
|
||||
"Apache-2.0",
|
||||
"Artistic-2.0",
|
||||
"BSD-2-Clause",
|
||||
"BSD-3-Clause",
|
||||
"CC0-1.0",
|
||||
@@ -66,7 +67,7 @@ registries = []
|
||||
# More documentation about the 'bans' section can be found here:
|
||||
# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
|
||||
[bans]
|
||||
multiple-versions = "allow"
|
||||
multiple-versions = "warn"
|
||||
wildcards = "allow"
|
||||
highlight = "all"
|
||||
workspace-default-features = "allow"
|
||||
|
||||
@@ -113,21 +113,21 @@ so manual installation of dependencies is not recommended.
|
||||
A single virtual environment with all dependencies is described in the single `Pipfile`.
|
||||
|
||||
### Prerequisites
|
||||
- Install Python 3.11 (the minimal supported version) or greater.
|
||||
- Install Python 3.9 (the minimal supported version) or greater.
|
||||
- Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesn't work as expected.
|
||||
- If you have some trouble with other version you can resolve it by installing Python 3.11 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
|
||||
- If you have some trouble with other version you can resolve it by installing Python 3.9 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
|
||||
```bash
|
||||
# In Ubuntu
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||
sudo apt update
|
||||
sudo apt install python3.11
|
||||
sudo apt install python3.9
|
||||
```
|
||||
- Install `poetry`
|
||||
- Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation).
|
||||
- Install dependencies via `./scripts/pysync`.
|
||||
- Note that CI uses specific Python version (look for `PYTHON_VERSION` [here](https://github.com/neondatabase/docker-images/blob/main/rust/Dockerfile))
|
||||
so if you have different version some linting tools can yield different result locally vs in the CI.
|
||||
- You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.11`.
|
||||
- You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.9`.
|
||||
This may also disable the `The currently activated Python version X.Y.Z is not supported by the project` warning.
|
||||
|
||||
Run `poetry shell` to activate the virtual environment.
|
||||
|
||||
@@ -33,7 +33,6 @@ remote_storage.workspace = true
|
||||
postgres_backend.workspace = true
|
||||
nix = {workspace = true, optional = true}
|
||||
reqwest.workspace = true
|
||||
rand.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
bincode.workspace = true
|
||||
|
||||
@@ -97,15 +97,6 @@ pub struct ConfigToml {
|
||||
pub control_plane_api: Option<reqwest::Url>,
|
||||
pub control_plane_api_token: Option<String>,
|
||||
pub control_plane_emergency_mode: bool,
|
||||
/// Unstable feature: subject to change or removal without notice.
|
||||
/// See <https://github.com/neondatabase/neon/pull/9218>.
|
||||
pub import_pgdata_upcall_api: Option<reqwest::Url>,
|
||||
/// Unstable feature: subject to change or removal without notice.
|
||||
/// See <https://github.com/neondatabase/neon/pull/9218>.
|
||||
pub import_pgdata_upcall_api_token: Option<String>,
|
||||
/// Unstable feature: subject to change or removal without notice.
|
||||
/// See <https://github.com/neondatabase/neon/pull/9218>.
|
||||
pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,
|
||||
pub heatmap_upload_concurrency: usize,
|
||||
pub secondary_download_concurrency: usize,
|
||||
pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
|
||||
@@ -395,10 +386,6 @@ impl Default for ConfigToml {
|
||||
control_plane_api_token: (None),
|
||||
control_plane_emergency_mode: (false),
|
||||
|
||||
import_pgdata_upcall_api: (None),
|
||||
import_pgdata_upcall_api_token: (None),
|
||||
import_pgdata_aws_endpoint_url: (None),
|
||||
|
||||
heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
|
||||
secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ pub struct ShardedRange<'a> {
|
||||
|
||||
// Calculate the size of a range within the blocks of the same relation, or spanning only the
|
||||
// top page in the previous relation's space.
|
||||
pub fn contiguous_range_len(range: &Range<Key>) -> u32 {
|
||||
fn contiguous_range_len(range: &Range<Key>) -> u32 {
|
||||
debug_assert!(is_contiguous_range(range));
|
||||
if range.start.field6 == 0xffffffff {
|
||||
range.end.field6 + 1
|
||||
@@ -67,7 +67,7 @@ pub fn contiguous_range_len(range: &Range<Key>) -> u32 {
|
||||
/// This matters, because:
|
||||
/// - Within such ranges, keys are used contiguously. Outside such ranges it is sparse.
|
||||
/// - Within such ranges, we may calculate distances using simple subtraction of field6.
|
||||
pub fn is_contiguous_range(range: &Range<Key>) -> bool {
|
||||
fn is_contiguous_range(range: &Range<Key>) -> bool {
|
||||
range.start.field1 == range.end.field1
|
||||
&& range.start.field2 == range.end.field2
|
||||
&& range.start.field3 == range.end.field3
|
||||
|
||||
@@ -2,8 +2,6 @@ pub mod detach_ancestor;
|
||||
pub mod partitioning;
|
||||
pub mod utilization;
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
use camino::Utf8PathBuf;
|
||||
pub use utilization::PageserverUtilization;
|
||||
|
||||
use std::{
|
||||
@@ -229,9 +227,6 @@ pub enum TimelineCreateRequestMode {
|
||||
// we continue to accept it by having it here.
|
||||
pg_version: Option<u32>,
|
||||
},
|
||||
ImportPgdata {
|
||||
import_pgdata: TimelineCreateRequestModeImportPgdata,
|
||||
},
|
||||
// NB: Bootstrap is all-optional, and thus the serde(untagged) will cause serde to stop at Bootstrap.
|
||||
// (serde picks the first matching enum variant, in declaration order).
|
||||
Bootstrap {
|
||||
@@ -241,42 +236,6 @@ pub enum TimelineCreateRequestMode {
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineCreateRequestModeImportPgdata {
|
||||
pub location: ImportPgdataLocation,
|
||||
pub idempotency_key: ImportPgdataIdempotencyKey,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub enum ImportPgdataLocation {
|
||||
#[cfg(feature = "testing")]
|
||||
LocalFs { path: Utf8PathBuf },
|
||||
AwsS3 {
|
||||
region: String,
|
||||
bucket: String,
|
||||
/// A better name for this would be `prefix`; changing requires coordination with cplane.
|
||||
/// See <https://github.com/neondatabase/cloud/issues/20646>.
|
||||
key: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
#[serde(transparent)]
|
||||
pub struct ImportPgdataIdempotencyKey(pub String);
|
||||
|
||||
impl ImportPgdataIdempotencyKey {
|
||||
pub fn random() -> Self {
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
Self(
|
||||
rand::thread_rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
.take(20)
|
||||
.map(char::from)
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct LsnLeaseRequest {
|
||||
pub lsn: Lsn,
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
[package]
|
||||
name = "postgres_initdb"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
tokio.workspace = true
|
||||
camino.workspace = true
|
||||
thiserror.workspace = true
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
@@ -1,103 +0,0 @@
|
||||
//! The canonical way we run `initdb` in Neon.
|
||||
//!
|
||||
//! initdb has implicit defaults that are dependent on the environment, e.g., locales & collations.
|
||||
//!
|
||||
//! This module's job is to eliminate the environment-dependence as much as possible.
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use camino::Utf8Path;
|
||||
|
||||
pub struct RunInitdbArgs<'a> {
|
||||
pub superuser: &'a str,
|
||||
pub locale: &'a str,
|
||||
pub initdb_bin: &'a Utf8Path,
|
||||
pub pg_version: u32,
|
||||
pub library_search_path: &'a Utf8Path,
|
||||
pub pgdata: &'a Utf8Path,
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
Spawn(std::io::Error),
|
||||
Failed {
|
||||
status: std::process::ExitStatus,
|
||||
stderr: Vec<u8>,
|
||||
},
|
||||
WaitOutput(std::io::Error),
|
||||
Other(anyhow::Error),
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Error::Spawn(e) => write!(f, "Error spawning command: {:?}", e),
|
||||
Error::Failed { status, stderr } => write!(
|
||||
f,
|
||||
"Command failed with status {:?}: {}",
|
||||
status,
|
||||
String::from_utf8_lossy(stderr)
|
||||
),
|
||||
Error::WaitOutput(e) => write!(f, "Error waiting for command output: {:?}", e),
|
||||
Error::Other(e) => write!(f, "Error: {:?}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn do_run_initdb(args: RunInitdbArgs<'_>) -> Result<(), Error> {
|
||||
let RunInitdbArgs {
|
||||
superuser,
|
||||
locale,
|
||||
initdb_bin: initdb_bin_path,
|
||||
pg_version,
|
||||
library_search_path,
|
||||
pgdata,
|
||||
} = args;
|
||||
let mut initdb_command = tokio::process::Command::new(initdb_bin_path);
|
||||
initdb_command
|
||||
.args(["--pgdata", pgdata.as_ref()])
|
||||
.args(["--username", superuser])
|
||||
.args(["--encoding", "utf8"])
|
||||
.args(["--locale", locale])
|
||||
.arg("--no-instructions")
|
||||
.arg("--no-sync")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", library_search_path)
|
||||
.env("DYLD_LIBRARY_PATH", library_search_path)
|
||||
.stdin(std::process::Stdio::null())
|
||||
// stdout invocation produces the same output every time, we don't need it
|
||||
.stdout(std::process::Stdio::null())
|
||||
// we would be interested in the stderr output, if there was any
|
||||
.stderr(std::process::Stdio::piped());
|
||||
|
||||
// Before version 14, only the libc provide was available.
|
||||
if pg_version > 14 {
|
||||
// Version 17 brought with it a builtin locale provider which only provides
|
||||
// C and C.UTF-8. While being safer for collation purposes since it is
|
||||
// guaranteed to be consistent throughout a major release, it is also more
|
||||
// performant.
|
||||
let locale_provider = if pg_version >= 17 { "builtin" } else { "libc" };
|
||||
|
||||
initdb_command.args(["--locale-provider", locale_provider]);
|
||||
}
|
||||
|
||||
let initdb_proc = initdb_command.spawn().map_err(Error::Spawn)?;
|
||||
|
||||
// Ideally we'd select here with the cancellation token, but the problem is that
|
||||
// we can't safely terminate initdb: it launches processes of its own, and killing
|
||||
// initdb doesn't kill them. After we return from this function, we want the target
|
||||
// directory to be able to be cleaned up.
|
||||
// See https://github.com/neondatabase/neon/issues/6385
|
||||
let initdb_output = initdb_proc
|
||||
.wait_with_output()
|
||||
.await
|
||||
.map_err(Error::WaitOutput)?;
|
||||
if !initdb_output.status.success() {
|
||||
return Err(Error::Failed {
|
||||
status: initdb_output.status,
|
||||
stderr: initdb_output.stderr,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -185,7 +185,7 @@ pub struct CancelKeyData {
|
||||
impl fmt::Display for CancelKeyData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let hi = (self.backend_pid as u64) << 32;
|
||||
let lo = (self.cancel_key as u64) & 0xffffffff;
|
||||
let lo = self.cancel_key as u64;
|
||||
let id = hi | lo;
|
||||
|
||||
// This format is more compact and might work better for logs.
|
||||
@@ -1046,13 +1046,4 @@ mod tests {
|
||||
let data = [0, 0, 0, 7, 0, 0, 0, 0];
|
||||
FeStartupPacket::parse(&mut BytesMut::from_iter(data)).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_key_data() {
|
||||
let key = CancelKeyData {
|
||||
backend_pid: -1817212860,
|
||||
cancel_key: -1183897012,
|
||||
};
|
||||
assert_eq!(format!("{key}"), "CancelKeyData(93af8844b96f2a4c)");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,6 @@ use anyhow::Result;
|
||||
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
|
||||
use azure_core::{Continuable, RetryOptions};
|
||||
use azure_identity::DefaultAzureCredential;
|
||||
use azure_storage::CloudLocation;
|
||||
use azure_storage::StorageCredentials;
|
||||
use azure_storage_blobs::blob::CopyStatus;
|
||||
use azure_storage_blobs::prelude::ClientBuilder;
|
||||
@@ -25,7 +24,6 @@ use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerCl
|
||||
use bytes::Bytes;
|
||||
use futures::future::Either;
|
||||
use futures::stream::Stream;
|
||||
use futures::FutureExt;
|
||||
use futures_util::StreamExt;
|
||||
use futures_util::TryStreamExt;
|
||||
use http_types::{StatusCode, Url};
|
||||
@@ -33,7 +31,6 @@ use scopeguard::ScopeGuard;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::debug;
|
||||
use utils::backoff;
|
||||
use utils::backoff::exponential_backoff_duration_seconds;
|
||||
|
||||
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
|
||||
use crate::{
|
||||
@@ -73,16 +70,8 @@ impl AzureBlobStorage {
|
||||
StorageCredentials::token_credential(Arc::new(token_credential))
|
||||
};
|
||||
|
||||
let location = match &azure_config.endpoint {
|
||||
None => CloudLocation::Public { account },
|
||||
Some(endpoint) => CloudLocation::Custom {
|
||||
account,
|
||||
uri: endpoint.clone(),
|
||||
},
|
||||
};
|
||||
let builder = ClientBuilder::with_location(location, credentials)
|
||||
// we have an outer retry
|
||||
.retry(RetryOptions::none());
|
||||
// we have an outer retry
|
||||
let builder = ClientBuilder::new(account, credentials).retry(RetryOptions::none());
|
||||
|
||||
let client = builder.container_client(azure_config.container_name.to_owned());
|
||||
|
||||
@@ -108,7 +97,10 @@ impl AzureBlobStorage {
|
||||
|
||||
pub fn relative_path_to_name(&self, path: &RemotePath) -> String {
|
||||
assert_eq!(std::path::MAIN_SEPARATOR, REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
let path_string = path.get_path().as_str();
|
||||
let path_string = path
|
||||
.get_path()
|
||||
.as_str()
|
||||
.trim_end_matches(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
match &self.prefix_in_container {
|
||||
Some(prefix) => {
|
||||
if prefix.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
|
||||
@@ -285,14 +277,19 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
cancel: &CancellationToken,
|
||||
) -> impl Stream<Item = Result<Listing, DownloadError>> {
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let list_prefix = prefix.map(|p| self.relative_path_to_name(p)).or_else(|| {
|
||||
self.prefix_in_container.clone().map(|mut s| {
|
||||
if !s.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
|
||||
s.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
let list_prefix = prefix
|
||||
.map(|p| self.relative_path_to_name(p))
|
||||
.or_else(|| self.prefix_in_container.clone())
|
||||
.map(|mut p| {
|
||||
// required to end with a separator
|
||||
// otherwise request will return only the entry of a prefix
|
||||
if matches!(mode, ListingMode::WithDelimiter)
|
||||
&& !p.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR)
|
||||
{
|
||||
p.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
}
|
||||
s
|
||||
})
|
||||
});
|
||||
p
|
||||
});
|
||||
|
||||
async_stream::stream! {
|
||||
let _permit = self.permit(RequestKind::List, cancel).await?;
|
||||
@@ -313,59 +310,40 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
|
||||
let mut next_marker = None;
|
||||
|
||||
let mut timeout_try_cnt = 1;
|
||||
|
||||
'outer: loop {
|
||||
let mut builder = builder.clone();
|
||||
if let Some(marker) = next_marker.clone() {
|
||||
builder = builder.marker(marker);
|
||||
}
|
||||
// Azure Blob Rust SDK does not expose the list blob API directly. Users have to use
|
||||
// their pageable iterator wrapper that returns all keys as a stream. We want to have
|
||||
// full control of paging, and therefore we only take the first item from the stream.
|
||||
let mut response_stream = builder.into_stream();
|
||||
let response = response_stream.next();
|
||||
// Timeout mechanism: Azure client will sometimes stuck on a request, but retrying that request
|
||||
// would immediately succeed. Therefore, we use exponential backoff timeout to retry the request.
|
||||
// (Usually, exponential backoff is used to determine the sleep time between two retries.) We
|
||||
// start with 10.0 second timeout, and double the timeout for each failure, up to 5 failures.
|
||||
// timeout = min(5 * (1.0+1.0)^n, self.timeout).
|
||||
let this_timeout = (5.0 * exponential_backoff_duration_seconds(timeout_try_cnt, 1.0, self.timeout.as_secs_f64())).min(self.timeout.as_secs_f64());
|
||||
let response = tokio::time::timeout(Duration::from_secs_f64(this_timeout), response);
|
||||
let response = response.map(|res| {
|
||||
match res {
|
||||
Ok(Some(Ok(res))) => Ok(Some(res)),
|
||||
Ok(Some(Err(e))) => Err(to_download_error(e)),
|
||||
Ok(None) => Ok(None),
|
||||
Err(_elasped) => Err(DownloadError::Timeout),
|
||||
}
|
||||
let response = builder.into_stream();
|
||||
let response = response.into_stream().map_err(to_download_error);
|
||||
let response = tokio_stream::StreamExt::timeout(response, self.timeout);
|
||||
let response = response.map(|res| match res {
|
||||
Ok(res) => res,
|
||||
Err(_elapsed) => Err(DownloadError::Timeout),
|
||||
});
|
||||
|
||||
let mut response = std::pin::pin!(response);
|
||||
|
||||
let mut max_keys = max_keys.map(|mk| mk.get());
|
||||
let next_item = tokio::select! {
|
||||
op = response => op,
|
||||
op = response.next() => Ok(op),
|
||||
_ = cancel.cancelled() => Err(DownloadError::Cancelled),
|
||||
};
|
||||
|
||||
if let Err(DownloadError::Timeout) = &next_item {
|
||||
timeout_try_cnt += 1;
|
||||
if timeout_try_cnt <= 5 {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let next_item = next_item?;
|
||||
|
||||
if timeout_try_cnt >= 2 {
|
||||
tracing::warn!("Azure Blob Storage list timed out and succeeded after {} tries", timeout_try_cnt);
|
||||
}
|
||||
timeout_try_cnt = 1;
|
||||
|
||||
}?;
|
||||
let Some(entry) = next_item else {
|
||||
// The list is complete, so yield it.
|
||||
break;
|
||||
};
|
||||
|
||||
let mut res = Listing::default();
|
||||
let entry = match entry {
|
||||
Ok(entry) => entry,
|
||||
Err(e) => {
|
||||
// The error is potentially retryable, so we must rewind the loop after yielding.
|
||||
yield Err(e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
next_marker = entry.continuation();
|
||||
let prefix_iter = entry
|
||||
.blobs
|
||||
@@ -381,7 +359,7 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
last_modified: k.properties.last_modified.into(),
|
||||
size: k.properties.content_length,
|
||||
}
|
||||
);
|
||||
);
|
||||
|
||||
for key in blob_iter {
|
||||
res.keys.push(key);
|
||||
|
||||
@@ -125,8 +125,6 @@ pub struct AzureConfig {
|
||||
pub container_region: String,
|
||||
/// A "subfolder" in the container, to use the same container separately by multiple remote storage users at once.
|
||||
pub prefix_in_container: Option<String>,
|
||||
/// The endpoint to use. Use the default if None.
|
||||
pub endpoint: Option<String>,
|
||||
/// Azure has various limits on its API calls, we need not to exceed those.
|
||||
/// See [`DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT`] for more details.
|
||||
#[serde(default = "default_remote_storage_azure_concurrency_limit")]
|
||||
@@ -146,7 +144,6 @@ impl Debug for AzureConfig {
|
||||
.field("storage_account", &self.storage_account)
|
||||
.field("bucket_region", &self.container_region)
|
||||
.field("prefix_in_container", &self.prefix_in_container)
|
||||
.field("endpoint", &self.endpoint)
|
||||
.field("concurrency_limit", &self.concurrency_limit)
|
||||
.field(
|
||||
"max_keys_per_list_response",
|
||||
@@ -299,7 +296,6 @@ timeout = '5s'";
|
||||
storage_account: None,
|
||||
container_region: "westeurope".into(),
|
||||
prefix_in_container: None,
|
||||
endpoint: None,
|
||||
concurrency_limit: default_remote_storage_azure_concurrency_limit(),
|
||||
max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
|
||||
}),
|
||||
|
||||
@@ -360,12 +360,7 @@ impl RemoteStorage for LocalFs {
|
||||
let mut objects = Vec::with_capacity(keys.len());
|
||||
for key in keys {
|
||||
let path = key.with_base(&self.storage_root);
|
||||
let metadata = file_metadata(&path).await;
|
||||
if let Err(DownloadError::NotFound) = metadata {
|
||||
// Race: if the file is deleted between listing and metadata check, ignore it.
|
||||
continue;
|
||||
}
|
||||
let metadata = metadata?;
|
||||
let metadata = file_metadata(&path).await?;
|
||||
if metadata.is_dir() {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -29,7 +29,6 @@ jsonwebtoken.workspace = true
|
||||
nix.workspace = true
|
||||
once_cell.workspace = true
|
||||
pin-project-lite.workspace = true
|
||||
pprof.workspace = true
|
||||
regex.workspace = true
|
||||
routerify.workspace = true
|
||||
serde.workspace = true
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use crate::auth::{AuthError, Claims, SwappableJwtAuth};
|
||||
use crate::http::error::{api_error_handler, route_error_handler, ApiError};
|
||||
use crate::http::request::{get_query_param, parse_query_param};
|
||||
use anyhow::{anyhow, Context};
|
||||
use hyper::header::{HeaderName, AUTHORIZATION, CONTENT_DISPOSITION};
|
||||
use anyhow::Context;
|
||||
use hyper::header::{HeaderName, AUTHORIZATION};
|
||||
use hyper::http::HeaderValue;
|
||||
use hyper::Method;
|
||||
use hyper::{header::CONTENT_TYPE, Body, Request, Response};
|
||||
@@ -13,13 +12,11 @@ use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
|
||||
use tracing::{debug, info, info_span, warn, Instrument};
|
||||
|
||||
use std::future::Future;
|
||||
use std::io::Write as _;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use pprof::protos::Message as _;
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
use std::io::Write as _;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
|
||||
static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
|
||||
@@ -331,82 +328,6 @@ pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Generates CPU profiles.
|
||||
pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
enum Format {
|
||||
Pprof,
|
||||
Svg,
|
||||
}
|
||||
|
||||
// Parameters.
|
||||
let format = match get_query_param(&req, "format")?.as_deref() {
|
||||
None => Format::Pprof,
|
||||
Some("pprof") => Format::Pprof,
|
||||
Some("svg") => Format::Svg,
|
||||
Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))),
|
||||
};
|
||||
let seconds = match parse_query_param(&req, "seconds")? {
|
||||
None => 5,
|
||||
Some(seconds @ 1..=30) => seconds,
|
||||
Some(_) => return Err(ApiError::BadRequest(anyhow!("duration must be 1-30 secs"))),
|
||||
};
|
||||
let frequency_hz = match parse_query_param(&req, "frequency")? {
|
||||
None => 99,
|
||||
Some(1001..) => return Err(ApiError::BadRequest(anyhow!("frequency must be <=1000 Hz"))),
|
||||
Some(frequency) => frequency,
|
||||
};
|
||||
|
||||
// Only allow one profiler at a time.
|
||||
static PROFILE_LOCK: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
|
||||
let _lock = PROFILE_LOCK
|
||||
.try_lock()
|
||||
.map_err(|_| ApiError::Conflict("profiler already running".into()))?;
|
||||
|
||||
// Take the profile.
|
||||
let report = tokio::task::spawn_blocking(move || {
|
||||
let guard = pprof::ProfilerGuardBuilder::default()
|
||||
.frequency(frequency_hz)
|
||||
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
|
||||
.build()?;
|
||||
std::thread::sleep(Duration::from_secs(seconds));
|
||||
guard.report().build()
|
||||
})
|
||||
.await
|
||||
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
|
||||
.map_err(|pprof_err| ApiError::InternalServerError(pprof_err.into()))?;
|
||||
|
||||
// Return the report in the requested format.
|
||||
match format {
|
||||
Format::Pprof => {
|
||||
let mut body = Vec::new();
|
||||
report
|
||||
.pprof()
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))?
|
||||
.write_to_vec(&mut body)
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))?;
|
||||
|
||||
Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, "application/octet-stream")
|
||||
.header(CONTENT_DISPOSITION, "attachment; filename=\"profile.pb\"")
|
||||
.body(Body::from(body))
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))
|
||||
}
|
||||
|
||||
Format::Svg => {
|
||||
let mut body = Vec::new();
|
||||
report
|
||||
.flamegraph(&mut body)
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))?;
|
||||
Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, "image/svg+xml")
|
||||
.body(Body::from(body))
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
|
||||
) -> Middleware<B, ApiError> {
|
||||
Middleware::pre(move |req| async move {
|
||||
|
||||
@@ -30,7 +30,7 @@ pub fn parse_request_param<T: FromStr>(
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_query_param<'a>(
|
||||
fn get_query_param<'a>(
|
||||
request: &'a Request<Body>,
|
||||
param_name: &str,
|
||||
) -> Result<Option<Cow<'a, str>>, ApiError> {
|
||||
|
||||
@@ -83,9 +83,7 @@ where
|
||||
}
|
||||
wake_these.push(self.heap.pop().unwrap().wake_channel);
|
||||
}
|
||||
if !wake_these.is_empty() {
|
||||
self.update_status();
|
||||
}
|
||||
self.update_status();
|
||||
wake_these
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,6 @@ postgres.workspace = true
|
||||
postgres_backend.workspace = true
|
||||
postgres-protocol.workspace = true
|
||||
postgres-types.workspace = true
|
||||
postgres_initdb.workspace = true
|
||||
rand.workspace = true
|
||||
range-set-blaze = { version = "0.1.16", features = ["alloc"] }
|
||||
regex.workspace = true
|
||||
@@ -69,7 +68,6 @@ url.workspace = true
|
||||
walkdir.workspace = true
|
||||
metrics.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
|
||||
pageserver_compaction.workspace = true
|
||||
postgres_connection.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
|
||||
@@ -144,10 +144,6 @@ pub struct PageServerConf {
|
||||
/// JWT token for use with the control plane API.
|
||||
pub control_plane_api_token: Option<SecretString>,
|
||||
|
||||
pub import_pgdata_upcall_api: Option<Url>,
|
||||
pub import_pgdata_upcall_api_token: Option<SecretString>,
|
||||
pub import_pgdata_aws_endpoint_url: Option<Url>,
|
||||
|
||||
/// If true, pageserver will make best-effort to operate without a control plane: only
|
||||
/// for use in major incidents.
|
||||
pub control_plane_emergency_mode: bool,
|
||||
@@ -332,9 +328,6 @@ impl PageServerConf {
|
||||
control_plane_api,
|
||||
control_plane_api_token,
|
||||
control_plane_emergency_mode,
|
||||
import_pgdata_upcall_api,
|
||||
import_pgdata_upcall_api_token,
|
||||
import_pgdata_aws_endpoint_url,
|
||||
heatmap_upload_concurrency,
|
||||
secondary_download_concurrency,
|
||||
ingest_batch_size,
|
||||
@@ -390,9 +383,6 @@ impl PageServerConf {
|
||||
timeline_offloading,
|
||||
ephemeral_bytes_per_memory_kb,
|
||||
server_side_batch_timeout,
|
||||
import_pgdata_upcall_api,
|
||||
import_pgdata_upcall_api_token: import_pgdata_upcall_api_token.map(SecretString::from),
|
||||
import_pgdata_aws_endpoint_url,
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// fields that require additional validation or custom handling
|
||||
|
||||
@@ -15,7 +15,6 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
use utils::backoff;
|
||||
use utils::pausable_failpoint;
|
||||
|
||||
use crate::metrics;
|
||||
|
||||
@@ -91,7 +90,6 @@ impl Deleter {
|
||||
/// Block until everything in accumulator has been executed
|
||||
async fn flush(&mut self) -> Result<(), DeletionQueueError> {
|
||||
while !self.accumulator.is_empty() && !self.cancel.is_cancelled() {
|
||||
pausable_failpoint!("deletion-queue-before-execute-pause");
|
||||
match self.remote_delete().await {
|
||||
Ok(()) => {
|
||||
// Note: we assume that the remote storage layer returns Ok(()) if some
|
||||
|
||||
@@ -623,8 +623,6 @@ paths:
|
||||
existing_initdb_timeline_id:
|
||||
type: string
|
||||
format: hex
|
||||
import_pgdata:
|
||||
$ref: "#/components/schemas/TimelineCreateRequestImportPgdata"
|
||||
responses:
|
||||
"201":
|
||||
description: Timeline was created, or already existed with matching parameters
|
||||
@@ -981,34 +979,6 @@ components:
|
||||
$ref: "#/components/schemas/TenantConfig"
|
||||
effective_config:
|
||||
$ref: "#/components/schemas/TenantConfig"
|
||||
TimelineCreateRequestImportPgdata:
|
||||
type: object
|
||||
required:
|
||||
- location
|
||||
- idempotency_key
|
||||
properties:
|
||||
idempotency_key:
|
||||
type: string
|
||||
location:
|
||||
$ref: "#/components/schemas/TimelineCreateRequestImportPgdataLocation"
|
||||
TimelineCreateRequestImportPgdataLocation:
|
||||
type: object
|
||||
properties:
|
||||
AwsS3:
|
||||
$ref: "#/components/schemas/TimelineCreateRequestImportPgdataLocationAwsS3"
|
||||
TimelineCreateRequestImportPgdataLocationAwsS3:
|
||||
type: object
|
||||
properties:
|
||||
region:
|
||||
type: string
|
||||
bucket:
|
||||
type: string
|
||||
key:
|
||||
type: string
|
||||
required:
|
||||
- region
|
||||
- bucket
|
||||
- key
|
||||
TimelineInfo:
|
||||
type: object
|
||||
required:
|
||||
|
||||
@@ -40,7 +40,6 @@ use pageserver_api::models::TenantSorting;
|
||||
use pageserver_api::models::TenantState;
|
||||
use pageserver_api::models::TimelineArchivalConfigRequest;
|
||||
use pageserver_api::models::TimelineCreateRequestMode;
|
||||
use pageserver_api::models::TimelineCreateRequestModeImportPgdata;
|
||||
use pageserver_api::models::TimelinesInfoAndOffloaded;
|
||||
use pageserver_api::models::TopTenantShardItem;
|
||||
use pageserver_api::models::TopTenantShardsRequest;
|
||||
@@ -56,7 +55,6 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::auth::JwtAuth;
|
||||
use utils::failpoint_support::failpoints_handler;
|
||||
use utils::http::endpoint::profile_cpu_handler;
|
||||
use utils::http::endpoint::prometheus_metrics_handler;
|
||||
use utils::http::endpoint::request_span;
|
||||
use utils::http::request::must_parse_query_param;
|
||||
@@ -82,7 +80,6 @@ use crate::tenant::secondary::SecondaryController;
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::timeline::import_pgdata;
|
||||
use crate::tenant::timeline::offload::offload_timeline;
|
||||
use crate::tenant::timeline::offload::OffloadError;
|
||||
use crate::tenant::timeline::CompactFlags;
|
||||
@@ -128,7 +125,7 @@ pub struct State {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
allowlist_routes: &'static [&'static str],
|
||||
allowlist_routes: Vec<Uri>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
@@ -149,13 +146,10 @@ impl State {
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
secondary_controller: SecondaryController,
|
||||
) -> anyhow::Result<Self> {
|
||||
let allowlist_routes = &[
|
||||
"/v1/status",
|
||||
"/v1/doc",
|
||||
"/swagger.yml",
|
||||
"/metrics",
|
||||
"/profile/cpu",
|
||||
];
|
||||
let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml", "/metrics"]
|
||||
.iter()
|
||||
.map(|v| v.parse().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
Ok(Self {
|
||||
conf,
|
||||
tenant_manager,
|
||||
@@ -582,35 +576,6 @@ async fn timeline_create_handler(
|
||||
ancestor_timeline_id,
|
||||
ancestor_start_lsn,
|
||||
}),
|
||||
TimelineCreateRequestMode::ImportPgdata {
|
||||
import_pgdata:
|
||||
TimelineCreateRequestModeImportPgdata {
|
||||
location,
|
||||
idempotency_key,
|
||||
},
|
||||
} => tenant::CreateTimelineParams::ImportPgdata(tenant::CreateTimelineParamsImportPgdata {
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new(
|
||||
idempotency_key.0,
|
||||
),
|
||||
new_timeline_id,
|
||||
location: {
|
||||
use import_pgdata::index_part_format::Location;
|
||||
use pageserver_api::models::ImportPgdataLocation;
|
||||
match location {
|
||||
#[cfg(feature = "testing")]
|
||||
ImportPgdataLocation::LocalFs { path } => Location::LocalFs { path },
|
||||
ImportPgdataLocation::AwsS3 {
|
||||
region,
|
||||
bucket,
|
||||
key,
|
||||
} => Location::AwsS3 {
|
||||
region,
|
||||
bucket,
|
||||
key,
|
||||
},
|
||||
}
|
||||
},
|
||||
}),
|
||||
};
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);
|
||||
@@ -3183,7 +3148,7 @@ pub fn make_router(
|
||||
if auth.is_some() {
|
||||
router = router.middleware(auth_middleware(|request| {
|
||||
let state = get_state(request);
|
||||
if state.allowlist_routes.contains(&request.uri().path()) {
|
||||
if state.allowlist_routes.contains(request.uri()) {
|
||||
None
|
||||
} else {
|
||||
state.auth.as_deref()
|
||||
@@ -3202,7 +3167,6 @@ pub fn make_router(
|
||||
Ok(router
|
||||
.data(state)
|
||||
.get("/metrics", |r| request_span(r, prometheus_metrics_handler))
|
||||
.get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
|
||||
.get("/v1/status", |r| api_handler(r, status_handler))
|
||||
.put("/v1/failpoints", |r| {
|
||||
testing_api_handler("manage failpoints", r, failpoints_handler)
|
||||
|
||||
@@ -1068,26 +1068,21 @@ impl PageServerHandler {
|
||||
));
|
||||
}
|
||||
|
||||
// Check explicitly for INVALID just to get a less scary error message if the request is obviously bogus
|
||||
if request_lsn == Lsn::INVALID {
|
||||
return Err(PageStreamError::BadRequest(
|
||||
"invalid LSN(0) in request".into(),
|
||||
));
|
||||
}
|
||||
|
||||
// Clients should only read from recent LSNs on their timeline, or from locations holding an LSN lease.
|
||||
//
|
||||
// We may have older data available, but we make a best effort to detect this case and return an error,
|
||||
// to distinguish a misbehaving client (asking for old LSN) from a storage issue (data missing at a legitimate LSN).
|
||||
if request_lsn < **latest_gc_cutoff_lsn && !timeline.is_gc_blocked_by_lsn_lease_deadline() {
|
||||
if request_lsn < **latest_gc_cutoff_lsn {
|
||||
let gc_info = &timeline.gc_info.read().unwrap();
|
||||
if !gc_info.leases.contains_key(&request_lsn) {
|
||||
return Err(
|
||||
// The requested LSN is below gc cutoff and is not guarded by a lease.
|
||||
|
||||
// Check explicitly for INVALID just to get a less scary error message if the
|
||||
// request is obviously bogus
|
||||
return Err(if request_lsn == Lsn::INVALID {
|
||||
PageStreamError::BadRequest("invalid LSN(0) in request".into())
|
||||
} else {
|
||||
PageStreamError::BadRequest(format!(
|
||||
"tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
|
||||
request_lsn, **latest_gc_cutoff_lsn
|
||||
).into())
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2276,9 +2276,9 @@ impl<'a> Version<'a> {
|
||||
//--- Metadata structs stored in key-value pairs in the repository.
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct DbDirectory {
|
||||
struct DbDirectory {
|
||||
// (spcnode, dbnode) -> (do relmapper and PG_VERSION files exist)
|
||||
pub(crate) dbdirs: HashMap<(Oid, Oid), bool>,
|
||||
dbdirs: HashMap<(Oid, Oid), bool>,
|
||||
}
|
||||
|
||||
// The format of TwoPhaseDirectory changed in PostgreSQL v17, because the filenames of
|
||||
@@ -2287,8 +2287,8 @@ pub(crate) struct DbDirectory {
|
||||
// "pg_twophsae/0000000A000002E4".
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct TwoPhaseDirectory {
|
||||
pub(crate) xids: HashSet<TransactionId>,
|
||||
struct TwoPhaseDirectory {
|
||||
xids: HashSet<TransactionId>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -2297,12 +2297,12 @@ struct TwoPhaseDirectoryV17 {
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
pub(crate) struct RelDirectory {
|
||||
struct RelDirectory {
|
||||
// Set of relations that exist. (relfilenode, forknum)
|
||||
//
|
||||
// TODO: Store it as a btree or radix tree or something else that spans multiple
|
||||
// key-value pairs, if you have a lot of relations
|
||||
pub(crate) rels: HashSet<(Oid, u8)>,
|
||||
rels: HashSet<(Oid, u8)>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -2311,9 +2311,9 @@ struct RelSizeEntry {
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
pub(crate) struct SlruSegmentDirectory {
|
||||
struct SlruSegmentDirectory {
|
||||
// Set of SLRU segments that exist.
|
||||
pub(crate) segments: HashSet<u32>,
|
||||
segments: HashSet<u32>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug, enum_map::Enum)]
|
||||
|
||||
@@ -381,8 +381,6 @@ pub enum TaskKind {
|
||||
UnitTest,
|
||||
|
||||
DetachAncestor,
|
||||
|
||||
ImportPgdata,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
||||
@@ -43,9 +43,7 @@ use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Weak;
|
||||
use std::time::SystemTime;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use timeline::import_pgdata;
|
||||
use timeline::offload::offload_timeline;
|
||||
use timeline::ShutdownMode;
|
||||
use tokio::io::BufReader;
|
||||
use tokio::sync::watch;
|
||||
use tokio::task::JoinSet;
|
||||
@@ -191,7 +189,6 @@ pub struct TenantSharedResources {
|
||||
/// A [`Tenant`] is really an _attached_ tenant. The configuration
|
||||
/// for an attached tenant is a subset of the [`LocationConf`], represented
|
||||
/// in this struct.
|
||||
#[derive(Clone)]
|
||||
pub(super) struct AttachedTenantConf {
|
||||
tenant_conf: TenantConfOpt,
|
||||
location: AttachedLocationConfig,
|
||||
@@ -375,6 +372,7 @@ pub struct Tenant {
|
||||
|
||||
l0_flush_global_state: L0FlushGlobalState,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Tenant {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{} ({})", self.tenant_shard_id, self.current_state())
|
||||
@@ -861,7 +859,6 @@ impl Debug for SetStoppingError {
|
||||
pub(crate) enum CreateTimelineParams {
|
||||
Bootstrap(CreateTimelineParamsBootstrap),
|
||||
Branch(CreateTimelineParamsBranch),
|
||||
ImportPgdata(CreateTimelineParamsImportPgdata),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -879,14 +876,7 @@ pub(crate) struct CreateTimelineParamsBranch {
|
||||
pub(crate) ancestor_start_lsn: Option<Lsn>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct CreateTimelineParamsImportPgdata {
|
||||
pub(crate) new_timeline_id: TimelineId,
|
||||
pub(crate) location: import_pgdata::index_part_format::Location,
|
||||
pub(crate) idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
|
||||
}
|
||||
|
||||
/// What is used to determine idempotency of a [`Tenant::create_timeline`] call in [`Tenant::start_creating_timeline`] in [`Tenant::start_creating_timeline`].
|
||||
/// What is used to determine idempotency of a [`Tenant::create_timeline`] call in [`Tenant::start_creating_timeline`].
|
||||
///
|
||||
/// Each [`Timeline`] object holds [`Self`] as an immutable property in [`Timeline::create_idempotency`].
|
||||
///
|
||||
@@ -916,50 +906,19 @@ pub(crate) enum CreateTimelineIdempotency {
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_start_lsn: Lsn,
|
||||
},
|
||||
ImportPgdata(CreatingTimelineIdempotencyImportPgdata),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct CreatingTimelineIdempotencyImportPgdata {
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
|
||||
}
|
||||
|
||||
/// What is returned by [`Tenant::start_creating_timeline`].
|
||||
#[must_use]
|
||||
enum StartCreatingTimelineResult {
|
||||
CreateGuard(TimelineCreateGuard),
|
||||
enum StartCreatingTimelineResult<'t> {
|
||||
CreateGuard(TimelineCreateGuard<'t>),
|
||||
Idempotent(Arc<Timeline>),
|
||||
}
|
||||
|
||||
enum TimelineInitAndSyncResult {
|
||||
ReadyToActivate(Arc<Timeline>),
|
||||
NeedsSpawnImportPgdata(TimelineInitAndSyncNeedsSpawnImportPgdata),
|
||||
}
|
||||
|
||||
impl TimelineInitAndSyncResult {
|
||||
fn ready_to_activate(self) -> Option<Arc<Timeline>> {
|
||||
match self {
|
||||
Self::ReadyToActivate(timeline) => Some(timeline),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
struct TimelineInitAndSyncNeedsSpawnImportPgdata {
|
||||
timeline: Arc<Timeline>,
|
||||
import_pgdata: import_pgdata::index_part_format::Root,
|
||||
guard: TimelineCreateGuard,
|
||||
}
|
||||
|
||||
/// What is returned by [`Tenant::create_timeline`].
|
||||
enum CreateTimelineResult {
|
||||
Created(Arc<Timeline>),
|
||||
Idempotent(Arc<Timeline>),
|
||||
/// IMPORTANT: This [`Arc<Timeline>`] object is not in [`Tenant::timelines`] when
|
||||
/// we return this result, nor will this concrete object ever be added there.
|
||||
/// Cf method comment on [`Tenant::create_timeline_import_pgdata`].
|
||||
ImportSpawned(Arc<Timeline>),
|
||||
}
|
||||
|
||||
impl CreateTimelineResult {
|
||||
@@ -967,19 +926,18 @@ impl CreateTimelineResult {
|
||||
match self {
|
||||
Self::Created(_) => "Created",
|
||||
Self::Idempotent(_) => "Idempotent",
|
||||
Self::ImportSpawned(_) => "ImportSpawned",
|
||||
}
|
||||
}
|
||||
fn timeline(&self) -> &Arc<Timeline> {
|
||||
match self {
|
||||
Self::Created(t) | Self::Idempotent(t) | Self::ImportSpawned(t) => t,
|
||||
Self::Created(t) | Self::Idempotent(t) => t,
|
||||
}
|
||||
}
|
||||
/// Unit test timelines aren't activated, test has to do it if it needs to.
|
||||
#[cfg(test)]
|
||||
fn into_timeline_for_test(self) -> Arc<Timeline> {
|
||||
match self {
|
||||
Self::Created(t) | Self::Idempotent(t) | Self::ImportSpawned(t) => t,
|
||||
Self::Created(t) | Self::Idempotent(t) => t,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1003,13 +961,33 @@ pub enum CreateTimelineError {
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum InitdbError {
|
||||
#[error("Operation was cancelled")]
|
||||
Cancelled,
|
||||
#[error(transparent)]
|
||||
enum InitdbError {
|
||||
Other(anyhow::Error),
|
||||
#[error(transparent)]
|
||||
Inner(postgres_initdb::Error),
|
||||
Cancelled,
|
||||
Spawn(std::io::Result<()>),
|
||||
Failed(std::process::ExitStatus, Vec<u8>),
|
||||
}
|
||||
|
||||
impl fmt::Display for InitdbError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
InitdbError::Cancelled => write!(f, "Operation was cancelled"),
|
||||
InitdbError::Spawn(e) => write!(f, "Spawn error: {:?}", e),
|
||||
InitdbError::Failed(status, stderr) => write!(
|
||||
f,
|
||||
"Command failed with status {:?}: {}",
|
||||
status,
|
||||
String::from_utf8_lossy(stderr)
|
||||
),
|
||||
InitdbError::Other(e) => write!(f, "Error: {:?}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for InitdbError {
|
||||
fn from(error: std::io::Error) -> Self {
|
||||
InitdbError::Spawn(Err(error))
|
||||
}
|
||||
}
|
||||
|
||||
enum CreateTimelineCause {
|
||||
@@ -1017,15 +995,6 @@ enum CreateTimelineCause {
|
||||
Delete,
|
||||
}
|
||||
|
||||
enum LoadTimelineCause {
|
||||
Attach,
|
||||
Unoffload,
|
||||
ImportPgdata {
|
||||
create_guard: TimelineCreateGuard,
|
||||
activate: ActivateTimelineArgs,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub(crate) enum GcError {
|
||||
// The tenant is shutting down
|
||||
@@ -1102,35 +1071,24 @@ impl Tenant {
|
||||
/// it is marked as Active.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn timeline_init_and_sync(
|
||||
self: &Arc<Self>,
|
||||
&self,
|
||||
timeline_id: TimelineId,
|
||||
resources: TimelineResources,
|
||||
mut index_part: IndexPart,
|
||||
index_part: IndexPart,
|
||||
metadata: TimelineMetadata,
|
||||
ancestor: Option<Arc<Timeline>>,
|
||||
cause: LoadTimelineCause,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<TimelineInitAndSyncResult> {
|
||||
_ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let tenant_id = self.tenant_shard_id;
|
||||
|
||||
let import_pgdata = index_part.import_pgdata.take();
|
||||
let idempotency = match &import_pgdata {
|
||||
Some(import_pgdata) => {
|
||||
CreateTimelineIdempotency::ImportPgdata(CreatingTimelineIdempotencyImportPgdata {
|
||||
idempotency_key: import_pgdata.idempotency_key().clone(),
|
||||
})
|
||||
let idempotency = if metadata.ancestor_timeline().is_none() {
|
||||
CreateTimelineIdempotency::Bootstrap {
|
||||
pg_version: metadata.pg_version(),
|
||||
}
|
||||
None => {
|
||||
if metadata.ancestor_timeline().is_none() {
|
||||
CreateTimelineIdempotency::Bootstrap {
|
||||
pg_version: metadata.pg_version(),
|
||||
}
|
||||
} else {
|
||||
CreateTimelineIdempotency::Branch {
|
||||
ancestor_timeline_id: metadata.ancestor_timeline().unwrap(),
|
||||
ancestor_start_lsn: metadata.ancestor_lsn(),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
CreateTimelineIdempotency::Branch {
|
||||
ancestor_timeline_id: metadata.ancestor_timeline().unwrap(),
|
||||
ancestor_start_lsn: metadata.ancestor_lsn(),
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1162,91 +1120,39 @@ impl Tenant {
|
||||
format!("Failed to load layermap for timeline {tenant_id}/{timeline_id}")
|
||||
})?;
|
||||
|
||||
match import_pgdata {
|
||||
Some(import_pgdata) if !import_pgdata.is_done() => {
|
||||
match cause {
|
||||
LoadTimelineCause::Attach | LoadTimelineCause::Unoffload => (),
|
||||
LoadTimelineCause::ImportPgdata { .. } => {
|
||||
unreachable!("ImportPgdata should not be reloading timeline import is done and persisted as such in s3")
|
||||
}
|
||||
{
|
||||
// avoiding holding it across awaits
|
||||
let mut timelines_accessor = self.timelines.lock().unwrap();
|
||||
match timelines_accessor.entry(timeline_id) {
|
||||
// We should never try and load the same timeline twice during startup
|
||||
Entry::Occupied(_) => {
|
||||
unreachable!(
|
||||
"Timeline {tenant_id}/{timeline_id} already exists in the tenant map"
|
||||
);
|
||||
}
|
||||
let mut guard = self.timelines_creating.lock().unwrap();
|
||||
if !guard.insert(timeline_id) {
|
||||
// We should never try and load the same timeline twice during startup
|
||||
unreachable!("Timeline {tenant_id}/{timeline_id} is already being created")
|
||||
Entry::Vacant(v) => {
|
||||
v.insert(Arc::clone(&timeline));
|
||||
timeline.maybe_spawn_flush_loop();
|
||||
}
|
||||
let timeline_create_guard = TimelineCreateGuard {
|
||||
_tenant_gate_guard: self.gate.enter()?,
|
||||
owning_tenant: self.clone(),
|
||||
timeline_id,
|
||||
idempotency,
|
||||
// The users of this specific return value don't need the timline_path in there.
|
||||
timeline_path: timeline
|
||||
.conf
|
||||
.timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id),
|
||||
};
|
||||
Ok(TimelineInitAndSyncResult::NeedsSpawnImportPgdata(
|
||||
TimelineInitAndSyncNeedsSpawnImportPgdata {
|
||||
timeline,
|
||||
import_pgdata,
|
||||
guard: timeline_create_guard,
|
||||
},
|
||||
))
|
||||
}
|
||||
Some(_) | None => {
|
||||
{
|
||||
let mut timelines_accessor = self.timelines.lock().unwrap();
|
||||
match timelines_accessor.entry(timeline_id) {
|
||||
// We should never try and load the same timeline twice during startup
|
||||
Entry::Occupied(_) => {
|
||||
unreachable!(
|
||||
"Timeline {tenant_id}/{timeline_id} already exists in the tenant map"
|
||||
);
|
||||
}
|
||||
Entry::Vacant(v) => {
|
||||
v.insert(Arc::clone(&timeline));
|
||||
timeline.maybe_spawn_flush_loop();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Sanity check: a timeline should have some content.
|
||||
anyhow::ensure!(
|
||||
ancestor.is_some()
|
||||
|| timeline
|
||||
.layers
|
||||
.read()
|
||||
.await
|
||||
.layer_map()
|
||||
.expect("currently loading, layer manager cannot be shutdown already")
|
||||
.iter_historic_layers()
|
||||
.next()
|
||||
.is_some(),
|
||||
"Timeline has no ancestor and no layer files"
|
||||
);
|
||||
// Sanity check: a timeline should have some content.
|
||||
anyhow::ensure!(
|
||||
ancestor.is_some()
|
||||
|| timeline
|
||||
.layers
|
||||
.read()
|
||||
.await
|
||||
.layer_map()
|
||||
.expect("currently loading, layer manager cannot be shutdown already")
|
||||
.iter_historic_layers()
|
||||
.next()
|
||||
.is_some(),
|
||||
"Timeline has no ancestor and no layer files"
|
||||
);
|
||||
|
||||
match cause {
|
||||
LoadTimelineCause::Attach | LoadTimelineCause::Unoffload => (),
|
||||
LoadTimelineCause::ImportPgdata {
|
||||
create_guard,
|
||||
activate,
|
||||
} => {
|
||||
// TODO: see the comment in the task code above how I'm not so certain
|
||||
// it is safe to activate here because of concurrent shutdowns.
|
||||
match activate {
|
||||
ActivateTimelineArgs::Yes { broker_client } => {
|
||||
info!("activating timeline after reload from pgdata import task");
|
||||
timeline.activate(self.clone(), broker_client, None, ctx);
|
||||
}
|
||||
ActivateTimelineArgs::No => (),
|
||||
}
|
||||
drop(create_guard);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(TimelineInitAndSyncResult::ReadyToActivate(timeline))
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Attach a tenant that's available in cloud storage.
|
||||
@@ -1671,46 +1577,24 @@ impl Tenant {
|
||||
}
|
||||
|
||||
// TODO again handle early failure
|
||||
let effect = self
|
||||
.load_remote_timeline(
|
||||
timeline_id,
|
||||
index_part,
|
||||
remote_metadata,
|
||||
TimelineResources {
|
||||
remote_client,
|
||||
timeline_get_throttle: self.timeline_get_throttle.clone(),
|
||||
l0_flush_global_state: self.l0_flush_global_state.clone(),
|
||||
},
|
||||
LoadTimelineCause::Attach,
|
||||
ctx,
|
||||
self.load_remote_timeline(
|
||||
timeline_id,
|
||||
index_part,
|
||||
remote_metadata,
|
||||
TimelineResources {
|
||||
remote_client,
|
||||
timeline_get_throttle: self.timeline_get_throttle.clone(),
|
||||
l0_flush_global_state: self.l0_flush_global_state.clone(),
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"failed to load remote timeline {} for tenant {}",
|
||||
timeline_id, self.tenant_shard_id
|
||||
)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"failed to load remote timeline {} for tenant {}",
|
||||
timeline_id, self.tenant_shard_id
|
||||
)
|
||||
})?;
|
||||
|
||||
match effect {
|
||||
TimelineInitAndSyncResult::ReadyToActivate(_) => {
|
||||
// activation happens later, on Tenant::activate
|
||||
}
|
||||
TimelineInitAndSyncResult::NeedsSpawnImportPgdata(
|
||||
TimelineInitAndSyncNeedsSpawnImportPgdata {
|
||||
timeline,
|
||||
import_pgdata,
|
||||
guard,
|
||||
},
|
||||
) => {
|
||||
tokio::task::spawn(self.clone().create_timeline_import_pgdata_task(
|
||||
timeline,
|
||||
import_pgdata,
|
||||
ActivateTimelineArgs::No,
|
||||
guard,
|
||||
));
|
||||
}
|
||||
}
|
||||
})?;
|
||||
}
|
||||
|
||||
// Walk through deleted timelines, resume deletion
|
||||
@@ -1834,14 +1718,13 @@ impl Tenant {
|
||||
|
||||
#[instrument(skip_all, fields(timeline_id=%timeline_id))]
|
||||
async fn load_remote_timeline(
|
||||
self: &Arc<Self>,
|
||||
&self,
|
||||
timeline_id: TimelineId,
|
||||
index_part: IndexPart,
|
||||
remote_metadata: TimelineMetadata,
|
||||
resources: TimelineResources,
|
||||
cause: LoadTimelineCause,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<TimelineInitAndSyncResult> {
|
||||
) -> anyhow::Result<()> {
|
||||
span::debug_assert_current_span_has_tenant_id();
|
||||
|
||||
info!("downloading index file for timeline {}", timeline_id);
|
||||
@@ -1868,7 +1751,6 @@ impl Tenant {
|
||||
index_part,
|
||||
remote_metadata,
|
||||
ancestor,
|
||||
cause,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1925,7 +1807,6 @@ impl Tenant {
|
||||
self.tenant_shard_id,
|
||||
timeline_id,
|
||||
self.generation,
|
||||
&self.tenant_conf.load().location,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -2055,7 +1936,6 @@ impl Tenant {
|
||||
TimelineArchivalError::Other(anyhow::anyhow!("Timeline already exists"))
|
||||
}
|
||||
TimelineExclusionError::Other(e) => TimelineArchivalError::Other(e),
|
||||
TimelineExclusionError::ShuttingDown => TimelineArchivalError::Cancelled,
|
||||
})?;
|
||||
|
||||
let timeline_preload = self
|
||||
@@ -2094,7 +1974,6 @@ impl Tenant {
|
||||
index_part,
|
||||
remote_metadata,
|
||||
timeline_resources,
|
||||
LoadTimelineCause::Unoffload,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
@@ -2332,7 +2211,7 @@ impl Tenant {
|
||||
///
|
||||
/// Tests should use `Tenant::create_test_timeline` to set up the minimum required metadata keys.
|
||||
pub(crate) async fn create_empty_timeline(
|
||||
self: &Arc<Self>,
|
||||
&self,
|
||||
new_timeline_id: TimelineId,
|
||||
initdb_lsn: Lsn,
|
||||
pg_version: u32,
|
||||
@@ -2382,7 +2261,7 @@ impl Tenant {
|
||||
// Our current tests don't need the background loops.
|
||||
#[cfg(test)]
|
||||
pub async fn create_test_timeline(
|
||||
self: &Arc<Self>,
|
||||
&self,
|
||||
new_timeline_id: TimelineId,
|
||||
initdb_lsn: Lsn,
|
||||
pg_version: u32,
|
||||
@@ -2421,7 +2300,7 @@ impl Tenant {
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn create_test_timeline_with_layers(
|
||||
self: &Arc<Self>,
|
||||
&self,
|
||||
new_timeline_id: TimelineId,
|
||||
initdb_lsn: Lsn,
|
||||
pg_version: u32,
|
||||
@@ -2558,16 +2437,6 @@ impl Tenant {
|
||||
self.branch_timeline(&ancestor_timeline, new_timeline_id, ancestor_start_lsn, ctx)
|
||||
.await?
|
||||
}
|
||||
CreateTimelineParams::ImportPgdata(params) => {
|
||||
self.create_timeline_import_pgdata(
|
||||
params,
|
||||
ActivateTimelineArgs::Yes {
|
||||
broker_client: broker_client.clone(),
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
|
||||
// At this point we have dropped our guard on [`Self::timelines_creating`], and
|
||||
@@ -2610,202 +2479,11 @@ impl Tenant {
|
||||
);
|
||||
timeline
|
||||
}
|
||||
CreateTimelineResult::ImportSpawned(timeline) => {
|
||||
info!("import task spawned, timeline will become visible and activated once the import is done");
|
||||
timeline
|
||||
}
|
||||
};
|
||||
|
||||
Ok(activated_timeline)
|
||||
}
|
||||
|
||||
/// The returned [`Arc<Timeline>`] is NOT in the [`Tenant::timelines`] map until the import
|
||||
/// completes in the background. A DIFFERENT [`Arc<Timeline>`] will be inserted into the
|
||||
/// [`Tenant::timelines`] map when the import completes.
|
||||
/// We only return an [`Arc<Timeline>`] here so the API handler can create a [`pageserver_api::models::TimelineInfo`]
|
||||
/// for the response.
|
||||
async fn create_timeline_import_pgdata(
|
||||
self: &Arc<Tenant>,
|
||||
params: CreateTimelineParamsImportPgdata,
|
||||
activate: ActivateTimelineArgs,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<CreateTimelineResult, CreateTimelineError> {
|
||||
let CreateTimelineParamsImportPgdata {
|
||||
new_timeline_id,
|
||||
location,
|
||||
idempotency_key,
|
||||
} = params;
|
||||
|
||||
let started_at = chrono::Utc::now().naive_utc();
|
||||
|
||||
//
|
||||
// There's probably a simpler way to upload an index part, but, remote_timeline_client
|
||||
// is the canonical way we do it.
|
||||
// - create an empty timeline in-memory
|
||||
// - use its remote_timeline_client to do the upload
|
||||
// - dispose of the uninit timeline
|
||||
// - keep the creation guard alive
|
||||
|
||||
let timeline_create_guard = match self
|
||||
.start_creating_timeline(
|
||||
new_timeline_id,
|
||||
CreateTimelineIdempotency::ImportPgdata(CreatingTimelineIdempotencyImportPgdata {
|
||||
idempotency_key: idempotency_key.clone(),
|
||||
}),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
StartCreatingTimelineResult::CreateGuard(guard) => guard,
|
||||
StartCreatingTimelineResult::Idempotent(timeline) => {
|
||||
return Ok(CreateTimelineResult::Idempotent(timeline))
|
||||
}
|
||||
};
|
||||
|
||||
let mut uninit_timeline = {
|
||||
let this = &self;
|
||||
let initdb_lsn = Lsn(0);
|
||||
let _ctx = ctx;
|
||||
async move {
|
||||
let new_metadata = TimelineMetadata::new(
|
||||
// Initialize disk_consistent LSN to 0, The caller must import some data to
|
||||
// make it valid, before calling finish_creation()
|
||||
Lsn(0),
|
||||
None,
|
||||
None,
|
||||
Lsn(0),
|
||||
initdb_lsn,
|
||||
initdb_lsn,
|
||||
15,
|
||||
);
|
||||
this.prepare_new_timeline(
|
||||
new_timeline_id,
|
||||
&new_metadata,
|
||||
timeline_create_guard,
|
||||
initdb_lsn,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
.await?;
|
||||
|
||||
let in_progress = import_pgdata::index_part_format::InProgress {
|
||||
idempotency_key,
|
||||
location,
|
||||
started_at,
|
||||
};
|
||||
let index_part = import_pgdata::index_part_format::Root::V1(
|
||||
import_pgdata::index_part_format::V1::InProgress(in_progress),
|
||||
);
|
||||
uninit_timeline
|
||||
.raw_timeline()
|
||||
.unwrap()
|
||||
.remote_client
|
||||
.schedule_index_upload_for_import_pgdata_state_update(Some(index_part.clone()))?;
|
||||
|
||||
// wait_completion happens in caller
|
||||
|
||||
let (timeline, timeline_create_guard) = uninit_timeline.finish_creation_myself();
|
||||
|
||||
tokio::spawn(self.clone().create_timeline_import_pgdata_task(
|
||||
timeline.clone(),
|
||||
index_part,
|
||||
activate,
|
||||
timeline_create_guard,
|
||||
));
|
||||
|
||||
// NB: the timeline doesn't exist in self.timelines at this point
|
||||
Ok(CreateTimelineResult::ImportSpawned(timeline))
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline.timeline_id))]
|
||||
async fn create_timeline_import_pgdata_task(
|
||||
self: Arc<Tenant>,
|
||||
timeline: Arc<Timeline>,
|
||||
index_part: import_pgdata::index_part_format::Root,
|
||||
activate: ActivateTimelineArgs,
|
||||
timeline_create_guard: TimelineCreateGuard,
|
||||
) {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
info!("starting");
|
||||
scopeguard::defer! {info!("exiting")};
|
||||
|
||||
let res = self
|
||||
.create_timeline_import_pgdata_task_impl(
|
||||
timeline,
|
||||
index_part,
|
||||
activate,
|
||||
timeline_create_guard,
|
||||
)
|
||||
.await;
|
||||
if let Err(err) = &res {
|
||||
error!(?err, "task failed");
|
||||
// TODO sleep & retry, sensitive to tenant shutdown
|
||||
// TODO: allow timeline deletion requests => should cancel the task
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_timeline_import_pgdata_task_impl(
|
||||
self: Arc<Tenant>,
|
||||
timeline: Arc<Timeline>,
|
||||
index_part: import_pgdata::index_part_format::Root,
|
||||
activate: ActivateTimelineArgs,
|
||||
timeline_create_guard: TimelineCreateGuard,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let ctx = RequestContext::new(TaskKind::ImportPgdata, DownloadBehavior::Warn);
|
||||
|
||||
info!("importing pgdata");
|
||||
import_pgdata::doit(&timeline, index_part, &ctx, self.cancel.clone())
|
||||
.await
|
||||
.context("import")?;
|
||||
info!("import done");
|
||||
|
||||
//
|
||||
// Reload timeline from remote.
|
||||
// This proves that the remote state is attachable, and it reuses the code.
|
||||
//
|
||||
// TODO: think about whether this is safe to do with concurrent Tenant::shutdown.
|
||||
// timeline_create_guard hols the tenant gate open, so, shutdown cannot _complete_ until we exit.
|
||||
// But our activate() call might launch new background tasks after Tenant::shutdown
|
||||
// already went past shutting down the Tenant::timelines, which this timeline here is no part of.
|
||||
// I think the same problem exists with the bootstrap & branch mgmt API tasks (tenant shutting
|
||||
// down while bootstrapping/branching + activating), but, the race condition is much more likely
|
||||
// to manifest because of the long runtime of this import task.
|
||||
|
||||
// in theory this shouldn't even .await anything except for coop yield
|
||||
info!("shutting down timeline");
|
||||
timeline.shutdown(ShutdownMode::Hard).await;
|
||||
info!("timeline shut down, reloading from remote");
|
||||
// TODO: we can't do the following check because create_timeline_import_pgdata must return an Arc<Timeline>
|
||||
// let Some(timeline) = Arc::into_inner(timeline) else {
|
||||
// anyhow::bail!("implementation error: timeline that we shut down was still referenced from somewhere");
|
||||
// };
|
||||
let timeline_id = timeline.timeline_id;
|
||||
|
||||
// load from object storage like Tenant::attach does
|
||||
let resources = self.build_timeline_resources(timeline_id);
|
||||
let index_part = resources
|
||||
.remote_client
|
||||
.download_index_file(&self.cancel)
|
||||
.await?;
|
||||
let index_part = match index_part {
|
||||
MaybeDeletedIndexPart::Deleted(_) => {
|
||||
// likely concurrent delete call, cplane should prevent this
|
||||
anyhow::bail!("index part says deleted but we are not done creating yet, this should not happen but")
|
||||
}
|
||||
MaybeDeletedIndexPart::IndexPart(p) => p,
|
||||
};
|
||||
let metadata = index_part.metadata.clone();
|
||||
self
|
||||
.load_remote_timeline(timeline_id, index_part, metadata, resources, LoadTimelineCause::ImportPgdata{
|
||||
create_guard: timeline_create_guard, activate, }, &ctx)
|
||||
.await?
|
||||
.ready_to_activate()
|
||||
.context("implementation error: reloaded timeline still needs import after import reported success")?;
|
||||
|
||||
anyhow::Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn delete_timeline(
|
||||
self: Arc<Self>,
|
||||
timeline_id: TimelineId,
|
||||
@@ -2849,10 +2527,6 @@ impl Tenant {
|
||||
{
|
||||
let conf = self.tenant_conf.load();
|
||||
|
||||
// If we may not delete layers, then simply skip GC. Even though a tenant
|
||||
// in AttachedMulti state could do GC and just enqueue the blocked deletions,
|
||||
// the only advantage to doing it is to perhaps shrink the LayerMap metadata
|
||||
// a bit sooner than we would achieve by waiting for AttachedSingle status.
|
||||
if !conf.location.may_delete_layers_hint() {
|
||||
info!("Skipping GC in location state {:?}", conf.location);
|
||||
return Ok(GcResult::default());
|
||||
@@ -2894,14 +2568,7 @@ impl Tenant {
|
||||
|
||||
{
|
||||
let conf = self.tenant_conf.load();
|
||||
|
||||
// Note that compaction usually requires deletions, but we don't respect
|
||||
// may_delete_layers_hint here: that is because tenants in AttachedMulti
|
||||
// should proceed with compaction even if they can't do deletion, to avoid
|
||||
// accumulating dangerously deep stacks of L0 layers. Deletions will be
|
||||
// enqueued inside RemoteTimelineClient, and executed layer if/when we transition
|
||||
// to AttachedSingle state.
|
||||
if !conf.location.may_upload_layers_hint() {
|
||||
if !conf.location.may_delete_layers_hint() || !conf.location.may_upload_layers_hint() {
|
||||
info!("Skipping compaction in location state {:?}", conf.location);
|
||||
return Ok(false);
|
||||
}
|
||||
@@ -3657,13 +3324,6 @@ where
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
enum ActivateTimelineArgs {
|
||||
Yes {
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
},
|
||||
No,
|
||||
}
|
||||
|
||||
impl Tenant {
|
||||
pub fn tenant_specific_overrides(&self) -> TenantConfOpt {
|
||||
self.tenant_conf.load().tenant_conf.clone()
|
||||
@@ -3786,7 +3446,6 @@ impl Tenant {
|
||||
// this race is not possible if both request types come from the storage
|
||||
// controller (as they should!) because an exclusive op lock is required
|
||||
// on the storage controller side.
|
||||
|
||||
self.tenant_conf.rcu(|inner| {
|
||||
Arc::new(AttachedTenantConf {
|
||||
tenant_conf: new_tenant_conf.clone(),
|
||||
@@ -3796,22 +3455,20 @@ impl Tenant {
|
||||
})
|
||||
});
|
||||
|
||||
let updated = self.tenant_conf.load().clone();
|
||||
|
||||
self.tenant_conf_updated(&new_tenant_conf);
|
||||
// Don't hold self.timelines.lock() during the notifies.
|
||||
// There's no risk of deadlock right now, but there could be if we consolidate
|
||||
// mutexes in struct Timeline in the future.
|
||||
let timelines = self.list_timelines();
|
||||
for timeline in timelines {
|
||||
timeline.tenant_conf_updated(&updated);
|
||||
timeline.tenant_conf_updated(&new_tenant_conf);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn set_new_location_config(&self, new_conf: AttachedTenantConf) {
|
||||
let new_tenant_conf = new_conf.tenant_conf.clone();
|
||||
|
||||
self.tenant_conf.store(Arc::new(new_conf.clone()));
|
||||
self.tenant_conf.store(Arc::new(new_conf));
|
||||
|
||||
self.tenant_conf_updated(&new_tenant_conf);
|
||||
// Don't hold self.timelines.lock() during the notifies.
|
||||
@@ -3819,7 +3476,7 @@ impl Tenant {
|
||||
// mutexes in struct Timeline in the future.
|
||||
let timelines = self.list_timelines();
|
||||
for timeline in timelines {
|
||||
timeline.tenant_conf_updated(&new_conf);
|
||||
timeline.tenant_conf_updated(&new_tenant_conf);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3847,7 +3504,6 @@ impl Tenant {
|
||||
/// `validate_ancestor == false` is used when a timeline is created for deletion
|
||||
/// and we might not have the ancestor present anymore which is fine for to be
|
||||
/// deleted timelines.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn create_timeline_struct(
|
||||
&self,
|
||||
new_timeline_id: TimelineId,
|
||||
@@ -4611,17 +4267,16 @@ impl Tenant {
|
||||
/// If the timeline was already created in the meantime, we check whether this
|
||||
/// request conflicts or is idempotent , based on `state`.
|
||||
async fn start_creating_timeline(
|
||||
self: &Arc<Self>,
|
||||
&self,
|
||||
new_timeline_id: TimelineId,
|
||||
idempotency: CreateTimelineIdempotency,
|
||||
) -> Result<StartCreatingTimelineResult, CreateTimelineError> {
|
||||
) -> Result<StartCreatingTimelineResult<'_>, CreateTimelineError> {
|
||||
let allow_offloaded = false;
|
||||
match self.create_timeline_create_guard(new_timeline_id, idempotency, allow_offloaded) {
|
||||
Ok(create_guard) => {
|
||||
pausable_failpoint!("timeline-creation-after-uninit");
|
||||
Ok(StartCreatingTimelineResult::CreateGuard(create_guard))
|
||||
}
|
||||
Err(TimelineExclusionError::ShuttingDown) => Err(CreateTimelineError::ShuttingDown),
|
||||
Err(TimelineExclusionError::AlreadyCreating) => {
|
||||
// Creation is in progress, we cannot create it again, and we cannot
|
||||
// check if this request matches the existing one, so caller must try
|
||||
@@ -4889,7 +4544,6 @@ impl Tenant {
|
||||
self.tenant_shard_id,
|
||||
timeline_id,
|
||||
self.generation,
|
||||
&self.tenant_conf.load().location,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -4911,7 +4565,7 @@ impl Tenant {
|
||||
&'a self,
|
||||
new_timeline_id: TimelineId,
|
||||
new_metadata: &TimelineMetadata,
|
||||
create_guard: TimelineCreateGuard,
|
||||
create_guard: TimelineCreateGuard<'a>,
|
||||
start_lsn: Lsn,
|
||||
ancestor: Option<Arc<Timeline>>,
|
||||
) -> anyhow::Result<UninitializedTimeline<'a>> {
|
||||
@@ -4971,7 +4625,7 @@ impl Tenant {
|
||||
/// The `allow_offloaded` parameter controls whether to tolerate the existence of
|
||||
/// offloaded timelines or not.
|
||||
fn create_timeline_create_guard(
|
||||
self: &Arc<Self>,
|
||||
&self,
|
||||
timeline_id: TimelineId,
|
||||
idempotency: CreateTimelineIdempotency,
|
||||
allow_offloaded: bool,
|
||||
@@ -5231,16 +4885,48 @@ async fn run_initdb(
|
||||
|
||||
let _permit = INIT_DB_SEMAPHORE.acquire().await;
|
||||
|
||||
let res = postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
|
||||
superuser: &conf.superuser,
|
||||
locale: &conf.locale,
|
||||
initdb_bin: &initdb_bin_path,
|
||||
pg_version,
|
||||
library_search_path: &initdb_lib_dir,
|
||||
pgdata: initdb_target_dir,
|
||||
})
|
||||
.await
|
||||
.map_err(InitdbError::Inner);
|
||||
let mut initdb_command = tokio::process::Command::new(&initdb_bin_path);
|
||||
initdb_command
|
||||
.args(["--pgdata", initdb_target_dir.as_ref()])
|
||||
.args(["--username", &conf.superuser])
|
||||
.args(["--encoding", "utf8"])
|
||||
.args(["--locale", &conf.locale])
|
||||
.arg("--no-instructions")
|
||||
.arg("--no-sync")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &initdb_lib_dir)
|
||||
.env("DYLD_LIBRARY_PATH", &initdb_lib_dir)
|
||||
.stdin(std::process::Stdio::null())
|
||||
// stdout invocation produces the same output every time, we don't need it
|
||||
.stdout(std::process::Stdio::null())
|
||||
// we would be interested in the stderr output, if there was any
|
||||
.stderr(std::process::Stdio::piped());
|
||||
|
||||
// Before version 14, only the libc provide was available.
|
||||
if pg_version > 14 {
|
||||
// Version 17 brought with it a builtin locale provider which only provides
|
||||
// C and C.UTF-8. While being safer for collation purposes since it is
|
||||
// guaranteed to be consistent throughout a major release, it is also more
|
||||
// performant.
|
||||
let locale_provider = if pg_version >= 17 { "builtin" } else { "libc" };
|
||||
|
||||
initdb_command.args(["--locale-provider", locale_provider]);
|
||||
}
|
||||
|
||||
let initdb_proc = initdb_command.spawn()?;
|
||||
|
||||
// Ideally we'd select here with the cancellation token, but the problem is that
|
||||
// we can't safely terminate initdb: it launches processes of its own, and killing
|
||||
// initdb doesn't kill them. After we return from this function, we want the target
|
||||
// directory to be able to be cleaned up.
|
||||
// See https://github.com/neondatabase/neon/issues/6385
|
||||
let initdb_output = initdb_proc.wait_with_output().await?;
|
||||
if !initdb_output.status.success() {
|
||||
return Err(InitdbError::Failed(
|
||||
initdb_output.status,
|
||||
initdb_output.stderr,
|
||||
));
|
||||
}
|
||||
|
||||
// This isn't true cancellation support, see above. Still return an error to
|
||||
// excercise the cancellation code path.
|
||||
@@ -5248,7 +4934,7 @@ async fn run_initdb(
|
||||
return Err(InitdbError::Cancelled);
|
||||
}
|
||||
|
||||
res
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Dump contents of a layer file to stdout.
|
||||
|
||||
@@ -1719,11 +1719,10 @@ impl TenantManager {
|
||||
parent_layers.push(relative_path.to_owned());
|
||||
}
|
||||
}
|
||||
|
||||
if parent_layers.is_empty() {
|
||||
tracing::info!("Ancestor shard has no resident layer to hard link");
|
||||
}
|
||||
|
||||
debug_assert!(
|
||||
!parent_layers.is_empty(),
|
||||
"shutdown cannot empty the layermap"
|
||||
);
|
||||
(parent_timelines, parent_layers)
|
||||
};
|
||||
|
||||
|
||||
@@ -197,9 +197,8 @@ use utils::backoff::{
|
||||
self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
|
||||
};
|
||||
use utils::pausable_failpoint;
|
||||
use utils::shard::ShardNumber;
|
||||
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, Mutex, OnceLock};
|
||||
use std::time::Duration;
|
||||
@@ -223,7 +222,7 @@ use crate::task_mgr::shutdown_token;
|
||||
use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::remote_timeline_client::download::download_retry;
|
||||
use crate::tenant::storage_layer::AsLayerDesc;
|
||||
use crate::tenant::upload_queue::{Delete, OpType, UploadQueueStoppedDeletable};
|
||||
use crate::tenant::upload_queue::{Delete, UploadQueueStoppedDeletable};
|
||||
use crate::tenant::TIMELINES_SEGMENT_NAME;
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
@@ -241,10 +240,8 @@ use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use self::index::IndexPart;
|
||||
|
||||
use super::config::AttachedLocationConfig;
|
||||
use super::metadata::MetadataUpdate;
|
||||
use super::storage_layer::{Layer, LayerName, ResidentLayer};
|
||||
use super::timeline::import_pgdata;
|
||||
use super::upload_queue::{NotInitialized, SetDeletedFlagProgress};
|
||||
use super::{DeleteTimelineError, Generation};
|
||||
|
||||
@@ -304,36 +301,6 @@ pub enum WaitCompletionError {
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("Upload queue either in unexpected state or hasn't downloaded manifest yet")]
|
||||
pub struct UploadQueueNotReadyError;
|
||||
/// Behavioral modes that enable seamless live migration.
|
||||
///
|
||||
/// See docs/rfcs/028-pageserver-migration.md to understand how these fit in.
|
||||
struct RemoteTimelineClientConfig {
|
||||
/// If this is false, then update to remote_consistent_lsn are dropped rather
|
||||
/// than being submitted to DeletionQueue for validation. This behavior is
|
||||
/// used when a tenant attachment is known to have a stale generation number,
|
||||
/// such that validation attempts will always fail. This is not necessary
|
||||
/// for correctness, but avoids spamming error statistics with failed validations
|
||||
/// when doing migrations of tenants.
|
||||
process_remote_consistent_lsn_updates: bool,
|
||||
|
||||
/// If this is true, then object deletions are held in a buffer in RemoteTimelineClient
|
||||
/// rather than being submitted to the DeletionQueue. This behavior is used when a tenant
|
||||
/// is known to be multi-attached, in order to avoid disrupting other attached tenants
|
||||
/// whose generations' metadata refers to the deleted objects.
|
||||
block_deletions: bool,
|
||||
}
|
||||
|
||||
/// RemoteTimelineClientConfig's state is entirely driven by LocationConf, but we do
|
||||
/// not carry the entire LocationConf structure: it's much more than we need. The From
|
||||
/// impl extracts the subset of the LocationConf that is interesting to RemoteTimelineClient.
|
||||
impl From<&AttachedLocationConfig> for RemoteTimelineClientConfig {
|
||||
fn from(lc: &AttachedLocationConfig) -> Self {
|
||||
Self {
|
||||
block_deletions: !lc.may_delete_layers_hint(),
|
||||
process_remote_consistent_lsn_updates: lc.may_upload_layers_hint(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A client for accessing a timeline's data in remote storage.
|
||||
///
|
||||
@@ -354,7 +321,7 @@ impl From<&AttachedLocationConfig> for RemoteTimelineClientConfig {
|
||||
/// in the index part file, whenever timeline metadata is uploaded.
|
||||
///
|
||||
/// Downloads are not queued, they are performed immediately.
|
||||
pub(crate) struct RemoteTimelineClient {
|
||||
pub struct RemoteTimelineClient {
|
||||
conf: &'static PageServerConf,
|
||||
|
||||
runtime: tokio::runtime::Handle,
|
||||
@@ -371,9 +338,6 @@ pub(crate) struct RemoteTimelineClient {
|
||||
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
|
||||
/// Subset of tenant configuration used to control upload behaviors during migrations
|
||||
config: std::sync::RwLock<RemoteTimelineClientConfig>,
|
||||
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
@@ -384,14 +348,13 @@ impl RemoteTimelineClient {
|
||||
/// Note: the caller must initialize the upload queue before any uploads can be scheduled,
|
||||
/// by calling init_upload_queue.
|
||||
///
|
||||
pub(crate) fn new(
|
||||
pub fn new(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
conf: &'static PageServerConf,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
generation: Generation,
|
||||
location_conf: &AttachedLocationConfig,
|
||||
) -> RemoteTimelineClient {
|
||||
RemoteTimelineClient {
|
||||
conf,
|
||||
@@ -411,7 +374,6 @@ impl RemoteTimelineClient {
|
||||
&tenant_shard_id,
|
||||
&timeline_id,
|
||||
)),
|
||||
config: std::sync::RwLock::new(RemoteTimelineClientConfig::from(location_conf)),
|
||||
cancel: CancellationToken::new(),
|
||||
}
|
||||
}
|
||||
@@ -467,43 +429,6 @@ impl RemoteTimelineClient {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Notify this client of a change to its parent tenant's config, as this may cause us to
|
||||
/// take action (unblocking deletions when transitioning from AttachedMulti to AttachedSingle)
|
||||
pub(super) fn update_config(&self, location_conf: &AttachedLocationConfig) {
|
||||
let new_conf = RemoteTimelineClientConfig::from(location_conf);
|
||||
let unblocked = !new_conf.block_deletions;
|
||||
|
||||
// Update config before draining deletions, so that we don't race with more being
|
||||
// inserted. This can result in deletions happening our of order, but that does not
|
||||
// violate any invariants: deletions only need to be ordered relative to upload of the index
|
||||
// that dereferences the deleted objects, and we are not changing that order.
|
||||
*self.config.write().unwrap() = new_conf;
|
||||
|
||||
if unblocked {
|
||||
// If we may now delete layers, drain any that were blocked in our old
|
||||
// configuration state
|
||||
let mut queue_locked = self.upload_queue.lock().unwrap();
|
||||
|
||||
if let Ok(queue) = queue_locked.initialized_mut() {
|
||||
let blocked_deletions = std::mem::take(&mut queue.blocked_deletions);
|
||||
for d in blocked_deletions {
|
||||
if let Err(e) = self.deletion_queue_client.push_layers_sync(
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.generation,
|
||||
d.layers,
|
||||
) {
|
||||
// This could happen if the pageserver is shut down while a tenant
|
||||
// is transitioning from a deletion-blocked state: we will leak some
|
||||
// S3 objects in this case.
|
||||
warn!("Failed to drain blocked deletions: {}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `None` if nothing is yet uplodaded, `Some(disk_consistent_lsn)` otherwise.
|
||||
pub fn remote_consistent_lsn_projected(&self) -> Option<Lsn> {
|
||||
match &mut *self.upload_queue.lock().unwrap() {
|
||||
@@ -814,18 +739,6 @@ impl RemoteTimelineClient {
|
||||
Ok(need_wait)
|
||||
}
|
||||
|
||||
/// Launch an index-file upload operation in the background, setting `import_pgdata` field.
|
||||
pub(crate) fn schedule_index_upload_for_import_pgdata_state_update(
|
||||
self: &Arc<Self>,
|
||||
state: Option<import_pgdata::index_part_format::Root>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
upload_queue.dirty.import_pgdata = state;
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Launch an index-file upload operation in the background, if necessary.
|
||||
///
|
||||
@@ -1103,7 +1016,7 @@ impl RemoteTimelineClient {
|
||||
"scheduled layer file upload {layer}",
|
||||
);
|
||||
|
||||
let op = UploadOp::UploadLayer(layer, metadata, None);
|
||||
let op = UploadOp::UploadLayer(layer, metadata);
|
||||
self.metric_begin(&op);
|
||||
upload_queue.queued_operations.push_back(op);
|
||||
}
|
||||
@@ -1818,7 +1731,7 @@ impl RemoteTimelineClient {
|
||||
// have finished.
|
||||
upload_queue.inprogress_tasks.is_empty()
|
||||
}
|
||||
UploadOp::Delete(..) => {
|
||||
UploadOp::Delete(_) => {
|
||||
// Wait for preceding uploads to finish. Concurrent deletions are OK, though.
|
||||
upload_queue.num_inprogress_deletions == upload_queue.inprogress_tasks.len()
|
||||
}
|
||||
@@ -1846,32 +1759,19 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
|
||||
// We can launch this task. Remove it from the queue first.
|
||||
let mut next_op = upload_queue.queued_operations.pop_front().unwrap();
|
||||
let next_op = upload_queue.queued_operations.pop_front().unwrap();
|
||||
|
||||
debug!("starting op: {}", next_op);
|
||||
|
||||
// Update the counters and prepare
|
||||
match &mut next_op {
|
||||
UploadOp::UploadLayer(layer, meta, mode) => {
|
||||
if upload_queue
|
||||
.recently_deleted
|
||||
.remove(&(layer.layer_desc().layer_name().clone(), meta.generation))
|
||||
{
|
||||
*mode = Some(OpType::FlushDeletion);
|
||||
} else {
|
||||
*mode = Some(OpType::MayReorder)
|
||||
}
|
||||
// Update the counters
|
||||
match next_op {
|
||||
UploadOp::UploadLayer(_, _) => {
|
||||
upload_queue.num_inprogress_layer_uploads += 1;
|
||||
}
|
||||
UploadOp::UploadMetadata { .. } => {
|
||||
upload_queue.num_inprogress_metadata_uploads += 1;
|
||||
}
|
||||
UploadOp::Delete(Delete { layers }) => {
|
||||
for (name, meta) in layers {
|
||||
upload_queue
|
||||
.recently_deleted
|
||||
.insert((name.clone(), meta.generation));
|
||||
}
|
||||
UploadOp::Delete(_) => {
|
||||
upload_queue.num_inprogress_deletions += 1;
|
||||
}
|
||||
UploadOp::Barrier(sender) => {
|
||||
@@ -1947,66 +1847,7 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
|
||||
let upload_result: anyhow::Result<()> = match &task.op {
|
||||
UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
|
||||
if let Some(OpType::FlushDeletion) = mode {
|
||||
if self.config.read().unwrap().block_deletions {
|
||||
// Of course, this is not efficient... but usually the queue should be empty.
|
||||
let mut queue_locked = self.upload_queue.lock().unwrap();
|
||||
let mut detected = false;
|
||||
if let Ok(queue) = queue_locked.initialized_mut() {
|
||||
for list in queue.blocked_deletions.iter_mut() {
|
||||
list.layers.retain(|(name, meta)| {
|
||||
if name == &layer.layer_desc().layer_name()
|
||||
&& meta.generation == layer_metadata.generation
|
||||
{
|
||||
detected = true;
|
||||
// remove the layer from deletion queue
|
||||
false
|
||||
} else {
|
||||
// keep the layer
|
||||
true
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
if detected {
|
||||
info!(
|
||||
"cancelled blocked deletion of layer {} at gen {:?}",
|
||||
layer.layer_desc().layer_name(),
|
||||
layer_metadata.generation
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// TODO: we did not guarantee that upload task starts after deletion task, so there could be possibly race conditions
|
||||
// that we still get the layer deleted. But this only happens if someone creates a layer immediately after it's deleted,
|
||||
// which is not possible in the current system.
|
||||
info!(
|
||||
"waiting for deletion queue flush to complete before uploading layer {} at gen {:?}",
|
||||
layer.layer_desc().layer_name(),
|
||||
layer_metadata.generation
|
||||
);
|
||||
{
|
||||
// We are going to flush, we can clean up the recently deleted list.
|
||||
let mut queue_locked = self.upload_queue.lock().unwrap();
|
||||
if let Ok(queue) = queue_locked.initialized_mut() {
|
||||
queue.recently_deleted.clear();
|
||||
}
|
||||
}
|
||||
if let Err(e) = self.deletion_queue_client.flush_execute().await {
|
||||
warn!(
|
||||
"failed to flush the deletion queue before uploading layer {} at gen {:?}, still proceeding to upload: {e:#} ",
|
||||
layer.layer_desc().layer_name(),
|
||||
layer_metadata.generation
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"done flushing deletion queue before uploading layer {} at gen {:?}",
|
||||
layer.layer_desc().layer_name(),
|
||||
layer_metadata.generation
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
UploadOp::UploadLayer(ref layer, ref layer_metadata) => {
|
||||
let local_path = layer.local_path();
|
||||
|
||||
// We should only be uploading layers created by this `Tenant`'s lifetime, so
|
||||
@@ -2071,24 +1912,16 @@ impl RemoteTimelineClient {
|
||||
res
|
||||
}
|
||||
UploadOp::Delete(delete) => {
|
||||
if self.config.read().unwrap().block_deletions {
|
||||
let mut queue_locked = self.upload_queue.lock().unwrap();
|
||||
if let Ok(queue) = queue_locked.initialized_mut() {
|
||||
queue.blocked_deletions.push(delete.clone());
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
pausable_failpoint!("before-delete-layer-pausable");
|
||||
self.deletion_queue_client
|
||||
.push_layers(
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.generation,
|
||||
delete.layers.clone(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!(e))
|
||||
}
|
||||
pausable_failpoint!("before-delete-layer-pausable");
|
||||
self.deletion_queue_client
|
||||
.push_layers(
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.generation,
|
||||
delete.layers.clone(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!(e))
|
||||
}
|
||||
unexpected @ UploadOp::Barrier(_) | unexpected @ UploadOp::Shutdown => {
|
||||
// unreachable. Barrier operations are handled synchronously in
|
||||
@@ -2170,7 +2003,7 @@ impl RemoteTimelineClient {
|
||||
upload_queue.inprogress_tasks.remove(&task.task_id);
|
||||
|
||||
let lsn_update = match task.op {
|
||||
UploadOp::UploadLayer(_, _, _) => {
|
||||
UploadOp::UploadLayer(_, _) => {
|
||||
upload_queue.num_inprogress_layer_uploads -= 1;
|
||||
None
|
||||
}
|
||||
@@ -2195,16 +2028,8 @@ impl RemoteTimelineClient {
|
||||
// Legacy mode: skip validating generation
|
||||
upload_queue.visible_remote_consistent_lsn.store(lsn);
|
||||
None
|
||||
} else if self
|
||||
.config
|
||||
.read()
|
||||
.unwrap()
|
||||
.process_remote_consistent_lsn_updates
|
||||
{
|
||||
Some((lsn, upload_queue.visible_remote_consistent_lsn.clone()))
|
||||
} else {
|
||||
// Our config disables remote_consistent_lsn updates: drop it.
|
||||
None
|
||||
Some((lsn, upload_queue.visible_remote_consistent_lsn.clone()))
|
||||
}
|
||||
}
|
||||
UploadOp::Delete(_) => {
|
||||
@@ -2247,7 +2072,7 @@ impl RemoteTimelineClient {
|
||||
)> {
|
||||
use RemoteTimelineClientMetricsCallTrackSize::DontTrackSize;
|
||||
let res = match op {
|
||||
UploadOp::UploadLayer(_, m, _) => (
|
||||
UploadOp::UploadLayer(_, m) => (
|
||||
RemoteOpFileKind::Layer,
|
||||
RemoteOpKind::Upload,
|
||||
RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size),
|
||||
@@ -2341,10 +2166,8 @@ impl RemoteTimelineClient {
|
||||
queued_operations: VecDeque::default(),
|
||||
#[cfg(feature = "testing")]
|
||||
dangling_files: HashMap::default(),
|
||||
blocked_deletions: Vec::new(),
|
||||
shutting_down: false,
|
||||
shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
|
||||
recently_deleted: HashSet::new(),
|
||||
};
|
||||
|
||||
let upload_queue = std::mem::replace(
|
||||
@@ -2408,28 +2231,6 @@ impl RemoteTimelineClient {
|
||||
UploadQueue::Initialized(x) => x.no_pending_work(),
|
||||
}
|
||||
}
|
||||
|
||||
/// 'foreign' in the sense that it does not belong to this tenant shard. This method
|
||||
/// is used during GC for other shards to get the index of shard zero.
|
||||
pub(crate) async fn download_foreign_index(
|
||||
&self,
|
||||
shard_number: ShardNumber,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(IndexPart, Generation, std::time::SystemTime), DownloadError> {
|
||||
let foreign_shard_id = TenantShardId {
|
||||
shard_number,
|
||||
shard_count: self.tenant_shard_id.shard_count,
|
||||
tenant_id: self.tenant_shard_id.tenant_id,
|
||||
};
|
||||
download_index_part(
|
||||
&self.storage_impl,
|
||||
&foreign_shard_id,
|
||||
&self.timeline_id,
|
||||
Generation::MAX,
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct UploadQueueAccessor<'a> {
|
||||
@@ -2578,7 +2379,6 @@ mod tests {
|
||||
use crate::{
|
||||
context::RequestContext,
|
||||
tenant::{
|
||||
config::AttachmentMode,
|
||||
harness::{TenantHarness, TIMELINE_ID},
|
||||
storage_layer::layer::local_layer_path,
|
||||
Tenant, Timeline,
|
||||
@@ -2664,10 +2464,6 @@ mod tests {
|
||||
|
||||
/// Construct a RemoteTimelineClient in an arbitrary generation
|
||||
fn build_client(&self, generation: Generation) -> Arc<RemoteTimelineClient> {
|
||||
let location_conf = AttachedLocationConfig {
|
||||
generation,
|
||||
attach_mode: AttachmentMode::Single,
|
||||
};
|
||||
Arc::new(RemoteTimelineClient {
|
||||
conf: self.harness.conf,
|
||||
runtime: tokio::runtime::Handle::current(),
|
||||
@@ -2681,7 +2477,6 @@ mod tests {
|
||||
&self.harness.tenant_shard_id,
|
||||
&TIMELINE_ID,
|
||||
)),
|
||||
config: std::sync::RwLock::new(RemoteTimelineClientConfig::from(&location_conf)),
|
||||
cancel: CancellationToken::new(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -706,7 +706,7 @@ where
|
||||
.and_then(|x| x)
|
||||
}
|
||||
|
||||
pub(crate) async fn download_retry_forever<T, O, F>(
|
||||
async fn download_retry_forever<T, O, F>(
|
||||
op: O,
|
||||
description: &str,
|
||||
cancel: &CancellationToken,
|
||||
|
||||
@@ -12,7 +12,6 @@ use utils::id::TimelineId;
|
||||
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::timeline::import_pgdata;
|
||||
use crate::tenant::Generation;
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
|
||||
@@ -38,13 +37,6 @@ pub struct IndexPart {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub archived_at: Option<NaiveDateTime>,
|
||||
|
||||
/// This field supports import-from-pgdata ("fast imports" platform feature).
|
||||
/// We don't currently use fast imports, so, this field is None for all production timelines.
|
||||
/// See <https://github.com/neondatabase/neon/pull/9218> for more information.
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub import_pgdata: Option<import_pgdata::index_part_format::Root>,
|
||||
|
||||
/// Per layer file name metadata, which can be present for a present or missing layer file.
|
||||
///
|
||||
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
|
||||
@@ -98,11 +90,10 @@ impl IndexPart {
|
||||
/// - 7: metadata_bytes is no longer written, but still read
|
||||
/// - 8: added `archived_at`
|
||||
/// - 9: +gc_blocking
|
||||
/// - 10: +import_pgdata
|
||||
const LATEST_VERSION: usize = 10;
|
||||
const LATEST_VERSION: usize = 9;
|
||||
|
||||
// Versions we may see when reading from a bucket.
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9];
|
||||
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
@@ -117,7 +108,6 @@ impl IndexPart {
|
||||
lineage: Default::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -391,7 +381,6 @@ mod tests {
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -436,7 +425,6 @@ mod tests {
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -482,7 +470,6 @@ mod tests {
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -531,7 +518,6 @@ mod tests {
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap();
|
||||
@@ -575,7 +561,6 @@ mod tests {
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -622,7 +607,6 @@ mod tests {
|
||||
},
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -674,7 +658,6 @@ mod tests {
|
||||
},
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Some(AuxFilePolicy::V2),
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -731,7 +714,6 @@ mod tests {
|
||||
lineage: Default::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Default::default(),
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -789,7 +771,6 @@ mod tests {
|
||||
lineage: Default::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Default::default(),
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -852,83 +833,6 @@ mod tests {
|
||||
}),
|
||||
last_aux_file_policy: Default::default(),
|
||||
archived_at: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v10_importpgdata_is_parsed() {
|
||||
let example = r#"{
|
||||
"version": 10,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata": {
|
||||
"disk_consistent_lsn": "0/16960E8",
|
||||
"prev_record_lsn": "0/1696070",
|
||||
"ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e",
|
||||
"ancestor_lsn": "0/0",
|
||||
"latest_gc_cutoff_lsn": "0/1696070",
|
||||
"initdb_lsn": "0/1696070",
|
||||
"pg_version": 14
|
||||
},
|
||||
"gc_blocking": {
|
||||
"started_at": "2024-07-19T09:00:00.123",
|
||||
"reasons": ["DetachAncestor"]
|
||||
},
|
||||
"import_pgdata": {
|
||||
"V1": {
|
||||
"Done": {
|
||||
"idempotency_key": "specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5",
|
||||
"started_at": "2024-11-13T09:23:42.123",
|
||||
"finished_at": "2024-11-13T09:42:23.123"
|
||||
}
|
||||
}
|
||||
}
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 10,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::new(
|
||||
Lsn::from_str("0/16960E8").unwrap(),
|
||||
Some(Lsn::from_str("0/1696070").unwrap()),
|
||||
Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()),
|
||||
Lsn::INVALID,
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
14,
|
||||
).with_recalculated_checksum().unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Default::default(),
|
||||
gc_blocking: Some(GcBlocking {
|
||||
started_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"),
|
||||
reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),
|
||||
}),
|
||||
last_aux_file_policy: Default::default(),
|
||||
archived_at: None,
|
||||
import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{
|
||||
started_at: parse_naive_datetime("2024-11-13T09:23:42.123000000"),
|
||||
finished_at: parse_naive_datetime("2024-11-13T09:42:23.123000000"),
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()),
|
||||
})))
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
|
||||
@@ -111,6 +111,15 @@ pub(crate) struct SecondaryTenant {
|
||||
pub(super) heatmap_total_size_metric: UIntGauge,
|
||||
}
|
||||
|
||||
impl Drop for SecondaryTenant {
|
||||
fn drop(&mut self) {
|
||||
let tenant_id = self.tenant_shard_id.tenant_id.to_string();
|
||||
let shard_id = format!("{}", self.tenant_shard_id.shard_slug());
|
||||
let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
|
||||
let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
|
||||
}
|
||||
}
|
||||
|
||||
impl SecondaryTenant {
|
||||
pub(crate) fn new(
|
||||
tenant_shard_id: TenantShardId,
|
||||
@@ -158,13 +167,6 @@ impl SecondaryTenant {
|
||||
|
||||
// Wait for any secondary downloader work to complete
|
||||
self.gate.close().await;
|
||||
|
||||
self.validate_metrics();
|
||||
|
||||
let tenant_id = self.tenant_shard_id.tenant_id.to_string();
|
||||
let shard_id = format!("{}", self.tenant_shard_id.shard_slug());
|
||||
let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
|
||||
let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
|
||||
}
|
||||
|
||||
pub(crate) fn set_config(&self, config: &SecondaryLocationConfig) {
|
||||
@@ -252,20 +254,6 @@ impl SecondaryTenant {
|
||||
.await
|
||||
.expect("secondary eviction should not have panicked");
|
||||
}
|
||||
|
||||
/// Exhaustive check that incrementally updated metrics match the actual state.
|
||||
#[cfg(feature = "testing")]
|
||||
fn validate_metrics(&self) {
|
||||
let detail = self.detail.lock().unwrap();
|
||||
let resident_size = detail.total_resident_size();
|
||||
|
||||
assert_eq!(resident_size, self.resident_size_metric.get());
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "testing"))]
|
||||
fn validate_metrics(&self) {
|
||||
// No-op in non-testing builds
|
||||
}
|
||||
}
|
||||
|
||||
/// The SecondaryController is a pseudo-rpc client for administrative control of secondary mode downloads,
|
||||
|
||||
@@ -242,19 +242,6 @@ impl SecondaryDetail {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub(crate) fn total_resident_size(&self) -> u64 {
|
||||
self.timelines
|
||||
.values()
|
||||
.map(|tl| {
|
||||
tl.on_disk_layers
|
||||
.values()
|
||||
.map(|v| v.metadata.file_size)
|
||||
.sum::<u64>()
|
||||
})
|
||||
.sum::<u64>()
|
||||
}
|
||||
|
||||
pub(super) fn evict_layer(
|
||||
&mut self,
|
||||
name: LayerName,
|
||||
@@ -776,7 +763,24 @@ impl<'a> TenantDownloader<'a> {
|
||||
}
|
||||
|
||||
// Metrics consistency check in testing builds
|
||||
self.secondary_state.validate_metrics();
|
||||
if cfg!(feature = "testing") {
|
||||
let detail = self.secondary_state.detail.lock().unwrap();
|
||||
let resident_size = detail
|
||||
.timelines
|
||||
.values()
|
||||
.map(|tl| {
|
||||
tl.on_disk_layers
|
||||
.values()
|
||||
.map(|v| v.metadata.file_size)
|
||||
.sum::<u64>()
|
||||
})
|
||||
.sum::<u64>();
|
||||
assert_eq!(
|
||||
resident_size,
|
||||
self.secondary_state.resident_size_metric.get()
|
||||
);
|
||||
}
|
||||
|
||||
// Only update last_etag after a full successful download: this way will not skip
|
||||
// the next download, even if the heatmap's actual etag is unchanged.
|
||||
self.secondary_state.detail.lock().unwrap().last_download = Some(DownloadSummary {
|
||||
|
||||
@@ -4,7 +4,6 @@ pub mod delete;
|
||||
pub(crate) mod detach_ancestor;
|
||||
mod eviction_task;
|
||||
pub(crate) mod handle;
|
||||
pub(crate) mod import_pgdata;
|
||||
mod init;
|
||||
pub mod layer_manager;
|
||||
pub(crate) mod logical_size;
|
||||
@@ -39,7 +38,6 @@ use pageserver_api::{
|
||||
shard::{ShardIdentity, ShardNumber, TenantShardId},
|
||||
};
|
||||
use rand::Rng;
|
||||
use remote_storage::DownloadError;
|
||||
use serde_with::serde_as;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use tokio::{
|
||||
@@ -274,7 +272,7 @@ pub struct Timeline {
|
||||
|
||||
/// Remote storage client.
|
||||
/// See [`remote_timeline_client`](super::remote_timeline_client) module comment for details.
|
||||
pub(crate) remote_client: Arc<RemoteTimelineClient>,
|
||||
pub remote_client: Arc<RemoteTimelineClient>,
|
||||
|
||||
// What page versions do we hold in the repository? If we get a
|
||||
// request > last_record_lsn, we need to wait until we receive all
|
||||
@@ -2086,11 +2084,6 @@ impl Timeline {
|
||||
.unwrap_or(self.conf.default_tenant_conf.lsn_lease_length_for_ts)
|
||||
}
|
||||
|
||||
pub(crate) fn is_gc_blocked_by_lsn_lease_deadline(&self) -> bool {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf.is_gc_blocked_by_lsn_lease_deadline()
|
||||
}
|
||||
|
||||
pub(crate) fn get_lazy_slru_download(&self) -> bool {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
@@ -2178,14 +2171,14 @@ impl Timeline {
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn tenant_conf_updated(&self, new_conf: &AttachedTenantConf) {
|
||||
pub(super) fn tenant_conf_updated(&self, new_conf: &TenantConfOpt) {
|
||||
// NB: Most tenant conf options are read by background loops, so,
|
||||
// changes will automatically be picked up.
|
||||
|
||||
// The threshold is embedded in the metric. So, we need to update it.
|
||||
{
|
||||
let new_threshold = Self::get_evictions_low_residence_duration_metric_threshold(
|
||||
&new_conf.tenant_conf,
|
||||
new_conf,
|
||||
&self.conf.default_tenant_conf,
|
||||
);
|
||||
|
||||
@@ -2193,9 +2186,6 @@ impl Timeline {
|
||||
let shard_id_str = format!("{}", self.tenant_shard_id.shard_slug());
|
||||
|
||||
let timeline_id_str = self.timeline_id.to_string();
|
||||
|
||||
self.remote_client.update_config(&new_conf.location);
|
||||
|
||||
self.metrics
|
||||
.evictions_with_low_residence_duration
|
||||
.write()
|
||||
@@ -2653,7 +2643,6 @@ impl Timeline {
|
||||
//
|
||||
// NB: generation numbers naturally protect against this because they disambiguate
|
||||
// (1) and (4)
|
||||
// TODO: this is basically a no-op now, should we remove it?
|
||||
self.remote_client.schedule_barrier()?;
|
||||
// Tenant::create_timeline will wait for these uploads to happen before returning, or
|
||||
// on retry.
|
||||
@@ -2709,23 +2698,20 @@ impl Timeline {
|
||||
{
|
||||
Some(cancel) => cancel.cancel(),
|
||||
None => {
|
||||
match self.current_state() {
|
||||
TimelineState::Broken { .. } | TimelineState::Stopping => {
|
||||
// Can happen when timeline detail endpoint is used when deletion is ongoing (or its broken).
|
||||
// Don't make noise.
|
||||
}
|
||||
TimelineState::Loading => {
|
||||
// Import does not return an activated timeline.
|
||||
info!("discarding priority boost for logical size calculation because timeline is not yet active");
|
||||
}
|
||||
TimelineState::Active => {
|
||||
// activation should be setting the once cell
|
||||
warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
|
||||
debug_assert!(false);
|
||||
}
|
||||
let state = self.current_state();
|
||||
if matches!(
|
||||
state,
|
||||
TimelineState::Broken { .. } | TimelineState::Stopping
|
||||
) {
|
||||
|
||||
// Can happen when timeline detail endpoint is used when deletion is ongoing (or its broken).
|
||||
// Don't make noise.
|
||||
} else {
|
||||
warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
|
||||
debug_assert!(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4835,86 +4821,6 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn find_gc_time_cutoff(
|
||||
&self,
|
||||
pitr: Duration,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Option<Lsn>, PageReconstructError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
if self.shard_identity.is_shard_zero() {
|
||||
// Shard Zero has SLRU data and can calculate the PITR time -> LSN mapping itself
|
||||
let now = SystemTime::now();
|
||||
let time_range = if pitr == Duration::ZERO {
|
||||
humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid")
|
||||
} else {
|
||||
pitr
|
||||
};
|
||||
|
||||
// If PITR is so large or `now` is so small that this underflows, we will retain no history (highly unexpected case)
|
||||
let time_cutoff = now.checked_sub(time_range).unwrap_or(now);
|
||||
let timestamp = to_pg_timestamp(time_cutoff);
|
||||
|
||||
let time_cutoff = match self.find_lsn_for_timestamp(timestamp, cancel, ctx).await? {
|
||||
LsnForTimestamp::Present(lsn) => Some(lsn),
|
||||
LsnForTimestamp::Future(lsn) => {
|
||||
// The timestamp is in the future. That sounds impossible,
|
||||
// but what it really means is that there hasn't been
|
||||
// any commits since the cutoff timestamp.
|
||||
//
|
||||
// In this case we should use the LSN of the most recent commit,
|
||||
// which is implicitly the last LSN in the log.
|
||||
debug!("future({})", lsn);
|
||||
Some(self.get_last_record_lsn())
|
||||
}
|
||||
LsnForTimestamp::Past(lsn) => {
|
||||
debug!("past({})", lsn);
|
||||
None
|
||||
}
|
||||
LsnForTimestamp::NoData(lsn) => {
|
||||
debug!("nodata({})", lsn);
|
||||
None
|
||||
}
|
||||
};
|
||||
Ok(time_cutoff)
|
||||
} else {
|
||||
// Shards other than shard zero cannot do timestamp->lsn lookups, and must instead learn their GC cutoff
|
||||
// from shard zero's index. The index doesn't explicitly tell us the time cutoff, but we may assume that
|
||||
// the point up to which shard zero's last_gc_cutoff has advanced will either be the time cutoff, or a
|
||||
// space cutoff that we would also have respected ourselves.
|
||||
match self
|
||||
.remote_client
|
||||
.download_foreign_index(ShardNumber(0), cancel)
|
||||
.await
|
||||
{
|
||||
Ok((index_part, index_generation, _index_mtime)) => {
|
||||
tracing::info!("GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}",
|
||||
index_part.metadata.latest_gc_cutoff_lsn());
|
||||
Ok(Some(index_part.metadata.latest_gc_cutoff_lsn()))
|
||||
}
|
||||
Err(DownloadError::NotFound) => {
|
||||
// This is unexpected, because during timeline creations shard zero persists to remote
|
||||
// storage before other shards are called, and during timeline deletion non-zeroth shards are
|
||||
// deleted before the zeroth one. However, it should be harmless: if we somehow end up in this
|
||||
// state, then shard zero should _eventually_ write an index when it GCs.
|
||||
tracing::warn!("GC couldn't find shard zero's index for timeline");
|
||||
Ok(None)
|
||||
}
|
||||
Err(e) => {
|
||||
// TODO: this function should return a different error type than page reconstruct error
|
||||
Err(PageReconstructError::Other(anyhow::anyhow!(e)))
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: after reading shard zero's GC cutoff, we should validate its generation with the storage
|
||||
// controller. Otherwise, it is possible that we see the GC cutoff go backwards while shard zero
|
||||
// is going through a migration if we read the old location's index and it has GC'd ahead of the
|
||||
// new location. This is legal in principle, but problematic in practice because it might result
|
||||
// in a timeline creation succeeding on shard zero ('s new location) but then failing on other shards
|
||||
// because they have GC'd past the branch point.
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the Lsns above which layer files need to be retained on
|
||||
/// garbage collection.
|
||||
///
|
||||
@@ -4957,7 +4863,40 @@ impl Timeline {
|
||||
// - if PITR interval is set, then this is our cutoff.
|
||||
// - if PITR interval is not set, then we do a lookup
|
||||
// based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases.
|
||||
let time_cutoff = self.find_gc_time_cutoff(pitr, cancel, ctx).await?;
|
||||
let time_cutoff = {
|
||||
let now = SystemTime::now();
|
||||
let time_range = if pitr == Duration::ZERO {
|
||||
humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid")
|
||||
} else {
|
||||
pitr
|
||||
};
|
||||
|
||||
// If PITR is so large or `now` is so small that this underflows, we will retain no history (highly unexpected case)
|
||||
let time_cutoff = now.checked_sub(time_range).unwrap_or(now);
|
||||
let timestamp = to_pg_timestamp(time_cutoff);
|
||||
|
||||
match self.find_lsn_for_timestamp(timestamp, cancel, ctx).await? {
|
||||
LsnForTimestamp::Present(lsn) => Some(lsn),
|
||||
LsnForTimestamp::Future(lsn) => {
|
||||
// The timestamp is in the future. That sounds impossible,
|
||||
// but what it really means is that there hasn't been
|
||||
// any commits since the cutoff timestamp.
|
||||
//
|
||||
// In this case we should use the LSN of the most recent commit,
|
||||
// which is implicitly the last LSN in the log.
|
||||
debug!("future({})", lsn);
|
||||
Some(self.get_last_record_lsn())
|
||||
}
|
||||
LsnForTimestamp::Past(lsn) => {
|
||||
debug!("past({})", lsn);
|
||||
None
|
||||
}
|
||||
LsnForTimestamp::NoData(lsn) => {
|
||||
debug!("nodata({})", lsn);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(match (pitr, time_cutoff) {
|
||||
(Duration::ZERO, Some(time_cutoff)) => {
|
||||
|
||||
@@ -283,7 +283,7 @@ impl DeleteTimelineFlow {
|
||||
|
||||
/// Shortcut to create Timeline in stopping state and spawn deletion task.
|
||||
#[instrument(skip_all, fields(%timeline_id))]
|
||||
pub(crate) async fn resume_deletion(
|
||||
pub async fn resume_deletion(
|
||||
tenant: Arc<Tenant>,
|
||||
timeline_id: TimelineId,
|
||||
local_metadata: &TimelineMetadata,
|
||||
|
||||
@@ -1,218 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use remote_storage::RemotePath;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info, info_span, Instrument};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::{context::RequestContext, tenant::metadata::TimelineMetadata};
|
||||
|
||||
use super::Timeline;
|
||||
|
||||
mod flow;
|
||||
mod importbucket_client;
|
||||
mod importbucket_format;
|
||||
pub(crate) mod index_part_format;
|
||||
pub(crate) mod upcall_api;
|
||||
|
||||
pub async fn doit(
|
||||
timeline: &Arc<Timeline>,
|
||||
index_part: index_part_format::Root,
|
||||
ctx: &RequestContext,
|
||||
cancel: CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let index_part_format::Root::V1(v1) = index_part;
|
||||
let index_part_format::InProgress {
|
||||
location,
|
||||
idempotency_key,
|
||||
started_at,
|
||||
} = match v1 {
|
||||
index_part_format::V1::Done(_) => return Ok(()),
|
||||
index_part_format::V1::InProgress(in_progress) => in_progress,
|
||||
};
|
||||
|
||||
let storage = importbucket_client::new(timeline.conf, &location, cancel.clone()).await?;
|
||||
|
||||
info!("get spec early so we know we'll be able to upcall when done");
|
||||
let Some(spec) = storage.get_spec().await? else {
|
||||
bail!("spec not found")
|
||||
};
|
||||
|
||||
let upcall_client =
|
||||
upcall_api::Client::new(timeline.conf, cancel.clone()).context("create upcall client")?;
|
||||
|
||||
//
|
||||
// send an early progress update to clean up k8s job early and generate potentially useful logs
|
||||
//
|
||||
info!("send early progress update");
|
||||
upcall_client
|
||||
.send_progress_until_success(&spec)
|
||||
.instrument(info_span!("early_progress_update"))
|
||||
.await?;
|
||||
|
||||
let status_prefix = RemotePath::from_string("status").unwrap();
|
||||
|
||||
//
|
||||
// See if shard is done.
|
||||
// TODO: incorporate generations into status key for split brain safety. Figure out together with checkpointing.
|
||||
//
|
||||
let shard_status_key =
|
||||
status_prefix.join(format!("shard-{}", timeline.tenant_shard_id.shard_slug()));
|
||||
let shard_status: Option<importbucket_format::ShardStatus> =
|
||||
storage.get_json(&shard_status_key).await?;
|
||||
info!(?shard_status, "peeking shard status");
|
||||
if shard_status.map(|st| st.done).unwrap_or(false) {
|
||||
info!("shard status indicates that the shard is done, skipping import");
|
||||
} else {
|
||||
// TODO: checkpoint the progress into the IndexPart instead of restarting
|
||||
// from the beginning.
|
||||
|
||||
//
|
||||
// Wipe the slate clean - the flow does not allow resuming.
|
||||
// We can implement resuming in the future by checkpointing the progress into the IndexPart.
|
||||
//
|
||||
info!("wipe the slate clean");
|
||||
{
|
||||
// TODO: do we need to hold GC lock for this?
|
||||
let mut guard = timeline.layers.write().await;
|
||||
assert!(
|
||||
guard.layer_map()?.open_layer.is_none(),
|
||||
"while importing, there should be no in-memory layer" // this just seems like a good place to assert it
|
||||
);
|
||||
let all_layers_keys = guard.all_persistent_layers();
|
||||
let all_layers: Vec<_> = all_layers_keys
|
||||
.iter()
|
||||
.map(|key| guard.get_from_key(key))
|
||||
.collect();
|
||||
let open = guard.open_mut().context("open_mut")?;
|
||||
|
||||
timeline.remote_client.schedule_gc_update(&all_layers)?;
|
||||
open.finish_gc_timeline(&all_layers);
|
||||
}
|
||||
|
||||
//
|
||||
// Wait for pgdata to finish uploading
|
||||
//
|
||||
info!("wait for pgdata to reach status 'done'");
|
||||
let pgdata_status_key = status_prefix.join("pgdata");
|
||||
loop {
|
||||
let res = async {
|
||||
let pgdata_status: Option<importbucket_format::PgdataStatus> = storage
|
||||
.get_json(&pgdata_status_key)
|
||||
.await
|
||||
.context("get pgdata status")?;
|
||||
info!(?pgdata_status, "peeking pgdata status");
|
||||
if pgdata_status.map(|st| st.done).unwrap_or(false) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(anyhow::anyhow!("pgdata not done yet"))
|
||||
}
|
||||
}
|
||||
.await;
|
||||
match res {
|
||||
Ok(_) => break,
|
||||
Err(err) => {
|
||||
info!(?err, "indefintely waiting for pgdata to finish");
|
||||
if tokio::time::timeout(std::time::Duration::from_secs(10), cancel.cancelled())
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
bail!("cancelled while waiting for pgdata");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Do the import
|
||||
//
|
||||
info!("do the import");
|
||||
let control_file = storage.get_control_file().await?;
|
||||
let base_lsn = control_file.base_lsn();
|
||||
|
||||
info!("update TimelineMetadata based on LSNs from control file");
|
||||
{
|
||||
let pg_version = control_file.pg_version();
|
||||
let _ctx: &RequestContext = ctx;
|
||||
async move {
|
||||
// FIXME: The 'disk_consistent_lsn' should be the LSN at the *end* of the
|
||||
// checkpoint record, and prev_record_lsn should point to its beginning.
|
||||
// We should read the real end of the record from the WAL, but here we
|
||||
// just fake it.
|
||||
let disk_consistent_lsn = Lsn(base_lsn.0 + 8);
|
||||
let prev_record_lsn = base_lsn;
|
||||
let metadata = TimelineMetadata::new(
|
||||
disk_consistent_lsn,
|
||||
Some(prev_record_lsn),
|
||||
None, // no ancestor
|
||||
Lsn(0), // no ancestor lsn
|
||||
base_lsn, // latest_gc_cutoff_lsn
|
||||
base_lsn, // initdb_lsn
|
||||
pg_version,
|
||||
);
|
||||
|
||||
let _start_lsn = disk_consistent_lsn + 1;
|
||||
|
||||
timeline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_full_metadata_update(&metadata)?;
|
||||
|
||||
timeline.remote_client.wait_completion().await?;
|
||||
|
||||
anyhow::Ok(())
|
||||
}
|
||||
}
|
||||
.await?;
|
||||
|
||||
flow::run(
|
||||
timeline.clone(),
|
||||
base_lsn,
|
||||
control_file,
|
||||
storage.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
//
|
||||
// Communicate that shard is done.
|
||||
//
|
||||
storage
|
||||
.put_json(
|
||||
&shard_status_key,
|
||||
&importbucket_format::ShardStatus { done: true },
|
||||
)
|
||||
.await
|
||||
.context("put shard status")?;
|
||||
}
|
||||
|
||||
//
|
||||
// Ensure at-least-once deliver of the upcall to cplane
|
||||
// before we mark the task as done and never come here again.
|
||||
//
|
||||
info!("send final progress update");
|
||||
upcall_client
|
||||
.send_progress_until_success(&spec)
|
||||
.instrument(info_span!("final_progress_update"))
|
||||
.await?;
|
||||
|
||||
//
|
||||
// Mark as done in index_part.
|
||||
// This makes subsequent timeline loads enter the normal load code path
|
||||
// instead of spawning the import task and calling this here function.
|
||||
//
|
||||
info!("mark import as complete in index part");
|
||||
timeline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_import_pgdata_state_update(Some(index_part_format::Root::V1(
|
||||
index_part_format::V1::Done(index_part_format::Done {
|
||||
idempotency_key,
|
||||
started_at,
|
||||
finished_at: chrono::Utc::now().naive_utc(),
|
||||
}),
|
||||
)))?;
|
||||
|
||||
timeline.remote_client.wait_completion().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,798 +0,0 @@
|
||||
//! Import a PGDATA directory into an empty root timeline.
|
||||
//!
|
||||
//! This module is adapted hackathon code by Heikki and Stas.
|
||||
//! Other code in the parent module was written by Christian as part of a customer PoC.
|
||||
//!
|
||||
//! The hackathon code was producing image layer files as a free-standing program.
|
||||
//!
|
||||
//! It has been modified to
|
||||
//! - run inside a running Pageserver, within the proper lifecycles of Timeline -> Tenant(Shard)
|
||||
//! - => sharding-awareness: produce image layers with only the data relevant for this shard
|
||||
//! - => S3 as the source for the PGDATA instead of local filesystem
|
||||
//!
|
||||
//! TODOs before productionization:
|
||||
//! - ChunkProcessingJob size / ImportJob::total_size does not account for sharding.
|
||||
//! => produced image layers likely too small.
|
||||
//! - ChunkProcessingJob should cut up an ImportJob to hit exactly target image layer size.
|
||||
//! - asserts / unwraps need to be replaced with errors
|
||||
//! - don't trust remote objects will be small (=prevent OOMs in those cases)
|
||||
//! - limit all in-memory buffers in size, or download to disk and read from there
|
||||
//! - limit task concurrency
|
||||
//! - generally play nice with other tenants in the system
|
||||
//! - importbucket is different bucket than main pageserver storage, so, should be fine wrt S3 rate limits
|
||||
//! - but concerns like network bandwidth, local disk write bandwidth, local disk capacity, etc
|
||||
//! - integrate with layer eviction system
|
||||
//! - audit for Tenant::cancel nor Timeline::cancel responsivity
|
||||
//! - audit for Tenant/Timeline gate holding (we spawn tokio tasks during this flow!)
|
||||
//!
|
||||
//! An incomplete set of TODOs from the Hackathon:
|
||||
//! - version-specific CheckPointData (=> pgv abstraction, already exists for regular walingest)
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, ensure};
|
||||
use bytes::Bytes;
|
||||
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::{
|
||||
key::{rel_block_to_key, rel_dir_to_key, rel_size_to_key, relmap_file_key, DBDIR_KEY},
|
||||
reltag::RelTag,
|
||||
shard::ShardIdentity,
|
||||
};
|
||||
use postgres_ffi::{pg_constants, relfile_utils::parse_relfilename, BLCKSZ};
|
||||
use tokio::task::JoinSet;
|
||||
use tracing::{debug, info_span, instrument, Instrument};
|
||||
|
||||
use crate::{
|
||||
assert_u64_eq_usize::UsizeIsU64,
|
||||
pgdatadir_mapping::{SlruSegmentDirectory, TwoPhaseDirectory},
|
||||
};
|
||||
use crate::{
|
||||
context::{DownloadBehavior, RequestContext},
|
||||
pgdatadir_mapping::{DbDirectory, RelDirectory},
|
||||
task_mgr::TaskKind,
|
||||
tenant::storage_layer::{ImageLayerWriter, Layer},
|
||||
};
|
||||
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::key::{
|
||||
slru_block_to_key, slru_dir_to_key, slru_segment_size_to_key, CHECKPOINT_KEY, CONTROLFILE_KEY,
|
||||
TWOPHASEDIR_KEY,
|
||||
};
|
||||
use pageserver_api::keyspace::singleton_range;
|
||||
use pageserver_api::keyspace::{contiguous_range_len, is_contiguous_range};
|
||||
use pageserver_api::reltag::SlruKind;
|
||||
use utils::bin_ser::BeSer;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::ops::Range;
|
||||
|
||||
use super::{
|
||||
importbucket_client::{ControlFile, RemoteStorageWrapper},
|
||||
Timeline,
|
||||
};
|
||||
|
||||
use remote_storage::RemotePath;
|
||||
|
||||
pub async fn run(
|
||||
timeline: Arc<Timeline>,
|
||||
pgdata_lsn: Lsn,
|
||||
control_file: ControlFile,
|
||||
storage: RemoteStorageWrapper,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
Flow {
|
||||
timeline,
|
||||
pgdata_lsn,
|
||||
control_file,
|
||||
tasks: Vec::new(),
|
||||
storage,
|
||||
}
|
||||
.run(ctx)
|
||||
.await
|
||||
}
|
||||
|
||||
struct Flow {
|
||||
timeline: Arc<Timeline>,
|
||||
pgdata_lsn: Lsn,
|
||||
control_file: ControlFile,
|
||||
tasks: Vec<AnyImportTask>,
|
||||
storage: RemoteStorageWrapper,
|
||||
}
|
||||
|
||||
impl Flow {
|
||||
/// Perform the ingestion into [`Self::timeline`].
|
||||
/// Assumes the timeline is empty (= no layers).
|
||||
pub async fn run(mut self, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
let pgdata_lsn = Lsn(self.control_file.control_file_data().checkPoint).align();
|
||||
|
||||
self.pgdata_lsn = pgdata_lsn;
|
||||
|
||||
let datadir = PgDataDir::new(&self.storage).await?;
|
||||
|
||||
// Import dbdir (00:00:00 keyspace)
|
||||
// This is just constructed here, but will be written to the image layer in the first call to import_db()
|
||||
let dbdir_buf = Bytes::from(DbDirectory::ser(&DbDirectory {
|
||||
dbdirs: datadir
|
||||
.dbs
|
||||
.iter()
|
||||
.map(|db| ((db.spcnode, db.dboid), true))
|
||||
.collect(),
|
||||
})?);
|
||||
self.tasks
|
||||
.push(ImportSingleKeyTask::new(DBDIR_KEY, dbdir_buf).into());
|
||||
|
||||
// Import databases (00:spcnode:dbnode keyspace for each db)
|
||||
for db in datadir.dbs {
|
||||
self.import_db(&db).await?;
|
||||
}
|
||||
|
||||
// Import SLRUs
|
||||
|
||||
// pg_xact (01:00 keyspace)
|
||||
self.import_slru(SlruKind::Clog, &self.storage.pgdata().join("pg_xact"))
|
||||
.await?;
|
||||
// pg_multixact/members (01:01 keyspace)
|
||||
self.import_slru(
|
||||
SlruKind::MultiXactMembers,
|
||||
&self.storage.pgdata().join("pg_multixact/members"),
|
||||
)
|
||||
.await?;
|
||||
// pg_multixact/offsets (01:02 keyspace)
|
||||
self.import_slru(
|
||||
SlruKind::MultiXactOffsets,
|
||||
&self.storage.pgdata().join("pg_multixact/offsets"),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Import pg_twophase.
|
||||
// TODO: as empty
|
||||
let twophasedir_buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
|
||||
xids: HashSet::new(),
|
||||
})?;
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
TWOPHASEDIR_KEY,
|
||||
Bytes::from(twophasedir_buf),
|
||||
)));
|
||||
|
||||
// Controlfile, checkpoint
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
CONTROLFILE_KEY,
|
||||
self.control_file.control_file_buf().clone(),
|
||||
)));
|
||||
|
||||
let checkpoint_buf = self
|
||||
.control_file
|
||||
.control_file_data()
|
||||
.checkPointCopy
|
||||
.encode()?;
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
CHECKPOINT_KEY,
|
||||
checkpoint_buf,
|
||||
)));
|
||||
|
||||
// Assigns parts of key space to later parallel jobs
|
||||
let mut last_end_key = Key::MIN;
|
||||
let mut current_chunk = Vec::new();
|
||||
let mut current_chunk_size: usize = 0;
|
||||
let mut parallel_jobs = Vec::new();
|
||||
for task in std::mem::take(&mut self.tasks).into_iter() {
|
||||
if current_chunk_size + task.total_size() > 1024 * 1024 * 1024 {
|
||||
let key_range = last_end_key..task.key_range().start;
|
||||
parallel_jobs.push(ChunkProcessingJob::new(
|
||||
key_range.clone(),
|
||||
std::mem::take(&mut current_chunk),
|
||||
&self,
|
||||
));
|
||||
last_end_key = key_range.end;
|
||||
current_chunk_size = 0;
|
||||
}
|
||||
current_chunk_size += task.total_size();
|
||||
current_chunk.push(task);
|
||||
}
|
||||
parallel_jobs.push(ChunkProcessingJob::new(
|
||||
last_end_key..Key::MAX,
|
||||
current_chunk,
|
||||
&self,
|
||||
));
|
||||
|
||||
// Start all jobs simultaneosly
|
||||
let mut work = JoinSet::new();
|
||||
// TODO: semaphore?
|
||||
for job in parallel_jobs {
|
||||
let ctx: RequestContext =
|
||||
ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Error);
|
||||
work.spawn(async move { job.run(&ctx).await }.instrument(info_span!("parallel_job")));
|
||||
}
|
||||
let mut results = Vec::new();
|
||||
while let Some(result) = work.join_next().await {
|
||||
match result {
|
||||
Ok(res) => {
|
||||
results.push(res);
|
||||
}
|
||||
Err(_joinset_err) => {
|
||||
results.push(Err(anyhow::anyhow!(
|
||||
"parallel job panicked or cancelled, check pageserver logs"
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if results.iter().all(|r| r.is_ok()) {
|
||||
Ok(())
|
||||
} else {
|
||||
let mut msg = String::new();
|
||||
for result in results {
|
||||
if let Err(err) = result {
|
||||
msg.push_str(&format!("{err:?}\n\n"));
|
||||
}
|
||||
}
|
||||
bail!("Some parallel jobs failed:\n\n{msg}");
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(dboid=%db.dboid, tablespace=%db.spcnode, path=%db.path))]
|
||||
async fn import_db(&mut self, db: &PgDataDirDb) -> anyhow::Result<()> {
|
||||
debug!("start");
|
||||
scopeguard::defer! {
|
||||
debug!("return");
|
||||
}
|
||||
|
||||
// Import relmap (00:spcnode:dbnode:00:*:00)
|
||||
let relmap_key = relmap_file_key(db.spcnode, db.dboid);
|
||||
debug!("Constructing relmap entry, key {relmap_key}");
|
||||
let relmap_path = db.path.join("pg_filenode.map");
|
||||
let relmap_buf = self.storage.get(&relmap_path).await?;
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
relmap_key, relmap_buf,
|
||||
)));
|
||||
|
||||
// Import reldir (00:spcnode:dbnode:00:*:01)
|
||||
let reldir_key = rel_dir_to_key(db.spcnode, db.dboid);
|
||||
debug!("Constructing reldirs entry, key {reldir_key}");
|
||||
let reldir_buf = RelDirectory::ser(&RelDirectory {
|
||||
rels: db
|
||||
.files
|
||||
.iter()
|
||||
.map(|f| (f.rel_tag.relnode, f.rel_tag.forknum))
|
||||
.collect(),
|
||||
})?;
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
reldir_key,
|
||||
Bytes::from(reldir_buf),
|
||||
)));
|
||||
|
||||
// Import data (00:spcnode:dbnode:reloid:fork:blk) and set sizes for each last
|
||||
// segment in a given relation (00:spcnode:dbnode:reloid:fork:ff)
|
||||
for file in &db.files {
|
||||
debug!(%file.path, %file.filesize, "importing file");
|
||||
let len = file.filesize;
|
||||
ensure!(len % 8192 == 0);
|
||||
let start_blk: u32 = file.segno * (1024 * 1024 * 1024 / 8192);
|
||||
let start_key = rel_block_to_key(file.rel_tag, start_blk);
|
||||
let end_key = rel_block_to_key(file.rel_tag, start_blk + (len / 8192) as u32);
|
||||
self.tasks
|
||||
.push(AnyImportTask::RelBlocks(ImportRelBlocksTask::new(
|
||||
*self.timeline.get_shard_identity(),
|
||||
start_key..end_key,
|
||||
&file.path,
|
||||
self.storage.clone(),
|
||||
)));
|
||||
|
||||
// Set relsize for the last segment (00:spcnode:dbnode:reloid:fork:ff)
|
||||
if let Some(nblocks) = file.nblocks {
|
||||
let size_key = rel_size_to_key(file.rel_tag);
|
||||
//debug!("Setting relation size (path={path}, rel_tag={rel_tag}, segno={segno}) to {nblocks}, key {size_key}");
|
||||
let buf = nblocks.to_le_bytes();
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
size_key,
|
||||
Bytes::from(buf.to_vec()),
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn import_slru(&mut self, kind: SlruKind, path: &RemotePath) -> anyhow::Result<()> {
|
||||
let segments = self.storage.listfilesindir(path).await?;
|
||||
let segments: Vec<(String, u32, usize)> = segments
|
||||
.into_iter()
|
||||
.filter_map(|(path, size)| {
|
||||
let filename = path.object_name()?;
|
||||
let segno = u32::from_str_radix(filename, 16).ok()?;
|
||||
Some((filename.to_string(), segno, size))
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Write SlruDir
|
||||
let slrudir_key = slru_dir_to_key(kind);
|
||||
let segnos: HashSet<u32> = segments
|
||||
.iter()
|
||||
.map(|(_path, segno, _size)| *segno)
|
||||
.collect();
|
||||
let slrudir = SlruSegmentDirectory { segments: segnos };
|
||||
let slrudir_buf = SlruSegmentDirectory::ser(&slrudir)?;
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
slrudir_key,
|
||||
Bytes::from(slrudir_buf),
|
||||
)));
|
||||
|
||||
for (segpath, segno, size) in segments {
|
||||
// SlruSegBlocks for each segment
|
||||
let p = path.join(&segpath);
|
||||
let file_size = size;
|
||||
ensure!(file_size % 8192 == 0);
|
||||
let nblocks = u32::try_from(file_size / 8192)?;
|
||||
let start_key = slru_block_to_key(kind, segno, 0);
|
||||
let end_key = slru_block_to_key(kind, segno, nblocks);
|
||||
debug!(%p, segno=%segno, %size, %start_key, %end_key, "scheduling SLRU segment");
|
||||
self.tasks
|
||||
.push(AnyImportTask::SlruBlocks(ImportSlruBlocksTask::new(
|
||||
*self.timeline.get_shard_identity(),
|
||||
start_key..end_key,
|
||||
&p,
|
||||
self.storage.clone(),
|
||||
)));
|
||||
|
||||
// Followed by SlruSegSize
|
||||
let segsize_key = slru_segment_size_to_key(kind, segno);
|
||||
let segsize_buf = nblocks.to_le_bytes();
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
segsize_key,
|
||||
Bytes::copy_from_slice(&segsize_buf),
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// dbdir iteration tools
|
||||
//
|
||||
|
||||
struct PgDataDir {
|
||||
pub dbs: Vec<PgDataDirDb>, // spcnode, dboid, path
|
||||
}
|
||||
|
||||
struct PgDataDirDb {
|
||||
pub spcnode: u32,
|
||||
pub dboid: u32,
|
||||
pub path: RemotePath,
|
||||
pub files: Vec<PgDataDirDbFile>,
|
||||
}
|
||||
|
||||
struct PgDataDirDbFile {
|
||||
pub path: RemotePath,
|
||||
pub rel_tag: RelTag,
|
||||
pub segno: u32,
|
||||
pub filesize: usize,
|
||||
// Cummulative size of the given fork, set only for the last segment of that fork
|
||||
pub nblocks: Option<usize>,
|
||||
}
|
||||
|
||||
impl PgDataDir {
|
||||
async fn new(storage: &RemoteStorageWrapper) -> anyhow::Result<Self> {
|
||||
let datadir_path = storage.pgdata();
|
||||
// Import ordinary databases, DEFAULTTABLESPACE_OID is smaller than GLOBALTABLESPACE_OID, so import them first
|
||||
// Traverse database in increasing oid order
|
||||
|
||||
let basedir = &datadir_path.join("base");
|
||||
let db_oids: Vec<_> = storage
|
||||
.listdir(basedir)
|
||||
.await?
|
||||
.into_iter()
|
||||
.filter_map(|path| path.object_name().and_then(|name| name.parse::<u32>().ok()))
|
||||
.sorted()
|
||||
.collect();
|
||||
debug!(?db_oids, "found databases");
|
||||
let mut databases = Vec::new();
|
||||
for dboid in db_oids {
|
||||
databases.push(
|
||||
PgDataDirDb::new(
|
||||
storage,
|
||||
&basedir.join(dboid.to_string()),
|
||||
pg_constants::DEFAULTTABLESPACE_OID,
|
||||
dboid,
|
||||
&datadir_path,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
}
|
||||
|
||||
// special case for global catalogs
|
||||
databases.push(
|
||||
PgDataDirDb::new(
|
||||
storage,
|
||||
&datadir_path.join("global"),
|
||||
postgres_ffi::pg_constants::GLOBALTABLESPACE_OID,
|
||||
0,
|
||||
&datadir_path,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
|
||||
databases.sort_by_key(|db| (db.spcnode, db.dboid));
|
||||
|
||||
Ok(Self { dbs: databases })
|
||||
}
|
||||
}
|
||||
|
||||
impl PgDataDirDb {
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%dboid, %db_path))]
|
||||
async fn new(
|
||||
storage: &RemoteStorageWrapper,
|
||||
db_path: &RemotePath,
|
||||
spcnode: u32,
|
||||
dboid: u32,
|
||||
datadir_path: &RemotePath,
|
||||
) -> anyhow::Result<Self> {
|
||||
let mut files: Vec<PgDataDirDbFile> = storage
|
||||
.listfilesindir(db_path)
|
||||
.await?
|
||||
.into_iter()
|
||||
.filter_map(|(path, size)| {
|
||||
debug!(%path, %size, "found file in dbdir");
|
||||
path.object_name().and_then(|name| {
|
||||
// returns (relnode, forknum, segno)
|
||||
parse_relfilename(name).ok().map(|x| (size, x))
|
||||
})
|
||||
})
|
||||
.sorted_by_key(|(_, relfilename)| *relfilename)
|
||||
.map(|(filesize, (relnode, forknum, segno))| {
|
||||
let rel_tag = RelTag {
|
||||
spcnode,
|
||||
dbnode: dboid,
|
||||
relnode,
|
||||
forknum,
|
||||
};
|
||||
|
||||
let path = datadir_path.join(rel_tag.to_segfile_name(segno));
|
||||
assert!(filesize % BLCKSZ as usize == 0); // TODO: this should result in an error
|
||||
let nblocks = filesize / BLCKSZ as usize;
|
||||
|
||||
PgDataDirDbFile {
|
||||
path,
|
||||
filesize,
|
||||
rel_tag,
|
||||
segno,
|
||||
nblocks: Some(nblocks), // first non-cummulative sizes
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Set cummulative sizes. Do all of that math here, so that later we could easier
|
||||
// parallelize over segments and know with which segments we need to write relsize
|
||||
// entry.
|
||||
let mut cumulative_nblocks: usize = 0;
|
||||
let mut prev_rel_tag: Option<RelTag> = None;
|
||||
for i in 0..files.len() {
|
||||
if prev_rel_tag == Some(files[i].rel_tag) {
|
||||
cumulative_nblocks += files[i].nblocks.unwrap();
|
||||
} else {
|
||||
cumulative_nblocks = files[i].nblocks.unwrap();
|
||||
}
|
||||
|
||||
files[i].nblocks = if i == files.len() - 1 || files[i + 1].rel_tag != files[i].rel_tag {
|
||||
Some(cumulative_nblocks)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
prev_rel_tag = Some(files[i].rel_tag);
|
||||
}
|
||||
|
||||
Ok(PgDataDirDb {
|
||||
files,
|
||||
path: db_path.clone(),
|
||||
spcnode,
|
||||
dboid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
trait ImportTask {
|
||||
fn key_range(&self) -> Range<Key>;
|
||||
|
||||
fn total_size(&self) -> usize {
|
||||
// TODO: revisit this
|
||||
if is_contiguous_range(&self.key_range()) {
|
||||
contiguous_range_len(&self.key_range()) as usize * 8192
|
||||
} else {
|
||||
u32::MAX as usize
|
||||
}
|
||||
}
|
||||
|
||||
async fn doit(
|
||||
self,
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize>;
|
||||
}
|
||||
|
||||
struct ImportSingleKeyTask {
|
||||
key: Key,
|
||||
buf: Bytes,
|
||||
}
|
||||
|
||||
impl ImportSingleKeyTask {
|
||||
fn new(key: Key, buf: Bytes) -> Self {
|
||||
ImportSingleKeyTask { key, buf }
|
||||
}
|
||||
}
|
||||
|
||||
impl ImportTask for ImportSingleKeyTask {
|
||||
fn key_range(&self) -> Range<Key> {
|
||||
singleton_range(self.key)
|
||||
}
|
||||
|
||||
async fn doit(
|
||||
self,
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize> {
|
||||
layer_writer.put_image(self.key, self.buf, ctx).await?;
|
||||
Ok(1)
|
||||
}
|
||||
}
|
||||
|
||||
struct ImportRelBlocksTask {
|
||||
shard_identity: ShardIdentity,
|
||||
key_range: Range<Key>,
|
||||
path: RemotePath,
|
||||
storage: RemoteStorageWrapper,
|
||||
}
|
||||
|
||||
impl ImportRelBlocksTask {
|
||||
fn new(
|
||||
shard_identity: ShardIdentity,
|
||||
key_range: Range<Key>,
|
||||
path: &RemotePath,
|
||||
storage: RemoteStorageWrapper,
|
||||
) -> Self {
|
||||
ImportRelBlocksTask {
|
||||
shard_identity,
|
||||
key_range,
|
||||
path: path.clone(),
|
||||
storage,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ImportTask for ImportRelBlocksTask {
|
||||
fn key_range(&self) -> Range<Key> {
|
||||
self.key_range.clone()
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%self.path))]
|
||||
async fn doit(
|
||||
self,
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize> {
|
||||
debug!("Importing relation file");
|
||||
|
||||
let (rel_tag, start_blk) = self.key_range.start.to_rel_block()?;
|
||||
let (rel_tag_end, end_blk) = self.key_range.end.to_rel_block()?;
|
||||
assert_eq!(rel_tag, rel_tag_end);
|
||||
|
||||
let ranges = (start_blk..end_blk)
|
||||
.enumerate()
|
||||
.filter_map(|(i, blknum)| {
|
||||
let key = rel_block_to_key(rel_tag, blknum);
|
||||
if self.shard_identity.is_key_disposable(&key) {
|
||||
return None;
|
||||
}
|
||||
let file_offset = i.checked_mul(8192).unwrap();
|
||||
Some((
|
||||
vec![key],
|
||||
file_offset,
|
||||
file_offset.checked_add(8192).unwrap(),
|
||||
))
|
||||
})
|
||||
.coalesce(|(mut acc, acc_start, acc_end), (mut key, start, end)| {
|
||||
assert_eq!(key.len(), 1);
|
||||
assert!(!acc.is_empty());
|
||||
assert!(acc_end > acc_start);
|
||||
if acc_end == start /* TODO additional max range check here, to limit memory consumption per task to X */ {
|
||||
acc.push(key.pop().unwrap());
|
||||
Ok((acc, acc_start, end))
|
||||
} else {
|
||||
Err(((acc, acc_start, acc_end), (key, start, end)))
|
||||
}
|
||||
});
|
||||
|
||||
let mut nimages = 0;
|
||||
for (keys, range_start, range_end) in ranges {
|
||||
let range_buf = self
|
||||
.storage
|
||||
.get_range(&self.path, range_start.into_u64(), range_end.into_u64())
|
||||
.await?;
|
||||
let mut buf = Bytes::from(range_buf);
|
||||
// TODO: batched writes
|
||||
for key in keys {
|
||||
let image = buf.split_to(8192);
|
||||
layer_writer.put_image(key, image, ctx).await?;
|
||||
nimages += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(nimages)
|
||||
}
|
||||
}
|
||||
|
||||
struct ImportSlruBlocksTask {
|
||||
shard_identity: ShardIdentity,
|
||||
key_range: Range<Key>,
|
||||
path: RemotePath,
|
||||
storage: RemoteStorageWrapper,
|
||||
}
|
||||
|
||||
impl ImportSlruBlocksTask {
|
||||
fn new(
|
||||
shard_identity: ShardIdentity,
|
||||
key_range: Range<Key>,
|
||||
path: &RemotePath,
|
||||
storage: RemoteStorageWrapper,
|
||||
) -> Self {
|
||||
ImportSlruBlocksTask {
|
||||
shard_identity,
|
||||
key_range,
|
||||
path: path.clone(),
|
||||
storage,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ImportTask for ImportSlruBlocksTask {
|
||||
fn key_range(&self) -> Range<Key> {
|
||||
self.key_range.clone()
|
||||
}
|
||||
|
||||
async fn doit(
|
||||
self,
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize> {
|
||||
debug!("Importing SLRU segment file {}", self.path);
|
||||
let buf = self.storage.get(&self.path).await?;
|
||||
|
||||
let (kind, segno, start_blk) = self.key_range.start.to_slru_block()?;
|
||||
let (_kind, _segno, end_blk) = self.key_range.end.to_slru_block()?;
|
||||
let mut blknum = start_blk;
|
||||
let mut nimages = 0;
|
||||
let mut file_offset = 0;
|
||||
while blknum < end_blk {
|
||||
let key = slru_block_to_key(kind, segno, blknum);
|
||||
assert!(
|
||||
!self.shard_identity.is_key_disposable(&key),
|
||||
"SLRU keys need to go into every shard"
|
||||
);
|
||||
let buf = &buf[file_offset..(file_offset + 8192)];
|
||||
file_offset += 8192;
|
||||
layer_writer
|
||||
.put_image(key, Bytes::copy_from_slice(buf), ctx)
|
||||
.await?;
|
||||
blknum += 1;
|
||||
nimages += 1;
|
||||
}
|
||||
Ok(nimages)
|
||||
}
|
||||
}
|
||||
|
||||
enum AnyImportTask {
|
||||
SingleKey(ImportSingleKeyTask),
|
||||
RelBlocks(ImportRelBlocksTask),
|
||||
SlruBlocks(ImportSlruBlocksTask),
|
||||
}
|
||||
|
||||
impl ImportTask for AnyImportTask {
|
||||
fn key_range(&self) -> Range<Key> {
|
||||
match self {
|
||||
Self::SingleKey(t) => t.key_range(),
|
||||
Self::RelBlocks(t) => t.key_range(),
|
||||
Self::SlruBlocks(t) => t.key_range(),
|
||||
}
|
||||
}
|
||||
/// returns the number of images put into the `layer_writer`
|
||||
async fn doit(
|
||||
self,
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize> {
|
||||
match self {
|
||||
Self::SingleKey(t) => t.doit(layer_writer, ctx).await,
|
||||
Self::RelBlocks(t) => t.doit(layer_writer, ctx).await,
|
||||
Self::SlruBlocks(t) => t.doit(layer_writer, ctx).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ImportSingleKeyTask> for AnyImportTask {
|
||||
fn from(t: ImportSingleKeyTask) -> Self {
|
||||
Self::SingleKey(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ImportRelBlocksTask> for AnyImportTask {
|
||||
fn from(t: ImportRelBlocksTask) -> Self {
|
||||
Self::RelBlocks(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ImportSlruBlocksTask> for AnyImportTask {
|
||||
fn from(t: ImportSlruBlocksTask) -> Self {
|
||||
Self::SlruBlocks(t)
|
||||
}
|
||||
}
|
||||
|
||||
struct ChunkProcessingJob {
|
||||
timeline: Arc<Timeline>,
|
||||
range: Range<Key>,
|
||||
tasks: Vec<AnyImportTask>,
|
||||
|
||||
pgdata_lsn: Lsn,
|
||||
}
|
||||
|
||||
impl ChunkProcessingJob {
|
||||
fn new(range: Range<Key>, tasks: Vec<AnyImportTask>, env: &Flow) -> Self {
|
||||
assert!(env.pgdata_lsn.is_valid());
|
||||
Self {
|
||||
timeline: env.timeline.clone(),
|
||||
range,
|
||||
tasks,
|
||||
pgdata_lsn: env.pgdata_lsn,
|
||||
}
|
||||
}
|
||||
|
||||
async fn run(self, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
let mut writer = ImageLayerWriter::new(
|
||||
self.timeline.conf,
|
||||
self.timeline.timeline_id,
|
||||
self.timeline.tenant_shard_id,
|
||||
&self.range,
|
||||
self.pgdata_lsn,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut nimages = 0;
|
||||
for task in self.tasks {
|
||||
nimages += task.doit(&mut writer, ctx).await?;
|
||||
}
|
||||
|
||||
let resident_layer = if nimages > 0 {
|
||||
let (desc, path) = writer.finish(ctx).await?;
|
||||
Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?
|
||||
} else {
|
||||
// dropping the writer cleans up
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
// this is sharing the same code as create_image_layers
|
||||
let mut guard = self.timeline.layers.write().await;
|
||||
guard
|
||||
.open_mut()?
|
||||
.track_new_image_layers(&[resident_layer.clone()], &self.timeline.metrics);
|
||||
crate::tenant::timeline::drop_wlock(guard);
|
||||
|
||||
// Schedule the layer for upload but don't add barriers such as
|
||||
// wait for completion or index upload, so we don't inhibit upload parallelism.
|
||||
// TODO: limit upload parallelism somehow (e.g. by limiting concurrency of jobs?)
|
||||
// TODO: or regulate parallelism by upload queue depth? Prob should happen at a higher level.
|
||||
self.timeline
|
||||
.remote_client
|
||||
.schedule_layer_file_upload(resident_layer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,315 +0,0 @@
|
||||
use std::{ops::Bound, sync::Arc};
|
||||
|
||||
use anyhow::Context;
|
||||
use bytes::Bytes;
|
||||
use postgres_ffi::ControlFileData;
|
||||
use remote_storage::{
|
||||
Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingObject, RemotePath,
|
||||
};
|
||||
use serde::de::DeserializeOwned;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, info, instrument};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::{assert_u64_eq_usize::U64IsUsize, config::PageServerConf};
|
||||
|
||||
use super::{importbucket_format, index_part_format};
|
||||
|
||||
pub async fn new(
|
||||
conf: &'static PageServerConf,
|
||||
location: &index_part_format::Location,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<RemoteStorageWrapper, anyhow::Error> {
|
||||
// FIXME: we probably want some timeout, and we might be able to assume the max file
|
||||
// size on S3 is 1GiB (postgres segment size). But the problem is that the individual
|
||||
// downloaders don't know enough about concurrent downloads to make a guess on the
|
||||
// expected bandwidth and resulting best timeout.
|
||||
let timeout = std::time::Duration::from_secs(24 * 60 * 60);
|
||||
let location_storage = match location {
|
||||
#[cfg(feature = "testing")]
|
||||
index_part_format::Location::LocalFs { path } => {
|
||||
GenericRemoteStorage::LocalFs(remote_storage::LocalFs::new(path.clone(), timeout)?)
|
||||
}
|
||||
index_part_format::Location::AwsS3 {
|
||||
region,
|
||||
bucket,
|
||||
key,
|
||||
} => {
|
||||
// TODO: think about security implications of letting the client specify the bucket & prefix.
|
||||
// It's the most flexible right now, but, possibly we want to move bucket name into PS conf
|
||||
// and force the timeline_id into the prefix?
|
||||
GenericRemoteStorage::AwsS3(Arc::new(
|
||||
remote_storage::S3Bucket::new(
|
||||
&remote_storage::S3Config {
|
||||
bucket_name: bucket.clone(),
|
||||
prefix_in_bucket: Some(key.clone()),
|
||||
bucket_region: region.clone(),
|
||||
endpoint: conf
|
||||
.import_pgdata_aws_endpoint_url
|
||||
.clone()
|
||||
.map(|url| url.to_string()), // by specifying None here, remote_storage/aws-sdk-rust will infer from env
|
||||
concurrency_limit: 100.try_into().unwrap(), // TODO: think about this
|
||||
max_keys_per_list_response: Some(1000), // TODO: think about this
|
||||
upload_storage_class: None, // irrelevant
|
||||
},
|
||||
timeout,
|
||||
)
|
||||
.await
|
||||
.context("setup s3 bucket")?,
|
||||
))
|
||||
}
|
||||
};
|
||||
let storage_wrapper = RemoteStorageWrapper::new(location_storage, cancel);
|
||||
Ok(storage_wrapper)
|
||||
}
|
||||
|
||||
/// Wrap [`remote_storage`] APIs to make it look a bit more like a filesystem API
|
||||
/// such as [`tokio::fs`], which was used in the original implementation of the import code.
|
||||
#[derive(Clone)]
|
||||
pub struct RemoteStorageWrapper {
|
||||
storage: GenericRemoteStorage,
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
impl RemoteStorageWrapper {
|
||||
pub fn new(storage: GenericRemoteStorage, cancel: CancellationToken) -> Self {
|
||||
Self { storage, cancel }
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn listfilesindir(
|
||||
&self,
|
||||
path: &RemotePath,
|
||||
) -> Result<Vec<(RemotePath, usize)>, DownloadError> {
|
||||
assert!(
|
||||
path.object_name().is_some(),
|
||||
"must specify dirname, without trailing slash"
|
||||
);
|
||||
let path = path.add_trailing_slash();
|
||||
|
||||
let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
|
||||
|| async {
|
||||
let Listing { keys, prefixes: _ } = self
|
||||
.storage
|
||||
.list(
|
||||
Some(&path),
|
||||
remote_storage::ListingMode::WithDelimiter,
|
||||
None,
|
||||
&self.cancel,
|
||||
)
|
||||
.await?;
|
||||
let res = keys
|
||||
.into_iter()
|
||||
.map(|ListingObject { key, size, .. }| (key, size.into_usize()))
|
||||
.collect();
|
||||
Ok(res)
|
||||
},
|
||||
&format!("listfilesindir {path:?}"),
|
||||
&self.cancel,
|
||||
)
|
||||
.await;
|
||||
debug!(?res, "returning");
|
||||
res
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn listdir(&self, path: &RemotePath) -> Result<Vec<RemotePath>, DownloadError> {
|
||||
assert!(
|
||||
path.object_name().is_some(),
|
||||
"must specify dirname, without trailing slash"
|
||||
);
|
||||
let path = path.add_trailing_slash();
|
||||
|
||||
let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
|
||||
|| async {
|
||||
let Listing { keys, prefixes } = self
|
||||
.storage
|
||||
.list(
|
||||
Some(&path),
|
||||
remote_storage::ListingMode::WithDelimiter,
|
||||
None,
|
||||
&self.cancel,
|
||||
)
|
||||
.await?;
|
||||
let res = keys
|
||||
.into_iter()
|
||||
.map(|ListingObject { key, .. }| key)
|
||||
.chain(prefixes.into_iter())
|
||||
.collect();
|
||||
Ok(res)
|
||||
},
|
||||
&format!("listdir {path:?}"),
|
||||
&self.cancel,
|
||||
)
|
||||
.await;
|
||||
debug!(?res, "returning");
|
||||
res
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn get(&self, path: &RemotePath) -> Result<Bytes, DownloadError> {
|
||||
let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
|
||||
|| async {
|
||||
let Download {
|
||||
download_stream, ..
|
||||
} = self
|
||||
.storage
|
||||
.download(path, &DownloadOpts::default(), &self.cancel)
|
||||
.await?;
|
||||
let mut reader = tokio_util::io::StreamReader::new(download_stream);
|
||||
|
||||
// XXX optimize this, can we get the capacity hint from somewhere?
|
||||
let mut buf = Vec::new();
|
||||
tokio::io::copy_buf(&mut reader, &mut buf).await?;
|
||||
Ok(Bytes::from(buf))
|
||||
},
|
||||
&format!("download {path:?}"),
|
||||
&self.cancel,
|
||||
)
|
||||
.await;
|
||||
debug!(len = res.as_ref().ok().map(|buf| buf.len()), "done");
|
||||
res
|
||||
}
|
||||
|
||||
pub async fn get_spec(&self) -> Result<Option<importbucket_format::Spec>, anyhow::Error> {
|
||||
self.get_json(&RemotePath::from_string("spec.json").unwrap())
|
||||
.await
|
||||
.context("get spec")
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn get_json<T: DeserializeOwned>(
|
||||
&self,
|
||||
path: &RemotePath,
|
||||
) -> Result<Option<T>, DownloadError> {
|
||||
let buf = match self.get(path).await {
|
||||
Ok(buf) => buf,
|
||||
Err(DownloadError::NotFound) => return Ok(None),
|
||||
Err(err) => return Err(err),
|
||||
};
|
||||
let res = serde_json::from_slice(&buf)
|
||||
.context("serialize")
|
||||
// TODO: own error type
|
||||
.map_err(DownloadError::Other)?;
|
||||
Ok(Some(res))
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn put_json<T>(&self, path: &RemotePath, value: &T) -> anyhow::Result<()>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
{
|
||||
let buf = serde_json::to_vec(value)?;
|
||||
let bytes = Bytes::from(buf);
|
||||
utils::backoff::retry(
|
||||
|| async {
|
||||
let size = bytes.len();
|
||||
let bytes = futures::stream::once(futures::future::ready(Ok(bytes.clone())));
|
||||
self.storage
|
||||
.upload_storage_object(bytes, size, path, &self.cancel)
|
||||
.await
|
||||
},
|
||||
remote_storage::TimeoutOrCancel::caused_by_cancel,
|
||||
1,
|
||||
u32::MAX,
|
||||
&format!("put json {path}"),
|
||||
&self.cancel,
|
||||
)
|
||||
.await
|
||||
.expect("practically infinite retries")
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn get_range(
|
||||
&self,
|
||||
path: &RemotePath,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: u64,
|
||||
) -> Result<Vec<u8>, DownloadError> {
|
||||
let len = end_exclusive
|
||||
.checked_sub(start_inclusive)
|
||||
.unwrap()
|
||||
.into_usize();
|
||||
let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
|
||||
|| async {
|
||||
let Download {
|
||||
download_stream, ..
|
||||
} = self
|
||||
.storage
|
||||
.download(
|
||||
path,
|
||||
&DownloadOpts {
|
||||
etag: None,
|
||||
byte_start: Bound::Included(start_inclusive),
|
||||
byte_end: Bound::Excluded(end_exclusive)
|
||||
},
|
||||
&self.cancel)
|
||||
.await?;
|
||||
let mut reader = tokio_util::io::StreamReader::new(download_stream);
|
||||
|
||||
let mut buf = Vec::with_capacity(len);
|
||||
tokio::io::copy_buf(&mut reader, &mut buf).await?;
|
||||
Ok(buf)
|
||||
},
|
||||
&format!("download range len=0x{len:x} [0x{start_inclusive:x},0x{end_exclusive:x}) from {path:?}"),
|
||||
&self.cancel,
|
||||
)
|
||||
.await;
|
||||
debug!(len = res.as_ref().ok().map(|buf| buf.len()), "done");
|
||||
res
|
||||
}
|
||||
|
||||
pub fn pgdata(&self) -> RemotePath {
|
||||
RemotePath::from_string("pgdata").unwrap()
|
||||
}
|
||||
|
||||
pub async fn get_control_file(&self) -> Result<ControlFile, anyhow::Error> {
|
||||
let control_file_path = self.pgdata().join("global/pg_control");
|
||||
info!("get control file from {control_file_path}");
|
||||
let control_file_buf = self.get(&control_file_path).await?;
|
||||
ControlFile::new(control_file_buf)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ControlFile {
|
||||
control_file_data: ControlFileData,
|
||||
control_file_buf: Bytes,
|
||||
}
|
||||
|
||||
impl ControlFile {
|
||||
pub(crate) fn new(control_file_buf: Bytes) -> Result<Self, anyhow::Error> {
|
||||
// XXX ControlFileData is version-specific, we're always using v14 here. v17 had changes.
|
||||
let control_file_data = ControlFileData::decode(&control_file_buf)?;
|
||||
let control_file = ControlFile {
|
||||
control_file_data,
|
||||
control_file_buf,
|
||||
};
|
||||
control_file.try_pg_version()?; // so that we can offer infallible pg_version()
|
||||
Ok(control_file)
|
||||
}
|
||||
pub(crate) fn base_lsn(&self) -> Lsn {
|
||||
Lsn(self.control_file_data.checkPoint).align()
|
||||
}
|
||||
pub(crate) fn pg_version(&self) -> u32 {
|
||||
self.try_pg_version()
|
||||
.expect("prepare() checks that try_pg_version doesn't error")
|
||||
}
|
||||
pub(crate) fn control_file_data(&self) -> &ControlFileData {
|
||||
&self.control_file_data
|
||||
}
|
||||
pub(crate) fn control_file_buf(&self) -> &Bytes {
|
||||
&self.control_file_buf
|
||||
}
|
||||
fn try_pg_version(&self) -> anyhow::Result<u32> {
|
||||
Ok(match self.control_file_data.catalog_version_no {
|
||||
// thesea are from catversion.h
|
||||
202107181 => 14,
|
||||
202209061 => 15,
|
||||
202307071 => 16,
|
||||
/* XXX pg17 */
|
||||
catversion => {
|
||||
anyhow::bail!("unrecognized catalog version {catversion}")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct PgdataStatus {
|
||||
pub done: bool,
|
||||
// TODO: remaining fields
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ShardStatus {
|
||||
pub done: bool,
|
||||
// TODO: remaining fields
|
||||
}
|
||||
|
||||
// TODO: dedupe with fast_import code
|
||||
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Spec {
|
||||
pub project_id: String,
|
||||
pub branch_id: String,
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
use camino::Utf8PathBuf;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Root {
|
||||
V1(V1),
|
||||
}
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub enum V1 {
|
||||
InProgress(InProgress),
|
||||
Done(Done),
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(transparent)]
|
||||
pub struct IdempotencyKey(String);
|
||||
|
||||
impl IdempotencyKey {
|
||||
pub fn new(s: String) -> Self {
|
||||
Self(s)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct InProgress {
|
||||
pub idempotency_key: IdempotencyKey,
|
||||
pub location: Location,
|
||||
pub started_at: chrono::NaiveDateTime,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Done {
|
||||
pub idempotency_key: IdempotencyKey,
|
||||
pub started_at: chrono::NaiveDateTime,
|
||||
pub finished_at: chrono::NaiveDateTime,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Location {
|
||||
#[cfg(feature = "testing")]
|
||||
LocalFs { path: Utf8PathBuf },
|
||||
AwsS3 {
|
||||
region: String,
|
||||
bucket: String,
|
||||
key: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl Root {
|
||||
pub fn is_done(&self) -> bool {
|
||||
match self {
|
||||
Root::V1(v1) => match v1 {
|
||||
V1::Done(_) => true,
|
||||
V1::InProgress(_) => false,
|
||||
},
|
||||
}
|
||||
}
|
||||
pub fn idempotency_key(&self) -> &IdempotencyKey {
|
||||
match self {
|
||||
Root::V1(v1) => match v1 {
|
||||
V1::InProgress(in_progress) => &in_progress.idempotency_key,
|
||||
V1::Done(done) => &done.idempotency_key,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,119 +0,0 @@
|
||||
//! FIXME: most of this is copy-paste from mgmt_api.rs ; dedupe into a `reqwest_utils::Client` crate.
|
||||
use pageserver_client::mgmt_api::{Error, ResponseErrorMessageExt};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::error;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use reqwest::Method;
|
||||
|
||||
use super::importbucket_format::Spec;
|
||||
|
||||
pub struct Client {
|
||||
base_url: String,
|
||||
authorization_header: Option<String>,
|
||||
client: reqwest::Client,
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct ImportProgressRequest {
|
||||
// no fields yet, not sure if there every will be any
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct ImportProgressResponse {
|
||||
// we don't care
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub fn new(conf: &PageServerConf, cancel: CancellationToken) -> anyhow::Result<Self> {
|
||||
let Some(ref base_url) = conf.import_pgdata_upcall_api else {
|
||||
anyhow::bail!("import_pgdata_upcall_api is not configured")
|
||||
};
|
||||
Ok(Self {
|
||||
base_url: base_url.to_string(),
|
||||
client: reqwest::Client::new(),
|
||||
cancel,
|
||||
authorization_header: conf
|
||||
.import_pgdata_upcall_api_token
|
||||
.as_ref()
|
||||
.map(|secret_string| secret_string.get_contents())
|
||||
.map(|jwt| format!("Bearer {jwt}")),
|
||||
})
|
||||
}
|
||||
|
||||
fn start_request<U: reqwest::IntoUrl>(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: U,
|
||||
) -> reqwest::RequestBuilder {
|
||||
let req = self.client.request(method, uri);
|
||||
if let Some(value) = &self.authorization_header {
|
||||
req.header(reqwest::header::AUTHORIZATION, value)
|
||||
} else {
|
||||
req
|
||||
}
|
||||
}
|
||||
|
||||
async fn request_noerror<B: serde::Serialize, U: reqwest::IntoUrl>(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: U,
|
||||
body: B,
|
||||
) -> Result<reqwest::Response> {
|
||||
self.start_request(method, uri)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
async fn request<B: serde::Serialize, U: reqwest::IntoUrl>(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: U,
|
||||
body: B,
|
||||
) -> Result<reqwest::Response> {
|
||||
let res = self.request_noerror(method, uri, body).await?;
|
||||
let response = res.error_from_body().await?;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub async fn send_progress_once(&self, spec: &Spec) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/projects/{}/branches/{}/import_progress",
|
||||
self.base_url, spec.project_id, spec.branch_id
|
||||
);
|
||||
let ImportProgressResponse {} = self
|
||||
.request(Method::POST, url, &ImportProgressRequest {})
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn send_progress_until_success(&self, spec: &Spec) -> anyhow::Result<()> {
|
||||
loop {
|
||||
match self.send_progress_once(spec).await {
|
||||
Ok(()) => return Ok(()),
|
||||
Err(Error::Cancelled) => return Err(anyhow::anyhow!("cancelled")),
|
||||
Err(err) => {
|
||||
error!(?err, "error sending progress, retrying");
|
||||
if tokio::time::timeout(
|
||||
std::time::Duration::from_secs(10),
|
||||
self.cancel.cancelled(),
|
||||
)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
anyhow::bail!("cancelled while sending early progress update");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,7 @@ use std::{collections::hash_map::Entry, fs, sync::Arc};
|
||||
use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use tracing::{error, info, info_span};
|
||||
use utils::{fs_ext, id::TimelineId, lsn::Lsn, sync::gate::GateGuard};
|
||||
use utils::{fs_ext, id::TimelineId, lsn::Lsn};
|
||||
|
||||
use crate::{
|
||||
context::RequestContext,
|
||||
@@ -23,14 +23,14 @@ use super::Timeline;
|
||||
pub struct UninitializedTimeline<'t> {
|
||||
pub(crate) owning_tenant: &'t Tenant,
|
||||
timeline_id: TimelineId,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard<'t>)>,
|
||||
}
|
||||
|
||||
impl<'t> UninitializedTimeline<'t> {
|
||||
pub(crate) fn new(
|
||||
owning_tenant: &'t Tenant,
|
||||
timeline_id: TimelineId,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard<'t>)>,
|
||||
) -> Self {
|
||||
Self {
|
||||
owning_tenant,
|
||||
@@ -87,10 +87,6 @@ impl<'t> UninitializedTimeline<'t> {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn finish_creation_myself(&mut self) -> (Arc<Timeline>, TimelineCreateGuard) {
|
||||
self.raw_timeline.take().expect("already checked")
|
||||
}
|
||||
|
||||
/// Prepares timeline data by loading it from the basebackup archive.
|
||||
pub(crate) async fn import_basebackup_from_tar(
|
||||
self,
|
||||
@@ -171,10 +167,9 @@ pub(crate) fn cleanup_timeline_directory(create_guard: TimelineCreateGuard) {
|
||||
/// A guard for timeline creations in process: as long as this object exists, the timeline ID
|
||||
/// is kept in `[Tenant::timelines_creating]` to exclude concurrent attempts to create the same timeline.
|
||||
#[must_use]
|
||||
pub(crate) struct TimelineCreateGuard {
|
||||
pub(crate) _tenant_gate_guard: GateGuard,
|
||||
pub(crate) owning_tenant: Arc<Tenant>,
|
||||
pub(crate) timeline_id: TimelineId,
|
||||
pub(crate) struct TimelineCreateGuard<'t> {
|
||||
owning_tenant: &'t Tenant,
|
||||
timeline_id: TimelineId,
|
||||
pub(crate) timeline_path: Utf8PathBuf,
|
||||
pub(crate) idempotency: CreateTimelineIdempotency,
|
||||
}
|
||||
@@ -189,27 +184,20 @@ pub(crate) enum TimelineExclusionError {
|
||||
},
|
||||
#[error("Already creating")]
|
||||
AlreadyCreating,
|
||||
#[error("Shutting down")]
|
||||
ShuttingDown,
|
||||
|
||||
// e.g. I/O errors, or some failure deep in postgres initdb
|
||||
#[error(transparent)]
|
||||
Other(#[from] anyhow::Error),
|
||||
}
|
||||
|
||||
impl TimelineCreateGuard {
|
||||
impl<'t> TimelineCreateGuard<'t> {
|
||||
pub(crate) fn new(
|
||||
owning_tenant: &Arc<Tenant>,
|
||||
owning_tenant: &'t Tenant,
|
||||
timeline_id: TimelineId,
|
||||
timeline_path: Utf8PathBuf,
|
||||
idempotency: CreateTimelineIdempotency,
|
||||
allow_offloaded: bool,
|
||||
) -> Result<Self, TimelineExclusionError> {
|
||||
let _tenant_gate_guard = owning_tenant
|
||||
.gate
|
||||
.enter()
|
||||
.map_err(|_| TimelineExclusionError::ShuttingDown)?;
|
||||
|
||||
// Lock order: this is the only place we take both locks. During drop() we only
|
||||
// lock creating_timelines
|
||||
let timelines = owning_tenant.timelines.lock().unwrap();
|
||||
@@ -237,12 +225,8 @@ impl TimelineCreateGuard {
|
||||
return Err(TimelineExclusionError::AlreadyCreating);
|
||||
}
|
||||
creating_timelines.insert(timeline_id);
|
||||
drop(creating_timelines);
|
||||
drop(timelines_offloaded);
|
||||
drop(timelines);
|
||||
Ok(Self {
|
||||
_tenant_gate_guard,
|
||||
owning_tenant: Arc::clone(owning_tenant),
|
||||
owning_tenant,
|
||||
timeline_id,
|
||||
timeline_path,
|
||||
idempotency,
|
||||
@@ -250,7 +234,7 @@ impl TimelineCreateGuard {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TimelineCreateGuard {
|
||||
impl Drop for TimelineCreateGuard<'_> {
|
||||
fn drop(&mut self) {
|
||||
self.owning_tenant
|
||||
.timelines_creating
|
||||
|
||||
@@ -3,7 +3,6 @@ use super::storage_layer::ResidentLayer;
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
use crate::tenant::remote_timeline_client::index::IndexPart;
|
||||
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::fmt::Debug;
|
||||
|
||||
@@ -15,6 +14,7 @@ use utils::lsn::AtomicLsn;
|
||||
use std::sync::atomic::AtomicU32;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
use utils::generation::Generation;
|
||||
|
||||
// clippy warns that Uninitialized is much smaller than Initialized, which wastes
|
||||
@@ -38,12 +38,6 @@ impl UploadQueue {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
|
||||
pub(crate) enum OpType {
|
||||
MayReorder,
|
||||
FlushDeletion,
|
||||
}
|
||||
|
||||
/// This keeps track of queued and in-progress tasks.
|
||||
pub(crate) struct UploadQueueInitialized {
|
||||
/// Counter to assign task IDs
|
||||
@@ -94,12 +88,6 @@ pub(crate) struct UploadQueueInitialized {
|
||||
#[cfg(feature = "testing")]
|
||||
pub(crate) dangling_files: HashMap<LayerName, Generation>,
|
||||
|
||||
/// Ensure we order file operations correctly.
|
||||
pub(crate) recently_deleted: HashSet<(LayerName, Generation)>,
|
||||
|
||||
/// Deletions that are blocked by the tenant configuration
|
||||
pub(crate) blocked_deletions: Vec<Delete>,
|
||||
|
||||
/// Set to true when we have inserted the `UploadOp::Shutdown` into the `inprogress_tasks`.
|
||||
pub(crate) shutting_down: bool,
|
||||
|
||||
@@ -192,8 +180,6 @@ impl UploadQueue {
|
||||
queued_operations: VecDeque::new(),
|
||||
#[cfg(feature = "testing")]
|
||||
dangling_files: HashMap::new(),
|
||||
recently_deleted: HashSet::new(),
|
||||
blocked_deletions: Vec::new(),
|
||||
shutting_down: false,
|
||||
shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
|
||||
};
|
||||
@@ -234,8 +220,6 @@ impl UploadQueue {
|
||||
queued_operations: VecDeque::new(),
|
||||
#[cfg(feature = "testing")]
|
||||
dangling_files: HashMap::new(),
|
||||
recently_deleted: HashSet::new(),
|
||||
blocked_deletions: Vec::new(),
|
||||
shutting_down: false,
|
||||
shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
|
||||
};
|
||||
@@ -286,15 +270,15 @@ pub(crate) struct UploadTask {
|
||||
|
||||
/// A deletion of some layers within the lifetime of a timeline. This is not used
|
||||
/// for timeline deletion, which skips this queue and goes directly to DeletionQueue.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Delete {
|
||||
pub(crate) layers: Vec<(LayerName, LayerFileMetadata)>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum UploadOp {
|
||||
/// Upload a layer file. The last field indicates the last operation for thie file.
|
||||
UploadLayer(ResidentLayer, LayerFileMetadata, Option<OpType>),
|
||||
/// Upload a layer file
|
||||
UploadLayer(ResidentLayer, LayerFileMetadata),
|
||||
|
||||
/// Upload a index_part.json file
|
||||
UploadMetadata {
|
||||
@@ -316,11 +300,11 @@ pub(crate) enum UploadOp {
|
||||
impl std::fmt::Display for UploadOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
UploadOp::UploadLayer(layer, metadata, mode) => {
|
||||
UploadOp::UploadLayer(layer, metadata) => {
|
||||
write!(
|
||||
f,
|
||||
"UploadLayer({}, size={:?}, gen={:?}, mode={:?})",
|
||||
layer, metadata.file_size, metadata.generation, mode
|
||||
"UploadLayer({}, size={:?}, gen={:?})",
|
||||
layer, metadata.file_size, metadata.generation
|
||||
)
|
||||
}
|
||||
UploadOp::UploadMetadata { uploaded, .. } => {
|
||||
|
||||
159
poetry.lock
generated
159
poetry.lock
generated
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohappyeyeballs"
|
||||
@@ -114,6 +114,7 @@ files = [
|
||||
[package.dependencies]
|
||||
aiohappyeyeballs = ">=2.3.0"
|
||||
aiosignal = ">=1.1.2"
|
||||
async-timeout = {version = ">=4.0,<6.0", markers = "python_version < \"3.11\""}
|
||||
attrs = ">=17.3.0"
|
||||
frozenlist = ">=1.1.1"
|
||||
multidict = ">=4.5,<7.0"
|
||||
@@ -218,8 +219,10 @@ files = [
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
|
||||
idna = ">=2.8"
|
||||
sniffio = ">=1.1"
|
||||
typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
|
||||
@@ -734,7 +737,10 @@ files = [
|
||||
[package.dependencies]
|
||||
jmespath = ">=0.7.1,<2.0.0"
|
||||
python-dateutil = ">=2.1,<3.0.0"
|
||||
urllib3 = {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""}
|
||||
urllib3 = [
|
||||
{version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""},
|
||||
{version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
crt = ["awscrt (==0.19.19)"]
|
||||
@@ -1063,6 +1069,20 @@ docs = ["myst-parser (==0.18.0)", "sphinx (==5.1.1)"]
|
||||
ssh = ["paramiko (>=2.4.3)"]
|
||||
websockets = ["websocket-client (>=1.3.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.1.1"
|
||||
description = "Backport of PEP 654 (exception groups)"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"},
|
||||
{file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
test = ["pytest (>=6)"]
|
||||
|
||||
[[package]]
|
||||
name = "execnet"
|
||||
version = "1.9.0"
|
||||
@@ -1090,6 +1110,7 @@ files = [
|
||||
|
||||
[package.dependencies]
|
||||
click = ">=8.0"
|
||||
importlib-metadata = {version = ">=3.6.0", markers = "python_version < \"3.10\""}
|
||||
itsdangerous = ">=2.0"
|
||||
Jinja2 = ">=3.0"
|
||||
Werkzeug = ">=2.2.2"
|
||||
@@ -1298,6 +1319,25 @@ files = [
|
||||
{file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "importlib-metadata"
|
||||
version = "4.12.0"
|
||||
description = "Read metadata from Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"},
|
||||
{file = "importlib_metadata-4.12.0.tar.gz", hash = "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
zipp = ">=0.5"
|
||||
|
||||
[package.extras]
|
||||
docs = ["jaraco.packaging (>=9)", "rst.linker (>=1.9)", "sphinx"]
|
||||
perf = ["ipython"]
|
||||
testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "1.1.1"
|
||||
@@ -1858,54 +1898,48 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "mypy"
|
||||
version = "1.13.0"
|
||||
version = "1.3.0"
|
||||
description = "Optional static typing for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a"},
|
||||
{file = "mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80"},
|
||||
{file = "mypy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b2353a44d2179846a096e25691d54d59904559f4232519d420d64da6828a3a7"},
|
||||
{file = "mypy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0730d1c6a2739d4511dc4253f8274cdd140c55c32dfb0a4cf8b7a43f40abfa6f"},
|
||||
{file = "mypy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c5fc54dbb712ff5e5a0fca797e6e0aa25726c7e72c6a5850cfd2adbc1eb0a372"},
|
||||
{file = "mypy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:581665e6f3a8a9078f28d5502f4c334c0c8d802ef55ea0e7276a6e409bc0d82d"},
|
||||
{file = "mypy-1.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3ddb5b9bf82e05cc9a627e84707b528e5c7caaa1c55c69e175abb15a761cec2d"},
|
||||
{file = "mypy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20c7ee0bc0d5a9595c46f38beb04201f2620065a93755704e141fcac9f59db2b"},
|
||||
{file = "mypy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3790ded76f0b34bc9c8ba4def8f919dd6a46db0f5a6610fb994fe8efdd447f73"},
|
||||
{file = "mypy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51f869f4b6b538229c1d1bcc1dd7d119817206e2bc54e8e374b3dfa202defcca"},
|
||||
{file = "mypy-1.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5c7051a3461ae84dfb5dd15eff5094640c61c5f22257c8b766794e6dd85e72d5"},
|
||||
{file = "mypy-1.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39bb21c69a5d6342f4ce526e4584bc5c197fd20a60d14a8624d8743fffb9472e"},
|
||||
{file = "mypy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:164f28cb9d6367439031f4c81e84d3ccaa1e19232d9d05d37cb0bd880d3f93c2"},
|
||||
{file = "mypy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4c1bfcdbce96ff5d96fc9b08e3831acb30dc44ab02671eca5953eadad07d6d0"},
|
||||
{file = "mypy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0affb3a79a256b4183ba09811e3577c5163ed06685e4d4b46429a271ba174d2"},
|
||||
{file = "mypy-1.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7b44178c9760ce1a43f544e595d35ed61ac2c3de306599fa59b38a6048e1aa7"},
|
||||
{file = "mypy-1.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d5092efb8516d08440e36626f0153b5006d4088c1d663d88bf79625af3d1d62"},
|
||||
{file = "mypy-1.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2904956dac40ced10931ac967ae63c5089bd498542194b436eb097a9f77bc8"},
|
||||
{file = "mypy-1.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:7bfd8836970d33c2105562650656b6846149374dc8ed77d98424b40b09340ba7"},
|
||||
{file = "mypy-1.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9f73dba9ec77acb86457a8fc04b5239822df0c14a082564737833d2963677dbc"},
|
||||
{file = "mypy-1.13.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:100fac22ce82925f676a734af0db922ecfea991e1d7ec0ceb1e115ebe501301a"},
|
||||
{file = "mypy-1.13.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7bcb0bb7f42a978bb323a7c88f1081d1b5dee77ca86f4100735a6f541299d8fb"},
|
||||
{file = "mypy-1.13.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bde31fc887c213e223bbfc34328070996061b0833b0a4cfec53745ed61f3519b"},
|
||||
{file = "mypy-1.13.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:07de989f89786f62b937851295ed62e51774722e5444a27cecca993fc3f9cd74"},
|
||||
{file = "mypy-1.13.0-cp38-cp38-win_amd64.whl", hash = "sha256:4bde84334fbe19bad704b3f5b78c4abd35ff1026f8ba72b29de70dda0916beb6"},
|
||||
{file = "mypy-1.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0246bcb1b5de7f08f2826451abd947bf656945209b140d16ed317f65a17dc7dc"},
|
||||
{file = "mypy-1.13.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7f5b7deae912cf8b77e990b9280f170381fdfbddf61b4ef80927edd813163732"},
|
||||
{file = "mypy-1.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7029881ec6ffb8bc233a4fa364736789582c738217b133f1b55967115288a2bc"},
|
||||
{file = "mypy-1.13.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3e38b980e5681f28f033f3be86b099a247b13c491f14bb8b1e1e134d23bb599d"},
|
||||
{file = "mypy-1.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:a6789be98a2017c912ae6ccb77ea553bbaf13d27605d2ca20a76dfbced631b24"},
|
||||
{file = "mypy-1.13.0-py3-none-any.whl", hash = "sha256:9c250883f9fd81d212e0952c92dbfcc96fc237f4b7c92f56ac81fd48460b3e5a"},
|
||||
{file = "mypy-1.13.0.tar.gz", hash = "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e"},
|
||||
{file = "mypy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eb485cea53f4f5284e5baf92902cd0088b24984f4209e25981cc359d64448d"},
|
||||
{file = "mypy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c99c3ecf223cf2952638da9cd82793d8f3c0c5fa8b6ae2b2d9ed1e1ff51ba85"},
|
||||
{file = "mypy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:550a8b3a19bb6589679a7c3c31f64312e7ff482a816c96e0cecec9ad3a7564dd"},
|
||||
{file = "mypy-1.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cbc07246253b9e3d7d74c9ff948cd0fd7a71afcc2b77c7f0a59c26e9395cb152"},
|
||||
{file = "mypy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a22435632710a4fcf8acf86cbd0d69f68ac389a3892cb23fbad176d1cddaf228"},
|
||||
{file = "mypy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6e33bb8b2613614a33dff70565f4c803f889ebd2f859466e42b46e1df76018dd"},
|
||||
{file = "mypy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d23370d2a6b7a71dc65d1266f9a34e4cde9e8e21511322415db4b26f46f6b8c"},
|
||||
{file = "mypy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:658fe7b674769a0770d4b26cb4d6f005e88a442fe82446f020be8e5f5efb2fae"},
|
||||
{file = "mypy-1.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d29e324cdda61daaec2336c42512e59c7c375340bd202efa1fe0f7b8f8ca"},
|
||||
{file = "mypy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0b6c62206e04061e27009481cb0ec966f7d6172b5b936f3ead3d74f29fe3dcf"},
|
||||
{file = "mypy-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:76ec771e2342f1b558c36d49900dfe81d140361dd0d2df6cd71b3db1be155409"},
|
||||
{file = "mypy-1.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc95f8386314272bbc817026f8ce8f4f0d2ef7ae44f947c4664efac9adec929"},
|
||||
{file = "mypy-1.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:faff86aa10c1aa4a10e1a301de160f3d8fc8703b88c7e98de46b531ff1276a9a"},
|
||||
{file = "mypy-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8c5979d0deb27e0f4479bee18ea0f83732a893e81b78e62e2dda3e7e518c92ee"},
|
||||
{file = "mypy-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c5d2cc54175bab47011b09688b418db71403aefad07cbcd62d44010543fc143f"},
|
||||
{file = "mypy-1.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87df44954c31d86df96c8bd6e80dfcd773473e877ac6176a8e29898bfb3501cb"},
|
||||
{file = "mypy-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473117e310febe632ddf10e745a355714e771ffe534f06db40702775056614c4"},
|
||||
{file = "mypy-1.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:74bc9b6e0e79808bf8678d7678b2ae3736ea72d56eede3820bd3849823e7f305"},
|
||||
{file = "mypy-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:44797d031a41516fcf5cbfa652265bb994e53e51994c1bd649ffcd0c3a7eccbf"},
|
||||
{file = "mypy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ddae0f39ca146972ff6bb4399f3b2943884a774b8771ea0a8f50e971f5ea5ba8"},
|
||||
{file = "mypy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c4c42c60a8103ead4c1c060ac3cdd3ff01e18fddce6f1016e08939647a0e703"},
|
||||
{file = "mypy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86c2c6852f62f8f2b24cb7a613ebe8e0c7dc1402c61d36a609174f63e0ff017"},
|
||||
{file = "mypy-1.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f9dca1e257d4cc129517779226753dbefb4f2266c4eaad610fc15c6a7e14283e"},
|
||||
{file = "mypy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d8d31a7713510685b05fbb18d6ac287a56c8f6554d88c19e73f724a445448a"},
|
||||
{file = "mypy-1.3.0-py3-none-any.whl", hash = "sha256:a8763e72d5d9574d45ce5881962bc8e9046bf7b375b0abf031f3e6811732a897"},
|
||||
{file = "mypy-1.3.0.tar.gz", hash = "sha256:e1f4d16e296f5135624b34e8fb741eb0eadedca90862405b1f1fde2040b9bd11"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
mypy-extensions = ">=1.0.0"
|
||||
typing-extensions = ">=4.6.0"
|
||||
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
||||
typing-extensions = ">=3.10"
|
||||
|
||||
[package.extras]
|
||||
dmypy = ["psutil (>=4.0)"]
|
||||
faster-cache = ["orjson"]
|
||||
install-types = ["pip"]
|
||||
mypyc = ["setuptools (>=50)"]
|
||||
python2 = ["typed-ast (>=1.4.0,<2)"]
|
||||
reports = ["lxml"]
|
||||
|
||||
[[package]]
|
||||
@@ -2480,9 +2514,11 @@ files = [
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||
exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
|
||||
iniconfig = "*"
|
||||
packaging = "*"
|
||||
pluggy = ">=0.12,<2.0"
|
||||
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
||||
@@ -2545,7 +2581,10 @@ files = [
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pytest = {version = ">=6.2.4", markers = "python_version >= \"3.10\""}
|
||||
pytest = [
|
||||
{version = ">=5.0", markers = "python_version < \"3.10\""},
|
||||
{version = ">=6.2.4", markers = "python_version >= \"3.10\""},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-repeat"
|
||||
@@ -3053,6 +3092,17 @@ files = [
|
||||
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.0.1"
|
||||
description = "A lil' TOML parser"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
||||
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "types-jwcrypto"
|
||||
version = "1.5.0.20240925"
|
||||
@@ -3309,6 +3359,16 @@ files = [
|
||||
{file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
|
||||
{file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
|
||||
{file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
|
||||
@@ -3463,6 +3523,21 @@ idna = ">=2.0"
|
||||
multidict = ">=4.0"
|
||||
propcache = ">=0.2.0"
|
||||
|
||||
[[package]]
|
||||
name = "zipp"
|
||||
version = "3.19.1"
|
||||
description = "Backport of pathlib-compatible object wrapper for zip files"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "zipp-3.19.1-py3-none-any.whl", hash = "sha256:2828e64edb5386ea6a52e7ba7cdb17bb30a73a858f5eb6eb93d8d36f5ea26091"},
|
||||
{file = "zipp-3.19.1.tar.gz", hash = "sha256:35427f6d5594f4acf82d25541438348c26736fa9b3afa2754bcd63cdb99d8e8f"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
|
||||
test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "zstandard"
|
||||
version = "0.21.0"
|
||||
@@ -3523,5 +3598,5 @@ cffi = ["cffi (>=1.11)"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "21debe1116843e5d14bdf37d6e265c68c63a98a64ba04ec8b8a02af2e8d9f486"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "8cb9c38d83eec441391c0528ac2fbefde18c734373b2399e07c69382044e8ced"
|
||||
|
||||
@@ -6,7 +6,6 @@ use tokio_postgres::config::SslMode;
|
||||
use tracing::{info, info_span};
|
||||
|
||||
use super::ComputeCredentialKeys;
|
||||
use crate::auth::IpPattern;
|
||||
use crate::cache::Cached;
|
||||
use crate::config::AuthenticationConfig;
|
||||
use crate::context::RequestContext;
|
||||
@@ -75,10 +74,10 @@ impl ConsoleRedirectBackend {
|
||||
ctx: &RequestContext,
|
||||
auth_config: &'static AuthenticationConfig,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) -> auth::Result<(ConsoleRedirectNodeInfo, Option<Vec<IpPattern>>)> {
|
||||
) -> auth::Result<ConsoleRedirectNodeInfo> {
|
||||
authenticate(ctx, auth_config, &self.console_uri, client)
|
||||
.await
|
||||
.map(|(node_info, ip_allowlist)| (ConsoleRedirectNodeInfo(node_info), ip_allowlist))
|
||||
.map(ConsoleRedirectNodeInfo)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,7 +102,7 @@ async fn authenticate(
|
||||
auth_config: &'static AuthenticationConfig,
|
||||
link_uri: &reqwest::Url,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) -> auth::Result<(NodeInfo, Option<Vec<IpPattern>>)> {
|
||||
) -> auth::Result<NodeInfo> {
|
||||
ctx.set_auth_method(crate::context::AuthMethod::ConsoleRedirect);
|
||||
|
||||
// registering waiter can fail if we get unlucky with rng.
|
||||
@@ -177,12 +176,9 @@ async fn authenticate(
|
||||
config.password(password.as_ref());
|
||||
}
|
||||
|
||||
Ok((
|
||||
NodeInfo {
|
||||
config,
|
||||
aux: db_info.aux,
|
||||
allow_self_signed_compute: false, // caller may override
|
||||
},
|
||||
db_info.allowed_ips,
|
||||
))
|
||||
Ok(NodeInfo {
|
||||
config,
|
||||
aux: db_info.aux,
|
||||
allow_self_signed_compute: false, // caller may override
|
||||
})
|
||||
}
|
||||
|
||||
@@ -132,93 +132,6 @@ struct JwkSet<'a> {
|
||||
keys: Vec<&'a RawValue>,
|
||||
}
|
||||
|
||||
/// Given a jwks_url, fetch the JWKS and parse out all the signing JWKs.
|
||||
/// Returns `None` and log a warning if there are any errors.
|
||||
async fn fetch_jwks(
|
||||
client: &reqwest_middleware::ClientWithMiddleware,
|
||||
jwks_url: url::Url,
|
||||
) -> Option<jose_jwk::JwkSet> {
|
||||
let req = client.get(jwks_url.clone());
|
||||
// TODO(conrad): We need to filter out URLs that point to local resources. Public internet only.
|
||||
let resp = req.send().await.and_then(|r| {
|
||||
r.error_for_status()
|
||||
.map_err(reqwest_middleware::Error::Reqwest)
|
||||
});
|
||||
|
||||
let resp = match resp {
|
||||
Ok(r) => r,
|
||||
// TODO: should we re-insert JWKs if we want to keep this JWKs URL?
|
||||
// I expect these failures would be quite sparse.
|
||||
Err(e) => {
|
||||
tracing::warn!(url=?jwks_url, error=?e, "could not fetch JWKs");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let resp: http::Response<reqwest::Body> = resp.into();
|
||||
|
||||
let bytes = match read_body_with_limit(resp.into_body(), MAX_JWK_BODY_SIZE).await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(e) => {
|
||||
tracing::warn!(url=?jwks_url, error=?e, "could not decode JWKs");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let jwks = match serde_json::from_slice::<JwkSet>(&bytes) {
|
||||
Ok(jwks) => jwks,
|
||||
Err(e) => {
|
||||
tracing::warn!(url=?jwks_url, error=?e, "could not decode JWKs");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
// `jose_jwk::Jwk` is quite large (288 bytes). Let's not pre-allocate for what we don't need.
|
||||
//
|
||||
// Even though we limit our responses to 64KiB, we could still receive a payload like
|
||||
// `{"keys":[` + repeat(`0`).take(30000).join(`,`) + `]}`. Parsing this as `RawValue` uses 468KiB.
|
||||
// Pre-allocating the corresponding `Vec::<jose_jwk::Jwk>::with_capacity(30000)` uses 8.2MiB.
|
||||
let mut keys = vec![];
|
||||
|
||||
let mut failed = 0;
|
||||
for key in jwks.keys {
|
||||
let key = match serde_json::from_str::<jose_jwk::Jwk>(key.get()) {
|
||||
Ok(key) => key,
|
||||
Err(e) => {
|
||||
tracing::debug!(url=?jwks_url, failed=?e, "could not decode JWK");
|
||||
failed += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// if `use` (called `cls` in rust) is specified to be something other than signing,
|
||||
// we can skip storing it.
|
||||
if key
|
||||
.prm
|
||||
.cls
|
||||
.as_ref()
|
||||
.is_some_and(|c| *c != jose_jwk::Class::Signing)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
keys.push(key);
|
||||
}
|
||||
|
||||
keys.shrink_to_fit();
|
||||
|
||||
if failed > 0 {
|
||||
tracing::warn!(url=?jwks_url, failed, "could not decode JWKs");
|
||||
}
|
||||
|
||||
if keys.is_empty() {
|
||||
tracing::warn!(url=?jwks_url, "no valid JWKs found inside the response body");
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(jose_jwk::JwkSet { keys })
|
||||
}
|
||||
|
||||
impl JwkCacheEntryLock {
|
||||
async fn acquire_permit<'a>(self: &'a Arc<Self>) -> JwkRenewalPermit<'a> {
|
||||
JwkRenewalPermit::acquire_permit(self).await
|
||||
@@ -253,15 +166,87 @@ impl JwkCacheEntryLock {
|
||||
// TODO(conrad): run concurrently
|
||||
// TODO(conrad): strip the JWKs urls (should be checked by cplane as well - cloud#16284)
|
||||
for rule in rules {
|
||||
if let Some(jwks) = fetch_jwks(client, rule.jwks_url).await {
|
||||
key_sets.insert(
|
||||
rule.id,
|
||||
KeySet {
|
||||
jwks,
|
||||
audience: rule.audience,
|
||||
role_names: rule.role_names,
|
||||
},
|
||||
);
|
||||
let req = client.get(rule.jwks_url.clone());
|
||||
// TODO(conrad): eventually switch to using reqwest_middleware/`new_client_with_timeout`.
|
||||
// TODO(conrad): We need to filter out URLs that point to local resources. Public internet only.
|
||||
match req.send().await.and_then(|r| {
|
||||
r.error_for_status()
|
||||
.map_err(reqwest_middleware::Error::Reqwest)
|
||||
}) {
|
||||
// todo: should we re-insert JWKs if we want to keep this JWKs URL?
|
||||
// I expect these failures would be quite sparse.
|
||||
Err(e) => tracing::warn!(url=?rule.jwks_url, error=?e, "could not fetch JWKs"),
|
||||
Ok(r) => {
|
||||
let resp: http::Response<reqwest::Body> = r.into();
|
||||
|
||||
let bytes = match read_body_with_limit(resp.into_body(), MAX_JWK_BODY_SIZE)
|
||||
.await
|
||||
{
|
||||
Ok(bytes) => bytes,
|
||||
Err(e) => {
|
||||
tracing::warn!(url=?rule.jwks_url, error=?e, "could not decode JWKs");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match serde_json::from_slice::<JwkSet>(&bytes) {
|
||||
Err(e) => {
|
||||
tracing::warn!(url=?rule.jwks_url, error=?e, "could not decode JWKs");
|
||||
}
|
||||
Ok(jwks) => {
|
||||
// size_of::<&RawValue>() == 16
|
||||
// size_of::<jose_jwk::Jwk>() == 288
|
||||
// better to not pre-allocate this as it might be pretty large - especially if it has many
|
||||
// keys we don't want or need.
|
||||
// trivial 'attack': `{"keys":[` + repeat(`0`).take(30000).join(`,`) + `]}`
|
||||
// this would consume 8MiB just like that!
|
||||
let mut keys = vec![];
|
||||
let mut failed = 0;
|
||||
for key in jwks.keys {
|
||||
match serde_json::from_str::<jose_jwk::Jwk>(key.get()) {
|
||||
Ok(key) => {
|
||||
// if `use` (called `cls` in rust) is specified to be something other than signing,
|
||||
// we can skip storing it.
|
||||
if key
|
||||
.prm
|
||||
.cls
|
||||
.as_ref()
|
||||
.is_some_and(|c| *c != jose_jwk::Class::Signing)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
keys.push(key);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::debug!(url=?rule.jwks_url, failed=?e, "could not decode JWK");
|
||||
failed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
keys.shrink_to_fit();
|
||||
|
||||
if failed > 0 {
|
||||
tracing::warn!(url=?rule.jwks_url, failed, "could not decode JWKs");
|
||||
}
|
||||
|
||||
if keys.is_empty() {
|
||||
tracing::warn!(url=?rule.jwks_url, "no valid JWKs found inside the response body");
|
||||
continue;
|
||||
}
|
||||
|
||||
let jwks = jose_jwk::JwkSet { keys };
|
||||
key_sets.insert(
|
||||
rule.id,
|
||||
KeySet {
|
||||
jwks,
|
||||
audience: rule.audience,
|
||||
role_names: rule.role_names,
|
||||
},
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ pub mod local;
|
||||
|
||||
use std::net::IpAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
pub use console_redirect::ConsoleRedirectBackend;
|
||||
pub(crate) use console_redirect::ConsoleRedirectError;
|
||||
@@ -29,7 +30,7 @@ use crate::intern::EndpointIdInt;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::proxy::connect_compute::ComputeConnectBackend;
|
||||
use crate::proxy::NeonOptions;
|
||||
use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter};
|
||||
use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter, RateBucketInfo};
|
||||
use crate::stream::Stream;
|
||||
use crate::types::{EndpointCacheKey, EndpointId, RoleName};
|
||||
use crate::{scram, stream};
|
||||
@@ -191,6 +192,21 @@ impl MaskedIp {
|
||||
// This can't be just per IP because that would limit some PaaS that share IP addresses
|
||||
pub type AuthRateLimiter = BucketRateLimiter<(EndpointIdInt, MaskedIp)>;
|
||||
|
||||
impl RateBucketInfo {
|
||||
/// All of these are per endpoint-maskedip pair.
|
||||
/// Context: 4096 rounds of pbkdf2 take about 1ms of cpu time to execute (1 milli-cpu-second or 1mcpus).
|
||||
///
|
||||
/// First bucket: 1000mcpus total per endpoint-ip pair
|
||||
/// * 4096000 requests per second with 1 hash rounds.
|
||||
/// * 1000 requests per second with 4096 hash rounds.
|
||||
/// * 6.8 requests per second with 600000 hash rounds.
|
||||
pub const DEFAULT_AUTH_SET: [Self; 3] = [
|
||||
Self::new(1000 * 4096, Duration::from_secs(1)),
|
||||
Self::new(600 * 4096, Duration::from_secs(60)),
|
||||
Self::new(300 * 4096, Duration::from_secs(600)),
|
||||
];
|
||||
}
|
||||
|
||||
impl AuthenticationConfig {
|
||||
pub(crate) fn check_rate_limit(
|
||||
&self,
|
||||
|
||||
@@ -111,7 +111,7 @@ struct SqlOverHttpArgs {
|
||||
sql_over_http_cancel_set_shards: usize,
|
||||
|
||||
#[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
|
||||
sql_over_http_max_request_size_bytes: usize,
|
||||
sql_over_http_max_request_size_bytes: u64,
|
||||
|
||||
#[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
|
||||
sql_over_http_max_response_size_bytes: usize,
|
||||
|
||||
@@ -276,7 +276,7 @@ struct SqlOverHttpArgs {
|
||||
sql_over_http_cancel_set_shards: usize,
|
||||
|
||||
#[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
|
||||
sql_over_http_max_request_size_bytes: usize,
|
||||
sql_over_http_max_request_size_bytes: u64,
|
||||
|
||||
#[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
|
||||
sql_over_http_max_response_size_bytes: usize,
|
||||
@@ -428,9 +428,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
)?))),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let cancellation_handler = Arc::new(CancellationHandler::<
|
||||
Option<Arc<Mutex<RedisPublisherClient>>>,
|
||||
Option<Arc<tokio::sync::Mutex<RedisPublisherClient>>>,
|
||||
>::new(
|
||||
cancel_map.clone(),
|
||||
redis_publisher,
|
||||
|
||||
@@ -10,23 +10,16 @@ use tokio_postgres::{CancelToken, NoTls};
|
||||
use tracing::{debug, info};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::auth::{check_peer_addr_is_in_list, IpPattern};
|
||||
use crate::error::ReportableError;
|
||||
use crate::metrics::{CancellationRequest, CancellationSource, Metrics};
|
||||
use crate::rate_limiter::LeakyBucketRateLimiter;
|
||||
use crate::redis::cancellation_publisher::{
|
||||
CancellationPublisher, CancellationPublisherMut, RedisPublisherClient,
|
||||
};
|
||||
use std::net::IpAddr;
|
||||
|
||||
use ipnet::{IpNet, Ipv4Net, Ipv6Net};
|
||||
|
||||
pub type CancelMap = Arc<DashMap<CancelKeyData, Option<CancelClosure>>>;
|
||||
pub type CancellationHandlerMain = CancellationHandler<Option<Arc<Mutex<RedisPublisherClient>>>>;
|
||||
pub(crate) type CancellationHandlerMainInternal = Option<Arc<Mutex<RedisPublisherClient>>>;
|
||||
|
||||
type IpSubnetKey = IpNet;
|
||||
|
||||
/// Enables serving `CancelRequest`s.
|
||||
///
|
||||
/// If `CancellationPublisher` is available, cancel request will be used to publish the cancellation key to other proxy instances.
|
||||
@@ -36,23 +29,14 @@ pub struct CancellationHandler<P> {
|
||||
/// This field used for the monitoring purposes.
|
||||
/// Represents the source of the cancellation request.
|
||||
from: CancellationSource,
|
||||
// rate limiter of cancellation requests
|
||||
limiter: Arc<std::sync::Mutex<LeakyBucketRateLimiter<IpSubnetKey>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub(crate) enum CancelError {
|
||||
#[error("{0}")]
|
||||
IO(#[from] std::io::Error),
|
||||
|
||||
#[error("{0}")]
|
||||
Postgres(#[from] tokio_postgres::Error),
|
||||
|
||||
#[error("rate limit exceeded")]
|
||||
RateLimit,
|
||||
|
||||
#[error("IP is not allowed")]
|
||||
IpNotAllowed,
|
||||
}
|
||||
|
||||
impl ReportableError for CancelError {
|
||||
@@ -63,8 +47,6 @@ impl ReportableError for CancelError {
|
||||
crate::error::ErrorKind::Postgres
|
||||
}
|
||||
CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
|
||||
CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
|
||||
CancelError::IpNotAllowed => crate::error::ErrorKind::User,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -97,36 +79,13 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
|
||||
cancellation_handler: self,
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to cancel a running query for the corresponding connection.
|
||||
/// If the cancellation key is not found, it will be published to Redis.
|
||||
/// check_allowed - if true, check if the IP is allowed to cancel the query
|
||||
pub(crate) async fn cancel_session(
|
||||
&self,
|
||||
key: CancelKeyData,
|
||||
session_id: Uuid,
|
||||
peer_addr: &IpAddr,
|
||||
check_allowed: bool,
|
||||
) -> Result<(), CancelError> {
|
||||
// TODO: check for unspecified address is only for backward compatibility, should be removed
|
||||
if !peer_addr.is_unspecified() {
|
||||
let subnet_key = match *peer_addr {
|
||||
IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here
|
||||
IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),
|
||||
};
|
||||
if !self.limiter.lock().unwrap().check(subnet_key, 1) {
|
||||
tracing::debug!("Rate limit exceeded. Skipping cancellation message");
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::RateLimitExceeded,
|
||||
});
|
||||
return Err(CancelError::RateLimit);
|
||||
}
|
||||
}
|
||||
|
||||
// NB: we should immediately release the lock after cloning the token.
|
||||
let Some(cancel_closure) = self.map.get(&key).and_then(|x| x.clone()) else {
|
||||
tracing::warn!("query cancellation key not found: {key}");
|
||||
@@ -137,13 +96,7 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::NotFound,
|
||||
});
|
||||
|
||||
if session_id == Uuid::nil() {
|
||||
// was already published, do not publish it again
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match self.client.try_publish(key, session_id, *peer_addr).await {
|
||||
match self.client.try_publish(key, session_id).await {
|
||||
Ok(()) => {} // do nothing
|
||||
Err(e) => {
|
||||
return Err(CancelError::IO(std::io::Error::new(
|
||||
@@ -154,13 +107,6 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
|
||||
}
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
if check_allowed
|
||||
&& !check_peer_addr_is_in_list(peer_addr, cancel_closure.ip_allowlist.as_slice())
|
||||
{
|
||||
return Err(CancelError::IpNotAllowed);
|
||||
}
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
@@ -189,29 +135,13 @@ impl CancellationHandler<()> {
|
||||
map,
|
||||
client: (),
|
||||
from,
|
||||
limiter: Arc::new(std::sync::Mutex::new(
|
||||
LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(
|
||||
LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,
|
||||
64,
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<P: CancellationPublisherMut> CancellationHandler<Option<Arc<Mutex<P>>>> {
|
||||
pub fn new(map: CancelMap, client: Option<Arc<Mutex<P>>>, from: CancellationSource) -> Self {
|
||||
Self {
|
||||
map,
|
||||
client,
|
||||
from,
|
||||
limiter: Arc::new(std::sync::Mutex::new(
|
||||
LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(
|
||||
LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,
|
||||
64,
|
||||
),
|
||||
)),
|
||||
}
|
||||
Self { map, client, from }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -222,19 +152,13 @@ impl<P: CancellationPublisherMut> CancellationHandler<Option<Arc<Mutex<P>>>> {
|
||||
pub struct CancelClosure {
|
||||
socket_addr: SocketAddr,
|
||||
cancel_token: CancelToken,
|
||||
ip_allowlist: Vec<IpPattern>,
|
||||
}
|
||||
|
||||
impl CancelClosure {
|
||||
pub(crate) fn new(
|
||||
socket_addr: SocketAddr,
|
||||
cancel_token: CancelToken,
|
||||
ip_allowlist: Vec<IpPattern>,
|
||||
) -> Self {
|
||||
pub(crate) fn new(socket_addr: SocketAddr, cancel_token: CancelToken) -> Self {
|
||||
Self {
|
||||
socket_addr,
|
||||
cancel_token,
|
||||
ip_allowlist,
|
||||
}
|
||||
}
|
||||
/// Cancels the query running on user's compute node.
|
||||
@@ -244,9 +168,6 @@ impl CancelClosure {
|
||||
debug!("query was cancelled");
|
||||
Ok(())
|
||||
}
|
||||
pub(crate) fn set_ip_allowlist(&mut self, ip_allowlist: Vec<IpPattern>) {
|
||||
self.ip_allowlist = ip_allowlist;
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper for registering query cancellation tokens.
|
||||
@@ -308,8 +229,6 @@ mod tests {
|
||||
cancel_key: 0,
|
||||
},
|
||||
Uuid::new_v4(),
|
||||
&("127.0.0.1".parse().unwrap()),
|
||||
true,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -342,7 +342,7 @@ impl ConnCfg {
|
||||
|
||||
// NB: CancelToken is supposed to hold socket_addr, but we use connect_raw.
|
||||
// Yet another reason to rework the connection establishing code.
|
||||
let cancel_closure = CancelClosure::new(socket_addr, client.cancel_token(), vec![]);
|
||||
let cancel_closure = CancelClosure::new(socket_addr, client.cancel_token());
|
||||
|
||||
let connection = PostgresConnection {
|
||||
stream,
|
||||
|
||||
@@ -64,7 +64,7 @@ pub struct HttpConfig {
|
||||
pub pool_options: GlobalConnPoolOptions,
|
||||
pub cancel_set: CancelSet,
|
||||
pub client_conn_threshold: u64,
|
||||
pub max_request_size_bytes: usize,
|
||||
pub max_request_size_bytes: u64,
|
||||
pub max_response_size_bytes: usize,
|
||||
}
|
||||
|
||||
|
||||
@@ -156,21 +156,16 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
let request_gauge = metrics.connection_requests.guard(proto);
|
||||
|
||||
let tls = config.tls_config.as_ref();
|
||||
|
||||
let record_handshake_error = !ctx.has_private_peer_addr();
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
let do_handshake = handshake(ctx, stream, tls, record_handshake_error);
|
||||
|
||||
let (mut stream, params) =
|
||||
match tokio::time::timeout(config.handshake_timeout, do_handshake).await?? {
|
||||
HandshakeData::Startup(stream, params) => (stream, params),
|
||||
HandshakeData::Cancel(cancel_key_data) => {
|
||||
return Ok(cancellation_handler
|
||||
.cancel_session(
|
||||
cancel_key_data,
|
||||
ctx.session_id(),
|
||||
&ctx.peer_addr(),
|
||||
config.authentication_config.ip_allowlist_check_enabled,
|
||||
)
|
||||
.cancel_session(cancel_key_data, ctx.session_id())
|
||||
.await
|
||||
.map(|()| None)?)
|
||||
}
|
||||
@@ -179,7 +174,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
|
||||
ctx.set_db_options(params.clone());
|
||||
|
||||
let (user_info, ip_allowlist) = match backend
|
||||
let user_info = match backend
|
||||
.authenticate(ctx, &config.authentication_config, &mut stream)
|
||||
.await
|
||||
{
|
||||
@@ -203,8 +198,6 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
.or_else(|e| stream.throw_error(e))
|
||||
.await?;
|
||||
|
||||
node.cancel_closure
|
||||
.set_ip_allowlist(ip_allowlist.unwrap_or_default());
|
||||
let session = cancellation_handler.get_session();
|
||||
prepare_client_connection(&node, &session, &mut stream).await?;
|
||||
|
||||
|
||||
@@ -380,7 +380,6 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
// after getting back a permit - it's possible the cache was filled
|
||||
// double check
|
||||
if permit.should_check_cache() {
|
||||
// TODO: if there is something in the cache, mark the permit as success.
|
||||
check_cache!();
|
||||
}
|
||||
|
||||
|
||||
@@ -122,18 +122,18 @@ impl Endpoint {
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub(crate) enum ReadBodyError<E> {
|
||||
pub(crate) enum ReadBodyError {
|
||||
#[error("Content length exceeds limit of {limit} bytes")]
|
||||
BodyTooLarge { limit: usize },
|
||||
|
||||
#[error(transparent)]
|
||||
Read(#[from] E),
|
||||
Read(#[from] reqwest::Error),
|
||||
}
|
||||
|
||||
pub(crate) async fn read_body_with_limit<E>(
|
||||
mut b: impl Body<Data = Bytes, Error = E> + Unpin,
|
||||
pub(crate) async fn read_body_with_limit(
|
||||
mut b: impl Body<Data = Bytes, Error = reqwest::Error> + Unpin,
|
||||
limit: usize,
|
||||
) -> Result<Vec<u8>, ReadBodyError<E>> {
|
||||
) -> Result<Vec<u8>, ReadBodyError> {
|
||||
// We could use `b.limited().collect().await.to_bytes()` here
|
||||
// but this ends up being slightly more efficient as far as I can tell.
|
||||
|
||||
|
||||
@@ -351,7 +351,6 @@ pub enum CancellationSource {
|
||||
pub enum CancellationOutcome {
|
||||
NotFound,
|
||||
Found,
|
||||
RateLimitExceeded,
|
||||
}
|
||||
|
||||
#[derive(LabelGroup)]
|
||||
|
||||
@@ -117,6 +117,7 @@ where
|
||||
node_info.set_keys(user_info.get_keys());
|
||||
node_info.allow_self_signed_compute = allow_self_signed_compute;
|
||||
mechanism.update_connect_config(&mut node_info.config);
|
||||
let retry_type = RetryType::ConnectToCompute;
|
||||
|
||||
// try once
|
||||
let err = match mechanism
|
||||
@@ -128,7 +129,7 @@ where
|
||||
Metrics::get().proxy.retries_metric.observe(
|
||||
RetriesMetricGroup {
|
||||
outcome: ConnectOutcome::Success,
|
||||
retry_type: RetryType::ConnectToCompute,
|
||||
retry_type,
|
||||
},
|
||||
num_retries.into(),
|
||||
);
|
||||
@@ -146,7 +147,7 @@ where
|
||||
Metrics::get().proxy.retries_metric.observe(
|
||||
RetriesMetricGroup {
|
||||
outcome: ConnectOutcome::Failed,
|
||||
retry_type: RetryType::ConnectToCompute,
|
||||
retry_type,
|
||||
},
|
||||
num_retries.into(),
|
||||
);
|
||||
@@ -155,9 +156,8 @@ where
|
||||
node_info
|
||||
} else {
|
||||
// if we failed to connect, it's likely that the compute node was suspended, wake a new compute node
|
||||
debug!("compute node's state has likely changed; requesting a wake-up");
|
||||
info!("compute node's state has likely changed; requesting a wake-up");
|
||||
let old_node_info = invalidate_cache(node_info);
|
||||
// TODO: increment num_retries?
|
||||
let mut node_info =
|
||||
wake_compute(&mut num_retries, ctx, user_info, wake_compute_retry_config).await?;
|
||||
node_info.reuse_settings(old_node_info);
|
||||
@@ -169,7 +169,7 @@ where
|
||||
// now that we have a new node, try connect to it repeatedly.
|
||||
// this can error for a few reasons, for instance:
|
||||
// * DNS connection settings haven't quite propagated yet
|
||||
debug!("wake_compute success. attempting to connect");
|
||||
info!("wake_compute success. attempting to connect");
|
||||
num_retries = 1;
|
||||
loop {
|
||||
match mechanism
|
||||
@@ -181,11 +181,10 @@ where
|
||||
Metrics::get().proxy.retries_metric.observe(
|
||||
RetriesMetricGroup {
|
||||
outcome: ConnectOutcome::Success,
|
||||
retry_type: RetryType::ConnectToCompute,
|
||||
retry_type,
|
||||
},
|
||||
num_retries.into(),
|
||||
);
|
||||
// TODO: is this necessary? We have a metric.
|
||||
info!(?num_retries, "connected to compute node after");
|
||||
return Ok(res);
|
||||
}
|
||||
@@ -195,7 +194,7 @@ where
|
||||
Metrics::get().proxy.retries_metric.observe(
|
||||
RetriesMetricGroup {
|
||||
outcome: ConnectOutcome::Failed,
|
||||
retry_type: RetryType::ConnectToCompute,
|
||||
retry_type,
|
||||
},
|
||||
num_retries.into(),
|
||||
);
|
||||
|
||||
@@ -87,8 +87,6 @@ where
|
||||
transfer_one_direction(cx, &mut compute_to_client, compute, client)
|
||||
.map_err(ErrorSource::from_compute)?;
|
||||
|
||||
// TODO: 1 info log, with a enum label for close direction.
|
||||
|
||||
// Early termination checks from compute to client.
|
||||
if let TransferState::Done(_) = compute_to_client {
|
||||
if let TransferState::Running(buf) = &client_to_compute {
|
||||
|
||||
@@ -5,7 +5,7 @@ use pq_proto::{
|
||||
};
|
||||
use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::{debug, info, warn};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::auth::endpoint_sni;
|
||||
use crate::config::{TlsConfig, PG_ALPN_PROTOCOL};
|
||||
@@ -199,8 +199,6 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
.await?;
|
||||
}
|
||||
|
||||
// This log highlights the start of the connection.
|
||||
// This contains useful information for debugging, not logged elsewhere, like role name and endpoint id.
|
||||
info!(
|
||||
?version,
|
||||
?params,
|
||||
@@ -213,7 +211,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
FeStartupPacket::StartupMessage { params, version }
|
||||
if version.major() == 3 && version > PG_PROTOCOL_LATEST =>
|
||||
{
|
||||
debug!(?version, "unsupported minor version");
|
||||
warn!(?version, "unsupported minor version");
|
||||
|
||||
// no protocol extensions are supported.
|
||||
// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/backend/tcop/backend_startup.c#L744-L753>
|
||||
@@ -235,16 +233,14 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
|
||||
info!(
|
||||
?version,
|
||||
?params,
|
||||
session_type = "normal",
|
||||
"successful handshake; unsupported minor version requested"
|
||||
);
|
||||
break Ok(HandshakeData::Startup(stream, params));
|
||||
}
|
||||
FeStartupPacket::StartupMessage { version, params } => {
|
||||
FeStartupPacket::StartupMessage { version, .. } => {
|
||||
warn!(
|
||||
?version,
|
||||
?params,
|
||||
session_type = "normal",
|
||||
"unsuccessful handshake; unsupported version"
|
||||
);
|
||||
|
||||
@@ -254,7 +254,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
conn_gauge: NumClientConnectionsGuard<'static>,
|
||||
) -> Result<Option<ProxyPassthrough<CancellationHandlerMainInternal, S>>, ClientRequestError> {
|
||||
debug!(
|
||||
info!(
|
||||
protocol = %ctx.protocol(),
|
||||
"handling interactive connection from client"
|
||||
);
|
||||
@@ -268,18 +268,12 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
let record_handshake_error = !ctx.has_private_peer_addr();
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
let do_handshake = handshake(ctx, stream, mode.handshake_tls(tls), record_handshake_error);
|
||||
|
||||
let (mut stream, params) =
|
||||
match tokio::time::timeout(config.handshake_timeout, do_handshake).await?? {
|
||||
HandshakeData::Startup(stream, params) => (stream, params),
|
||||
HandshakeData::Cancel(cancel_key_data) => {
|
||||
return Ok(cancellation_handler
|
||||
.cancel_session(
|
||||
cancel_key_data,
|
||||
ctx.session_id(),
|
||||
&ctx.peer_addr(),
|
||||
config.authentication_config.ip_allowlist_check_enabled,
|
||||
)
|
||||
.cancel_session(cancel_key_data, ctx.session_id())
|
||||
.await
|
||||
.map(|()| None)?)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::debug;
|
||||
use tracing::info;
|
||||
use utils::measured_stream::MeasuredStream;
|
||||
|
||||
use super::copy_bidirectional::ErrorSource;
|
||||
@@ -45,7 +45,7 @@ pub(crate) async fn proxy_pass(
|
||||
);
|
||||
|
||||
// Starting from here we only proxy the client's traffic.
|
||||
debug!("performing the proxy pass...");
|
||||
info!("performing the proxy pass...");
|
||||
let _ = crate::proxy::copy_bidirectional::copy_bidirectional_client_compute(
|
||||
&mut client,
|
||||
&mut compute,
|
||||
|
||||
@@ -17,6 +17,7 @@ pub(crate) async fn wake_compute<B: ComputeConnectBackend>(
|
||||
api: &B,
|
||||
config: RetryConfig,
|
||||
) -> Result<CachedNodeInfo, WakeComputeError> {
|
||||
let retry_type = RetryType::WakeCompute;
|
||||
loop {
|
||||
match api.wake_compute(ctx).await {
|
||||
Err(e) if !should_retry(&e, *num_retries, config) => {
|
||||
@@ -25,7 +26,7 @@ pub(crate) async fn wake_compute<B: ComputeConnectBackend>(
|
||||
Metrics::get().proxy.retries_metric.observe(
|
||||
RetriesMetricGroup {
|
||||
outcome: ConnectOutcome::Failed,
|
||||
retry_type: RetryType::WakeCompute,
|
||||
retry_type,
|
||||
},
|
||||
(*num_retries).into(),
|
||||
);
|
||||
@@ -39,12 +40,10 @@ pub(crate) async fn wake_compute<B: ComputeConnectBackend>(
|
||||
Metrics::get().proxy.retries_metric.observe(
|
||||
RetriesMetricGroup {
|
||||
outcome: ConnectOutcome::Success,
|
||||
retry_type: RetryType::WakeCompute,
|
||||
retry_type,
|
||||
},
|
||||
(*num_retries).into(),
|
||||
);
|
||||
// TODO: is this necessary? We have a metric.
|
||||
// TODO: this log line is misleading as "wake_compute" might return cached (and stale) info.
|
||||
info!(?num_retries, "compute node woken up after");
|
||||
return Ok(n);
|
||||
}
|
||||
|
||||
@@ -195,11 +195,7 @@ impl DynamicLimiter {
|
||||
///
|
||||
/// Set the outcome to `None` to ignore the job.
|
||||
fn release_inner(&self, start: Instant, outcome: Option<Outcome>) {
|
||||
if outcome.is_none() {
|
||||
tracing::warn!("outcome is {:?}", outcome);
|
||||
} else {
|
||||
tracing::debug!("outcome is {:?}", outcome);
|
||||
}
|
||||
tracing::info!("outcome is {:?}", outcome);
|
||||
if self.config.initial_limit == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -31,32 +31,26 @@ impl LimitAlgorithm for Aimd {
|
||||
|
||||
if utilisation > self.utilisation {
|
||||
let limit = old_limit + self.inc;
|
||||
let new_limit = limit.clamp(self.min, self.max);
|
||||
if new_limit > old_limit {
|
||||
tracing::info!(old_limit, new_limit, "limit increased");
|
||||
} else {
|
||||
tracing::debug!(old_limit, new_limit, "limit clamped at max");
|
||||
let increased_limit = limit.clamp(self.min, self.max);
|
||||
if increased_limit > old_limit {
|
||||
tracing::info!(increased_limit, "limit increased");
|
||||
}
|
||||
|
||||
new_limit
|
||||
increased_limit
|
||||
} else {
|
||||
old_limit
|
||||
}
|
||||
}
|
||||
Outcome::Overload => {
|
||||
let new_limit = old_limit as f32 * self.dec;
|
||||
let limit = old_limit as f32 * self.dec;
|
||||
|
||||
// Floor instead of round, so the limit reduces even with small numbers.
|
||||
// E.g. round(2 * 0.9) = 2, but floor(2 * 0.9) = 1
|
||||
let new_limit = new_limit.floor() as usize;
|
||||
let limit = limit.floor() as usize;
|
||||
|
||||
let new_limit = new_limit.clamp(self.min, self.max);
|
||||
if new_limit < old_limit {
|
||||
tracing::info!(old_limit, new_limit, "limit decreased");
|
||||
} else {
|
||||
tracing::debug!(old_limit, new_limit, "limit clamped at min");
|
||||
}
|
||||
new_limit
|
||||
let limit = limit.clamp(self.min, self.max);
|
||||
tracing::info!(limit, "limit decreased");
|
||||
limit
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,13 +14,13 @@ use tracing::info;
|
||||
|
||||
use crate::intern::EndpointIdInt;
|
||||
|
||||
pub struct GlobalRateLimiter {
|
||||
pub(crate) struct GlobalRateLimiter {
|
||||
data: Vec<RateBucket>,
|
||||
info: Vec<RateBucketInfo>,
|
||||
}
|
||||
|
||||
impl GlobalRateLimiter {
|
||||
pub fn new(info: Vec<RateBucketInfo>) -> Self {
|
||||
pub(crate) fn new(info: Vec<RateBucketInfo>) -> Self {
|
||||
Self {
|
||||
data: vec![
|
||||
RateBucket {
|
||||
@@ -34,7 +34,7 @@ impl GlobalRateLimiter {
|
||||
}
|
||||
|
||||
/// Check that number of connections is below `max_rps` rps.
|
||||
pub fn check(&mut self) -> bool {
|
||||
pub(crate) fn check(&mut self) -> bool {
|
||||
let now = Instant::now();
|
||||
|
||||
let should_allow_request = self
|
||||
@@ -137,19 +137,6 @@ impl RateBucketInfo {
|
||||
Self::new(200, Duration::from_secs(600)),
|
||||
];
|
||||
|
||||
/// All of these are per endpoint-maskedip pair.
|
||||
/// Context: 4096 rounds of pbkdf2 take about 1ms of cpu time to execute (1 milli-cpu-second or 1mcpus).
|
||||
///
|
||||
/// First bucket: 1000mcpus total per endpoint-ip pair
|
||||
/// * 4096000 requests per second with 1 hash rounds.
|
||||
/// * 1000 requests per second with 4096 hash rounds.
|
||||
/// * 6.8 requests per second with 600000 hash rounds.
|
||||
pub const DEFAULT_AUTH_SET: [Self; 3] = [
|
||||
Self::new(1000 * 4096, Duration::from_secs(1)),
|
||||
Self::new(600 * 4096, Duration::from_secs(60)),
|
||||
Self::new(300 * 4096, Duration::from_secs(600)),
|
||||
];
|
||||
|
||||
pub fn rps(&self) -> f64 {
|
||||
(self.max_rpi as f64) / self.interval.as_secs_f64()
|
||||
}
|
||||
|
||||
@@ -8,4 +8,5 @@ pub(crate) use limit_algorithm::aimd::Aimd;
|
||||
pub(crate) use limit_algorithm::{
|
||||
DynamicLimiter, Outcome, RateLimitAlgorithm, RateLimiterConfig, Token,
|
||||
};
|
||||
pub use limiter::{BucketRateLimiter, GlobalRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
|
||||
pub(crate) use limiter::GlobalRateLimiter;
|
||||
pub use limiter::{BucketRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use core::net::IpAddr;
|
||||
use pq_proto::CancelKeyData;
|
||||
use redis::AsyncCommands;
|
||||
use tokio::sync::Mutex;
|
||||
@@ -16,7 +15,6 @@ pub trait CancellationPublisherMut: Send + Sync + 'static {
|
||||
&mut self,
|
||||
cancel_key_data: CancelKeyData,
|
||||
session_id: Uuid,
|
||||
peer_addr: IpAddr,
|
||||
) -> anyhow::Result<()>;
|
||||
}
|
||||
|
||||
@@ -26,7 +24,6 @@ pub trait CancellationPublisher: Send + Sync + 'static {
|
||||
&self,
|
||||
cancel_key_data: CancelKeyData,
|
||||
session_id: Uuid,
|
||||
peer_addr: IpAddr,
|
||||
) -> anyhow::Result<()>;
|
||||
}
|
||||
|
||||
@@ -35,7 +32,6 @@ impl CancellationPublisher for () {
|
||||
&self,
|
||||
_cancel_key_data: CancelKeyData,
|
||||
_session_id: Uuid,
|
||||
_peer_addr: IpAddr,
|
||||
) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
@@ -46,10 +42,8 @@ impl<P: CancellationPublisher> CancellationPublisherMut for P {
|
||||
&mut self,
|
||||
cancel_key_data: CancelKeyData,
|
||||
session_id: Uuid,
|
||||
peer_addr: IpAddr,
|
||||
) -> anyhow::Result<()> {
|
||||
<P as CancellationPublisher>::try_publish(self, cancel_key_data, session_id, peer_addr)
|
||||
.await
|
||||
<P as CancellationPublisher>::try_publish(self, cancel_key_data, session_id).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,10 +52,9 @@ impl<P: CancellationPublisher> CancellationPublisher for Option<P> {
|
||||
&self,
|
||||
cancel_key_data: CancelKeyData,
|
||||
session_id: Uuid,
|
||||
peer_addr: IpAddr,
|
||||
) -> anyhow::Result<()> {
|
||||
if let Some(p) = self {
|
||||
p.try_publish(cancel_key_data, session_id, peer_addr).await
|
||||
p.try_publish(cancel_key_data, session_id).await
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
@@ -73,11 +66,10 @@ impl<P: CancellationPublisherMut> CancellationPublisher for Arc<Mutex<P>> {
|
||||
&self,
|
||||
cancel_key_data: CancelKeyData,
|
||||
session_id: Uuid,
|
||||
peer_addr: IpAddr,
|
||||
) -> anyhow::Result<()> {
|
||||
self.lock()
|
||||
.await
|
||||
.try_publish(cancel_key_data, session_id, peer_addr)
|
||||
.try_publish(cancel_key_data, session_id)
|
||||
.await
|
||||
}
|
||||
}
|
||||
@@ -105,13 +97,11 @@ impl RedisPublisherClient {
|
||||
&mut self,
|
||||
cancel_key_data: CancelKeyData,
|
||||
session_id: Uuid,
|
||||
peer_addr: IpAddr,
|
||||
) -> anyhow::Result<()> {
|
||||
let payload = serde_json::to_string(&Notification::Cancel(CancelSession {
|
||||
region_id: Some(self.region_id.clone()),
|
||||
cancel_key_data,
|
||||
session_id,
|
||||
peer_addr: Some(peer_addr),
|
||||
}))?;
|
||||
let _: () = self.client.publish(PROXY_CHANNEL_NAME, payload).await?;
|
||||
Ok(())
|
||||
@@ -130,14 +120,12 @@ impl RedisPublisherClient {
|
||||
&mut self,
|
||||
cancel_key_data: CancelKeyData,
|
||||
session_id: Uuid,
|
||||
peer_addr: IpAddr,
|
||||
) -> anyhow::Result<()> {
|
||||
// TODO: review redundant error duplication logs.
|
||||
if !self.limiter.check() {
|
||||
tracing::info!("Rate limit exceeded. Skipping cancellation message");
|
||||
return Err(anyhow::anyhow!("Rate limit exceeded"));
|
||||
}
|
||||
match self.publish(cancel_key_data, session_id, peer_addr).await {
|
||||
match self.publish(cancel_key_data, session_id).await {
|
||||
Ok(()) => return Ok(()),
|
||||
Err(e) => {
|
||||
tracing::error!("failed to publish a message: {e}");
|
||||
@@ -145,7 +133,7 @@ impl RedisPublisherClient {
|
||||
}
|
||||
tracing::info!("Publisher is disconnected. Reconnectiong...");
|
||||
self.try_connect().await?;
|
||||
self.publish(cancel_key_data, session_id, peer_addr).await
|
||||
self.publish(cancel_key_data, session_id).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -154,15 +142,11 @@ impl CancellationPublisherMut for RedisPublisherClient {
|
||||
&mut self,
|
||||
cancel_key_data: CancelKeyData,
|
||||
session_id: Uuid,
|
||||
peer_addr: IpAddr,
|
||||
) -> anyhow::Result<()> {
|
||||
tracing::info!("publishing cancellation key to Redis");
|
||||
match self
|
||||
.try_publish_internal(cancel_key_data, session_id, peer_addr)
|
||||
.await
|
||||
{
|
||||
match self.try_publish_internal(cancel_key_data, session_id).await {
|
||||
Ok(()) => {
|
||||
tracing::debug!("cancellation key successfuly published to Redis");
|
||||
tracing::info!("cancellation key successfuly published to Redis");
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
|
||||
@@ -60,7 +60,6 @@ pub(crate) struct CancelSession {
|
||||
pub(crate) region_id: Option<String>,
|
||||
pub(crate) cancel_key_data: CancelKeyData,
|
||||
pub(crate) session_id: Uuid,
|
||||
pub(crate) peer_addr: Option<std::net::IpAddr>,
|
||||
}
|
||||
|
||||
fn deserialize_json_string<'de, D, T>(deserializer: D) -> Result<T, D::Error>
|
||||
@@ -138,20 +137,10 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Remove unspecified peer_addr after the complete migration to the new format
|
||||
let peer_addr = cancel_session
|
||||
.peer_addr
|
||||
.unwrap_or(std::net::IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED));
|
||||
// This instance of cancellation_handler doesn't have a RedisPublisherClient so it can't publish the message.
|
||||
match self
|
||||
.cancellation_handler
|
||||
.cancel_session(
|
||||
cancel_session.cancel_key_data,
|
||||
uuid::Uuid::nil(),
|
||||
&peer_addr,
|
||||
cancel_session.peer_addr.is_some(),
|
||||
)
|
||||
.cancel_session(cancel_session.cancel_key_data, uuid::Uuid::nil())
|
||||
.await
|
||||
{
|
||||
Ok(()) => {}
|
||||
@@ -346,7 +335,6 @@ mod tests {
|
||||
cancel_key_data,
|
||||
region_id: None,
|
||||
session_id: uuid,
|
||||
peer_addr: None,
|
||||
});
|
||||
let text = serde_json::to_string(&msg)?;
|
||||
let result: Notification = serde_json::from_str(&text)?;
|
||||
@@ -356,7 +344,6 @@ mod tests {
|
||||
cancel_key_data,
|
||||
region_id: Some("region".to_string()),
|
||||
session_id: uuid,
|
||||
peer_addr: None,
|
||||
});
|
||||
let text = serde_json::to_string(&msg)?;
|
||||
let result: Notification = serde_json::from_str(&text)?;
|
||||
|
||||
@@ -12,8 +12,8 @@ use tracing::field::display;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::conn_pool::poll_client;
|
||||
use super::conn_pool_lib::{Client, ConnInfo, EndpointConnPool, GlobalConnPool};
|
||||
use super::http_conn_pool::{self, poll_http2_client, HttpConnPool, Send};
|
||||
use super::conn_pool_lib::{Client, ConnInfo, GlobalConnPool};
|
||||
use super::http_conn_pool::{self, poll_http2_client, Send};
|
||||
use super::local_conn_pool::{self, LocalConnPool, EXT_NAME, EXT_SCHEMA, EXT_VERSION};
|
||||
use crate::auth::backend::local::StaticAuthRules;
|
||||
use crate::auth::backend::{ComputeCredentials, ComputeUserInfo};
|
||||
@@ -36,10 +36,9 @@ use crate::rate_limiter::EndpointRateLimiter;
|
||||
use crate::types::{EndpointId, Host, LOCAL_PROXY_SUFFIX};
|
||||
|
||||
pub(crate) struct PoolingBackend {
|
||||
pub(crate) http_conn_pool: Arc<GlobalConnPool<Send, HttpConnPool<Send>>>,
|
||||
pub(crate) http_conn_pool: Arc<super::http_conn_pool::GlobalConnPool<Send>>,
|
||||
pub(crate) local_pool: Arc<LocalConnPool<tokio_postgres::Client>>,
|
||||
pub(crate) pool:
|
||||
Arc<GlobalConnPool<tokio_postgres::Client, EndpointConnPool<tokio_postgres::Client>>>,
|
||||
pub(crate) pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
|
||||
|
||||
pub(crate) config: &'static ProxyConfig,
|
||||
pub(crate) auth_backend: &'static crate::auth::Backend<'static, ()>,
|
||||
@@ -168,10 +167,10 @@ impl PoolingBackend {
|
||||
force_new: bool,
|
||||
) -> Result<Client<tokio_postgres::Client>, HttpConnError> {
|
||||
let maybe_client = if force_new {
|
||||
debug!("pool: pool is disabled");
|
||||
info!("pool: pool is disabled");
|
||||
None
|
||||
} else {
|
||||
debug!("pool: looking for an existing connection");
|
||||
info!("pool: looking for an existing connection");
|
||||
self.pool.get(ctx, &conn_info)?
|
||||
};
|
||||
|
||||
@@ -205,14 +204,14 @@ impl PoolingBackend {
|
||||
ctx: &RequestContext,
|
||||
conn_info: ConnInfo,
|
||||
) -> Result<http_conn_pool::Client<Send>, HttpConnError> {
|
||||
debug!("pool: looking for an existing connection");
|
||||
info!("pool: looking for an existing connection");
|
||||
if let Ok(Some(client)) = self.http_conn_pool.get(ctx, &conn_info) {
|
||||
return Ok(client);
|
||||
}
|
||||
|
||||
let conn_id = uuid::Uuid::new_v4();
|
||||
tracing::Span::current().record("conn_id", display(conn_id));
|
||||
debug!(%conn_id, "pool: opening a new connection '{conn_info}'");
|
||||
info!(%conn_id, "pool: opening a new connection '{conn_info}'");
|
||||
let backend = self.auth_backend.as_ref().map(|()| ComputeCredentials {
|
||||
info: ComputeUserInfo {
|
||||
user: conn_info.user_info.user.clone(),
|
||||
@@ -475,7 +474,7 @@ impl ShouldRetryWakeCompute for LocalProxyConnError {
|
||||
}
|
||||
|
||||
struct TokioMechanism {
|
||||
pool: Arc<GlobalConnPool<tokio_postgres::Client, EndpointConnPool<tokio_postgres::Client>>>,
|
||||
pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
|
||||
conn_info: ConnInfo,
|
||||
conn_id: uuid::Uuid,
|
||||
|
||||
@@ -525,7 +524,7 @@ impl ConnectMechanism for TokioMechanism {
|
||||
}
|
||||
|
||||
struct HyperMechanism {
|
||||
pool: Arc<GlobalConnPool<Send, HttpConnPool<Send>>>,
|
||||
pool: Arc<http_conn_pool::GlobalConnPool<Send>>,
|
||||
conn_info: ConnInfo,
|
||||
conn_id: uuid::Uuid,
|
||||
|
||||
|
||||
@@ -19,8 +19,7 @@ use {
|
||||
};
|
||||
|
||||
use super::conn_pool_lib::{
|
||||
Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, EndpointConnPool,
|
||||
GlobalConnPool,
|
||||
Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, GlobalConnPool,
|
||||
};
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::messages::MetricsAuxInfo;
|
||||
@@ -53,7 +52,7 @@ impl fmt::Display for ConnInfo {
|
||||
}
|
||||
|
||||
pub(crate) fn poll_client<C: ClientInnerExt>(
|
||||
global_pool: Arc<GlobalConnPool<C, EndpointConnPool<C>>>,
|
||||
global_pool: Arc<GlobalConnPool<C>>,
|
||||
ctx: &RequestContext,
|
||||
conn_info: ConnInfo,
|
||||
client: C,
|
||||
@@ -168,7 +167,6 @@ pub(crate) fn poll_client<C: ClientInnerExt>(
|
||||
Client::new(inner, conn_info, pool_clone)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ClientDataRemote {
|
||||
session: tokio::sync::watch::Sender<uuid::Uuid>,
|
||||
cancel: CancellationToken,
|
||||
@@ -245,7 +243,7 @@ mod tests {
|
||||
},
|
||||
cancel_set: CancelSet::new(0),
|
||||
client_conn_threshold: u64::MAX,
|
||||
max_request_size_bytes: usize::MAX,
|
||||
max_request_size_bytes: u64::MAX,
|
||||
max_response_size_bytes: usize::MAX,
|
||||
}));
|
||||
let pool = GlobalConnPool::new(config);
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::collections::HashMap;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::sync::atomic::{self, AtomicUsize};
|
||||
use std::sync::{Arc, Weak};
|
||||
@@ -44,14 +43,13 @@ impl ConnInfo {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum ClientDataEnum {
|
||||
Remote(ClientDataRemote),
|
||||
Local(ClientDataLocal),
|
||||
#[allow(dead_code)]
|
||||
Http(ClientDataHttp),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ClientInnerCommon<C: ClientInnerExt> {
|
||||
pub(crate) inner: C,
|
||||
pub(crate) aux: MetricsAuxInfo,
|
||||
@@ -93,7 +91,6 @@ pub(crate) struct ConnPoolEntry<C: ClientInnerExt> {
|
||||
pub(crate) struct EndpointConnPool<C: ClientInnerExt> {
|
||||
pools: HashMap<(DbName, RoleName), DbUserConnPool<C>>,
|
||||
total_conns: usize,
|
||||
/// max # connections per endpoint
|
||||
max_conns: usize,
|
||||
_guard: HttpEndpointPoolsGuard<'static>,
|
||||
global_connections_count: Arc<AtomicUsize>,
|
||||
@@ -235,7 +232,7 @@ impl<C: ClientInnerExt> EndpointConnPool<C> {
|
||||
|
||||
// do logging outside of the mutex
|
||||
if returned {
|
||||
debug!(%conn_id, "{pool_name}: returning connection '{conn_info}' back to the pool, total_conns={total_conns}, for this (db, user)={per_db_size}");
|
||||
info!(%conn_id, "{pool_name}: returning connection '{conn_info}' back to the pool, total_conns={total_conns}, for this (db, user)={per_db_size}");
|
||||
} else {
|
||||
info!(%conn_id, "{pool_name}: throwing away connection '{conn_info}' because pool is full, total_conns={total_conns}");
|
||||
}
|
||||
@@ -320,49 +317,24 @@ impl<C: ClientInnerExt> DbUserConn<C> for DbUserConnPool<C> {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait EndpointConnPoolExt<C: ClientInnerExt> {
|
||||
fn clear_closed(&mut self) -> usize;
|
||||
fn total_conns(&self) -> usize;
|
||||
}
|
||||
|
||||
impl<C: ClientInnerExt> EndpointConnPoolExt<C> for EndpointConnPool<C> {
|
||||
fn clear_closed(&mut self) -> usize {
|
||||
let mut clients_removed: usize = 0;
|
||||
for db_pool in self.pools.values_mut() {
|
||||
clients_removed += db_pool.clear_closed_clients(&mut self.total_conns);
|
||||
}
|
||||
clients_removed
|
||||
}
|
||||
|
||||
fn total_conns(&self) -> usize {
|
||||
self.total_conns
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct GlobalConnPool<C, P>
|
||||
where
|
||||
C: ClientInnerExt,
|
||||
P: EndpointConnPoolExt<C>,
|
||||
{
|
||||
pub(crate) struct GlobalConnPool<C: ClientInnerExt> {
|
||||
// endpoint -> per-endpoint connection pool
|
||||
//
|
||||
// That should be a fairly conteded map, so return reference to the per-endpoint
|
||||
// pool as early as possible and release the lock.
|
||||
pub(crate) global_pool: DashMap<EndpointCacheKey, Arc<RwLock<P>>>,
|
||||
global_pool: DashMap<EndpointCacheKey, Arc<RwLock<EndpointConnPool<C>>>>,
|
||||
|
||||
/// Number of endpoint-connection pools
|
||||
///
|
||||
/// [`DashMap::len`] iterates over all inner pools and acquires a read lock on each.
|
||||
/// That seems like far too much effort, so we're using a relaxed increment counter instead.
|
||||
/// It's only used for diagnostics.
|
||||
pub(crate) global_pool_size: AtomicUsize,
|
||||
global_pool_size: AtomicUsize,
|
||||
|
||||
/// Total number of connections in the pool
|
||||
pub(crate) global_connections_count: Arc<AtomicUsize>,
|
||||
global_connections_count: Arc<AtomicUsize>,
|
||||
|
||||
pub(crate) config: &'static crate::config::HttpConfig,
|
||||
|
||||
_marker: PhantomData<C>,
|
||||
config: &'static crate::config::HttpConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@@ -385,11 +357,7 @@ pub struct GlobalConnPoolOptions {
|
||||
pub max_total_conns: usize,
|
||||
}
|
||||
|
||||
impl<C, P> GlobalConnPool<C, P>
|
||||
where
|
||||
C: ClientInnerExt,
|
||||
P: EndpointConnPoolExt<C>,
|
||||
{
|
||||
impl<C: ClientInnerExt> GlobalConnPool<C> {
|
||||
pub(crate) fn new(config: &'static crate::config::HttpConfig) -> Arc<Self> {
|
||||
let shards = config.pool_options.pool_shards;
|
||||
Arc::new(Self {
|
||||
@@ -397,7 +365,6 @@ where
|
||||
global_pool_size: AtomicUsize::new(0),
|
||||
config,
|
||||
global_connections_count: Arc::new(AtomicUsize::new(0)),
|
||||
_marker: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -411,6 +378,60 @@ where
|
||||
self.config.pool_options.idle_timeout
|
||||
}
|
||||
|
||||
pub(crate) fn get(
|
||||
self: &Arc<Self>,
|
||||
ctx: &RequestContext,
|
||||
conn_info: &ConnInfo,
|
||||
) -> Result<Option<Client<C>>, HttpConnError> {
|
||||
let mut client: Option<ClientInnerCommon<C>> = None;
|
||||
let Some(endpoint) = conn_info.endpoint_cache_key() else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let endpoint_pool = self.get_or_create_endpoint_pool(&endpoint);
|
||||
if let Some(entry) = endpoint_pool
|
||||
.write()
|
||||
.get_conn_entry(conn_info.db_and_user())
|
||||
{
|
||||
client = Some(entry.conn);
|
||||
}
|
||||
let endpoint_pool = Arc::downgrade(&endpoint_pool);
|
||||
|
||||
// ok return cached connection if found and establish a new one otherwise
|
||||
if let Some(mut client) = client {
|
||||
if client.inner.is_closed() {
|
||||
info!("pool: cached connection '{conn_info}' is closed, opening a new one");
|
||||
return Ok(None);
|
||||
}
|
||||
tracing::Span::current()
|
||||
.record("conn_id", tracing::field::display(client.get_conn_id()));
|
||||
tracing::Span::current().record(
|
||||
"pid",
|
||||
tracing::field::display(client.inner.get_process_id()),
|
||||
);
|
||||
info!(
|
||||
cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),
|
||||
"pool: reusing connection '{conn_info}'"
|
||||
);
|
||||
|
||||
match client.get_data() {
|
||||
ClientDataEnum::Local(data) => {
|
||||
data.session().send(ctx.session_id())?;
|
||||
}
|
||||
|
||||
ClientDataEnum::Remote(data) => {
|
||||
data.session().send(ctx.session_id())?;
|
||||
}
|
||||
ClientDataEnum::Http(_) => (),
|
||||
}
|
||||
|
||||
ctx.set_cold_start_info(ColdStartInfo::HttpPoolHit);
|
||||
ctx.success();
|
||||
return Ok(Some(Client::new(client, conn_info.clone(), endpoint_pool)));
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub(crate) fn shutdown(&self) {
|
||||
// drops all strong references to endpoint-pools
|
||||
self.global_pool.clear();
|
||||
@@ -443,10 +464,17 @@ where
|
||||
// if the current endpoint pool is unique (no other strong or weak references)
|
||||
// then it is currently not in use by any connections.
|
||||
if let Some(pool) = Arc::get_mut(x.get_mut()) {
|
||||
let endpoints = pool.get_mut();
|
||||
clients_removed = endpoints.clear_closed();
|
||||
let EndpointConnPool {
|
||||
pools, total_conns, ..
|
||||
} = pool.get_mut();
|
||||
|
||||
if endpoints.total_conns() == 0 {
|
||||
// ensure that closed clients are removed
|
||||
for db_pool in pools.values_mut() {
|
||||
clients_removed += db_pool.clear_closed_clients(total_conns);
|
||||
}
|
||||
|
||||
// we only remove this pool if it has no active connections
|
||||
if *total_conns == 0 {
|
||||
info!("pool: discarding pool for endpoint {endpoint}");
|
||||
return false;
|
||||
}
|
||||
@@ -482,62 +510,6 @@ where
|
||||
info!("pool: performed global pool gc. size now {global_pool_size}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: ClientInnerExt> GlobalConnPool<C, EndpointConnPool<C>> {
|
||||
pub(crate) fn get(
|
||||
self: &Arc<Self>,
|
||||
ctx: &RequestContext,
|
||||
conn_info: &ConnInfo,
|
||||
) -> Result<Option<Client<C>>, HttpConnError> {
|
||||
let mut client: Option<ClientInnerCommon<C>> = None;
|
||||
let Some(endpoint) = conn_info.endpoint_cache_key() else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let endpoint_pool = self.get_or_create_endpoint_pool(&endpoint);
|
||||
if let Some(entry) = endpoint_pool
|
||||
.write()
|
||||
.get_conn_entry(conn_info.db_and_user())
|
||||
{
|
||||
client = Some(entry.conn);
|
||||
}
|
||||
let endpoint_pool = Arc::downgrade(&endpoint_pool);
|
||||
|
||||
// ok return cached connection if found and establish a new one otherwise
|
||||
if let Some(mut client) = client {
|
||||
if client.inner.is_closed() {
|
||||
info!("pool: cached connection '{conn_info}' is closed, opening a new one");
|
||||
return Ok(None);
|
||||
}
|
||||
tracing::Span::current()
|
||||
.record("conn_id", tracing::field::display(client.get_conn_id()));
|
||||
tracing::Span::current().record(
|
||||
"pid",
|
||||
tracing::field::display(client.inner.get_process_id()),
|
||||
);
|
||||
debug!(
|
||||
cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),
|
||||
"pool: reusing connection '{conn_info}'"
|
||||
);
|
||||
|
||||
match client.get_data() {
|
||||
ClientDataEnum::Local(data) => {
|
||||
data.session().send(ctx.session_id())?;
|
||||
}
|
||||
|
||||
ClientDataEnum::Remote(data) => {
|
||||
data.session().send(ctx.session_id())?;
|
||||
}
|
||||
ClientDataEnum::Http(_) => (),
|
||||
}
|
||||
|
||||
ctx.set_cold_start_info(ColdStartInfo::HttpPoolHit);
|
||||
ctx.success();
|
||||
return Ok(Some(Client::new(client, conn_info.clone(), endpoint_pool)));
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub(crate) fn get_or_create_endpoint_pool(
|
||||
self: &Arc<Self>,
|
||||
@@ -584,6 +556,7 @@ impl<C: ClientInnerExt> GlobalConnPool<C, EndpointConnPool<C>> {
|
||||
pool
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Client<C: ClientInnerExt> {
|
||||
span: Span,
|
||||
inner: Option<ClientInnerCommon<C>>,
|
||||
|
||||
@@ -2,17 +2,16 @@ use std::collections::VecDeque;
|
||||
use std::sync::atomic::{self, AtomicUsize};
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use dashmap::DashMap;
|
||||
use hyper::client::conn::http2;
|
||||
use hyper_util::rt::{TokioExecutor, TokioIo};
|
||||
use parking_lot::RwLock;
|
||||
use rand::Rng;
|
||||
use tokio::net::TcpStream;
|
||||
use tracing::{debug, error, info, info_span, Instrument};
|
||||
|
||||
use super::backend::HttpConnError;
|
||||
use super::conn_pool_lib::{
|
||||
ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, ConnPoolEntry,
|
||||
EndpointConnPoolExt, GlobalConnPool,
|
||||
};
|
||||
use super::conn_pool_lib::{ClientInnerExt, ConnInfo};
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
|
||||
use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
|
||||
@@ -24,11 +23,17 @@ pub(crate) type Connect =
|
||||
http2::Connection<TokioIo<TcpStream>, hyper::body::Incoming, TokioExecutor>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ConnPoolEntry<C: ClientInnerExt + Clone> {
|
||||
conn: C,
|
||||
conn_id: uuid::Uuid,
|
||||
aux: MetricsAuxInfo,
|
||||
}
|
||||
|
||||
pub(crate) struct ClientDataHttp();
|
||||
|
||||
// Per-endpoint connection pool
|
||||
// Number of open connections is limited by the `max_conns_per_endpoint`.
|
||||
pub(crate) struct HttpConnPool<C: ClientInnerExt + Clone> {
|
||||
pub(crate) struct EndpointConnPool<C: ClientInnerExt + Clone> {
|
||||
// TODO(conrad):
|
||||
// either we should open more connections depending on stream count
|
||||
// (not exposed by hyper, need our own counter)
|
||||
@@ -43,19 +48,14 @@ pub(crate) struct HttpConnPool<C: ClientInnerExt + Clone> {
|
||||
global_connections_count: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
impl<C: ClientInnerExt + Clone> HttpConnPool<C> {
|
||||
impl<C: ClientInnerExt + Clone> EndpointConnPool<C> {
|
||||
fn get_conn_entry(&mut self) -> Option<ConnPoolEntry<C>> {
|
||||
let Self { conns, .. } = self;
|
||||
|
||||
loop {
|
||||
let conn = conns.pop_front()?;
|
||||
if !conn.conn.inner.is_closed() {
|
||||
let new_conn = ConnPoolEntry {
|
||||
conn: conn.conn.clone(),
|
||||
_last_access: std::time::Instant::now(),
|
||||
};
|
||||
|
||||
conns.push_back(new_conn);
|
||||
if !conn.conn.is_closed() {
|
||||
conns.push_back(conn.clone());
|
||||
return Some(conn);
|
||||
}
|
||||
}
|
||||
@@ -69,7 +69,7 @@ impl<C: ClientInnerExt + Clone> HttpConnPool<C> {
|
||||
} = self;
|
||||
|
||||
let old_len = conns.len();
|
||||
conns.retain(|entry| entry.conn.conn_id != conn_id);
|
||||
conns.retain(|conn| conn.conn_id != conn_id);
|
||||
let new_len = conns.len();
|
||||
let removed = old_len - new_len;
|
||||
if removed > 0 {
|
||||
@@ -84,22 +84,7 @@ impl<C: ClientInnerExt + Clone> HttpConnPool<C> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: ClientInnerExt + Clone> EndpointConnPoolExt<C> for HttpConnPool<C> {
|
||||
fn clear_closed(&mut self) -> usize {
|
||||
let Self { conns, .. } = self;
|
||||
let old_len = conns.len();
|
||||
conns.retain(|entry| !entry.conn.inner.is_closed());
|
||||
|
||||
let new_len = conns.len();
|
||||
old_len - new_len
|
||||
}
|
||||
|
||||
fn total_conns(&self) -> usize {
|
||||
self.conns.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: ClientInnerExt + Clone> Drop for HttpConnPool<C> {
|
||||
impl<C: ClientInnerExt + Clone> Drop for EndpointConnPool<C> {
|
||||
fn drop(&mut self) {
|
||||
if !self.conns.is_empty() {
|
||||
self.global_connections_count
|
||||
@@ -113,7 +98,117 @@ impl<C: ClientInnerExt + Clone> Drop for HttpConnPool<C> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: ClientInnerExt + Clone> GlobalConnPool<C, HttpConnPool<C>> {
|
||||
pub(crate) struct GlobalConnPool<C: ClientInnerExt + Clone> {
|
||||
// endpoint -> per-endpoint connection pool
|
||||
//
|
||||
// That should be a fairly conteded map, so return reference to the per-endpoint
|
||||
// pool as early as possible and release the lock.
|
||||
global_pool: DashMap<EndpointCacheKey, Arc<RwLock<EndpointConnPool<C>>>>,
|
||||
|
||||
/// Number of endpoint-connection pools
|
||||
///
|
||||
/// [`DashMap::len`] iterates over all inner pools and acquires a read lock on each.
|
||||
/// That seems like far too much effort, so we're using a relaxed increment counter instead.
|
||||
/// It's only used for diagnostics.
|
||||
global_pool_size: AtomicUsize,
|
||||
|
||||
/// Total number of connections in the pool
|
||||
global_connections_count: Arc<AtomicUsize>,
|
||||
|
||||
config: &'static crate::config::HttpConfig,
|
||||
}
|
||||
|
||||
impl<C: ClientInnerExt + Clone> GlobalConnPool<C> {
|
||||
pub(crate) fn new(config: &'static crate::config::HttpConfig) -> Arc<Self> {
|
||||
let shards = config.pool_options.pool_shards;
|
||||
Arc::new(Self {
|
||||
global_pool: DashMap::with_shard_amount(shards),
|
||||
global_pool_size: AtomicUsize::new(0),
|
||||
config,
|
||||
global_connections_count: Arc::new(AtomicUsize::new(0)),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn shutdown(&self) {
|
||||
// drops all strong references to endpoint-pools
|
||||
self.global_pool.clear();
|
||||
}
|
||||
|
||||
pub(crate) async fn gc_worker(&self, mut rng: impl Rng) {
|
||||
let epoch = self.config.pool_options.gc_epoch;
|
||||
let mut interval = tokio::time::interval(epoch / (self.global_pool.shards().len()) as u32);
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
||||
let shard = rng.gen_range(0..self.global_pool.shards().len());
|
||||
self.gc(shard);
|
||||
}
|
||||
}
|
||||
|
||||
fn gc(&self, shard: usize) {
|
||||
debug!(shard, "pool: performing epoch reclamation");
|
||||
|
||||
// acquire a random shard lock
|
||||
let mut shard = self.global_pool.shards()[shard].write();
|
||||
|
||||
let timer = Metrics::get()
|
||||
.proxy
|
||||
.http_pool_reclaimation_lag_seconds
|
||||
.start_timer();
|
||||
let current_len = shard.len();
|
||||
let mut clients_removed = 0;
|
||||
shard.retain(|endpoint, x| {
|
||||
// if the current endpoint pool is unique (no other strong or weak references)
|
||||
// then it is currently not in use by any connections.
|
||||
if let Some(pool) = Arc::get_mut(x.get_mut()) {
|
||||
let EndpointConnPool { conns, .. } = pool.get_mut();
|
||||
|
||||
let old_len = conns.len();
|
||||
|
||||
conns.retain(|conn| !conn.conn.is_closed());
|
||||
|
||||
let new_len = conns.len();
|
||||
let removed = old_len - new_len;
|
||||
clients_removed += removed;
|
||||
|
||||
// we only remove this pool if it has no active connections
|
||||
if conns.is_empty() {
|
||||
info!("pool: discarding pool for endpoint {endpoint}");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
});
|
||||
|
||||
let new_len = shard.len();
|
||||
drop(shard);
|
||||
timer.observe();
|
||||
|
||||
// Do logging outside of the lock.
|
||||
if clients_removed > 0 {
|
||||
let size = self
|
||||
.global_connections_count
|
||||
.fetch_sub(clients_removed, atomic::Ordering::Relaxed)
|
||||
- clients_removed;
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.http_pool_opened_connections
|
||||
.get_metric()
|
||||
.dec_by(clients_removed as i64);
|
||||
info!("pool: performed global pool gc. removed {clients_removed} clients, total number of clients in pool is {size}");
|
||||
}
|
||||
let removed = current_len - new_len;
|
||||
|
||||
if removed > 0 {
|
||||
let global_pool_size = self
|
||||
.global_pool_size
|
||||
.fetch_sub(removed, atomic::Ordering::Relaxed)
|
||||
- removed;
|
||||
info!("pool: performed global pool gc. size now {global_pool_size}");
|
||||
}
|
||||
}
|
||||
|
||||
#[expect(unused_results)]
|
||||
pub(crate) fn get(
|
||||
self: &Arc<Self>,
|
||||
@@ -131,28 +226,27 @@ impl<C: ClientInnerExt + Clone> GlobalConnPool<C, HttpConnPool<C>> {
|
||||
return result;
|
||||
};
|
||||
|
||||
tracing::Span::current().record("conn_id", tracing::field::display(client.conn.conn_id));
|
||||
debug!(
|
||||
tracing::Span::current().record("conn_id", tracing::field::display(client.conn_id));
|
||||
info!(
|
||||
cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),
|
||||
"pool: reusing connection '{conn_info}'"
|
||||
);
|
||||
ctx.set_cold_start_info(ColdStartInfo::HttpPoolHit);
|
||||
ctx.success();
|
||||
|
||||
Ok(Some(Client::new(client.conn.clone())))
|
||||
Ok(Some(Client::new(client.conn, client.aux)))
|
||||
}
|
||||
|
||||
fn get_or_create_endpoint_pool(
|
||||
self: &Arc<Self>,
|
||||
endpoint: &EndpointCacheKey,
|
||||
) -> Arc<RwLock<HttpConnPool<C>>> {
|
||||
) -> Arc<RwLock<EndpointConnPool<C>>> {
|
||||
// fast path
|
||||
if let Some(pool) = self.global_pool.get(endpoint) {
|
||||
return pool.clone();
|
||||
}
|
||||
|
||||
// slow path
|
||||
let new_pool = Arc::new(RwLock::new(HttpConnPool {
|
||||
let new_pool = Arc::new(RwLock::new(EndpointConnPool {
|
||||
conns: VecDeque::new(),
|
||||
_guard: Metrics::get().proxy.http_endpoint_pools.guard(),
|
||||
global_connections_count: self.global_connections_count.clone(),
|
||||
@@ -185,7 +279,7 @@ impl<C: ClientInnerExt + Clone> GlobalConnPool<C, HttpConnPool<C>> {
|
||||
}
|
||||
|
||||
pub(crate) fn poll_http2_client(
|
||||
global_pool: Arc<GlobalConnPool<Send, HttpConnPool<Send>>>,
|
||||
global_pool: Arc<GlobalConnPool<Send>>,
|
||||
ctx: &RequestContext,
|
||||
conn_info: &ConnInfo,
|
||||
client: Send,
|
||||
@@ -205,15 +299,11 @@ pub(crate) fn poll_http2_client(
|
||||
let pool = match conn_info.endpoint_cache_key() {
|
||||
Some(endpoint) => {
|
||||
let pool = global_pool.get_or_create_endpoint_pool(&endpoint);
|
||||
let client = ClientInnerCommon {
|
||||
inner: client.clone(),
|
||||
aux: aux.clone(),
|
||||
conn_id,
|
||||
data: ClientDataEnum::Http(ClientDataHttp()),
|
||||
};
|
||||
|
||||
pool.write().conns.push_back(ConnPoolEntry {
|
||||
conn: client,
|
||||
_last_access: std::time::Instant::now(),
|
||||
conn: client.clone(),
|
||||
conn_id,
|
||||
aux: aux.clone(),
|
||||
});
|
||||
Metrics::get()
|
||||
.proxy
|
||||
@@ -245,30 +335,23 @@ pub(crate) fn poll_http2_client(
|
||||
.instrument(span),
|
||||
);
|
||||
|
||||
let client = ClientInnerCommon {
|
||||
inner: client,
|
||||
aux,
|
||||
conn_id,
|
||||
data: ClientDataEnum::Http(ClientDataHttp()),
|
||||
};
|
||||
|
||||
Client::new(client)
|
||||
Client::new(client, aux)
|
||||
}
|
||||
|
||||
pub(crate) struct Client<C: ClientInnerExt + Clone> {
|
||||
pub(crate) inner: ClientInnerCommon<C>,
|
||||
pub(crate) inner: C,
|
||||
aux: MetricsAuxInfo,
|
||||
}
|
||||
|
||||
impl<C: ClientInnerExt + Clone> Client<C> {
|
||||
pub(self) fn new(inner: ClientInnerCommon<C>) -> Self {
|
||||
Self { inner }
|
||||
pub(self) fn new(inner: C, aux: MetricsAuxInfo) -> Self {
|
||||
Self { inner, aux }
|
||||
}
|
||||
|
||||
pub(crate) fn metrics(&self) -> Arc<MetricCounter> {
|
||||
let aux = &self.inner.aux;
|
||||
USAGE_METRICS.register(Ids {
|
||||
endpoint_id: aux.endpoint_id,
|
||||
branch_id: aux.branch_id,
|
||||
endpoint_id: self.aux.endpoint_id,
|
||||
branch_id: self.aux.branch_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ use tokio_postgres::tls::NoTlsStream;
|
||||
use tokio_postgres::types::ToSql;
|
||||
use tokio_postgres::{AsyncMessage, Socket};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, error, info, info_span, warn, Instrument};
|
||||
use tracing::{error, info, info_span, warn, Instrument};
|
||||
|
||||
use super::backend::HttpConnError;
|
||||
use super::conn_pool_lib::{
|
||||
@@ -44,7 +44,6 @@ pub(crate) const EXT_NAME: &str = "pg_session_jwt";
|
||||
pub(crate) const EXT_VERSION: &str = "0.1.2";
|
||||
pub(crate) const EXT_SCHEMA: &str = "auth";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ClientDataLocal {
|
||||
session: tokio::sync::watch::Sender<uuid::Uuid>,
|
||||
cancel: CancellationToken,
|
||||
@@ -111,7 +110,7 @@ impl<C: ClientInnerExt> LocalConnPool<C> {
|
||||
"pid",
|
||||
tracing::field::display(client.inner.get_process_id()),
|
||||
);
|
||||
debug!(
|
||||
info!(
|
||||
cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),
|
||||
"local_pool: reusing connection '{conn_info}'"
|
||||
);
|
||||
|
||||
@@ -88,7 +88,7 @@ pub async fn task_main(
|
||||
}
|
||||
});
|
||||
|
||||
let http_conn_pool = conn_pool_lib::GlobalConnPool::new(&config.http_config);
|
||||
let http_conn_pool = http_conn_pool::GlobalConnPool::new(&config.http_config);
|
||||
{
|
||||
let http_conn_pool = Arc::clone(&http_conn_pool);
|
||||
tokio::spawn(async move {
|
||||
|
||||
@@ -8,17 +8,17 @@ use http::header::AUTHORIZATION;
|
||||
use http::Method;
|
||||
use http_body_util::combinators::BoxBody;
|
||||
use http_body_util::{BodyExt, Full};
|
||||
use hyper::body::Incoming;
|
||||
use hyper::body::{Body, Incoming};
|
||||
use hyper::http::{HeaderName, HeaderValue};
|
||||
use hyper::{header, HeaderMap, Request, Response, StatusCode};
|
||||
use pq_proto::StartupMessageParamsBuilder;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use tokio::time::{self, Instant};
|
||||
use tokio::time;
|
||||
use tokio_postgres::error::{DbError, ErrorPosition, SqlState};
|
||||
use tokio_postgres::{GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, Transaction};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, error, info};
|
||||
use tracing::{error, info};
|
||||
use typed_json::json;
|
||||
use url::Url;
|
||||
use urlencoding;
|
||||
@@ -36,7 +36,6 @@ use crate::auth::{endpoint_sni, ComputeUserInfoParseError};
|
||||
use crate::config::{AuthenticationConfig, HttpConfig, ProxyConfig, TlsConfig};
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::{ErrorKind, ReportableError, UserFacingError};
|
||||
use crate::http::{read_body_with_limit, ReadBodyError};
|
||||
use crate::metrics::{HttpDirection, Metrics};
|
||||
use crate::proxy::{run_until_cancelled, NeonOptions};
|
||||
use crate::serverless::backend::HttpConnError;
|
||||
@@ -48,7 +47,6 @@ use crate::usage_metrics::{MetricCounter, MetricCounterRecorder};
|
||||
struct QueryData {
|
||||
query: String,
|
||||
#[serde(deserialize_with = "bytes_to_pg_text")]
|
||||
#[serde(default)]
|
||||
params: Vec<Option<String>>,
|
||||
#[serde(default)]
|
||||
array_mode: Option<bool>,
|
||||
@@ -359,6 +357,8 @@ pub(crate) enum SqlOverHttpError {
|
||||
ConnectCompute(#[from] HttpConnError),
|
||||
#[error("{0}")]
|
||||
ConnInfo(#[from] ConnInfoError),
|
||||
#[error("request is too large (max is {0} bytes)")]
|
||||
RequestTooLarge(u64),
|
||||
#[error("response is too large (max is {0} bytes)")]
|
||||
ResponseTooLarge(usize),
|
||||
#[error("invalid isolation level")]
|
||||
@@ -377,6 +377,7 @@ impl ReportableError for SqlOverHttpError {
|
||||
SqlOverHttpError::ReadPayload(e) => e.get_error_kind(),
|
||||
SqlOverHttpError::ConnectCompute(e) => e.get_error_kind(),
|
||||
SqlOverHttpError::ConnInfo(e) => e.get_error_kind(),
|
||||
SqlOverHttpError::RequestTooLarge(_) => ErrorKind::User,
|
||||
SqlOverHttpError::ResponseTooLarge(_) => ErrorKind::User,
|
||||
SqlOverHttpError::InvalidIsolationLevel => ErrorKind::User,
|
||||
SqlOverHttpError::Postgres(p) => p.get_error_kind(),
|
||||
@@ -392,6 +393,7 @@ impl UserFacingError for SqlOverHttpError {
|
||||
SqlOverHttpError::ReadPayload(p) => p.to_string(),
|
||||
SqlOverHttpError::ConnectCompute(c) => c.to_string_client(),
|
||||
SqlOverHttpError::ConnInfo(c) => c.to_string_client(),
|
||||
SqlOverHttpError::RequestTooLarge(_) => self.to_string(),
|
||||
SqlOverHttpError::ResponseTooLarge(_) => self.to_string(),
|
||||
SqlOverHttpError::InvalidIsolationLevel => self.to_string(),
|
||||
SqlOverHttpError::Postgres(p) => p.to_string(),
|
||||
@@ -404,12 +406,13 @@ impl UserFacingError for SqlOverHttpError {
|
||||
impl HttpCodeError for SqlOverHttpError {
|
||||
fn get_http_status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
SqlOverHttpError::ReadPayload(e) => e.get_http_status_code(),
|
||||
SqlOverHttpError::ReadPayload(_) => StatusCode::BAD_REQUEST,
|
||||
SqlOverHttpError::ConnectCompute(h) => match h.get_error_kind() {
|
||||
ErrorKind::User => StatusCode::BAD_REQUEST,
|
||||
_ => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
},
|
||||
SqlOverHttpError::ConnInfo(_) => StatusCode::BAD_REQUEST,
|
||||
SqlOverHttpError::RequestTooLarge(_) => StatusCode::PAYLOAD_TOO_LARGE,
|
||||
SqlOverHttpError::ResponseTooLarge(_) => StatusCode::INSUFFICIENT_STORAGE,
|
||||
SqlOverHttpError::InvalidIsolationLevel => StatusCode::BAD_REQUEST,
|
||||
SqlOverHttpError::Postgres(_) => StatusCode::BAD_REQUEST,
|
||||
@@ -423,41 +426,19 @@ impl HttpCodeError for SqlOverHttpError {
|
||||
pub(crate) enum ReadPayloadError {
|
||||
#[error("could not read the HTTP request body: {0}")]
|
||||
Read(#[from] hyper::Error),
|
||||
#[error("request is too large (max is {limit} bytes)")]
|
||||
BodyTooLarge { limit: usize },
|
||||
#[error("could not parse the HTTP request body: {0}")]
|
||||
Parse(#[from] serde_json::Error),
|
||||
}
|
||||
|
||||
impl From<ReadBodyError<hyper::Error>> for ReadPayloadError {
|
||||
fn from(value: ReadBodyError<hyper::Error>) -> Self {
|
||||
match value {
|
||||
ReadBodyError::BodyTooLarge { limit } => Self::BodyTooLarge { limit },
|
||||
ReadBodyError::Read(e) => Self::Read(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ReportableError for ReadPayloadError {
|
||||
fn get_error_kind(&self) -> ErrorKind {
|
||||
match self {
|
||||
ReadPayloadError::Read(_) => ErrorKind::ClientDisconnect,
|
||||
ReadPayloadError::BodyTooLarge { .. } => ErrorKind::User,
|
||||
ReadPayloadError::Parse(_) => ErrorKind::User,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HttpCodeError for ReadPayloadError {
|
||||
fn get_http_status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
ReadPayloadError::Read(_) => StatusCode::BAD_REQUEST,
|
||||
ReadPayloadError::BodyTooLarge { .. } => StatusCode::PAYLOAD_TOO_LARGE,
|
||||
ReadPayloadError::Parse(_) => StatusCode::BAD_REQUEST,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum SqlOverHttpCancel {
|
||||
#[error("query was cancelled")]
|
||||
@@ -599,20 +580,28 @@ async fn handle_db_inner(
|
||||
|
||||
let parsed_headers = HttpHeaders::try_parse(headers)?;
|
||||
|
||||
let request_content_length = match request.body().size_hint().upper() {
|
||||
Some(v) => v,
|
||||
None => config.http_config.max_request_size_bytes + 1,
|
||||
};
|
||||
info!(request_content_length, "request size in bytes");
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.http_conn_content_length_bytes
|
||||
.observe(HttpDirection::Request, request_content_length as f64);
|
||||
|
||||
// we don't have a streaming request support yet so this is to prevent OOM
|
||||
// from a malicious user sending an extremely large request body
|
||||
if request_content_length > config.http_config.max_request_size_bytes {
|
||||
return Err(SqlOverHttpError::RequestTooLarge(
|
||||
config.http_config.max_request_size_bytes,
|
||||
));
|
||||
}
|
||||
|
||||
let fetch_and_process_request = Box::pin(
|
||||
async {
|
||||
let body = read_body_with_limit(
|
||||
request.into_body(),
|
||||
config.http_config.max_request_size_bytes,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.http_conn_content_length_bytes
|
||||
.observe(HttpDirection::Request, body.len() as f64);
|
||||
|
||||
debug!(length = body.len(), "request payload read");
|
||||
let body = request.into_body().collect().await?.to_bytes();
|
||||
info!(length = body.len(), "request payload read");
|
||||
let payload: Payload = serde_json::from_slice(&body)?;
|
||||
Ok::<Payload, ReadPayloadError>(payload) // Adjust error type accordingly
|
||||
}
|
||||
@@ -779,7 +768,6 @@ async fn handle_auth_broker_inner(
|
||||
let _metrics = client.metrics();
|
||||
|
||||
Ok(client
|
||||
.inner
|
||||
.inner
|
||||
.send_request(req)
|
||||
.await
|
||||
@@ -980,11 +968,10 @@ async fn query_to_json<T: GenericClient>(
|
||||
current_size: &mut usize,
|
||||
parsed_headers: HttpHeaders,
|
||||
) -> Result<(ReadyForQueryStatus, impl Serialize), SqlOverHttpError> {
|
||||
let query_start = Instant::now();
|
||||
|
||||
info!("executing query");
|
||||
let query_params = data.params;
|
||||
let mut row_stream = std::pin::pin!(client.query_raw_txt(&data.query, query_params).await?);
|
||||
let query_acknowledged = Instant::now();
|
||||
info!("finished executing query");
|
||||
|
||||
// Manually drain the stream into a vector to leave row_stream hanging
|
||||
// around to get a command tag. Also check that the response is not too
|
||||
@@ -1003,7 +990,6 @@ async fn query_to_json<T: GenericClient>(
|
||||
}
|
||||
}
|
||||
|
||||
let query_resp_end = Instant::now();
|
||||
let ready = row_stream.ready_status();
|
||||
|
||||
// grab the command tag and number of rows affected
|
||||
@@ -1023,9 +1009,7 @@ async fn query_to_json<T: GenericClient>(
|
||||
rows = rows.len(),
|
||||
?ready,
|
||||
command_tag,
|
||||
acknowledgement = ?(query_acknowledged - query_start),
|
||||
response = ?(query_resp_end - query_start),
|
||||
"finished executing query"
|
||||
"finished reading rows"
|
||||
);
|
||||
|
||||
let columns_len = row_stream.columns().len();
|
||||
@@ -1111,63 +1095,3 @@ impl Discard<'_> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_payload() {
|
||||
let payload = "{\"query\":\"SELECT * FROM users WHERE name = ?\",\"params\":[\"test\"],\"arrayMode\":true}";
|
||||
let deserialized_payload: Payload = serde_json::from_str(payload).unwrap();
|
||||
|
||||
match deserialized_payload {
|
||||
Payload::Single(QueryData {
|
||||
query,
|
||||
params,
|
||||
array_mode,
|
||||
}) => {
|
||||
assert_eq!(query, "SELECT * FROM users WHERE name = ?");
|
||||
assert_eq!(params, vec![Some(String::from("test"))]);
|
||||
assert!(array_mode.unwrap());
|
||||
}
|
||||
Payload::Batch(_) => {
|
||||
panic!("deserialization failed: case with single query, one param, and array mode")
|
||||
}
|
||||
}
|
||||
|
||||
let payload = "{\"queries\":[{\"query\":\"SELECT * FROM users0 WHERE name = ?\",\"params\":[\"test0\"], \"arrayMode\":false},{\"query\":\"SELECT * FROM users1 WHERE name = ?\",\"params\":[\"test1\"],\"arrayMode\":true}]}";
|
||||
let deserialized_payload: Payload = serde_json::from_str(payload).unwrap();
|
||||
|
||||
match deserialized_payload {
|
||||
Payload::Batch(BatchQueryData { queries }) => {
|
||||
assert_eq!(queries.len(), 2);
|
||||
for (i, query) in queries.into_iter().enumerate() {
|
||||
assert_eq!(
|
||||
query.query,
|
||||
format!("SELECT * FROM users{i} WHERE name = ?")
|
||||
);
|
||||
assert_eq!(query.params, vec![Some(format!("test{i}"))]);
|
||||
assert_eq!(query.array_mode.unwrap(), i > 0);
|
||||
}
|
||||
}
|
||||
Payload::Single(_) => panic!("deserialization failed: case with multiple queries"),
|
||||
}
|
||||
|
||||
let payload = "{\"query\":\"SELECT 1\"}";
|
||||
let deserialized_payload: Payload = serde_json::from_str(payload).unwrap();
|
||||
|
||||
match deserialized_payload {
|
||||
Payload::Single(QueryData {
|
||||
query,
|
||||
params,
|
||||
array_mode,
|
||||
}) => {
|
||||
assert_eq!(query, "SELECT 1");
|
||||
assert_eq!(params, vec![]);
|
||||
assert!(array_mode.is_none());
|
||||
}
|
||||
Payload::Batch(_) => panic!("deserialization failed: case with only one query"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ authors = []
|
||||
package-mode = false
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
python = "^3.9"
|
||||
pytest = "^7.4.4"
|
||||
psycopg2-binary = "^2.9.10"
|
||||
typing-extensions = "^4.6.1"
|
||||
@@ -51,7 +51,7 @@ testcontainers = "^4.8.1"
|
||||
jsonnet = "^0.20.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
mypy = "==1.13.0"
|
||||
mypy = "==1.3.0"
|
||||
ruff = "^0.7.0"
|
||||
|
||||
[build-system]
|
||||
@@ -89,7 +89,7 @@ module = [
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py311"
|
||||
target-version = "py39"
|
||||
extend-exclude = [
|
||||
"vendor/",
|
||||
"target/",
|
||||
@@ -108,3 +108,6 @@ select = [
|
||||
"B", # bugbear
|
||||
"UP", # pyupgrade
|
||||
]
|
||||
|
||||
[tool.ruff.lint.pyupgrade]
|
||||
keep-runtime-typing = true # Remove this stanza when we require Python 3.10
|
||||
|
||||
@@ -30,7 +30,6 @@ once_cell.workspace = true
|
||||
parking_lot.workspace = true
|
||||
postgres.workspace = true
|
||||
postgres-protocol.workspace = true
|
||||
pprof.workspace = true
|
||||
rand.workspace = true
|
||||
regex.workspace = true
|
||||
scopeguard.workspace = true
|
||||
|
||||
@@ -14,10 +14,6 @@ cargo bench --package safekeeper --bench receive_wal process_msg/fsync=false
|
||||
|
||||
# List available benchmarks.
|
||||
cargo bench --package safekeeper --benches -- --list
|
||||
|
||||
# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.
|
||||
# Output in target/criterion/*/profile/flamegraph.svg.
|
||||
cargo bench --package safekeeper --bench receive_wal process_msg/fsync=false --profile-time 10
|
||||
```
|
||||
|
||||
Additional charts and statistics are available in `target/criterion/report/index.html`.
|
||||
|
||||
@@ -10,7 +10,6 @@ use camino_tempfile::tempfile;
|
||||
use criterion::{criterion_group, criterion_main, BatchSize, Bencher, Criterion};
|
||||
use itertools::Itertools as _;
|
||||
use postgres_ffi::v17::wal_generator::{LogicalMessageGenerator, WalGenerator};
|
||||
use pprof::criterion::{Output, PProfProfiler};
|
||||
use safekeeper::receive_wal::{self, WalAcceptor};
|
||||
use safekeeper::safekeeper::{
|
||||
AcceptorProposerMessage, AppendRequest, AppendRequestHeader, ProposerAcceptorMessage,
|
||||
@@ -25,9 +24,8 @@ const GB: usize = 1024 * MB;
|
||||
|
||||
// Register benchmarks with Criterion.
|
||||
criterion_group!(
|
||||
name = benches;
|
||||
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
|
||||
targets = bench_process_msg,
|
||||
benches,
|
||||
bench_process_msg,
|
||||
bench_wal_acceptor,
|
||||
bench_wal_acceptor_throughput,
|
||||
bench_file_write
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use hyper::{Body, Request, Response, StatusCode};
|
||||
use hyper::{Body, Request, Response, StatusCode, Uri};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
use std::io::Write as _;
|
||||
use std::str::FromStr;
|
||||
@@ -13,9 +14,7 @@ use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info_span, Instrument};
|
||||
use utils::failpoint_support::failpoints_handler;
|
||||
use utils::http::endpoint::{
|
||||
profile_cpu_handler, prometheus_metrics_handler, request_span, ChannelWriter,
|
||||
};
|
||||
use utils::http::endpoint::{prometheus_metrics_handler, request_span, ChannelWriter};
|
||||
use utils::http::request::parse_query_param;
|
||||
|
||||
use postgres_ffi::WAL_SEGMENT_SIZE;
|
||||
@@ -573,8 +572,14 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
|
||||
let mut router = endpoint::make_router();
|
||||
if conf.http_auth.is_some() {
|
||||
router = router.middleware(auth_middleware(|request| {
|
||||
const ALLOWLIST_ROUTES: &[&str] = &["/v1/status", "/metrics", "/profile/cpu"];
|
||||
if ALLOWLIST_ROUTES.contains(&request.uri().path()) {
|
||||
#[allow(clippy::mutable_key_type)]
|
||||
static ALLOWLIST_ROUTES: Lazy<HashSet<Uri>> = Lazy::new(|| {
|
||||
["/v1/status", "/metrics"]
|
||||
.iter()
|
||||
.map(|v| v.parse().unwrap())
|
||||
.collect()
|
||||
});
|
||||
if ALLOWLIST_ROUTES.contains(request.uri()) {
|
||||
None
|
||||
} else {
|
||||
// Option<Arc<SwappableJwtAuth>> is always provided as data below, hence unwrap().
|
||||
@@ -593,7 +598,6 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
|
||||
.data(Arc::new(conf))
|
||||
.data(auth)
|
||||
.get("/metrics", |r| request_span(r, prometheus_metrics_handler))
|
||||
.get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
|
||||
.get("/v1/status", |r| request_span(r, status_handler))
|
||||
.put("/v1/failpoints", |r| {
|
||||
request_span(r, move |r| async {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user