mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-16 18:02:56 +00:00
Merge pull request #10354 from neondatabase/rc/release-compute/2025-01-10
This commit is contained in:
12
.github/file-filters.yaml
vendored
Normal file
12
.github/file-filters.yaml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']
|
||||
|
||||
v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
|
||||
v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']
|
||||
v16: ['vendor/postgres-v16/**', 'Makefile', 'pgxn/**']
|
||||
v17: ['vendor/postgres-v17/**', 'Makefile', 'pgxn/**']
|
||||
|
||||
rebuild_neon_extra:
|
||||
- .github/workflows/neon_extra_builds.yml
|
||||
|
||||
rebuild_macos:
|
||||
- .github/workflows/build-macos.yml
|
||||
241
.github/workflows/build-macos.yml
vendored
Normal file
241
.github/workflows/build-macos.yml
vendored
Normal file
@@ -0,0 +1,241 @@
|
||||
name: Check neon with MacOS builds
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
pg_versions:
|
||||
description: "Array of the pg versions to build for, for example: ['v14', 'v17']"
|
||||
type: string
|
||||
default: '[]'
|
||||
required: false
|
||||
rebuild_rust_code:
|
||||
description: "Rebuild Rust code"
|
||||
type: boolean
|
||||
default: false
|
||||
required: false
|
||||
rebuild_everything:
|
||||
description: "If true, rebuild for all versions"
|
||||
type: boolean
|
||||
default: false
|
||||
required: false
|
||||
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
|
||||
# TODO: move `check-*` and `files-changed` jobs to the "Caller" Workflow
|
||||
# We should care about that as Github has limitations:
|
||||
# - You can connect up to four levels of workflows
|
||||
# - You can call a maximum of 20 unique reusable workflows from a single workflow file.
|
||||
# https://docs.github.com/en/actions/sharing-automations/reusing-workflows#limitations
|
||||
jobs:
|
||||
build-pgxn:
|
||||
if: |
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
strategy:
|
||||
matrix:
|
||||
postgres-version: ${{ inputs.rebuild_everything && fromJson('["v14", "v15", "v16", "v17"]') || fromJSON(inputs.pg_versions) }}
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
steps:
|
||||
- name: Checkout main repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set pg ${{ matrix.postgres-version }} for caching
|
||||
id: pg_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-${{ matrix.postgres-version }}) | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache postgres ${{ matrix.postgres-version }} build
|
||||
id: cache_pg
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/${{ matrix.postgres-version }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Checkout submodule vendor/postgres-${{ matrix.postgres-version }}
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git submodule init vendor/postgres-${{ matrix.postgres-version }}
|
||||
git submodule update --depth 1 --recursive
|
||||
|
||||
- name: Install build dependencies
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
brew install flex bison openssl protobuf icu4c
|
||||
|
||||
- name: Set extra env for macOS
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Build Postgres ${{ matrix.postgres-version }}
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make postgres-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build Neon Pg Ext ${{ matrix.postgres-version }}
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Get postgres headers ${{ matrix.postgres-version }}
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
|
||||
|
||||
build-walproposer-lib:
|
||||
if: |
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
needs: [build-pgxn]
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
steps:
|
||||
- name: Checkout main repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set pg v17 for caching
|
||||
id: pg_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Checkout submodule vendor/postgres-v17
|
||||
if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git submodule init vendor/postgres-v17
|
||||
git submodule update --depth 1 --recursive
|
||||
|
||||
- name: Install build dependencies
|
||||
if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
brew install flex bison openssl protobuf icu4c
|
||||
|
||||
- name: Set extra env for macOS
|
||||
if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Build walproposer-lib (only for v17)
|
||||
if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
|
||||
run:
|
||||
make walproposer-lib -j$(sysctl -n hw.ncpu)
|
||||
|
||||
cargo-build:
|
||||
if: |
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
needs: [build-pgxn, build-walproposer-lib]
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
steps:
|
||||
- name: Checkout main repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Set pg v14 for caching
|
||||
id: pg_rev_v14
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v15 for caching
|
||||
id: pg_rev_v15
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v16 for caching
|
||||
id: pg_rev_v16
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v17 for caching
|
||||
id: pg_rev_v17
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_v15
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_v16
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_v17
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache cargo deps (only for v17)
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
brew install flex bison openssl protobuf icu4c
|
||||
|
||||
- name: Set extra env for macOS
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Run cargo build (only for v17)
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Check that no warnings are produced (only for v17)
|
||||
run: ./run_clippy.sh
|
||||
139
.github/workflows/neon_extra_builds.yml
vendored
139
.github/workflows/neon_extra_builds.yml
vendored
@@ -31,19 +31,15 @@ jobs:
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
secrets: inherit
|
||||
|
||||
check-macos-build:
|
||||
needs: [ check-permissions ]
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
timeout-minutes: 90
|
||||
runs-on: macos-15
|
||||
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
files-changed:
|
||||
name: Detect what files changed
|
||||
runs-on: ubuntu-22.04
|
||||
timeout-minutes: 3
|
||||
outputs:
|
||||
v17: ${{ steps.files_changed.outputs.v17 }}
|
||||
postgres_changes: ${{ steps.postgres_changes.outputs.changes }}
|
||||
rebuild_rust_code: ${{ steps.files_changed.outputs.rust_code }}
|
||||
rebuild_everything: ${{ steps.files_changed.outputs.rebuild_neon_extra || steps.files_changed.outputs.rebuild_macos }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -51,106 +47,45 @@ jobs:
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Install macOS postgres dependencies
|
||||
run: brew install flex bison openssl protobuf icu4c
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 15 revision for caching
|
||||
id: pg_v15_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 16 revision for caching
|
||||
id: pg_v16_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 17 revision for caching
|
||||
id: pg_v17_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
uses: actions/cache@v4
|
||||
- name: Check for Postgres changes
|
||||
uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242 #v3
|
||||
id: files_changed
|
||||
with:
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
token: ${{ github.token }}
|
||||
filters: .github/file-filters.yaml
|
||||
base: ${{ github.event_name != 'pull_request' && (github.event.merge_group.base_ref || github.ref_name) || '' }}
|
||||
ref: ${{ github.event_name != 'pull_request' && (github.event.merge_group.head_ref || github.ref) || '' }}
|
||||
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_15
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Set extra env for macOS
|
||||
- name: Filter out only v-string for build matrix
|
||||
id: postgres_changes
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
v_strings_only_as_json_array=$(echo ${{ steps.files_changed.outputs.chnages }} | jq '.[]|select(test("v\\d+"))' | jq --slurp -c)
|
||||
echo "changes=${v_strings_only_as_json_array}" | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache cargo deps
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
run: make postgres-v14 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v15
|
||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||
run: make postgres-v15 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v16
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: make postgres-v16 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v17
|
||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
||||
run: make postgres-v17 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: make neon-pg-ext -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build walproposer-lib
|
||||
run: make walproposer-lib -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Run cargo build
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release
|
||||
|
||||
- name: Check that no warnings are produced
|
||||
run: ./run_clippy.sh
|
||||
check-macos-build:
|
||||
needs: [ check-permissions, files-changed ]
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
uses: ./.github/workflows/build-macos.yml
|
||||
with:
|
||||
pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
|
||||
rebuild_rust_code: ${{ needs.files-changed.outputs.rebuild_rust_code }}
|
||||
rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }}
|
||||
|
||||
gather-rust-build-stats:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
needs: [ check-permissions, build-build-tools-image, files-changed ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
(needs.files-changed.outputs.v17 == 'true' || needs.files-changed.outputs.rebuild_everything == 'true') && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
runs-on: [ self-hosted, large ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
|
||||
218
Cargo.lock
generated
218
Cargo.lock
generated
@@ -718,13 +718,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.5"
|
||||
version = "0.7.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
|
||||
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core",
|
||||
"base64 0.21.1",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
@@ -746,8 +746,8 @@ dependencies = [
|
||||
"sha1",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tokio",
|
||||
"tokio-tungstenite",
|
||||
"tower",
|
||||
"tokio-tungstenite 0.24.0",
|
||||
"tower 0.5.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
@@ -1267,6 +1267,7 @@ dependencies = [
|
||||
"aws-config",
|
||||
"aws-sdk-kms",
|
||||
"aws-sdk-s3",
|
||||
"axum",
|
||||
"base64 0.13.1",
|
||||
"bytes",
|
||||
"camino",
|
||||
@@ -1277,7 +1278,7 @@ dependencies = [
|
||||
"fail",
|
||||
"flate2",
|
||||
"futures",
|
||||
"hyper 0.14.30",
|
||||
"http 1.1.0",
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
"notify",
|
||||
@@ -1303,6 +1304,8 @@ dependencies = [
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tower 0.5.2",
|
||||
"tower-http",
|
||||
"tracing",
|
||||
"tracing-opentelemetry",
|
||||
"tracing-subscriber",
|
||||
@@ -1650,6 +1653,20 @@ dependencies = [
|
||||
"parking_lot_core 0.9.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "6.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"hashbrown 0.14.5",
|
||||
"lock_api",
|
||||
"once_cell",
|
||||
"parking_lot_core 0.9.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "data-encoding"
|
||||
version = "2.4.0"
|
||||
@@ -1949,6 +1966,15 @@ dependencies = [
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
|
||||
dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.10.2"
|
||||
@@ -1962,6 +1988,16 @@ dependencies = [
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d"
|
||||
dependencies = [
|
||||
"env_filter",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equator"
|
||||
version = "0.2.2"
|
||||
@@ -2720,7 +2756,7 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tower 0.4.13",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
@@ -2945,6 +2981,28 @@ dependencies = [
|
||||
"str_stack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inferno"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75a5d75fee4d36809e6b021e4b96b686e763d365ffdb03af2bd00786353f84fe"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"clap",
|
||||
"crossbeam-channel",
|
||||
"crossbeam-utils",
|
||||
"dashmap 6.1.0",
|
||||
"env_logger 0.11.2",
|
||||
"indexmap 2.0.1",
|
||||
"itoa",
|
||||
"log",
|
||||
"num-format",
|
||||
"once_cell",
|
||||
"quick-xml 0.37.1",
|
||||
"rgb",
|
||||
"str_stack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inotify"
|
||||
version = "0.9.6"
|
||||
@@ -3152,7 +3210,7 @@ version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2"
|
||||
dependencies = [
|
||||
"dashmap",
|
||||
"dashmap 5.5.0",
|
||||
"hashbrown 0.13.2",
|
||||
]
|
||||
|
||||
@@ -3260,9 +3318,9 @@ checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40"
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.8.2"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "540f1c43aed89909c0cc0cc604e3bb2f7e7a341a3728a9e6cfe760e733cd11ed"
|
||||
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
|
||||
|
||||
[[package]]
|
||||
name = "md-5"
|
||||
@@ -3690,23 +3748,23 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry"
|
||||
version = "0.26.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "570074cc999d1a58184080966e5bd3bf3a9a4af650c3b05047c2621e7405cd17"
|
||||
checksum = "ab70038c28ed37b97d8ed414b6429d343a8bbf44c9f79ec854f3a643029ba6d7"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"js-sys",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"thiserror",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-http"
|
||||
version = "0.26.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6351496aeaa49d7c267fb480678d85d1cd30c5edb20b497c48c56f62a8c14b99"
|
||||
checksum = "10a8a7f5f6ba7c1b286c2fbca0454eaba116f63bbe69ed250b642d36fbb04d80"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -3717,9 +3775,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-otlp"
|
||||
version = "0.26.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29e1f9c8b032d4f635c730c0efcf731d5e2530ea13fa8bef7939ddc8420696bd"
|
||||
checksum = "91cf61a1868dacc576bf2b2a1c3e9ab150af7272909e80085c3173384fe11f76"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-core",
|
||||
@@ -3735,9 +3793,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-proto"
|
||||
version = "0.26.1"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c9d3968ce3aefdcca5c27e3c4ea4391b37547726a70893aab52d3de95d5f8b34"
|
||||
checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6"
|
||||
dependencies = [
|
||||
"opentelemetry",
|
||||
"opentelemetry_sdk",
|
||||
@@ -3747,22 +3805,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-semantic-conventions"
|
||||
version = "0.26.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db945c1eaea8ac6a9677185357480d215bb6999faa9f691d0c4d4d641eab7a09"
|
||||
checksum = "bc1b6902ff63b32ef6c489e8048c5e253e2e4a803ea3ea7e783914536eb15c52"
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry_sdk"
|
||||
version = "0.26.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2c627d9f4c9cdc1f21a29ee4bfbd6028fcb8bcf2a857b43f3abdf72c9c862f3"
|
||||
checksum = "231e9d6ceef9b0b2546ddf52335785ce41252bc7474ee8ba05bfad277be13ab8"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-channel",
|
||||
"futures-executor",
|
||||
"futures-util",
|
||||
"glob",
|
||||
"once_cell",
|
||||
"opentelemetry",
|
||||
"percent-encoding",
|
||||
"rand 0.8.5",
|
||||
@@ -3770,6 +3827,7 @@ dependencies = [
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4418,7 +4476,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"crc32c",
|
||||
"criterion",
|
||||
"env_logger",
|
||||
"env_logger 0.10.2",
|
||||
"log",
|
||||
"memoffset 0.9.0",
|
||||
"once_cell",
|
||||
@@ -4459,7 +4517,7 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"criterion",
|
||||
"findshlibs",
|
||||
"inferno",
|
||||
"inferno 0.11.21",
|
||||
"libc",
|
||||
"log",
|
||||
"nix 0.26.4",
|
||||
@@ -4685,9 +4743,9 @@ dependencies = [
|
||||
"clap",
|
||||
"compute_api",
|
||||
"consumption_metrics",
|
||||
"dashmap",
|
||||
"dashmap 5.5.0",
|
||||
"ecdsa 0.16.9",
|
||||
"env_logger",
|
||||
"env_logger 0.10.2",
|
||||
"fallible-iterator",
|
||||
"flate2",
|
||||
"framed-websockets",
|
||||
@@ -4758,7 +4816,7 @@ dependencies = [
|
||||
"tokio-postgres",
|
||||
"tokio-postgres2",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-tungstenite",
|
||||
"tokio-tungstenite 0.21.0",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
@@ -4794,6 +4852,15 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.37.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f22f29bdff3987b4d8632ef95fd6424ec7e4e0a57e2f4fc63e489e75357f6a03"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.37"
|
||||
@@ -5178,15 +5245,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "reqwest-tracing"
|
||||
version = "0.5.4"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ff82cf5730a1311fb9413b0bc2b8e743e0157cd73f010ab4ec374a923873b6a2"
|
||||
checksum = "73e6153390585f6961341b50e5a1931d6be6dee4292283635903c26ef9d980d2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"getrandom 0.2.11",
|
||||
"http 1.1.0",
|
||||
"matchit 0.8.2",
|
||||
"matchit 0.8.4",
|
||||
"opentelemetry",
|
||||
"reqwest",
|
||||
"reqwest-middleware",
|
||||
@@ -6800,7 +6867,19 @@ dependencies = [
|
||||
"futures-util",
|
||||
"log",
|
||||
"tokio",
|
||||
"tungstenite",
|
||||
"tungstenite 0.21.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-tungstenite"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"log",
|
||||
"tokio",
|
||||
"tungstenite 0.24.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6881,7 +6960,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-stream",
|
||||
"tower",
|
||||
"tower 0.4.13",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
@@ -6922,16 +7001,49 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-layer"
|
||||
version = "0.3.2"
|
||||
name = "tower"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0"
|
||||
checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tokio",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-http"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bytes",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"pin-project-lite",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-layer"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
|
||||
|
||||
[[package]]
|
||||
name = "tower-service"
|
||||
version = "0.3.2"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
|
||||
checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
|
||||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
@@ -7000,9 +7112,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-opentelemetry"
|
||||
version = "0.27.0"
|
||||
version = "0.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc58af5d3f6c5811462cabb3289aec0093f7338e367e5a33d28c0433b3c7360b"
|
||||
checksum = "97a971f6058498b5c0f1affa23e7ea202057a7301dbff68e968b2d578bcbd053"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"once_cell",
|
||||
@@ -7086,6 +7198,24 @@ dependencies = [
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tungstenite"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"data-encoding",
|
||||
"http 1.1.0",
|
||||
"httparse",
|
||||
"log",
|
||||
"rand 0.8.5",
|
||||
"sha1",
|
||||
"thiserror",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "twox-hash"
|
||||
version = "1.6.3"
|
||||
@@ -7253,6 +7383,7 @@ dependencies = [
|
||||
"hex-literal",
|
||||
"humantime",
|
||||
"hyper 0.14.30",
|
||||
"inferno 0.12.0",
|
||||
"itertools 0.10.5",
|
||||
"jemalloc_pprof",
|
||||
"jsonwebtoken",
|
||||
@@ -7356,7 +7487,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"camino-tempfile",
|
||||
"clap",
|
||||
"env_logger",
|
||||
"env_logger 0.10.2",
|
||||
"log",
|
||||
"postgres",
|
||||
"postgres_ffi",
|
||||
@@ -7867,7 +7998,8 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"tonic",
|
||||
"tower",
|
||||
"tower 0.4.13",
|
||||
"tower 0.5.2",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"url",
|
||||
|
||||
19
Cargo.toml
19
Cargo.toml
@@ -65,7 +65,7 @@ aws-smithy-types = "1.2"
|
||||
aws-credential-types = "1.2.0"
|
||||
aws-sigv4 = { version = "1.2", features = ["sign-http"] }
|
||||
aws-types = "1.3"
|
||||
axum = { version = "0.7.5", features = ["ws"] }
|
||||
axum = { version = "0.7.9", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
bindgen = "0.70"
|
||||
@@ -110,6 +110,7 @@ hyper-util = "0.1"
|
||||
tokio-tungstenite = "0.21.0"
|
||||
indexmap = "2"
|
||||
indoc = "2"
|
||||
inferno = "0.12.0"
|
||||
ipnet = "2.10.0"
|
||||
itertools = "0.10"
|
||||
itoa = "1.0.11"
|
||||
@@ -126,10 +127,10 @@ notify = "6.0.0"
|
||||
num_cpus = "1.15"
|
||||
num-traits = "0.2.15"
|
||||
once_cell = "1.13"
|
||||
opentelemetry = "0.26"
|
||||
opentelemetry_sdk = "0.26"
|
||||
opentelemetry-otlp = { version = "0.26", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.26"
|
||||
opentelemetry = "0.27"
|
||||
opentelemetry_sdk = "0.27"
|
||||
opentelemetry-otlp = { version = "0.27", default-features = false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.27"
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
@@ -143,7 +144,7 @@ rand = "0.8"
|
||||
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
|
||||
regex = "1.10.2"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
|
||||
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_26"] }
|
||||
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_27"] }
|
||||
reqwest-middleware = "0.4"
|
||||
reqwest-retry = "0.7"
|
||||
routerify = "3"
|
||||
@@ -187,10 +188,12 @@ tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
toml = "0.8"
|
||||
toml_edit = "0.22"
|
||||
tonic = {version = "0.12.3", features = ["tls", "tls-roots"]}
|
||||
tower-service = "0.3.2"
|
||||
tower = { version = "0.5.2", default-features = false }
|
||||
tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
|
||||
tower-service = "0.3.3"
|
||||
tracing = "0.1"
|
||||
tracing-error = "0.2"
|
||||
tracing-opentelemetry = "0.27"
|
||||
tracing-opentelemetry = "0.28"
|
||||
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
|
||||
try-lock = "0.2.5"
|
||||
twox-hash = { version = "1.6.3", default-features = false }
|
||||
|
||||
@@ -103,11 +103,6 @@ RUN mkdir -p /data/.neon/ && \
|
||||
> /data/.neon/pageserver.toml && \
|
||||
chown -R neon:neon /data/.neon
|
||||
|
||||
# When running a binary that links with libpq, default to using our most recent postgres version. Binaries
|
||||
# that want a particular postgres version will select it explicitly: this is just a default.
|
||||
ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib
|
||||
|
||||
|
||||
VOLUME ["/data"]
|
||||
USER neon
|
||||
EXPOSE 6400
|
||||
|
||||
@@ -258,7 +258,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.83.0
|
||||
ENV RUSTC_VERSION=1.84.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
|
||||
@@ -1167,22 +1167,13 @@ FROM rust-extensions-build AS pg-mooncake-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# The topmost commit in the `neon` branch at the time of writing this
|
||||
# https://github.com/Mooncake-Labs/pg_mooncake/commits/neon/
|
||||
# https://github.com/Mooncake-Labs/pg_mooncake/commit/077c92c452bb6896a7b7776ee95f039984f076af
|
||||
ENV PG_MOONCAKE_VERSION=077c92c452bb6896a7b7776ee95f039984f076af
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
|
||||
RUN case "${PG_VERSION}" in \
|
||||
'v14') \
|
||||
echo "pg_mooncake is not supported on Postgres ${PG_VERSION}" && exit 0;; \
|
||||
esac && \
|
||||
git clone --depth 1 --branch neon https://github.com/Mooncake-Labs/pg_mooncake.git pg_mooncake-src && \
|
||||
cd pg_mooncake-src && \
|
||||
git checkout "${PG_MOONCAKE_VERSION}" && \
|
||||
git submodule update --init --depth 1 --recursive && \
|
||||
make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.0/pg_mooncake-0.1.0.tar.gz -O pg_mooncake.tar.gz && \
|
||||
echo "eafd059b77f541f11525eb8affcd66a176968cbd8fe7c0d436e733f2aa4da59f pg_mooncake.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_mooncake-src && cd pg_mooncake-src && tar xzf ../pg_mooncake.tar.gz --strip-components=1 -C . && \
|
||||
make release -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control
|
||||
|
||||
#########################################################################################
|
||||
|
||||
@@ -15,6 +15,7 @@ aws-config.workspace = true
|
||||
aws-sdk-s3.workspace = true
|
||||
aws-sdk-kms.workspace = true
|
||||
anyhow.workspace = true
|
||||
axum = { workspace = true, features = [] }
|
||||
camino.workspace = true
|
||||
chrono.workspace = true
|
||||
cfg-if.workspace = true
|
||||
@@ -22,7 +23,7 @@ clap.workspace = true
|
||||
fail.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
hyper0 = { workspace = true, features = ["full"] }
|
||||
http.workspace = true
|
||||
metrics.workspace = true
|
||||
nix.workspace = true
|
||||
notify.workspace = true
|
||||
@@ -37,6 +38,8 @@ serde_with.workspace = true
|
||||
serde_json.workspace = true
|
||||
signal-hook.workspace = true
|
||||
tar.workspace = true
|
||||
tower.workspace = true
|
||||
tower-http.workspace = true
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
tokio-postgres.workspace = true
|
||||
|
||||
@@ -60,7 +60,7 @@ use compute_tools::compute::{
|
||||
};
|
||||
use compute_tools::configurator::launch_configurator;
|
||||
use compute_tools::extension_server::get_pg_version_string;
|
||||
use compute_tools::http::api::launch_http_server;
|
||||
use compute_tools::http::launch_http_server;
|
||||
use compute_tools::logger::*;
|
||||
use compute_tools::monitor::launch_monitor;
|
||||
use compute_tools::params::*;
|
||||
@@ -111,11 +111,6 @@ fn main() -> Result<()> {
|
||||
fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
opentelemetry::global::set_error_handler(|err| {
|
||||
tracing::info!("OpenTelemetry error: {err}");
|
||||
})
|
||||
.expect("global error handler lock poisoned");
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
thread::spawn(move || {
|
||||
for sig in signals.forever() {
|
||||
@@ -493,7 +488,10 @@ fn start_postgres(
|
||||
let mut pg = None;
|
||||
if !prestartup_failed {
|
||||
pg = match compute.start_compute() {
|
||||
Ok(pg) => Some(pg),
|
||||
Ok(pg) => {
|
||||
info!(postmaster_pid = %pg.0.id(), "Postgres was started");
|
||||
Some(pg)
|
||||
}
|
||||
Err(err) => {
|
||||
error!("could not start the compute node: {:#}", err);
|
||||
compute.set_failed_status(err);
|
||||
@@ -591,6 +589,8 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
|
||||
// propagate to Postgres and it will be shut down as well.
|
||||
let mut exit_code = None;
|
||||
if let Some((mut pg, logs_handle)) = pg {
|
||||
info!(postmaster_pid = %pg.id(), "Waiting for Postgres to exit");
|
||||
|
||||
let ecode = pg
|
||||
.wait()
|
||||
.expect("failed to start waiting on Postgres process");
|
||||
|
||||
@@ -36,11 +36,11 @@ pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<Cat
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum SchemaDumpError {
|
||||
#[error("Database does not exist.")]
|
||||
#[error("database does not exist")]
|
||||
DatabaseDoesNotExist,
|
||||
#[error("Failed to execute pg_dump.")]
|
||||
#[error("failed to execute pg_dump")]
|
||||
IO(#[from] std::io::Error),
|
||||
#[error("Unexpected error.")]
|
||||
#[error("unexpected I/O error")]
|
||||
Unexpected,
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ use std::time::Instant;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use compute_api::spec::{PgIdent, Role};
|
||||
use compute_api::spec::{Database, PgIdent, Role};
|
||||
use futures::future::join_all;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use futures::StreamExt;
|
||||
@@ -45,8 +45,10 @@ use crate::spec_apply::ApplySpecPhase::{
|
||||
DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
|
||||
RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
|
||||
};
|
||||
use crate::spec_apply::PerDatabasePhase;
|
||||
use crate::spec_apply::PerDatabasePhase::{
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, HandleAnonExtension,
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropSubscriptionsForDeletedDatabases,
|
||||
HandleAnonExtension,
|
||||
};
|
||||
use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
|
||||
use crate::sync_sk::{check_if_synced, ping_safekeeper};
|
||||
@@ -834,7 +836,7 @@ impl ComputeNode {
|
||||
conf
|
||||
}
|
||||
|
||||
async fn get_maintenance_client(
|
||||
pub async fn get_maintenance_client(
|
||||
conf: &tokio_postgres::Config,
|
||||
) -> Result<tokio_postgres::Client> {
|
||||
let mut conf = conf.clone();
|
||||
@@ -943,6 +945,78 @@ impl ComputeNode {
|
||||
dbs: databases,
|
||||
}));
|
||||
|
||||
// Apply special pre drop database phase.
|
||||
// NOTE: we use the code of RunInEachDatabase phase for parallelism
|
||||
// and connection management, but we don't really run it in *each* database,
|
||||
// only in databases, we're about to drop.
|
||||
info!("Applying PerDatabase (pre-dropdb) phase");
|
||||
let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
|
||||
|
||||
// Run the phase for each database that we're about to drop.
|
||||
let db_processes = spec
|
||||
.delta_operations
|
||||
.iter()
|
||||
.flatten()
|
||||
.filter_map(move |op| {
|
||||
if op.action.as_str() == "delete_db" {
|
||||
Some(op.name.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.map(|dbname| {
|
||||
let spec = spec.clone();
|
||||
let ctx = ctx.clone();
|
||||
let jwks_roles = jwks_roles.clone();
|
||||
let mut conf = conf.as_ref().clone();
|
||||
let concurrency_token = concurrency_token.clone();
|
||||
// We only need dbname field for this phase, so set other fields to dummy values
|
||||
let db = DB::UserDB(Database {
|
||||
name: dbname.clone(),
|
||||
owner: "cloud_admin".to_string(),
|
||||
options: None,
|
||||
restrict_conn: false,
|
||||
invalid: false,
|
||||
});
|
||||
|
||||
debug!("Applying per-database phases for Database {:?}", &db);
|
||||
|
||||
match &db {
|
||||
DB::SystemDB => {}
|
||||
DB::UserDB(db) => {
|
||||
conf.dbname(db.name.as_str());
|
||||
}
|
||||
}
|
||||
|
||||
let conf = Arc::new(conf);
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
spec.clone(),
|
||||
conf,
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
[DropSubscriptionsForDeletedDatabases].to_vec(),
|
||||
);
|
||||
|
||||
Ok(spawn(fut))
|
||||
})
|
||||
.collect::<Vec<Result<_, anyhow::Error>>>();
|
||||
|
||||
for process in db_processes.into_iter() {
|
||||
let handle = process?;
|
||||
if let Err(e) = handle.await? {
|
||||
// Handle the error case where the database does not exist
|
||||
// We do not check whether the DB exists or not in the deletion phase,
|
||||
// so we shouldn't be strict about it in pre-deletion cleanup as well.
|
||||
if e.to_string().contains("does not exist") {
|
||||
warn!("Error dropping subscription: {}", e);
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for phase in [
|
||||
CreateSuperUser,
|
||||
DropInvalidDatabases,
|
||||
@@ -962,7 +1036,7 @@ impl ComputeNode {
|
||||
.await?;
|
||||
}
|
||||
|
||||
info!("Applying RunInEachDatabase phase");
|
||||
info!("Applying RunInEachDatabase2 phase");
|
||||
let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
|
||||
|
||||
let db_processes = spec
|
||||
@@ -997,6 +1071,12 @@ impl ComputeNode {
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
[
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
]
|
||||
.to_vec(),
|
||||
);
|
||||
|
||||
Ok(spawn(fut))
|
||||
@@ -1043,16 +1123,13 @@ impl ComputeNode {
|
||||
jwks_roles: Arc<HashSet<String>>,
|
||||
concurrency_token: Arc<tokio::sync::Semaphore>,
|
||||
db: DB,
|
||||
subphases: Vec<PerDatabasePhase>,
|
||||
) -> Result<()> {
|
||||
let _permit = concurrency_token.acquire().await?;
|
||||
|
||||
let mut client_conn = None;
|
||||
|
||||
for subphase in [
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
] {
|
||||
for subphase in subphases {
|
||||
apply_operations(
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
|
||||
@@ -1,606 +0,0 @@
|
||||
use std::convert::Infallible;
|
||||
use std::net::IpAddr;
|
||||
use std::net::Ipv6Addr;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
use crate::catalog::SchemaDumpError;
|
||||
use crate::catalog::{get_database_schema, get_dbs_and_roles};
|
||||
use crate::compute::forward_termination_signal;
|
||||
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
|
||||
use crate::installed_extensions;
|
||||
use compute_api::requests::{ConfigurationRequest, ExtensionInstallRequest, SetRoleGrantsRequest};
|
||||
use compute_api::responses::{
|
||||
ComputeStatus, ComputeStatusResponse, ExtensionInstallResult, GenericAPIError,
|
||||
SetRoleGrantsResponse,
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use hyper::header::CONTENT_TYPE;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Method, Request, Response, Server, StatusCode};
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::Encoder;
|
||||
use metrics::TextEncoder;
|
||||
use tokio::task;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use tracing_utils::http::OtelName;
|
||||
use utils::failpoint_support::failpoints_handler;
|
||||
use utils::http::error::ApiError;
|
||||
use utils::http::request::must_get_query_param;
|
||||
|
||||
fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
|
||||
ComputeStatusResponse {
|
||||
start_time: state.start_time,
|
||||
tenant: state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.map(|pspec| pspec.tenant_id.to_string()),
|
||||
timeline: state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.map(|pspec| pspec.timeline_id.to_string()),
|
||||
status: state.status,
|
||||
last_active: state.last_active,
|
||||
error: state.error.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
// Service function to handle all available routes.
|
||||
async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
|
||||
//
|
||||
// NOTE: The URI path is currently included in traces. That's OK because
|
||||
// it doesn't contain any variable parts or sensitive information. But
|
||||
// please keep that in mind if you change the routing here.
|
||||
//
|
||||
match (req.method(), req.uri().path()) {
|
||||
// Serialized compute state.
|
||||
(&Method::GET, "/status") => {
|
||||
debug!("serving /status GET request");
|
||||
let state = compute.state.lock().unwrap();
|
||||
let status_response = status_response_from_state(&state);
|
||||
Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
|
||||
}
|
||||
|
||||
// Startup metrics in JSON format. Keep /metrics reserved for a possible
|
||||
// future use for Prometheus metrics format.
|
||||
(&Method::GET, "/metrics.json") => {
|
||||
info!("serving /metrics.json GET request");
|
||||
let metrics = compute.state.lock().unwrap().metrics.clone();
|
||||
Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
|
||||
}
|
||||
|
||||
// Prometheus metrics
|
||||
(&Method::GET, "/metrics") => {
|
||||
debug!("serving /metrics GET request");
|
||||
|
||||
// When we call TextEncoder::encode() below, it will immediately
|
||||
// return an error if a metric family has no metrics, so we need to
|
||||
// preemptively filter out metric families with no metrics.
|
||||
let metrics = installed_extensions::collect()
|
||||
.into_iter()
|
||||
.filter(|m| !m.get_metric().is_empty())
|
||||
.collect::<Vec<MetricFamily>>();
|
||||
|
||||
let encoder = TextEncoder::new();
|
||||
let mut buffer = vec![];
|
||||
|
||||
if let Err(err) = encoder.encode(&metrics, &mut buffer) {
|
||||
let msg = format!("error handling /metrics request: {err}");
|
||||
error!(msg);
|
||||
return render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
match Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(CONTENT_TYPE, encoder.format_type())
|
||||
.body(Body::from(buffer))
|
||||
{
|
||||
Ok(response) => response,
|
||||
Err(err) => {
|
||||
let msg = format!("error handling /metrics request: {err}");
|
||||
error!(msg);
|
||||
render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Collect Postgres current usage insights
|
||||
(&Method::GET, "/insights") => {
|
||||
info!("serving /insights GET request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!("compute is not running, current status: {:?}", status);
|
||||
error!(msg);
|
||||
return Response::new(Body::from(msg));
|
||||
}
|
||||
|
||||
let insights = compute.collect_insights().await;
|
||||
Response::new(Body::from(insights))
|
||||
}
|
||||
|
||||
(&Method::POST, "/check_writability") => {
|
||||
info!("serving /check_writability POST request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for check_writability request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return Response::new(Body::from(msg));
|
||||
}
|
||||
|
||||
let res = crate::checker::check_writability(compute).await;
|
||||
match res {
|
||||
Ok(_) => Response::new(Body::from("true")),
|
||||
Err(e) => {
|
||||
error!("check_writability failed: {}", e);
|
||||
Response::new(Body::from(e.to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::POST, "/extensions") => {
|
||||
info!("serving /extensions POST request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for extensions request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
|
||||
}
|
||||
|
||||
let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let request = serde_json::from_slice::<ExtensionInstallRequest>(&request).unwrap();
|
||||
let res = compute
|
||||
.install_extension(&request.extension, &request.database, request.version)
|
||||
.await;
|
||||
match res {
|
||||
Ok(version) => render_json(Body::from(
|
||||
serde_json::to_string(&ExtensionInstallResult {
|
||||
extension: request.extension,
|
||||
version,
|
||||
})
|
||||
.unwrap(),
|
||||
)),
|
||||
Err(e) => {
|
||||
error!("install_extension failed: {}", e);
|
||||
render_json_error(&e.to_string(), StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/info") => {
|
||||
let num_cpus = num_cpus::get_physical();
|
||||
info!("serving /info GET request. num_cpus: {}", num_cpus);
|
||||
Response::new(Body::from(
|
||||
serde_json::json!({
|
||||
"num_cpus": num_cpus,
|
||||
})
|
||||
.to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
// Accept spec in JSON format and request compute configuration. If
|
||||
// anything goes wrong after we set the compute status to `ConfigurationPending`
|
||||
// and update compute state with new spec, we basically leave compute
|
||||
// in the potentially wrong state. That said, it's control-plane's
|
||||
// responsibility to watch compute state after reconfiguration request
|
||||
// and to clean restart in case of errors.
|
||||
(&Method::POST, "/configure") => {
|
||||
info!("serving /configure POST request");
|
||||
match handle_configure_request(req, compute).await {
|
||||
Ok(msg) => Response::new(Body::from(msg)),
|
||||
Err((msg, code)) => {
|
||||
error!("error handling /configure request: {msg}");
|
||||
render_json_error(&msg, code)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::POST, "/terminate") => {
|
||||
info!("serving /terminate POST request");
|
||||
match handle_terminate_request(compute).await {
|
||||
Ok(()) => Response::new(Body::empty()),
|
||||
Err((msg, code)) => {
|
||||
error!("error handling /terminate request: {msg}");
|
||||
render_json_error(&msg, code)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/dbs_and_roles") => {
|
||||
info!("serving /dbs_and_roles GET request",);
|
||||
match get_dbs_and_roles(compute).await {
|
||||
Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
|
||||
Err(_) => {
|
||||
render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/database_schema") => {
|
||||
let database = match must_get_query_param(&req, "database") {
|
||||
Err(e) => return e.into_response(),
|
||||
Ok(database) => database,
|
||||
};
|
||||
info!("serving /database_schema GET request with database: {database}",);
|
||||
match get_database_schema(compute, &database).await {
|
||||
Ok(res) => render_plain(Body::wrap_stream(res)),
|
||||
Err(SchemaDumpError::DatabaseDoesNotExist) => {
|
||||
render_json_error("database does not exist", StatusCode::NOT_FOUND)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("can't get schema dump: {}", e);
|
||||
render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::POST, "/grants") => {
|
||||
info!("serving /grants POST request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for set_role_grants request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
|
||||
}
|
||||
|
||||
let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let request = serde_json::from_slice::<SetRoleGrantsRequest>(&request).unwrap();
|
||||
|
||||
let res = compute
|
||||
.set_role_grants(
|
||||
&request.database,
|
||||
&request.schema,
|
||||
&request.privileges,
|
||||
&request.role,
|
||||
)
|
||||
.await;
|
||||
match res {
|
||||
Ok(()) => render_json(Body::from(
|
||||
serde_json::to_string(&SetRoleGrantsResponse {
|
||||
database: request.database,
|
||||
schema: request.schema,
|
||||
role: request.role,
|
||||
privileges: request.privileges,
|
||||
})
|
||||
.unwrap(),
|
||||
)),
|
||||
Err(e) => render_json_error(
|
||||
&format!("could not grant role privileges to the schema: {e}"),
|
||||
// TODO: can we filter on role/schema not found errors
|
||||
// and return appropriate error code?
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// get the list of installed extensions
|
||||
// currently only used in python tests
|
||||
// TODO: call it from cplane
|
||||
(&Method::GET, "/installed_extensions") => {
|
||||
info!("serving /installed_extensions GET request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for extensions request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return Response::new(Body::from(msg));
|
||||
}
|
||||
|
||||
let conf = compute.get_conn_conf(None);
|
||||
let res =
|
||||
task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
match res {
|
||||
Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
|
||||
Err(e) => render_json_error(
|
||||
&format!("could not get list of installed extensions: {}", e),
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::POST, "/failpoints") if cfg!(feature = "testing") => {
|
||||
match failpoints_handler(req, CancellationToken::new()).await {
|
||||
Ok(r) => r,
|
||||
Err(ApiError::BadRequest(e)) => {
|
||||
render_json_error(&e.to_string(), StatusCode::BAD_REQUEST)
|
||||
}
|
||||
Err(_) => {
|
||||
render_json_error("Internal server error", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// download extension files from remote extension storage on demand
|
||||
(&Method::POST, route) if route.starts_with("/extension_server/") => {
|
||||
info!("serving {:?} POST request", route);
|
||||
info!("req.uri {:?}", req.uri());
|
||||
|
||||
// don't even try to download extensions
|
||||
// if no remote storage is configured
|
||||
if compute.ext_remote_storage.is_none() {
|
||||
info!("no extensions remote storage configured");
|
||||
let mut resp = Response::new(Body::from("no remote storage configured"));
|
||||
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
return resp;
|
||||
}
|
||||
|
||||
let mut is_library = false;
|
||||
if let Some(params) = req.uri().query() {
|
||||
info!("serving {:?} POST request with params: {}", route, params);
|
||||
if params == "is_library=true" {
|
||||
is_library = true;
|
||||
} else {
|
||||
let mut resp = Response::new(Body::from("Wrong request parameters"));
|
||||
*resp.status_mut() = StatusCode::BAD_REQUEST;
|
||||
return resp;
|
||||
}
|
||||
}
|
||||
let filename = route.split('/').last().unwrap().to_string();
|
||||
info!("serving /extension_server POST request, filename: {filename:?} is_library: {is_library}");
|
||||
|
||||
// get ext_name and path from spec
|
||||
// don't lock compute_state for too long
|
||||
let ext = {
|
||||
let compute_state = compute.state.lock().unwrap();
|
||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
let spec = &pspec.spec;
|
||||
|
||||
// debug only
|
||||
info!("spec: {:?}", spec);
|
||||
|
||||
let remote_extensions = match spec.remote_extensions.as_ref() {
|
||||
Some(r) => r,
|
||||
None => {
|
||||
info!("no remote extensions spec was provided");
|
||||
let mut resp = Response::new(Body::from("no remote storage configured"));
|
||||
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
return resp;
|
||||
}
|
||||
};
|
||||
|
||||
remote_extensions.get_ext(
|
||||
&filename,
|
||||
is_library,
|
||||
&compute.build_tag,
|
||||
&compute.pgversion,
|
||||
)
|
||||
};
|
||||
|
||||
match ext {
|
||||
Ok((ext_name, ext_path)) => {
|
||||
match compute.download_extension(ext_name, ext_path).await {
|
||||
Ok(_) => Response::new(Body::from("OK")),
|
||||
Err(e) => {
|
||||
error!("extension download failed: {}", e);
|
||||
let mut resp = Response::new(Body::from(e.to_string()));
|
||||
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
resp
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("extension download failed to find extension: {}", e);
|
||||
let mut resp = Response::new(Body::from("failed to find file"));
|
||||
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
resp
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return the `404 Not Found` for any other routes.
|
||||
_ => {
|
||||
let mut not_found = Response::new(Body::from("404 Not Found"));
|
||||
*not_found.status_mut() = StatusCode::NOT_FOUND;
|
||||
not_found
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_configure_request(
|
||||
req: Request<Body>,
|
||||
compute: &Arc<ComputeNode>,
|
||||
) -> Result<String, (String, StatusCode)> {
|
||||
if !compute.live_config_allowed {
|
||||
return Err((
|
||||
"live configuration is not allowed for this compute node".to_string(),
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
));
|
||||
}
|
||||
|
||||
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
|
||||
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
|
||||
let spec = request.spec;
|
||||
|
||||
let parsed_spec = match ParsedSpec::try_from(spec) {
|
||||
Ok(ps) => ps,
|
||||
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
|
||||
};
|
||||
|
||||
// XXX: wrap state update under lock in code blocks. Otherwise,
|
||||
// we will try to `Send` `mut state` into the spawned thread
|
||||
// bellow, which will cause error:
|
||||
// ```
|
||||
// error: future cannot be sent between threads safely
|
||||
// ```
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for configuration request: {:?}",
|
||||
state.status.clone()
|
||||
);
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
}
|
||||
state.pspec = Some(parsed_spec);
|
||||
state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
info!("set new spec and notified waiters");
|
||||
}
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Running.
|
||||
// This is needed to do not block the main pool of workers and
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Running {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become Running, current status: {:?}",
|
||||
state.status
|
||||
);
|
||||
|
||||
if state.status == ComputeStatus::Failed {
|
||||
let err = state.error.as_ref().map_or("unknown error", |x| x);
|
||||
let msg = format!("compute configuration failed: {:?}", err);
|
||||
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()?;
|
||||
|
||||
// Return current compute state if everything went well.
|
||||
let state = compute.state.lock().unwrap().clone();
|
||||
let status_response = status_response_from_state(&state);
|
||||
Ok(serde_json::to_string(&status_response).unwrap())
|
||||
} else {
|
||||
Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
|
||||
}
|
||||
}
|
||||
|
||||
fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
|
||||
let error = GenericAPIError {
|
||||
error: e.to_string(),
|
||||
};
|
||||
Response::builder()
|
||||
.status(status)
|
||||
.header(CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(serde_json::to_string(&error).unwrap()))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn render_json(body: Body) -> Response<Body> {
|
||||
Response::builder()
|
||||
.header(CONTENT_TYPE, "application/json")
|
||||
.body(body)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn render_plain(body: Body) -> Response<Body> {
|
||||
Response::builder()
|
||||
.header(CONTENT_TYPE, "text/plain")
|
||||
.body(body)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status == ComputeStatus::Terminated {
|
||||
return Ok(());
|
||||
}
|
||||
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for termination request: {}",
|
||||
state.status
|
||||
);
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
}
|
||||
state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
}
|
||||
|
||||
forward_termination_signal();
|
||||
info!("sent signal and notified waiters");
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Terminated.
|
||||
// This is needed to do not block the main pool of workers and
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Terminated {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become {}, current status: {:?}",
|
||||
ComputeStatus::Terminated,
|
||||
state.status
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()?;
|
||||
info!("terminated Postgres");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
|
||||
#[tokio::main]
|
||||
async fn serve(port: u16, state: Arc<ComputeNode>) {
|
||||
// this usually binds to both IPv4 and IPv6 on linux
|
||||
// see e.g. https://github.com/rust-lang/rust/pull/34440
|
||||
let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
|
||||
|
||||
let make_service = make_service_fn(move |_conn| {
|
||||
let state = state.clone();
|
||||
async move {
|
||||
Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
|
||||
let state = state.clone();
|
||||
async move {
|
||||
Ok::<_, Infallible>(
|
||||
// NOTE: We include the URI path in the string. It
|
||||
// doesn't contain any variable parts or sensitive
|
||||
// information in this API.
|
||||
tracing_utils::http::tracing_handler(
|
||||
req,
|
||||
|req| routes(req, &state),
|
||||
OtelName::UriPath,
|
||||
)
|
||||
.await,
|
||||
)
|
||||
}
|
||||
}))
|
||||
}
|
||||
});
|
||||
|
||||
info!("starting HTTP server on {}", addr);
|
||||
|
||||
let server = Server::bind(&addr).serve(make_service);
|
||||
|
||||
// Run this server forever
|
||||
if let Err(e) = server.await {
|
||||
error!("server error: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
|
||||
pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
|
||||
let state = Arc::clone(state);
|
||||
|
||||
Ok(thread::Builder::new()
|
||||
.name("http-endpoint".into())
|
||||
.spawn(move || serve(port, state))?)
|
||||
}
|
||||
48
compute_tools/src/http/extract/json.rs
Normal file
48
compute_tools/src/http/extract/json.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::JsonRejection, FromRequest, Request},
|
||||
};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::StatusCode;
|
||||
|
||||
/// Custom `Json` extractor, so that we can format errors into
|
||||
/// `JsonResponse<GenericAPIError>`.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Json<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequest<S> for Json<T>
|
||||
where
|
||||
axum::Json<T>: FromRequest<S, Rejection = JsonRejection>,
|
||||
S: Send + Sync,
|
||||
{
|
||||
type Rejection = (StatusCode, axum::Json<GenericAPIError>);
|
||||
|
||||
async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {
|
||||
match axum::Json::<T>::from_request(req, state).await {
|
||||
Ok(value) => Ok(Self(value.0)),
|
||||
Err(rejection) => Err((
|
||||
rejection.status(),
|
||||
axum::Json(GenericAPIError {
|
||||
error: rejection.body_text().to_lowercase(),
|
||||
}),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for Json<T> {
|
||||
type Target = T;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DerefMut for Json<T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
7
compute_tools/src/http/extract/mod.rs
Normal file
7
compute_tools/src/http/extract/mod.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
pub(crate) mod json;
|
||||
pub(crate) mod path;
|
||||
pub(crate) mod query;
|
||||
|
||||
pub(crate) use json::Json;
|
||||
pub(crate) use path::Path;
|
||||
pub(crate) use query::Query;
|
||||
48
compute_tools/src/http/extract/path.rs
Normal file
48
compute_tools/src/http/extract/path.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::PathRejection, FromRequestParts},
|
||||
};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::{request::Parts, StatusCode};
|
||||
|
||||
/// Custom `Path` extractor, so that we can format errors into
|
||||
/// `JsonResponse<GenericAPIError>`.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Path<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequestParts<S> for Path<T>
|
||||
where
|
||||
axum::extract::Path<T>: FromRequestParts<S, Rejection = PathRejection>,
|
||||
S: Send + Sync,
|
||||
{
|
||||
type Rejection = (StatusCode, axum::Json<GenericAPIError>);
|
||||
|
||||
async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
|
||||
match axum::extract::Path::<T>::from_request_parts(parts, state).await {
|
||||
Ok(value) => Ok(Self(value.0)),
|
||||
Err(rejection) => Err((
|
||||
rejection.status(),
|
||||
axum::Json(GenericAPIError {
|
||||
error: rejection.body_text().to_ascii_lowercase(),
|
||||
}),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for Path<T> {
|
||||
type Target = T;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DerefMut for Path<T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
48
compute_tools/src/http/extract/query.rs
Normal file
48
compute_tools/src/http/extract/query.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::QueryRejection, FromRequestParts},
|
||||
};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::{request::Parts, StatusCode};
|
||||
|
||||
/// Custom `Query` extractor, so that we can format errors into
|
||||
/// `JsonResponse<GenericAPIError>`.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Query<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequestParts<S> for Query<T>
|
||||
where
|
||||
axum::extract::Query<T>: FromRequestParts<S, Rejection = QueryRejection>,
|
||||
S: Send + Sync,
|
||||
{
|
||||
type Rejection = (StatusCode, axum::Json<GenericAPIError>);
|
||||
|
||||
async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
|
||||
match axum::extract::Query::<T>::from_request_parts(parts, state).await {
|
||||
Ok(value) => Ok(Self(value.0)),
|
||||
Err(rejection) => Err((
|
||||
rejection.status(),
|
||||
axum::Json(GenericAPIError {
|
||||
error: rejection.body_text().to_ascii_lowercase(),
|
||||
}),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for Query<T> {
|
||||
type Target = T;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DerefMut for Query<T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
@@ -1 +1,56 @@
|
||||
pub mod api;
|
||||
use axum::{body::Body, response::Response};
|
||||
use compute_api::responses::{ComputeStatus, GenericAPIError};
|
||||
use http::{header::CONTENT_TYPE, StatusCode};
|
||||
use serde::Serialize;
|
||||
use tracing::error;
|
||||
|
||||
pub use server::launch_http_server;
|
||||
|
||||
mod extract;
|
||||
mod routes;
|
||||
mod server;
|
||||
|
||||
/// Convenience response builder for JSON responses
|
||||
struct JsonResponse;
|
||||
|
||||
impl JsonResponse {
|
||||
/// Helper for actually creating a response
|
||||
fn create_response(code: StatusCode, body: impl Serialize) -> Response {
|
||||
Response::builder()
|
||||
.status(code)
|
||||
.header(CONTENT_TYPE.as_str(), "application/json")
|
||||
.body(Body::from(serde_json::to_string(&body).unwrap()))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Create a successful error response
|
||||
pub(self) fn success(code: StatusCode, body: impl Serialize) -> Response {
|
||||
assert!({
|
||||
let code = code.as_u16();
|
||||
|
||||
(200..300).contains(&code)
|
||||
});
|
||||
|
||||
Self::create_response(code, body)
|
||||
}
|
||||
|
||||
/// Create an error response
|
||||
pub(self) fn error(code: StatusCode, error: impl ToString) -> Response {
|
||||
assert!(code.as_u16() >= 400);
|
||||
|
||||
let message = error.to_string();
|
||||
error!(message);
|
||||
|
||||
Self::create_response(code, &GenericAPIError { error: message })
|
||||
}
|
||||
|
||||
/// Create an error response related to the compute being in an invalid state
|
||||
pub(self) fn invalid_status(status: ComputeStatus) -> Response {
|
||||
Self::create_response(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
&GenericAPIError {
|
||||
error: format!("invalid compute status: {status}"),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ComputeMetrics"
|
||||
|
||||
/metrics
|
||||
/metrics:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
|
||||
20
compute_tools/src/http/routes/check_writability.rs
Normal file
20
compute_tools/src/http/routes/check_writability.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{checker::check_writability, compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Check that the compute is currently running.
|
||||
pub(in crate::http) async fn is_writable(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
match check_writability(&compute).await {
|
||||
Ok(_) => JsonResponse::success(StatusCode::OK, true),
|
||||
Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
|
||||
}
|
||||
}
|
||||
91
compute_tools/src/http/routes/configure.rs
Normal file
91
compute_tools/src/http/routes/configure.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::{
|
||||
requests::ConfigurationRequest,
|
||||
responses::{ComputeStatus, ComputeStatusResponse},
|
||||
};
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
use tracing::info;
|
||||
|
||||
use crate::{
|
||||
compute::{ComputeNode, ParsedSpec},
|
||||
http::{extract::Json, JsonResponse},
|
||||
};
|
||||
|
||||
// Accept spec in JSON format and request compute configuration. If anything
|
||||
// goes wrong after we set the compute status to `ConfigurationPending` and
|
||||
// update compute state with new spec, we basically leave compute in the
|
||||
// potentially wrong state. That said, it's control-plane's responsibility to
|
||||
// watch compute state after reconfiguration request and to clean restart in
|
||||
// case of errors.
|
||||
pub(in crate::http) async fn configure(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
request: Json<ConfigurationRequest>,
|
||||
) -> Response {
|
||||
if !compute.live_config_allowed {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"live configuration is not allowed for this compute node".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let pspec = match ParsedSpec::try_from(request.spec.clone()) {
|
||||
Ok(p) => p,
|
||||
Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),
|
||||
};
|
||||
|
||||
// XXX: wrap state update under lock in a code block. Otherwise, we will try
|
||||
// to `Send` `mut state` into the spawned thread bellow, which will cause
|
||||
// the following rustc error:
|
||||
//
|
||||
// error: future cannot be sent between threads safely
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
|
||||
return JsonResponse::invalid_status(state.status);
|
||||
}
|
||||
|
||||
state.pspec = Some(pspec);
|
||||
state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
}
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Running. This is
|
||||
// needed to do not block the main pool of workers and be able to serve
|
||||
// other requests while some particular request is waiting for compute to
|
||||
// finish configuration.
|
||||
let c = compute.clone();
|
||||
let completed = task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Running {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become {}, current status: {}",
|
||||
ComputeStatus::Running,
|
||||
state.status
|
||||
);
|
||||
|
||||
if state.status == ComputeStatus::Failed {
|
||||
let err = state.error.as_ref().map_or("unknown error", |x| x);
|
||||
let msg = format!("compute configuration failed: {:?}", err);
|
||||
return Err(msg);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
if let Err(e) = completed {
|
||||
return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);
|
||||
}
|
||||
|
||||
// Return current compute state if everything went well.
|
||||
let state = compute.state.lock().unwrap().clone();
|
||||
let body = ComputeStatusResponse::from(&state);
|
||||
|
||||
JsonResponse::success(StatusCode::OK, body)
|
||||
}
|
||||
34
compute_tools/src/http/routes/database_schema.rs
Normal file
34
compute_tools/src/http/routes/database_schema.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{body::Body, extract::State, response::Response};
|
||||
use http::{header::CONTENT_TYPE, StatusCode};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::{
|
||||
catalog::{get_database_schema, SchemaDumpError},
|
||||
compute::ComputeNode,
|
||||
http::{extract::Query, JsonResponse},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub(in crate::http) struct DatabaseSchemaParams {
|
||||
database: String,
|
||||
}
|
||||
|
||||
/// Get a schema dump of the requested database.
|
||||
pub(in crate::http) async fn get_schema_dump(
|
||||
params: Query<DatabaseSchemaParams>,
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
match get_database_schema(&compute, ¶ms.database).await {
|
||||
Ok(schema) => Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(CONTENT_TYPE.as_str(), "application/json")
|
||||
.body(Body::from_stream(schema))
|
||||
.unwrap(),
|
||||
Err(SchemaDumpError::DatabaseDoesNotExist) => {
|
||||
JsonResponse::error(StatusCode::NOT_FOUND, SchemaDumpError::DatabaseDoesNotExist)
|
||||
}
|
||||
Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
|
||||
}
|
||||
}
|
||||
16
compute_tools/src/http/routes/dbs_and_roles.rs
Normal file
16
compute_tools/src/http/routes/dbs_and_roles.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{catalog::get_dbs_and_roles, compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Get the databases and roles from the compute.
|
||||
pub(in crate::http) async fn get_catalog_objects(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
match get_dbs_and_roles(&compute).await {
|
||||
Ok(catalog_objects) => JsonResponse::success(StatusCode::OK, catalog_objects),
|
||||
Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
|
||||
}
|
||||
}
|
||||
67
compute_tools/src/http/routes/extension_server.rs
Normal file
67
compute_tools/src/http/routes/extension_server.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{
|
||||
extract::State,
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
use http::StatusCode;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
http::{
|
||||
extract::{Path, Query},
|
||||
JsonResponse,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub(in crate::http) struct ExtensionServerParams {
|
||||
is_library: Option<bool>,
|
||||
}
|
||||
|
||||
/// Download a remote extension.
|
||||
pub(in crate::http) async fn download_extension(
|
||||
Path(filename): Path<String>,
|
||||
params: Query<ExtensionServerParams>,
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
// Don't even try to download extensions if no remote storage is configured
|
||||
if compute.ext_remote_storage.is_none() {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"remote storage is not configured",
|
||||
);
|
||||
}
|
||||
|
||||
let ext = {
|
||||
let state = compute.state.lock().unwrap();
|
||||
let pspec = state.pspec.as_ref().unwrap();
|
||||
let spec = &pspec.spec;
|
||||
|
||||
let remote_extensions = match spec.remote_extensions.as_ref() {
|
||||
Some(r) => r,
|
||||
None => {
|
||||
return JsonResponse::error(
|
||||
StatusCode::CONFLICT,
|
||||
"information about remote extensions is unavailable",
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
remote_extensions.get_ext(
|
||||
&filename,
|
||||
params.is_library.unwrap_or(false),
|
||||
&compute.build_tag,
|
||||
&compute.pgversion,
|
||||
)
|
||||
};
|
||||
|
||||
match ext {
|
||||
Ok((ext_name, ext_path)) => match compute.download_extension(ext_name, ext_path).await {
|
||||
Ok(_) => StatusCode::OK.into_response(),
|
||||
Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
|
||||
},
|
||||
Err(e) => JsonResponse::error(StatusCode::NOT_FOUND, e),
|
||||
}
|
||||
}
|
||||
45
compute_tools/src/http/routes/extensions.rs
Normal file
45
compute_tools/src/http/routes/extensions.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::{
|
||||
requests::ExtensionInstallRequest,
|
||||
responses::{ComputeStatus, ExtensionInstallResponse},
|
||||
};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
http::{extract::Json, JsonResponse},
|
||||
};
|
||||
|
||||
/// Install a extension.
|
||||
pub(in crate::http) async fn install_extension(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
request: Json<ExtensionInstallRequest>,
|
||||
) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
match compute
|
||||
.install_extension(
|
||||
&request.extension,
|
||||
&request.database,
|
||||
request.version.to_string(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(version) => JsonResponse::success(
|
||||
StatusCode::CREATED,
|
||||
Some(ExtensionInstallResponse {
|
||||
extension: request.extension.clone(),
|
||||
version,
|
||||
}),
|
||||
),
|
||||
Err(e) => JsonResponse::error(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("failed to install extension: {e}"),
|
||||
),
|
||||
}
|
||||
}
|
||||
35
compute_tools/src/http/routes/failpoints.rs
Normal file
35
compute_tools/src/http/routes/failpoints.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use http::StatusCode;
|
||||
use tracing::info;
|
||||
use utils::failpoint_support::{apply_failpoint, ConfigureFailpointsRequest};
|
||||
|
||||
use crate::http::{extract::Json, JsonResponse};
|
||||
|
||||
/// Configure failpoints for testing purposes.
|
||||
pub(in crate::http) async fn configure_failpoints(
|
||||
failpoints: Json<ConfigureFailpointsRequest>,
|
||||
) -> Response {
|
||||
if !fail::has_failpoints() {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"Cannot manage failpoints because neon was compiled without failpoints support",
|
||||
);
|
||||
}
|
||||
|
||||
for fp in &*failpoints {
|
||||
info!("cfg failpoint: {} {}", fp.name, fp.actions);
|
||||
|
||||
// We recognize one extra "action" that's not natively recognized
|
||||
// by the failpoints crate: exit, to immediately kill the process
|
||||
let cfg_result = apply_failpoint(&fp.name, &fp.actions);
|
||||
|
||||
if let Err(e) = cfg_result {
|
||||
return JsonResponse::error(
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("failed to configure failpoints: {e}"),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
StatusCode::OK.into_response()
|
||||
}
|
||||
48
compute_tools/src/http/routes/grants.rs
Normal file
48
compute_tools/src/http/routes/grants.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::{
|
||||
requests::SetRoleGrantsRequest,
|
||||
responses::{ComputeStatus, SetRoleGrantsResponse},
|
||||
};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
http::{extract::Json, JsonResponse},
|
||||
};
|
||||
|
||||
/// Add grants for a role.
|
||||
pub(in crate::http) async fn add_grant(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
request: Json<SetRoleGrantsRequest>,
|
||||
) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
match compute
|
||||
.set_role_grants(
|
||||
&request.database,
|
||||
&request.schema,
|
||||
&request.privileges,
|
||||
&request.role,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => JsonResponse::success(
|
||||
StatusCode::CREATED,
|
||||
Some(SetRoleGrantsResponse {
|
||||
database: request.database.clone(),
|
||||
schema: request.schema.clone(),
|
||||
role: request.role.clone(),
|
||||
privileges: request.privileges.clone(),
|
||||
}),
|
||||
),
|
||||
Err(e) => JsonResponse::error(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("failed to grant role privileges to the schema: {e}"),
|
||||
),
|
||||
}
|
||||
}
|
||||
11
compute_tools/src/http/routes/info.rs
Normal file
11
compute_tools/src/http/routes/info.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
use axum::response::Response;
|
||||
use compute_api::responses::InfoResponse;
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::http::JsonResponse;
|
||||
|
||||
/// Get information about the physical characteristics about the compute.
|
||||
pub(in crate::http) async fn get_info() -> Response {
|
||||
let num_cpus = num_cpus::get_physical();
|
||||
JsonResponse::success(StatusCode::OK, &InfoResponse { num_cpus })
|
||||
}
|
||||
18
compute_tools/src/http/routes/insights.rs
Normal file
18
compute_tools/src/http/routes/insights.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Collect current Postgres usage insights.
|
||||
pub(in crate::http) async fn get_insights(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
let insights = compute.collect_insights().await;
|
||||
JsonResponse::success(StatusCode::OK, insights)
|
||||
}
|
||||
33
compute_tools/src/http/routes/installed_extensions.rs
Normal file
33
compute_tools/src/http/routes/installed_extensions.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
|
||||
use crate::{compute::ComputeNode, http::JsonResponse, installed_extensions};
|
||||
|
||||
/// Get a list of installed extensions.
|
||||
pub(in crate::http) async fn get_installed_extensions(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
let conf = compute.get_conn_conf(None);
|
||||
let res = task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
match res {
|
||||
Ok(installed_extensions) => {
|
||||
JsonResponse::success(StatusCode::OK, Some(installed_extensions))
|
||||
}
|
||||
Err(e) => JsonResponse::error(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("failed to get list of installed extensions: {e}"),
|
||||
),
|
||||
}
|
||||
}
|
||||
32
compute_tools/src/http/routes/metrics.rs
Normal file
32
compute_tools/src/http/routes/metrics.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use axum::{body::Body, response::Response};
|
||||
use http::header::CONTENT_TYPE;
|
||||
use http::StatusCode;
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::Encoder;
|
||||
use metrics::TextEncoder;
|
||||
|
||||
use crate::{http::JsonResponse, installed_extensions};
|
||||
|
||||
/// Expose Prometheus metrics.
|
||||
pub(in crate::http) async fn get_metrics() -> Response {
|
||||
// When we call TextEncoder::encode() below, it will immediately return an
|
||||
// error if a metric family has no metrics, so we need to preemptively
|
||||
// filter out metric families with no metrics.
|
||||
let metrics = installed_extensions::collect()
|
||||
.into_iter()
|
||||
.filter(|m| !m.get_metric().is_empty())
|
||||
.collect::<Vec<MetricFamily>>();
|
||||
|
||||
let encoder = TextEncoder::new();
|
||||
let mut buffer = vec![];
|
||||
|
||||
if let Err(e) = encoder.encode(&metrics, &mut buffer) {
|
||||
return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);
|
||||
}
|
||||
|
||||
Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(CONTENT_TYPE, encoder.format_type())
|
||||
.body(Body::from(buffer))
|
||||
.unwrap()
|
||||
}
|
||||
12
compute_tools/src/http/routes/metrics_json.rs
Normal file
12
compute_tools/src/http/routes/metrics_json.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Get startup metrics.
|
||||
pub(in crate::http) async fn get_metrics(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
let metrics = compute.state.lock().unwrap().metrics.clone();
|
||||
JsonResponse::success(StatusCode::OK, metrics)
|
||||
}
|
||||
38
compute_tools/src/http/routes/mod.rs
Normal file
38
compute_tools/src/http/routes/mod.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
use compute_api::responses::ComputeStatusResponse;
|
||||
|
||||
use crate::compute::ComputeState;
|
||||
|
||||
pub(in crate::http) mod check_writability;
|
||||
pub(in crate::http) mod configure;
|
||||
pub(in crate::http) mod database_schema;
|
||||
pub(in crate::http) mod dbs_and_roles;
|
||||
pub(in crate::http) mod extension_server;
|
||||
pub(in crate::http) mod extensions;
|
||||
pub(in crate::http) mod failpoints;
|
||||
pub(in crate::http) mod grants;
|
||||
pub(in crate::http) mod info;
|
||||
pub(in crate::http) mod insights;
|
||||
pub(in crate::http) mod installed_extensions;
|
||||
pub(in crate::http) mod metrics;
|
||||
pub(in crate::http) mod metrics_json;
|
||||
pub(in crate::http) mod status;
|
||||
pub(in crate::http) mod terminate;
|
||||
|
||||
impl From<&ComputeState> for ComputeStatusResponse {
|
||||
fn from(state: &ComputeState) -> Self {
|
||||
ComputeStatusResponse {
|
||||
start_time: state.start_time,
|
||||
tenant: state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.map(|pspec| pspec.tenant_id.to_string()),
|
||||
timeline: state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.map(|pspec| pspec.timeline_id.to_string()),
|
||||
status: state.status,
|
||||
last_active: state.last_active,
|
||||
error: state.error.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
14
compute_tools/src/http/routes/status.rs
Normal file
14
compute_tools/src/http/routes/status.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use std::{ops::Deref, sync::Arc};
|
||||
|
||||
use axum::{extract::State, http::StatusCode, response::Response};
|
||||
use compute_api::responses::ComputeStatusResponse;
|
||||
|
||||
use crate::{compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Retrieve the state of the comute.
|
||||
pub(in crate::http) async fn get_status(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
let state = compute.state.lock().unwrap();
|
||||
let body = ComputeStatusResponse::from(state.deref());
|
||||
|
||||
JsonResponse::success(StatusCode::OK, body)
|
||||
}
|
||||
58
compute_tools/src/http/routes/terminate.rs
Normal file
58
compute_tools/src/http/routes/terminate.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{
|
||||
extract::State,
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
use tracing::info;
|
||||
|
||||
use crate::{
|
||||
compute::{forward_termination_signal, ComputeNode},
|
||||
http::JsonResponse,
|
||||
};
|
||||
|
||||
/// Terminate the compute.
|
||||
pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status == ComputeStatus::Terminated {
|
||||
return StatusCode::CREATED.into_response();
|
||||
}
|
||||
|
||||
if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
|
||||
return JsonResponse::invalid_status(state.status);
|
||||
}
|
||||
|
||||
state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
}
|
||||
|
||||
forward_termination_signal();
|
||||
info!("sent signal and notified waiters");
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Terminated.
|
||||
// This is needed to do not block the main pool of workers and
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Terminated {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become {}, current status: {:?}",
|
||||
ComputeStatus::Terminated,
|
||||
state.status
|
||||
);
|
||||
}
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
info!("terminated Postgres");
|
||||
|
||||
StatusCode::OK.into_response()
|
||||
}
|
||||
165
compute_tools/src/http/server.rs
Normal file
165
compute_tools/src/http/server.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
use std::{
|
||||
net::{IpAddr, Ipv6Addr, SocketAddr},
|
||||
sync::{
|
||||
atomic::{AtomicU64, Ordering},
|
||||
Arc,
|
||||
},
|
||||
thread,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use axum::{
|
||||
response::{IntoResponse, Response},
|
||||
routing::{get, post},
|
||||
Router,
|
||||
};
|
||||
use http::StatusCode;
|
||||
use tokio::net::TcpListener;
|
||||
use tower::ServiceBuilder;
|
||||
use tower_http::{
|
||||
request_id::{MakeRequestId, PropagateRequestIdLayer, RequestId, SetRequestIdLayer},
|
||||
trace::TraceLayer,
|
||||
};
|
||||
use tracing::{debug, error, info, Span};
|
||||
|
||||
use super::routes::{
|
||||
check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
|
||||
grants, info as info_route, insights, installed_extensions, metrics, metrics_json, status,
|
||||
terminate,
|
||||
};
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
async fn handle_404() -> Response {
|
||||
StatusCode::NOT_FOUND.into_response()
|
||||
}
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct ComputeMakeRequestId(Arc<AtomicU64>);
|
||||
|
||||
impl MakeRequestId for ComputeMakeRequestId {
|
||||
fn make_request_id<B>(
|
||||
&mut self,
|
||||
_request: &http::Request<B>,
|
||||
) -> Option<tower_http::request_id::RequestId> {
|
||||
let request_id = self
|
||||
.0
|
||||
.fetch_add(1, Ordering::SeqCst)
|
||||
.to_string()
|
||||
.parse()
|
||||
.unwrap();
|
||||
|
||||
Some(RequestId::new(request_id))
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the HTTP server and wait on it forever.
|
||||
#[tokio::main]
|
||||
async fn serve(port: u16, compute: Arc<ComputeNode>) {
|
||||
const X_REQUEST_ID: &str = "x-request-id";
|
||||
|
||||
let mut app = Router::new()
|
||||
.route("/check_writability", post(check_writability::is_writable))
|
||||
.route("/configure", post(configure::configure))
|
||||
.route("/database_schema", get(database_schema::get_schema_dump))
|
||||
.route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
|
||||
.route(
|
||||
"/extension_server/*filename",
|
||||
post(extension_server::download_extension),
|
||||
)
|
||||
.route("/extensions", post(extensions::install_extension))
|
||||
.route("/grants", post(grants::add_grant))
|
||||
.route("/info", get(info_route::get_info))
|
||||
.route("/insights", get(insights::get_insights))
|
||||
.route(
|
||||
"/installed_extensions",
|
||||
get(installed_extensions::get_installed_extensions),
|
||||
)
|
||||
.route("/metrics", get(metrics::get_metrics))
|
||||
.route("/metrics.json", get(metrics_json::get_metrics))
|
||||
.route("/status", get(status::get_status))
|
||||
.route("/terminate", post(terminate::terminate))
|
||||
.fallback(handle_404)
|
||||
.layer(
|
||||
ServiceBuilder::new()
|
||||
.layer(SetRequestIdLayer::x_request_id(
|
||||
ComputeMakeRequestId::default(),
|
||||
))
|
||||
.layer(
|
||||
TraceLayer::new_for_http()
|
||||
.on_request(|request: &http::Request<_>, _span: &Span| {
|
||||
let request_id = request
|
||||
.headers()
|
||||
.get(X_REQUEST_ID)
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap();
|
||||
|
||||
match request.uri().path() {
|
||||
"/metrics" => {
|
||||
debug!(%request_id, "{} {}", request.method(), request.uri())
|
||||
}
|
||||
_ => info!(%request_id, "{} {}", request.method(), request.uri()),
|
||||
};
|
||||
})
|
||||
.on_response(
|
||||
|response: &http::Response<_>, latency: Duration, _span: &Span| {
|
||||
let request_id = response
|
||||
.headers()
|
||||
.get(X_REQUEST_ID)
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap();
|
||||
|
||||
info!(
|
||||
%request_id,
|
||||
code = response.status().as_u16(),
|
||||
latency = latency.as_millis()
|
||||
)
|
||||
},
|
||||
),
|
||||
)
|
||||
.layer(PropagateRequestIdLayer::x_request_id()),
|
||||
)
|
||||
.with_state(compute);
|
||||
|
||||
// Add in any testing support
|
||||
if cfg!(feature = "testing") {
|
||||
use super::routes::failpoints;
|
||||
|
||||
app = app.route("/failpoints", post(failpoints::configure_failpoints))
|
||||
}
|
||||
|
||||
// This usually binds to both IPv4 and IPv6 on Linux, see
|
||||
// https://github.com/rust-lang/rust/pull/34440 for more information
|
||||
let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
|
||||
let listener = match TcpListener::bind(&addr).await {
|
||||
Ok(listener) => listener,
|
||||
Err(e) => {
|
||||
error!(
|
||||
"failed to bind the compute_ctl HTTP server to port {}: {}",
|
||||
port, e
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if let Ok(local_addr) = listener.local_addr() {
|
||||
info!("compute_ctl HTTP server listening on {}", local_addr);
|
||||
} else {
|
||||
info!("compute_ctl HTTP server listening on port {}", port);
|
||||
}
|
||||
|
||||
if let Err(e) = axum::serve(listener, app).await {
|
||||
error!("compute_ctl HTTP server error: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
/// Launch a separate HTTP server thread and return its `JoinHandle`.
|
||||
pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
|
||||
let state = Arc::clone(state);
|
||||
|
||||
Ok(thread::Builder::new()
|
||||
.name("http-server".into())
|
||||
.spawn(move || serve(port, state))?)
|
||||
}
|
||||
@@ -3,8 +3,6 @@
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
extern crate hyper0 as hyper;
|
||||
|
||||
pub mod checker;
|
||||
pub mod config;
|
||||
pub mod configurator;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use anyhow::{Context, Result};
|
||||
use fail::fail_point;
|
||||
use postgres::Client;
|
||||
use postgres::{Client, Transaction};
|
||||
use tracing::info;
|
||||
|
||||
/// Runs a series of migrations on a target database
|
||||
@@ -20,11 +20,9 @@ impl<'m> MigrationRunner<'m> {
|
||||
|
||||
/// Get the current value neon_migration.migration_id
|
||||
fn get_migration_id(&mut self) -> Result<i64> {
|
||||
let query = "SELECT id FROM neon_migration.migration_id";
|
||||
let row = self
|
||||
.client
|
||||
.query_one(query, &[])
|
||||
.context("run_migrations get migration_id")?;
|
||||
.query_one("SELECT id FROM neon_migration.migration_id", &[])?;
|
||||
|
||||
Ok(row.get::<&str, i64>("id"))
|
||||
}
|
||||
@@ -34,7 +32,7 @@ impl<'m> MigrationRunner<'m> {
|
||||
/// This function has a fail point called compute-migration, which can be
|
||||
/// used if you would like to fail the application of a series of migrations
|
||||
/// at some point.
|
||||
fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
|
||||
fn update_migration_id(txn: &mut Transaction, migration_id: i64) -> Result<()> {
|
||||
// We use this fail point in order to check that failing in the
|
||||
// middle of applying a series of migrations fails in an expected
|
||||
// manner
|
||||
@@ -55,12 +53,11 @@ impl<'m> MigrationRunner<'m> {
|
||||
}
|
||||
}
|
||||
|
||||
self.client
|
||||
.query(
|
||||
"UPDATE neon_migration.migration_id SET id = $1",
|
||||
&[&migration_id],
|
||||
)
|
||||
.context("run_migrations update id")?;
|
||||
txn.query(
|
||||
"UPDATE neon_migration.migration_id SET id = $1",
|
||||
&[&migration_id],
|
||||
)
|
||||
.with_context(|| format!("update neon_migration.migration_id to {migration_id}"))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -81,53 +78,50 @@ impl<'m> MigrationRunner<'m> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run the configrured set of migrations
|
||||
/// Run an individual migration
|
||||
fn run_migration(txn: &mut Transaction, migration_id: i64, migration: &str) -> Result<()> {
|
||||
if migration.starts_with("-- SKIP") {
|
||||
info!("Skipping migration id={}", migration_id);
|
||||
|
||||
// Even though we are skipping the migration, updating the
|
||||
// migration ID should help keep logic easy to understand when
|
||||
// trying to understand the state of a cluster.
|
||||
Self::update_migration_id(txn, migration_id)?;
|
||||
} else {
|
||||
info!("Running migration id={}:\n{}\n", migration_id, migration);
|
||||
|
||||
txn.simple_query(migration)
|
||||
.with_context(|| format!("apply migration {migration_id}"))?;
|
||||
|
||||
Self::update_migration_id(txn, migration_id)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run the configured set of migrations
|
||||
pub fn run_migrations(mut self) -> Result<()> {
|
||||
self.prepare_database()?;
|
||||
self.prepare_database()
|
||||
.context("prepare database to handle migrations")?;
|
||||
|
||||
let mut current_migration = self.get_migration_id()? as usize;
|
||||
while current_migration < self.migrations.len() {
|
||||
macro_rules! migration_id {
|
||||
($cm:expr) => {
|
||||
($cm + 1) as i64
|
||||
};
|
||||
}
|
||||
// The index lags the migration ID by 1, so the current migration
|
||||
// ID is also the next index
|
||||
let migration_id = (current_migration + 1) as i64;
|
||||
|
||||
let migration = self.migrations[current_migration];
|
||||
let mut txn = self
|
||||
.client
|
||||
.transaction()
|
||||
.with_context(|| format!("begin transaction for migration {migration_id}"))?;
|
||||
|
||||
if migration.starts_with("-- SKIP") {
|
||||
info!("Skipping migration id={}", migration_id!(current_migration));
|
||||
Self::run_migration(&mut txn, migration_id, self.migrations[current_migration])
|
||||
.with_context(|| format!("running migration {migration_id}"))?;
|
||||
|
||||
// Even though we are skipping the migration, updating the
|
||||
// migration ID should help keep logic easy to understand when
|
||||
// trying to understand the state of a cluster.
|
||||
self.update_migration_id(migration_id!(current_migration))?;
|
||||
} else {
|
||||
info!(
|
||||
"Running migration id={}:\n{}\n",
|
||||
migration_id!(current_migration),
|
||||
migration
|
||||
);
|
||||
txn.commit()
|
||||
.with_context(|| format!("commit transaction for migration {migration_id}"))?;
|
||||
|
||||
self.client
|
||||
.simple_query("BEGIN")
|
||||
.context("begin migration")?;
|
||||
|
||||
self.client.simple_query(migration).with_context(|| {
|
||||
format!(
|
||||
"run_migrations migration id={}",
|
||||
migration_id!(current_migration)
|
||||
)
|
||||
})?;
|
||||
|
||||
self.update_migration_id(migration_id!(current_migration))?;
|
||||
|
||||
self.client
|
||||
.simple_query("COMMIT")
|
||||
.context("commit migration")?;
|
||||
|
||||
info!("Finished migration id={}", migration_id!(current_migration));
|
||||
}
|
||||
info!("Finished migration id={}", migration_id);
|
||||
|
||||
current_migration += 1;
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ pub enum PerDatabasePhase {
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
DropSubscriptionsForDeletedDatabases,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -74,7 +75,7 @@ pub struct MutableApplyContext {
|
||||
pub dbs: HashMap<String, Database>,
|
||||
}
|
||||
|
||||
/// Appply the operations that belong to the given spec apply phase.
|
||||
/// Apply the operations that belong to the given spec apply phase.
|
||||
///
|
||||
/// Commands within a single phase are executed in order of Iterator yield.
|
||||
/// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database
|
||||
@@ -326,13 +327,12 @@ async fn get_operations<'a>(
|
||||
|
||||
// Use FORCE to drop database even if there are active connections.
|
||||
// We run this from `cloud_admin`, so it should have enough privileges.
|
||||
//
|
||||
// NB: there could be other db states, which prevent us from dropping
|
||||
// the database. For example, if db is used by any active subscription
|
||||
// or replication slot.
|
||||
// TODO: deal with it once we allow logical replication. Proper fix should
|
||||
// involve returning an error code to the control plane, so it could
|
||||
// figure out that this is a non-retryable error, return it to the user
|
||||
// and fail operation permanently.
|
||||
// Such cases are handled in the DropSubscriptionsForDeletedDatabases
|
||||
// phase. We do all the cleanup before actually dropping the database.
|
||||
let drop_db_query: String = format!(
|
||||
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
|
||||
&op.name.pg_quote()
|
||||
@@ -444,6 +444,30 @@ async fn get_operations<'a>(
|
||||
}
|
||||
ApplySpecPhase::RunInEachDatabase { db, subphase } => {
|
||||
match subphase {
|
||||
PerDatabasePhase::DropSubscriptionsForDeletedDatabases => {
|
||||
match &db {
|
||||
DB::UserDB(db) => {
|
||||
let drop_subscription_query: String = format!(
|
||||
include_str!("sql/drop_subscription_for_drop_dbs.sql"),
|
||||
datname_str = escape_literal(&db.name),
|
||||
);
|
||||
|
||||
let operations = vec![Operation {
|
||||
query: drop_subscription_query,
|
||||
comment: Some(format!(
|
||||
"optionally dropping subscriptions for DB {}",
|
||||
db.name,
|
||||
)),
|
||||
}]
|
||||
.into_iter();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
// skip this cleanup for the system databases
|
||||
// because users can't drop them
|
||||
DB::SystemDB => Ok(Box::new(empty())),
|
||||
}
|
||||
}
|
||||
PerDatabasePhase::DeleteDBRoleReferences => {
|
||||
let ctx = ctx.read().await;
|
||||
|
||||
@@ -474,7 +498,19 @@ async fn get_operations<'a>(
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
// Revoke some potentially blocking privileges (Neon-specific currently)
|
||||
Operation {
|
||||
query: format!(
|
||||
include_str!("sql/pre_drop_role_revoke_privileges.sql"),
|
||||
role_name = quoted,
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
// This now will only drop privileges of the role
|
||||
// TODO: this is obviously not 100% true because of the above case,
|
||||
// there could be still some privileges that are not revoked. Maybe this
|
||||
// only drops privileges that were granted *by this* role, not *to this* role,
|
||||
// but this has to be checked.
|
||||
Operation {
|
||||
query: format!("DROP OWNED BY {}", quoted),
|
||||
comment: None,
|
||||
|
||||
11
compute_tools/src/sql/drop_subscription_for_drop_dbs.sql
Normal file
11
compute_tools/src/sql/drop_subscription_for_drop_dbs.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
subname TEXT;
|
||||
BEGIN
|
||||
FOR subname IN SELECT pg_subscription.subname FROM pg_subscription WHERE subdbid = (SELECT oid FROM pg_database WHERE datname = {datname_str}) LOOP
|
||||
EXECUTE format('ALTER SUBSCRIPTION %I DISABLE;', subname);
|
||||
EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
|
||||
EXECUTE format('DROP SUBSCRIPTION %I;', subname);
|
||||
END LOOP;
|
||||
END;
|
||||
$$;
|
||||
28
compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
Normal file
28
compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
Normal file
@@ -0,0 +1,28 @@
|
||||
SET SESSION ROLE neon_superuser;
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
schema TEXT;
|
||||
revoke_query TEXT;
|
||||
BEGIN
|
||||
FOR schema IN
|
||||
SELECT schema_name
|
||||
FROM information_schema.schemata
|
||||
-- So far, we only had issues with 'public' schema. Probably, because we do some additional grants,
|
||||
-- e.g., make DB owner the owner of 'public' schema automatically (when created via API).
|
||||
-- See https://github.com/neondatabase/cloud/issues/13582 for the context.
|
||||
-- Still, keep the loop because i) it efficiently handles the case when there is no 'public' schema,
|
||||
-- ii) it's easy to add more schemas to the list if needed.
|
||||
WHERE schema_name IN ('public')
|
||||
LOOP
|
||||
revoke_query := format(
|
||||
'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM {role_name} GRANTED BY neon_superuser;',
|
||||
schema
|
||||
);
|
||||
|
||||
EXECUTE revoke_query;
|
||||
END LOOP;
|
||||
END;
|
||||
$$;
|
||||
|
||||
RESET ROLE;
|
||||
@@ -62,7 +62,7 @@ use crate::local_env::LocalEnv;
|
||||
use crate::postgresql_conf::PostgresConf;
|
||||
use crate::storage_controller::StorageController;
|
||||
|
||||
use compute_api::responses::{ComputeState, ComputeStatus};
|
||||
use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
|
||||
use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
|
||||
|
||||
// contents of a endpoint.json file
|
||||
@@ -739,7 +739,7 @@ impl Endpoint {
|
||||
}
|
||||
|
||||
// Call the /status HTTP API
|
||||
pub async fn get_status(&self) -> Result<ComputeState> {
|
||||
pub async fn get_status(&self) -> Result<ComputeStatusResponse> {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let response = client
|
||||
|
||||
@@ -1035,7 +1035,15 @@ async fn main() -> anyhow::Result<()> {
|
||||
resp.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header(["Id", "Version", "Host", "Port", "Http Port", "AZ Id"]);
|
||||
table.set_header([
|
||||
"Id",
|
||||
"Version",
|
||||
"Host",
|
||||
"Port",
|
||||
"Http Port",
|
||||
"AZ Id",
|
||||
"Scheduling",
|
||||
]);
|
||||
for sk in resp {
|
||||
table.add_row([
|
||||
format!("{}", sk.id),
|
||||
@@ -1043,7 +1051,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
sk.host,
|
||||
format!("{}", sk.port),
|
||||
format!("{}", sk.http_port),
|
||||
sk.availability_zone_id.to_string(),
|
||||
sk.availability_zone_id.clone(),
|
||||
String::from(sk.scheduling_policy),
|
||||
]);
|
||||
}
|
||||
println!("{table}");
|
||||
|
||||
@@ -15,6 +15,17 @@ pub struct GenericAPIError {
|
||||
pub error: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct InfoResponse {
|
||||
pub num_cpus: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ExtensionInstallResponse {
|
||||
pub extension: PgIdent,
|
||||
pub version: ExtVersion,
|
||||
}
|
||||
|
||||
/// Response of the /status API
|
||||
#[derive(Serialize, Debug, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
@@ -28,16 +39,6 @@ pub struct ComputeStatusResponse {
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub struct ComputeState {
|
||||
pub status: ComputeStatus,
|
||||
/// Timestamp of the last Postgres activity
|
||||
#[serde(serialize_with = "rfc3339_serialize")]
|
||||
pub last_active: Option<DateTime<Utc>>,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ComputeStatus {
|
||||
@@ -78,7 +79,7 @@ impl Display for ComputeStatus {
|
||||
}
|
||||
}
|
||||
|
||||
fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
|
||||
pub fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
|
||||
@@ -320,6 +320,38 @@ impl From<NodeSchedulingPolicy> for String {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
|
||||
pub enum SkSchedulingPolicy {
|
||||
Active,
|
||||
Disabled,
|
||||
Decomissioned,
|
||||
}
|
||||
|
||||
impl FromStr for SkSchedulingPolicy {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"active" => Self::Active,
|
||||
"disabled" => Self::Disabled,
|
||||
"decomissioned" => Self::Decomissioned,
|
||||
_ => return Err(anyhow::anyhow!("Unknown scheduling state '{s}'")),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SkSchedulingPolicy> for String {
|
||||
fn from(value: SkSchedulingPolicy) -> String {
|
||||
use SkSchedulingPolicy::*;
|
||||
match value {
|
||||
Active => "active",
|
||||
Disabled => "disabled",
|
||||
Decomissioned => "decomissioned",
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Controls how tenant shards are mapped to locations on pageservers, e.g. whether
|
||||
/// to create secondary locations.
|
||||
#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]
|
||||
@@ -387,6 +419,7 @@ pub struct SafekeeperDescribeResponse {
|
||||
pub port: i32,
|
||||
pub http_port: i32,
|
||||
pub availability_zone_id: String,
|
||||
pub scheduling_policy: SkSchedulingPolicy,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -1460,75 +1460,91 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
// interface allows sending both LSNs, and let the pageserver do the right thing. There was no
|
||||
// difference in the responses between V1 and V2.
|
||||
//
|
||||
#[derive(Clone, Copy)]
|
||||
// V3 version of protocol adds request ID to all requests. This request ID is also included in response
|
||||
// as well as other fields from requests, which allows to verify that we receive response for our request.
|
||||
// We copy fields from request to response to make checking more reliable: request ID is formed from process ID
|
||||
// and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
|
||||
//
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum PagestreamProtocolVersion {
|
||||
V2,
|
||||
V3,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub type RequestId = u64;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamRequest {
|
||||
pub reqid: RequestId,
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamExistsRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamNblocksRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetPageRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
pub blkno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamDbSizeRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetSlruSegmentRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub kind: u8,
|
||||
pub segno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsResponse {
|
||||
pub req: PagestreamExistsRequest,
|
||||
pub exists: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksResponse {
|
||||
pub req: PagestreamNblocksRequest,
|
||||
pub n_blocks: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageResponse {
|
||||
pub req: PagestreamGetPageRequest,
|
||||
pub page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetSlruSegmentResponse {
|
||||
pub req: PagestreamGetSlruSegmentRequest,
|
||||
pub segment: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamErrorResponse {
|
||||
pub req: PagestreamRequest,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeResponse {
|
||||
pub req: PagestreamDbSizeRequest,
|
||||
pub db_size: i64,
|
||||
}
|
||||
|
||||
@@ -1545,15 +1561,16 @@ pub struct TenantHistorySize {
|
||||
|
||||
impl PagestreamFeMessage {
|
||||
/// Serialize a compute -> pageserver message. This is currently only used in testing
|
||||
/// tools. Always uses protocol version 2.
|
||||
/// tools. Always uses protocol version 3.
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
match self {
|
||||
Self::Exists(req) => {
|
||||
bytes.put_u8(0);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
@@ -1562,8 +1579,9 @@ impl PagestreamFeMessage {
|
||||
|
||||
Self::Nblocks(req) => {
|
||||
bytes.put_u8(1);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
@@ -1572,8 +1590,9 @@ impl PagestreamFeMessage {
|
||||
|
||||
Self::GetPage(req) => {
|
||||
bytes.put_u8(2);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
@@ -1583,15 +1602,17 @@ impl PagestreamFeMessage {
|
||||
|
||||
Self::DbSize(req) => {
|
||||
bytes.put_u8(3);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.dbnode);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(req) => {
|
||||
bytes.put_u8(4);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(req.kind);
|
||||
bytes.put_u32(req.segno);
|
||||
}
|
||||
@@ -1600,21 +1621,35 @@ impl PagestreamFeMessage {
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn parse<R: std::io::Read>(body: &mut R) -> anyhow::Result<PagestreamFeMessage> {
|
||||
pub fn parse<R: std::io::Read>(
|
||||
body: &mut R,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
) -> anyhow::Result<PagestreamFeMessage> {
|
||||
// these correspond to the NeonMessageTag enum in pagestore_client.h
|
||||
//
|
||||
// TODO: consider using protobuf or serde bincode for less error prone
|
||||
// serialization.
|
||||
let msg_tag = body.read_u8()?;
|
||||
|
||||
// these two fields are the same for every request type
|
||||
let request_lsn = Lsn::from(body.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn::from(body.read_u64::<BigEndian>()?);
|
||||
let (reqid, request_lsn, not_modified_since) = match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => (
|
||||
0,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
PagestreamProtocolVersion::V3 => (
|
||||
body.read_u64::<BigEndian>()?,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
};
|
||||
|
||||
match msg_tag {
|
||||
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
@@ -1623,8 +1658,11 @@ impl PagestreamFeMessage {
|
||||
},
|
||||
})),
|
||||
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
@@ -1633,8 +1671,11 @@ impl PagestreamFeMessage {
|
||||
},
|
||||
})),
|
||||
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
@@ -1644,14 +1685,20 @@ impl PagestreamFeMessage {
|
||||
blkno: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
4 => Ok(PagestreamFeMessage::GetSlruSegment(
|
||||
PagestreamGetSlruSegmentRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind: body.read_u8()?,
|
||||
segno: body.read_u32::<BigEndian>()?,
|
||||
},
|
||||
@@ -1662,43 +1709,114 @@ impl PagestreamFeMessage {
|
||||
}
|
||||
|
||||
impl PagestreamBeMessage {
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
pub fn serialize(&self, protocol_version: PagestreamProtocolVersion) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put(&resp.page[..]);
|
||||
}
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
PagestreamProtocolVersion::V3 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.req.blkno);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put_u64(resp.req.reqid);
|
||||
bytes.put_u64(resp.req.request_lsn.0);
|
||||
bytes.put_u64(resp.req.not_modified_since.0);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.dbnode);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(resp.req.kind);
|
||||
bytes.put_u32(resp.req.segno);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
@@ -1710,38 +1828,131 @@ impl PagestreamBeMessage {
|
||||
let ok =
|
||||
match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
|
||||
Tag::Exists => {
|
||||
let exists = buf.read_u8()?;
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let exists = buf.read_u8()? != 0;
|
||||
Self::Exists(PagestreamExistsResponse {
|
||||
exists: exists != 0,
|
||||
req: PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
exists,
|
||||
})
|
||||
}
|
||||
Tag::Nblocks => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
Self::Nblocks(PagestreamNblocksResponse { n_blocks })
|
||||
Self::Nblocks(PagestreamNblocksResponse {
|
||||
req: PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
n_blocks,
|
||||
})
|
||||
}
|
||||
Tag::GetPage => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let blkno = buf.read_u32::<BigEndian>()?;
|
||||
let mut page = vec![0; 8192]; // TODO: use MaybeUninit
|
||||
buf.read_exact(&mut page)?;
|
||||
PagestreamBeMessage::GetPage(PagestreamGetPageResponse { page: page.into() })
|
||||
Self::GetPage(PagestreamGetPageResponse {
|
||||
req: PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
blkno,
|
||||
},
|
||||
page: page.into(),
|
||||
})
|
||||
}
|
||||
Tag::Error => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let mut msg = Vec::new();
|
||||
buf.read_until(0, &mut msg)?;
|
||||
let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
|
||||
let rust_str = cstring.to_str()?;
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
Self::Error(PagestreamErrorResponse {
|
||||
req: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
message: rust_str.to_owned(),
|
||||
})
|
||||
}
|
||||
Tag::DbSize => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let dbnode = buf.read_u32::<BigEndian>()?;
|
||||
let db_size = buf.read_i64::<BigEndian>()?;
|
||||
Self::DbSize(PagestreamDbSizeResponse { db_size })
|
||||
Self::DbSize(PagestreamDbSizeResponse {
|
||||
req: PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode,
|
||||
},
|
||||
db_size,
|
||||
})
|
||||
}
|
||||
Tag::GetSlruSegment => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let kind = buf.read_u8()?;
|
||||
let segno = buf.read_u32::<BigEndian>()?;
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
let mut segment = vec![0; n_blocks as usize * BLCKSZ as usize];
|
||||
buf.read_exact(&mut segment)?;
|
||||
Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
|
||||
req: PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind,
|
||||
segno,
|
||||
},
|
||||
segment: segment.into(),
|
||||
})
|
||||
}
|
||||
@@ -1780,8 +1991,11 @@ mod tests {
|
||||
// Test serialization/deserialization of PagestreamFeMessage
|
||||
let messages = vec![
|
||||
PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
@@ -1790,8 +2004,11 @@ mod tests {
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(4),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(4),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
@@ -1800,8 +2017,11 @@ mod tests {
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
@@ -1811,14 +2031,19 @@ mod tests {
|
||||
blkno: 7,
|
||||
}),
|
||||
PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
dbnode: 7,
|
||||
}),
|
||||
];
|
||||
for msg in messages {
|
||||
let bytes = msg.serialize();
|
||||
let reconstructed = PagestreamFeMessage::parse(&mut bytes.reader()).unwrap();
|
||||
let reconstructed =
|
||||
PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V3)
|
||||
.unwrap();
|
||||
assert!(msg == reconstructed);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -115,13 +115,15 @@ fn default_max_keys_per_list_response() -> Option<i32> {
|
||||
}
|
||||
|
||||
fn default_azure_conn_pool_size() -> usize {
|
||||
// Conservative default: no connection pooling. At time of writing this is the Azure
|
||||
// SDK's default as well, due to historic reports of hard-to-reproduce issues
|
||||
// By default, the Azure SDK does no connection pooling, due to historic reports of hard-to-reproduce issues
|
||||
// (https://github.com/hyperium/hyper/issues/2312)
|
||||
//
|
||||
// However, using connection pooling is important to avoid exhausting client ports when
|
||||
// doing huge numbers of requests (https://github.com/neondatabase/cloud/issues/20971)
|
||||
0
|
||||
//
|
||||
// We therefore enable a modest pool size by default: this may be configured to zero if
|
||||
// issues like the alleged upstream hyper issue appear.
|
||||
8
|
||||
}
|
||||
|
||||
impl Debug for S3Config {
|
||||
|
||||
@@ -38,7 +38,6 @@ pub mod http;
|
||||
|
||||
use opentelemetry::trace::TracerProvider;
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry_sdk::Resource;
|
||||
use tracing::Subscriber;
|
||||
use tracing_subscriber::registry::LookupSpan;
|
||||
use tracing_subscriber::Layer;
|
||||
@@ -121,7 +120,10 @@ where
|
||||
S: Subscriber + for<'span> LookupSpan<'span>,
|
||||
{
|
||||
// Sets up exporter from the OTEL_EXPORTER_* environment variables.
|
||||
let exporter = opentelemetry_otlp::new_exporter().http();
|
||||
let exporter = opentelemetry_otlp::SpanExporter::builder()
|
||||
.with_http()
|
||||
.build()
|
||||
.expect("could not initialize opentelemetry exporter");
|
||||
|
||||
// TODO: opentelemetry::global::set_error_handler() with custom handler that
|
||||
// bypasses default tracing layers, but logs regular looking log
|
||||
@@ -132,17 +134,13 @@ where
|
||||
opentelemetry_sdk::propagation::TraceContextPropagator::new(),
|
||||
);
|
||||
|
||||
let tracer = opentelemetry_otlp::new_pipeline()
|
||||
.tracing()
|
||||
.with_exporter(exporter)
|
||||
.with_trace_config(opentelemetry_sdk::trace::Config::default().with_resource(
|
||||
Resource::new(vec![KeyValue::new(
|
||||
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
|
||||
service_name,
|
||||
)]),
|
||||
))
|
||||
.install_batch(opentelemetry_sdk::runtime::Tokio)
|
||||
.expect("could not initialize opentelemetry exporter")
|
||||
let tracer = opentelemetry_sdk::trace::TracerProvider::builder()
|
||||
.with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio)
|
||||
.with_resource(opentelemetry_sdk::Resource::new(vec![KeyValue::new(
|
||||
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
|
||||
service_name,
|
||||
)]))
|
||||
.build()
|
||||
.tracer("global");
|
||||
|
||||
tracing_opentelemetry::layer().with_tracer(tracer)
|
||||
|
||||
@@ -26,6 +26,7 @@ git-version.workspace = true
|
||||
hex = { workspace = true, features = ["serde"] }
|
||||
humantime.workspace = true
|
||||
hyper0 = { workspace = true, features = ["full"] }
|
||||
inferno.workspace = true
|
||||
itertools.workspace = true
|
||||
fail.workspace = true
|
||||
futures = { workspace = true }
|
||||
|
||||
@@ -417,6 +417,7 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
|
||||
enum Format {
|
||||
Jemalloc,
|
||||
Pprof,
|
||||
Svg,
|
||||
}
|
||||
|
||||
// Parameters.
|
||||
@@ -424,9 +425,24 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
|
||||
None => Format::Pprof,
|
||||
Some("jemalloc") => Format::Jemalloc,
|
||||
Some("pprof") => Format::Pprof,
|
||||
Some("svg") => Format::Svg,
|
||||
Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))),
|
||||
};
|
||||
|
||||
// Functions and mappings to strip when symbolizing pprof profiles. If true,
|
||||
// also remove child frames.
|
||||
static STRIP_FUNCTIONS: Lazy<Vec<(Regex, bool)>> = Lazy::new(|| {
|
||||
vec![
|
||||
(Regex::new("^__rust").unwrap(), false),
|
||||
(Regex::new("^_start$").unwrap(), false),
|
||||
(Regex::new("^irallocx_prof").unwrap(), true),
|
||||
(Regex::new("^prof_alloc_prep").unwrap(), true),
|
||||
(Regex::new("^std::rt::lang_start").unwrap(), false),
|
||||
(Regex::new("^std::sys::backtrace::__rust").unwrap(), false),
|
||||
]
|
||||
});
|
||||
const STRIP_MAPPINGS: &[&str] = &["libc", "libgcc", "pthread", "vdso"];
|
||||
|
||||
// Obtain profiler handle.
|
||||
let mut prof_ctl = jemalloc_pprof::PROF_CTL
|
||||
.as_ref()
|
||||
@@ -464,24 +480,9 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
|
||||
// Symbolize the profile.
|
||||
// TODO: consider moving this upstream to jemalloc_pprof and avoiding the
|
||||
// serialization roundtrip.
|
||||
static STRIP_FUNCTIONS: Lazy<Vec<(Regex, bool)>> = Lazy::new(|| {
|
||||
// Functions to strip from profiles. If true, also remove child frames.
|
||||
vec![
|
||||
(Regex::new("^__rust").unwrap(), false),
|
||||
(Regex::new("^_start$").unwrap(), false),
|
||||
(Regex::new("^irallocx_prof").unwrap(), true),
|
||||
(Regex::new("^prof_alloc_prep").unwrap(), true),
|
||||
(Regex::new("^std::rt::lang_start").unwrap(), false),
|
||||
(Regex::new("^std::sys::backtrace::__rust").unwrap(), false),
|
||||
]
|
||||
});
|
||||
let profile = pprof::decode(&bytes)?;
|
||||
let profile = pprof::symbolize(profile)?;
|
||||
let profile = pprof::strip_locations(
|
||||
profile,
|
||||
&["libc", "libgcc", "pthread", "vdso"],
|
||||
&STRIP_FUNCTIONS,
|
||||
);
|
||||
let profile = pprof::strip_locations(profile, STRIP_MAPPINGS, &STRIP_FUNCTIONS);
|
||||
pprof::encode(&profile)
|
||||
})
|
||||
.await
|
||||
@@ -494,6 +495,27 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
|
||||
.body(Body::from(data))
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))
|
||||
}
|
||||
|
||||
Format::Svg => {
|
||||
let body = tokio::task::spawn_blocking(move || {
|
||||
let bytes = prof_ctl.dump_pprof()?;
|
||||
let profile = pprof::decode(&bytes)?;
|
||||
let profile = pprof::symbolize(profile)?;
|
||||
let profile = pprof::strip_locations(profile, STRIP_MAPPINGS, &STRIP_FUNCTIONS);
|
||||
let mut opts = inferno::flamegraph::Options::default();
|
||||
opts.title = "Heap inuse".to_string();
|
||||
opts.count_name = "bytes".to_string();
|
||||
pprof::flamegraph(profile, &mut opts)
|
||||
})
|
||||
.await
|
||||
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, "image/svg+xml")
|
||||
.body(Body::from(body))
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use anyhow::bail;
|
||||
use flate2::write::{GzDecoder, GzEncoder};
|
||||
use flate2::Compression;
|
||||
use itertools::Itertools as _;
|
||||
use once_cell::sync::Lazy;
|
||||
use pprof::protos::{Function, Line, Message as _, Profile};
|
||||
use pprof::protos::{Function, Line, Location, Message as _, Profile};
|
||||
use regex::Regex;
|
||||
|
||||
use std::borrow::Cow;
|
||||
@@ -188,3 +189,59 @@ pub fn strip_locations(
|
||||
|
||||
profile
|
||||
}
|
||||
|
||||
/// Generates an SVG flamegraph from a symbolized pprof profile.
|
||||
pub fn flamegraph(
|
||||
profile: Profile,
|
||||
opts: &mut inferno::flamegraph::Options,
|
||||
) -> anyhow::Result<Vec<u8>> {
|
||||
if profile.mapping.iter().any(|m| !m.has_functions) {
|
||||
bail!("profile not symbolized");
|
||||
}
|
||||
|
||||
// Index locations, functions, and strings.
|
||||
let locations: HashMap<u64, Location> =
|
||||
profile.location.into_iter().map(|l| (l.id, l)).collect();
|
||||
let functions: HashMap<u64, Function> =
|
||||
profile.function.into_iter().map(|f| (f.id, f)).collect();
|
||||
let strings = profile.string_table;
|
||||
|
||||
// Resolve stacks as function names, and sum sample values per stack. Also reverse the stack,
|
||||
// since inferno expects it bottom-up.
|
||||
let mut stacks: HashMap<Vec<&str>, i64> = HashMap::new();
|
||||
for sample in profile.sample {
|
||||
let mut stack = Vec::with_capacity(sample.location_id.len());
|
||||
for location in sample.location_id.into_iter().rev() {
|
||||
let Some(location) = locations.get(&location) else {
|
||||
bail!("missing location {location}");
|
||||
};
|
||||
for line in location.line.iter().rev() {
|
||||
let Some(function) = functions.get(&line.function_id) else {
|
||||
bail!("missing function {}", line.function_id);
|
||||
};
|
||||
let Some(name) = strings.get(function.name as usize) else {
|
||||
bail!("missing string {}", function.name);
|
||||
};
|
||||
stack.push(name.as_str());
|
||||
}
|
||||
}
|
||||
let Some(&value) = sample.value.first() else {
|
||||
bail!("missing value");
|
||||
};
|
||||
*stacks.entry(stack).or_default() += value;
|
||||
}
|
||||
|
||||
// Construct stack lines for inferno.
|
||||
let lines = stacks
|
||||
.into_iter()
|
||||
.map(|(stack, value)| (stack.into_iter().join(";"), value))
|
||||
.map(|(stack, value)| format!("{stack} {value}"))
|
||||
.sorted()
|
||||
.collect_vec();
|
||||
|
||||
// Construct the flamegraph.
|
||||
let mut bytes = Vec::new();
|
||||
let lines = lines.iter().map(|line| line.as_str());
|
||||
inferno::flamegraph::from_lines(opts, lines, &mut bytes)?;
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
@@ -96,7 +96,11 @@ impl<T: Send> Sender<T> {
|
||||
}
|
||||
}
|
||||
State::SenderWaitsForReceiverToConsume(_data) => {
|
||||
// Really, we shouldn't be polled until receiver has consumed and wakes us.
|
||||
// SAFETY: send is single threaded due to `&mut self` requirement,
|
||||
// therefore register is not concurrent.
|
||||
unsafe {
|
||||
self.state.wake_sender.register(cx.waker());
|
||||
}
|
||||
Poll::Pending
|
||||
}
|
||||
State::ReceiverGone => Poll::Ready(Err(SendError::ReceiverGone)),
|
||||
@@ -449,4 +453,38 @@ mod tests {
|
||||
let err = recv_task.await.unwrap().expect_err("should error");
|
||||
assert!(matches!(err, RecvError::SenderGone));
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn test_receiver_drop_while_waiting_for_receiver_to_consume_unblocks_sender() {
|
||||
let (mut sender, receiver) = channel();
|
||||
|
||||
let state = receiver.state.clone();
|
||||
|
||||
sender.send((), |_, _| unreachable!()).await.unwrap();
|
||||
|
||||
assert!(matches!(&*state.value.lock().unwrap(), &State::HasData(_)));
|
||||
|
||||
let unmergeable = sender.send((), |_, _| Err(()));
|
||||
let mut unmergeable = std::pin::pin!(unmergeable);
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(FOREVER) => {},
|
||||
_ = &mut unmergeable => {
|
||||
panic!("unmergeable should not complete");
|
||||
},
|
||||
}
|
||||
|
||||
assert!(matches!(
|
||||
&*state.value.lock().unwrap(),
|
||||
&State::SenderWaitsForReceiverToConsume(_)
|
||||
));
|
||||
|
||||
drop(receiver);
|
||||
|
||||
assert!(matches!(
|
||||
&*state.value.lock().unwrap(),
|
||||
&State::ReceiverGone
|
||||
));
|
||||
|
||||
unmergeable.await.unwrap_err();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,6 +95,14 @@ impl InterpretedWalRecord {
|
||||
&& self.metadata_record.is_none()
|
||||
&& matches!(self.flush_uncommitted, FlushUncommittedRecords::No)
|
||||
}
|
||||
|
||||
/// Checks if the WAL record is observed (i.e. contains only metadata
|
||||
/// for observed values)
|
||||
pub fn is_observed(&self) -> bool {
|
||||
self.batch.is_observed()
|
||||
&& self.metadata_record.is_none()
|
||||
&& matches!(self.flush_uncommitted, FlushUncommittedRecords::No)
|
||||
}
|
||||
}
|
||||
|
||||
/// The interpreted part of the Postgres WAL record which requires metadata
|
||||
|
||||
@@ -501,6 +501,11 @@ impl SerializedValueBatch {
|
||||
!self.has_data() && self.metadata.is_empty()
|
||||
}
|
||||
|
||||
/// Checks if the batch contains only observed values
|
||||
pub fn is_observed(&self) -> bool {
|
||||
!self.has_data() && !self.metadata.is_empty()
|
||||
}
|
||||
|
||||
/// Checks if the batch contains data
|
||||
///
|
||||
/// Note that if this returns false, it may still contain observed values or
|
||||
|
||||
@@ -60,7 +60,7 @@ impl Client {
|
||||
) -> anyhow::Result<PagestreamClient> {
|
||||
let copy_both: tokio_postgres::CopyBothDuplex<bytes::Bytes> = self
|
||||
.client
|
||||
.copy_both_simple(&format!("pagestream_v2 {tenant_id} {timeline_id}"))
|
||||
.copy_both_simple(&format!("pagestream_v3 {tenant_id} {timeline_id}"))
|
||||
.await?;
|
||||
let Client {
|
||||
cancel_on_client_drop,
|
||||
|
||||
@@ -2,7 +2,7 @@ use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::keyspace::KeySpaceAccum;
|
||||
use pageserver_api::models::PagestreamGetPageRequest;
|
||||
use pageserver_api::models::{PagestreamGetPageRequest, PagestreamRequest};
|
||||
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -322,12 +322,15 @@ async fn main_impl(
|
||||
.to_rel_block()
|
||||
.expect("we filter non-rel-block keys out above");
|
||||
PagestreamGetPageRequest {
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
} else {
|
||||
r.timeline_lsn
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
} else {
|
||||
r.timeline_lsn
|
||||
},
|
||||
not_modified_since: r.timeline_lsn,
|
||||
},
|
||||
not_modified_since: r.timeline_lsn,
|
||||
rel: rel_tag,
|
||||
blkno: block_no,
|
||||
}
|
||||
|
||||
@@ -53,12 +53,10 @@ project_build_tag!(BUILD_TAG);
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
|
||||
// TODO: disabled because concurrent CPU profiles cause seg faults. See:
|
||||
// https://github.com/neondatabase/neon/issues/10225.
|
||||
//#[allow(non_upper_case_globals)]
|
||||
//#[export_name = "malloc_conf"]
|
||||
//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
|
||||
/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
|
||||
#[allow(non_upper_case_globals)]
|
||||
#[export_name = "malloc_conf"]
|
||||
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
|
||||
|
||||
const PID_FILE_NAME: &str = "pageserver.pid";
|
||||
|
||||
|
||||
@@ -1854,6 +1854,7 @@ pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
|
||||
|
||||
#[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
|
||||
pub(crate) enum ComputeCommandKind {
|
||||
PageStreamV3,
|
||||
PageStreamV2,
|
||||
Basebackup,
|
||||
Fullbackup,
|
||||
@@ -2337,13 +2338,15 @@ macro_rules! redo_bytes_histogram_count_buckets {
|
||||
pub(crate) struct WalIngestMetrics {
|
||||
pub(crate) bytes_received: IntCounter,
|
||||
pub(crate) records_received: IntCounter,
|
||||
pub(crate) records_observed: IntCounter,
|
||||
pub(crate) records_committed: IntCounter,
|
||||
pub(crate) records_filtered: IntCounter,
|
||||
pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
|
||||
pub(crate) clear_vm_bits_unknown: IntCounterVec,
|
||||
}
|
||||
|
||||
pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
|
||||
pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
|
||||
WalIngestMetrics {
|
||||
bytes_received: register_int_counter!(
|
||||
"pageserver_wal_ingest_bytes_received",
|
||||
"Bytes of WAL ingested from safekeepers",
|
||||
@@ -2354,6 +2357,11 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMet
|
||||
"Number of WAL records received from safekeepers"
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
records_observed: register_int_counter!(
|
||||
"pageserver_wal_ingest_records_observed",
|
||||
"Number of WAL records observed from safekeepers. These are metadata only records for shard 0."
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
records_committed: register_int_counter!(
|
||||
"pageserver_wal_ingest_records_committed",
|
||||
"Number of WAL records which resulted in writes to pageserver storage"
|
||||
@@ -2375,6 +2383,7 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMet
|
||||
&["entity"],
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
}
|
||||
});
|
||||
|
||||
pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
|
||||
@@ -17,7 +17,7 @@ use pageserver_api::models::{
|
||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
|
||||
PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||
PagestreamProtocolVersion,
|
||||
PagestreamProtocolVersion, PagestreamRequest,
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use postgres_backend::{
|
||||
@@ -67,7 +67,7 @@ use crate::tenant::PageReconstructError;
|
||||
use crate::tenant::Timeline;
|
||||
use crate::{basebackup, timed_after_cancellation};
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use pageserver_api::reltag::SlruKind;
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
@@ -537,6 +537,23 @@ impl From<WaitLsnError> for QueryError {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
struct BatchedPageStreamError {
|
||||
req: PagestreamRequest,
|
||||
err: PageStreamError,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for BatchedPageStreamError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.err.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
struct BatchedGetPageRequest {
|
||||
req: PagestreamGetPageRequest,
|
||||
timer: SmgrOpTimer,
|
||||
}
|
||||
|
||||
enum BatchedFeMessage {
|
||||
Exists {
|
||||
span: Span,
|
||||
@@ -554,7 +571,7 @@ enum BatchedFeMessage {
|
||||
span: Span,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
effective_request_lsn: Lsn,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber, SmgrOpTimer); 1]>,
|
||||
pages: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
|
||||
},
|
||||
DbSize {
|
||||
span: Span,
|
||||
@@ -570,7 +587,7 @@ enum BatchedFeMessage {
|
||||
},
|
||||
RespondError {
|
||||
span: Span,
|
||||
error: PageStreamError,
|
||||
error: BatchedPageStreamError,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -595,12 +612,15 @@ impl BatchedFeMessage {
|
||||
BatchedFeMessage::GetPage { shard, pages, .. } => (
|
||||
shard,
|
||||
pages.len(),
|
||||
itertools::Either::Right(pages.iter_mut().map(|(_, _, timer)| timer)),
|
||||
itertools::Either::Right(pages.iter_mut().map(|p| &mut p.timer)),
|
||||
),
|
||||
BatchedFeMessage::RespondError { .. } => return Ok(()),
|
||||
};
|
||||
let throttled = tokio::select! {
|
||||
throttled = shard.pagestream_throttle.throttle(tokens) => { throttled }
|
||||
_ = shard.cancel.cancelled() => {
|
||||
return Err(QueryError::Shutdown);
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
return Err(QueryError::Shutdown);
|
||||
}
|
||||
@@ -654,6 +674,7 @@ impl PageServerHandler {
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn pagestream_read_message<IO>(
|
||||
pgb: &mut PostgresBackendReader<IO>,
|
||||
tenant_id: TenantId,
|
||||
@@ -661,6 +682,7 @@ impl PageServerHandler {
|
||||
timeline_handles: &mut TimelineHandles,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
parent_span: Span,
|
||||
) -> Result<Option<BatchedFeMessage>, QueryError>
|
||||
where
|
||||
@@ -695,11 +717,12 @@ impl PageServerHandler {
|
||||
fail::fail_point!("ps::handle-pagerequest-message");
|
||||
|
||||
// parse request
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
let neon_fe_msg =
|
||||
PagestreamFeMessage::parse(&mut copy_data_bytes.reader(), protocol_version)?;
|
||||
|
||||
let batched_msg = match neon_fe_msg {
|
||||
PagestreamFeMessage::Exists(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn);
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.hdr.request_lsn);
|
||||
let shard = timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
@@ -715,7 +738,7 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::Nblocks(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn);
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.hdr.request_lsn);
|
||||
let shard = timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
@@ -731,7 +754,7 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::DbSize(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn);
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.hdr.request_lsn);
|
||||
let shard = timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
@@ -747,7 +770,7 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::GetSlruSegment(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn);
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.hdr.request_lsn);
|
||||
let shard = timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
@@ -762,25 +785,23 @@ impl PageServerHandler {
|
||||
req,
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
rel,
|
||||
blkno,
|
||||
}) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_page_at_lsn_request_batched", req_lsn = %request_lsn);
|
||||
PagestreamFeMessage::GetPage(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_page_at_lsn_request_batched", req_lsn = %req.hdr.request_lsn);
|
||||
|
||||
macro_rules! respond_error {
|
||||
($error:expr) => {{
|
||||
let error = BatchedFeMessage::RespondError {
|
||||
span,
|
||||
error: $error,
|
||||
error: BatchedPageStreamError {
|
||||
req: req.hdr,
|
||||
err: $error,
|
||||
},
|
||||
};
|
||||
Ok(Some(error))
|
||||
}};
|
||||
}
|
||||
|
||||
let key = rel_block_to_key(rel, blkno);
|
||||
let key = rel_block_to_key(req.rel, req.blkno);
|
||||
let shard = match timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Page(key))
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
@@ -814,8 +835,8 @@ impl PageServerHandler {
|
||||
|
||||
let effective_request_lsn = match Self::wait_or_get_last_lsn(
|
||||
&shard,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&shard.get_latest_gc_cutoff_lsn(),
|
||||
ctx,
|
||||
)
|
||||
@@ -831,7 +852,7 @@ impl PageServerHandler {
|
||||
span,
|
||||
shard,
|
||||
effective_request_lsn,
|
||||
pages: smallvec::smallvec![(rel, blkno, timer)],
|
||||
pages: smallvec::smallvec![BatchedGetPageRequest { req, timer }],
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -910,6 +931,7 @@ impl PageServerHandler {
|
||||
pgb_writer: &mut PostgresBackend<IO>,
|
||||
batch: BatchedFeMessage,
|
||||
cancel: &CancellationToken,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), QueryError>
|
||||
where
|
||||
@@ -917,7 +939,7 @@ impl PageServerHandler {
|
||||
{
|
||||
// invoke handler function
|
||||
let (handler_results, span): (
|
||||
Vec<Result<(PagestreamBeMessage, SmgrOpTimer), PageStreamError>>,
|
||||
Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>>,
|
||||
_,
|
||||
) = match batch {
|
||||
BatchedFeMessage::Exists {
|
||||
@@ -932,7 +954,8 @@ impl PageServerHandler {
|
||||
.handle_get_rel_exists_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
.map(|msg| (msg, timer))
|
||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
||||
span,
|
||||
)
|
||||
}
|
||||
@@ -948,7 +971,8 @@ impl PageServerHandler {
|
||||
.handle_get_nblocks_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
.map(|msg| (msg, timer))
|
||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
||||
span,
|
||||
)
|
||||
}
|
||||
@@ -990,7 +1014,8 @@ impl PageServerHandler {
|
||||
.handle_db_size_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
.map(|msg| (msg, timer))
|
||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
||||
span,
|
||||
)
|
||||
}
|
||||
@@ -1006,7 +1031,8 @@ impl PageServerHandler {
|
||||
.handle_get_slru_segment_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
.map(|msg| (msg, timer))
|
||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
||||
span,
|
||||
)
|
||||
}
|
||||
@@ -1022,7 +1048,7 @@ impl PageServerHandler {
|
||||
// Other handler errors are sent back as an error message and we stay in pagestream protocol.
|
||||
for handler_result in handler_results {
|
||||
let (response_msg, timer) = match handler_result {
|
||||
Err(e) => match &e {
|
||||
Err(e) => match &e.err {
|
||||
PageStreamError::Shutdown => {
|
||||
// If we fail to fulfil a request during shutdown, which may be _because_ of
|
||||
// shutdown, then do not send the error to the client. Instead just drop the
|
||||
@@ -1041,13 +1067,14 @@ impl PageServerHandler {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough. Do not log if shutting down, as the anyhow::Error
|
||||
// here includes cancellation which is not an error.
|
||||
let full = utils::error::report_compact_sources(&e);
|
||||
let full = utils::error::report_compact_sources(&e.err);
|
||||
span.in_scope(|| {
|
||||
error!("error reading relation or page version: {full:#}")
|
||||
});
|
||||
(
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
req: e.req,
|
||||
message: e.err.to_string(),
|
||||
}),
|
||||
None, // TODO: measure errors
|
||||
)
|
||||
@@ -1060,7 +1087,9 @@ impl PageServerHandler {
|
||||
// marshal & transmit response message
|
||||
//
|
||||
|
||||
pgb_writer.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
|
||||
pgb_writer.write_message_noflush(&BeMessage::CopyData(
|
||||
&response_msg.serialize(protocol_version),
|
||||
))?;
|
||||
|
||||
// We purposefully don't count flush time into the timer.
|
||||
//
|
||||
@@ -1123,7 +1152,7 @@ impl PageServerHandler {
|
||||
pgb: &mut PostgresBackend<IO>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
_protocol_version: PagestreamProtocolVersion,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
ctx: RequestContext,
|
||||
) -> Result<(), QueryError>
|
||||
where
|
||||
@@ -1163,6 +1192,7 @@ impl PageServerHandler {
|
||||
timeline_handles,
|
||||
request_span,
|
||||
pipelining_config,
|
||||
protocol_version,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1175,6 +1205,7 @@ impl PageServerHandler {
|
||||
timeline_id,
|
||||
timeline_handles,
|
||||
request_span,
|
||||
protocol_version,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1201,6 +1232,7 @@ impl PageServerHandler {
|
||||
timeline_id: TimelineId,
|
||||
mut timeline_handles: TimelineHandles,
|
||||
request_span: Span,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
ctx: &RequestContext,
|
||||
) -> (
|
||||
(PostgresBackendReader<IO>, TimelineHandles),
|
||||
@@ -1218,6 +1250,7 @@ impl PageServerHandler {
|
||||
&mut timeline_handles,
|
||||
&cancel,
|
||||
ctx,
|
||||
protocol_version,
|
||||
request_span.clone(),
|
||||
)
|
||||
.await;
|
||||
@@ -1238,7 +1271,7 @@ impl PageServerHandler {
|
||||
}
|
||||
|
||||
let err = self
|
||||
.pagesteam_handle_batched_message(pgb_writer, msg, &cancel, ctx)
|
||||
.pagesteam_handle_batched_message(pgb_writer, msg, &cancel, protocol_version, ctx)
|
||||
.await;
|
||||
match err {
|
||||
Ok(()) => {}
|
||||
@@ -1261,6 +1294,7 @@ impl PageServerHandler {
|
||||
mut timeline_handles: TimelineHandles,
|
||||
request_span: Span,
|
||||
pipelining_config: PageServicePipeliningConfigPipelined,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
ctx: &RequestContext,
|
||||
) -> (
|
||||
(PostgresBackendReader<IO>, TimelineHandles),
|
||||
@@ -1358,6 +1392,7 @@ impl PageServerHandler {
|
||||
&mut timeline_handles,
|
||||
&cancel_batcher,
|
||||
&ctx,
|
||||
protocol_version,
|
||||
request_span.clone(),
|
||||
)
|
||||
.await;
|
||||
@@ -1403,8 +1438,14 @@ impl PageServerHandler {
|
||||
batch
|
||||
.throttle_and_record_start_processing(&self.cancel)
|
||||
.await?;
|
||||
self.pagesteam_handle_batched_message(pgb_writer, batch, &cancel, &ctx)
|
||||
.await?;
|
||||
self.pagesteam_handle_batched_message(
|
||||
pgb_writer,
|
||||
batch,
|
||||
&cancel,
|
||||
protocol_version,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -1578,8 +1619,8 @@ impl PageServerHandler {
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
req.request_lsn,
|
||||
req.not_modified_since,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&latest_gc_cutoff_lsn,
|
||||
ctx,
|
||||
)
|
||||
@@ -1590,6 +1631,7 @@ impl PageServerHandler {
|
||||
.await?;
|
||||
|
||||
Ok(PagestreamBeMessage::Exists(PagestreamExistsResponse {
|
||||
req: *req,
|
||||
exists,
|
||||
}))
|
||||
}
|
||||
@@ -1604,8 +1646,8 @@ impl PageServerHandler {
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
req.request_lsn,
|
||||
req.not_modified_since,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&latest_gc_cutoff_lsn,
|
||||
ctx,
|
||||
)
|
||||
@@ -1616,6 +1658,7 @@ impl PageServerHandler {
|
||||
.await?;
|
||||
|
||||
Ok(PagestreamBeMessage::Nblocks(PagestreamNblocksResponse {
|
||||
req: *req,
|
||||
n_blocks,
|
||||
}))
|
||||
}
|
||||
@@ -1630,8 +1673,8 @@ impl PageServerHandler {
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
req.request_lsn,
|
||||
req.not_modified_since,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&latest_gc_cutoff_lsn,
|
||||
ctx,
|
||||
)
|
||||
@@ -1643,6 +1686,7 @@ impl PageServerHandler {
|
||||
let db_size = total_blocks as i64 * BLCKSZ as i64;
|
||||
|
||||
Ok(PagestreamBeMessage::DbSize(PagestreamDbSizeResponse {
|
||||
req: *req,
|
||||
db_size,
|
||||
}))
|
||||
}
|
||||
@@ -1652,9 +1696,9 @@ impl PageServerHandler {
|
||||
&mut self,
|
||||
timeline: &Timeline,
|
||||
effective_lsn: Lsn,
|
||||
requests: smallvec::SmallVec<[(RelTag, BlockNumber, SmgrOpTimer); 1]>,
|
||||
requests: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
|
||||
ctx: &RequestContext,
|
||||
) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), PageStreamError>> {
|
||||
) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
timeline
|
||||
@@ -1663,7 +1707,7 @@ impl PageServerHandler {
|
||||
|
||||
let results = timeline
|
||||
.get_rel_page_at_lsn_batched(
|
||||
requests.iter().map(|(reltag, blkno, _)| (reltag, blkno)),
|
||||
requests.iter().map(|p| (&p.req.rel, &p.req.blkno)),
|
||||
effective_lsn,
|
||||
ctx,
|
||||
)
|
||||
@@ -1675,16 +1719,20 @@ impl PageServerHandler {
|
||||
requests
|
||||
.into_iter()
|
||||
.zip(results.into_iter())
|
||||
.map(|((_, _, timer), res)| {
|
||||
.map(|(req, res)| {
|
||||
res.map(|page| {
|
||||
(
|
||||
PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse {
|
||||
req: req.req,
|
||||
page,
|
||||
}),
|
||||
timer,
|
||||
req.timer,
|
||||
)
|
||||
})
|
||||
.map_err(PageStreamError::from)
|
||||
.map_err(|e| BatchedPageStreamError {
|
||||
err: PageStreamError::from(e),
|
||||
req: req.req.hdr,
|
||||
})
|
||||
}),
|
||||
)
|
||||
}
|
||||
@@ -1699,8 +1747,8 @@ impl PageServerHandler {
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
req.request_lsn,
|
||||
req.not_modified_since,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&latest_gc_cutoff_lsn,
|
||||
ctx,
|
||||
)
|
||||
@@ -1711,7 +1759,7 @@ impl PageServerHandler {
|
||||
let segment = timeline.get_slru_segment(kind, req.segno, lsn, ctx).await?;
|
||||
|
||||
Ok(PagestreamBeMessage::GetSlruSegment(
|
||||
PagestreamGetSlruSegmentResponse { segment },
|
||||
PagestreamGetSlruSegmentResponse { req: *req, segment },
|
||||
))
|
||||
}
|
||||
|
||||
@@ -1906,6 +1954,7 @@ struct FullBackupCmd {
|
||||
struct PageStreamCmd {
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
}
|
||||
|
||||
/// `lease lsn tenant timeline lsn`
|
||||
@@ -1926,7 +1975,7 @@ enum PageServiceCmd {
|
||||
}
|
||||
|
||||
impl PageStreamCmd {
|
||||
fn parse(query: &str) -> anyhow::Result<Self> {
|
||||
fn parse(query: &str, protocol_version: PagestreamProtocolVersion) -> anyhow::Result<Self> {
|
||||
let parameters = query.split_whitespace().collect_vec();
|
||||
if parameters.len() != 2 {
|
||||
bail!(
|
||||
@@ -1941,6 +1990,7 @@ impl PageStreamCmd {
|
||||
Ok(Self {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
protocol_version,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -2078,7 +2128,14 @@ impl PageServiceCmd {
|
||||
bail!("cannot parse query: {query}")
|
||||
};
|
||||
match cmd.to_ascii_lowercase().as_str() {
|
||||
"pagestream_v2" => Ok(Self::PageStream(PageStreamCmd::parse(other)?)),
|
||||
"pagestream_v2" => Ok(Self::PageStream(PageStreamCmd::parse(
|
||||
other,
|
||||
PagestreamProtocolVersion::V2,
|
||||
)?)),
|
||||
"pagestream_v3" => Ok(Self::PageStream(PageStreamCmd::parse(
|
||||
other,
|
||||
PagestreamProtocolVersion::V3,
|
||||
)?)),
|
||||
"basebackup" => Ok(Self::BaseBackup(BaseBackupCmd::parse(other)?)),
|
||||
"fullbackup" => Ok(Self::FullBackup(FullBackupCmd::parse(other)?)),
|
||||
"lease" => {
|
||||
@@ -2160,25 +2217,21 @@ where
|
||||
PageServiceCmd::PageStream(PageStreamCmd {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
protocol_version,
|
||||
}) => {
|
||||
tracing::Span::current()
|
||||
.record("tenant_id", field::display(tenant_id))
|
||||
.record("timeline_id", field::display(timeline_id));
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
let command_kind = match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => ComputeCommandKind::PageStreamV2,
|
||||
PagestreamProtocolVersion::V3 => ComputeCommandKind::PageStreamV3,
|
||||
};
|
||||
COMPUTE_COMMANDS_COUNTERS.for_command(command_kind).inc();
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::PageStreamV2)
|
||||
.inc();
|
||||
|
||||
self.handle_pagerequests(
|
||||
pgb,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
PagestreamProtocolVersion::V2,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
self.handle_pagerequests(pgb, tenant_id, timeline_id, protocol_version, ctx)
|
||||
.await?;
|
||||
}
|
||||
PageServiceCmd::BaseBackup(BaseBackupCmd {
|
||||
tenant_id,
|
||||
@@ -2357,7 +2410,8 @@ mod tests {
|
||||
cmd,
|
||||
PageServiceCmd::PageStream(PageStreamCmd {
|
||||
tenant_id,
|
||||
timeline_id
|
||||
timeline_id,
|
||||
protocol_version: PagestreamProtocolVersion::V2,
|
||||
})
|
||||
);
|
||||
let cmd = PageServiceCmd::parse(&format!("basebackup {tenant_id} {timeline_id}")).unwrap();
|
||||
|
||||
@@ -627,7 +627,7 @@ impl Timeline {
|
||||
// cannot overflow, high and low are both smaller than u64::MAX / 2
|
||||
let mid = (high + low) / 2;
|
||||
|
||||
let cmp = self
|
||||
let cmp = match self
|
||||
.is_latest_commit_timestamp_ge_than(
|
||||
search_timestamp,
|
||||
Lsn(mid * 8),
|
||||
@@ -635,7 +635,16 @@ impl Timeline {
|
||||
&mut found_larger,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
{
|
||||
Ok(res) => res,
|
||||
Err(PageReconstructError::MissingKey(e)) => {
|
||||
warn!("Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}", e);
|
||||
// Return that we didn't find any requests smaller than the LSN, and logging the error.
|
||||
return Ok(LsnForTimestamp::Past(min_lsn));
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
if cmp {
|
||||
high = mid;
|
||||
@@ -643,6 +652,7 @@ impl Timeline {
|
||||
low = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// If `found_smaller == true`, `low = t + 1` where `t` is the target LSN,
|
||||
// so the LSN of the last commit record before or at `search_timestamp`.
|
||||
// Remove one from `low` to get `t`.
|
||||
|
||||
@@ -48,6 +48,7 @@ use timeline::compaction::GcCompactJob;
|
||||
use timeline::compaction::ScheduledCompactionTask;
|
||||
use timeline::import_pgdata;
|
||||
use timeline::offload::offload_timeline;
|
||||
use timeline::offload::OffloadError;
|
||||
use timeline::CompactFlags;
|
||||
use timeline::CompactOptions;
|
||||
use timeline::CompactionError;
|
||||
@@ -2039,7 +2040,7 @@ impl Tenant {
|
||||
) -> Result<Arc<Timeline>, TimelineArchivalError> {
|
||||
info!("unoffloading timeline");
|
||||
|
||||
// We activate the timeline below manually, so this must be called on an active timeline.
|
||||
// We activate the timeline below manually, so this must be called on an active tenant.
|
||||
// We expect callers of this function to ensure this.
|
||||
match self.current_state() {
|
||||
TenantState::Activating { .. }
|
||||
@@ -3100,9 +3101,17 @@ impl Tenant {
|
||||
};
|
||||
has_pending_task |= pending_task_left.unwrap_or(false);
|
||||
if pending_task_left == Some(false) && *can_offload {
|
||||
offload_timeline(self, timeline)
|
||||
pausable_failpoint!("before-timeline-auto-offload");
|
||||
match offload_timeline(self, timeline)
|
||||
.instrument(info_span!("offload_timeline", %timeline_id))
|
||||
.await?;
|
||||
.await
|
||||
{
|
||||
Err(OffloadError::NotArchived) => {
|
||||
// Ignore this, we likely raced with unarchival
|
||||
Ok(())
|
||||
}
|
||||
other => other,
|
||||
}?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -304,6 +304,15 @@ pub enum WaitCompletionError {
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("Upload queue either in unexpected state or hasn't downloaded manifest yet")]
|
||||
pub struct UploadQueueNotReadyError;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ShutdownIfArchivedError {
|
||||
#[error(transparent)]
|
||||
NotInitialized(NotInitialized),
|
||||
#[error("timeline is not archived")]
|
||||
NotArchived,
|
||||
}
|
||||
|
||||
/// Behavioral modes that enable seamless live migration.
|
||||
///
|
||||
/// See docs/rfcs/028-pageserver-migration.md to understand how these fit in.
|
||||
@@ -816,6 +825,55 @@ impl RemoteTimelineClient {
|
||||
Ok(need_wait)
|
||||
}
|
||||
|
||||
/// Shuts the timeline client down, but only if the timeline is archived.
|
||||
///
|
||||
/// This function and [`Self::schedule_index_upload_for_timeline_archival_state`] use the
|
||||
/// same lock to prevent races between unarchival and offloading: unarchival requires the
|
||||
/// upload queue to be initialized, and leaves behind an upload queue where either dirty
|
||||
/// or clean has archived_at of `None`. offloading leaves behind an uninitialized upload
|
||||
/// queue.
|
||||
pub(crate) async fn shutdown_if_archived(
|
||||
self: &Arc<Self>,
|
||||
) -> Result<(), ShutdownIfArchivedError> {
|
||||
{
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard
|
||||
.initialized_mut()
|
||||
.map_err(ShutdownIfArchivedError::NotInitialized)?;
|
||||
|
||||
match (
|
||||
upload_queue.dirty.archived_at.is_none(),
|
||||
upload_queue.clean.0.archived_at.is_none(),
|
||||
) {
|
||||
// The expected case: the timeline is archived and we don't want to unarchive
|
||||
(false, false) => {}
|
||||
(true, false) => {
|
||||
tracing::info!("can't shut down timeline: timeline slated for unarchival");
|
||||
return Err(ShutdownIfArchivedError::NotArchived);
|
||||
}
|
||||
(dirty_archived, true) => {
|
||||
tracing::info!(%dirty_archived, "can't shut down timeline: timeline not archived in remote storage");
|
||||
return Err(ShutdownIfArchivedError::NotArchived);
|
||||
}
|
||||
}
|
||||
|
||||
// Set the shutting_down flag while the guard from the archival check is held.
|
||||
// This prevents a race with unarchival, as initialized_mut will not return
|
||||
// an upload queue from this point.
|
||||
// Also launch the queued tasks like shutdown() does.
|
||||
if !upload_queue.shutting_down {
|
||||
upload_queue.shutting_down = true;
|
||||
upload_queue.queued_operations.push_back(UploadOp::Shutdown);
|
||||
// this operation is not counted similar to Barrier
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
}
|
||||
}
|
||||
|
||||
self.shutdown().await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Launch an index-file upload operation in the background, setting `import_pgdata` field.
|
||||
pub(crate) fn schedule_index_upload_for_import_pgdata_state_update(
|
||||
self: &Arc<Self>,
|
||||
|
||||
@@ -194,7 +194,9 @@ impl DeleteTimelineFlow {
|
||||
super::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let allow_offloaded_children = false;
|
||||
let (timeline, mut guard) = Self::prepare(tenant, timeline_id, allow_offloaded_children)?;
|
||||
let set_stopping = true;
|
||||
let (timeline, mut guard) =
|
||||
Self::prepare(tenant, timeline_id, allow_offloaded_children, set_stopping)?;
|
||||
|
||||
guard.mark_in_progress()?;
|
||||
|
||||
@@ -334,6 +336,7 @@ impl DeleteTimelineFlow {
|
||||
tenant: &Tenant,
|
||||
timeline_id: TimelineId,
|
||||
allow_offloaded_children: bool,
|
||||
set_stopping: bool,
|
||||
) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
|
||||
// Note the interaction between this guard and deletion guard.
|
||||
// Here we attempt to lock deletion guard when we're holding a lock on timelines.
|
||||
@@ -389,8 +392,10 @@ impl DeleteTimelineFlow {
|
||||
}
|
||||
};
|
||||
|
||||
if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
if set_stopping {
|
||||
if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
}
|
||||
}
|
||||
|
||||
Ok((timeline, delete_lock_guard))
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use pageserver_api::models::TenantState;
|
||||
use pageserver_api::models::{TenantState, TimelineState};
|
||||
|
||||
use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
|
||||
use super::Timeline;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
|
||||
use crate::tenant::{OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded};
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
@@ -36,28 +37,29 @@ pub(crate) async fn offload_timeline(
|
||||
tracing::info!("offloading archived timeline");
|
||||
|
||||
let allow_offloaded_children = true;
|
||||
let (timeline, guard) =
|
||||
DeleteTimelineFlow::prepare(tenant, timeline.timeline_id, allow_offloaded_children)
|
||||
.map_err(|e| OffloadError::Other(anyhow::anyhow!(e)))?;
|
||||
let set_stopping = false;
|
||||
let (timeline, guard) = DeleteTimelineFlow::prepare(
|
||||
tenant,
|
||||
timeline.timeline_id,
|
||||
allow_offloaded_children,
|
||||
set_stopping,
|
||||
)
|
||||
.map_err(|e| OffloadError::Other(anyhow::anyhow!(e)))?;
|
||||
|
||||
let TimelineOrOffloaded::Timeline(timeline) = timeline else {
|
||||
tracing::error!("timeline already offloaded, but given timeline object");
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let is_archived = timeline.is_archived();
|
||||
match is_archived {
|
||||
Some(true) => (),
|
||||
Some(false) => {
|
||||
tracing::warn!("tried offloading a non-archived timeline");
|
||||
return Err(OffloadError::NotArchived);
|
||||
}
|
||||
None => {
|
||||
// This is legal: calls to this function can race with the timeline shutting down
|
||||
tracing::info!("tried offloading a timeline whose remote storage is not initialized");
|
||||
return Err(OffloadError::Cancelled);
|
||||
match timeline.remote_client.shutdown_if_archived().await {
|
||||
Ok(()) => {}
|
||||
Err(ShutdownIfArchivedError::NotInitialized(_)) => {
|
||||
// Either the timeline is being deleted, the operation is being retried, or we are shutting down.
|
||||
// Don't return cancelled here to keep it idempotent.
|
||||
}
|
||||
Err(ShutdownIfArchivedError::NotArchived) => return Err(OffloadError::NotArchived),
|
||||
}
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
|
||||
// Now that the Timeline is in Stopping state, request all the related tasks to shut down.
|
||||
timeline.shutdown(super::ShutdownMode::Reload).await;
|
||||
|
||||
@@ -319,27 +319,11 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
async fn commit(
|
||||
modification: &mut DatadirModification<'_>,
|
||||
uncommitted: &mut u64,
|
||||
filtered: &mut u64,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
WAL_INGEST
|
||||
.records_committed
|
||||
.inc_by(*uncommitted - *filtered);
|
||||
modification.commit(ctx).await?;
|
||||
*uncommitted = 0;
|
||||
*filtered = 0;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
let status_update = match replication_message {
|
||||
ReplicationMessage::RawInterpretedWalRecords(raw) => {
|
||||
WAL_INGEST.bytes_received.inc_by(raw.data().len() as u64);
|
||||
|
||||
let mut uncommitted_records = 0;
|
||||
let mut filtered_records = 0;
|
||||
|
||||
// This is the end LSN of the raw WAL from which the records
|
||||
// were interpreted.
|
||||
@@ -380,31 +364,23 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
if matches!(interpreted.flush_uncommitted, FlushUncommittedRecords::Yes)
|
||||
&& uncommitted_records > 0
|
||||
{
|
||||
commit(
|
||||
&mut modification,
|
||||
&mut uncommitted_records,
|
||||
&mut filtered_records,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
modification.commit(&ctx).await?;
|
||||
uncommitted_records = 0;
|
||||
}
|
||||
|
||||
let local_next_record_lsn = interpreted.next_record_lsn;
|
||||
let ingested = walingest
|
||||
|
||||
if interpreted.is_observed() {
|
||||
WAL_INGEST.records_observed.inc();
|
||||
}
|
||||
|
||||
walingest
|
||||
.ingest_record(interpreted, &mut modification, &ctx)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("could not ingest record at {local_next_record_lsn}")
|
||||
})?;
|
||||
|
||||
if !ingested {
|
||||
tracing::debug!(
|
||||
"ingest: filtered out record @ LSN {local_next_record_lsn}"
|
||||
);
|
||||
WAL_INGEST.records_filtered.inc();
|
||||
filtered_records += 1;
|
||||
}
|
||||
|
||||
uncommitted_records += 1;
|
||||
|
||||
// FIXME: this cannot be made pausable_failpoint without fixing the
|
||||
@@ -418,13 +394,8 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
|| modification.approx_pending_bytes()
|
||||
> DatadirModification::MAX_PENDING_BYTES
|
||||
{
|
||||
commit(
|
||||
&mut modification,
|
||||
&mut uncommitted_records,
|
||||
&mut filtered_records,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
modification.commit(&ctx).await?;
|
||||
uncommitted_records = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -442,13 +413,7 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
|
||||
if uncommitted_records > 0 || needs_last_record_lsn_advance {
|
||||
// Commit any uncommitted records
|
||||
commit(
|
||||
&mut modification,
|
||||
&mut uncommitted_records,
|
||||
&mut filtered_records,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
modification.commit(&ctx).await?;
|
||||
}
|
||||
|
||||
if !caught_up && streaming_lsn >= end_of_wal {
|
||||
@@ -469,6 +434,21 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
}
|
||||
|
||||
ReplicationMessage::XLogData(xlog_data) => {
|
||||
async fn commit(
|
||||
modification: &mut DatadirModification<'_>,
|
||||
uncommitted: &mut u64,
|
||||
filtered: &mut u64,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
WAL_INGEST
|
||||
.records_committed
|
||||
.inc_by(*uncommitted - *filtered);
|
||||
modification.commit(ctx).await?;
|
||||
*uncommitted = 0;
|
||||
*filtered = 0;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Pass the WAL data to the decoder, and see if we can decode
|
||||
// more records as a result.
|
||||
let data = xlog_data.data();
|
||||
|
||||
@@ -556,6 +556,9 @@ pageserver_connect(shardno_t shard_no, int elevel)
|
||||
|
||||
switch (neon_protocol_version)
|
||||
{
|
||||
case 3:
|
||||
pagestream_query = psprintf("pagestream_v3 %s %s", neon_tenant, neon_timeline);
|
||||
break;
|
||||
case 2:
|
||||
pagestream_query = psprintf("pagestream_v2 %s %s", neon_tenant, neon_timeline);
|
||||
break;
|
||||
@@ -1135,9 +1138,9 @@ pg_init_libpagestore(void)
|
||||
"Version of compute<->page server protocol",
|
||||
NULL,
|
||||
&neon_protocol_version,
|
||||
2, /* use protocol version 2 */
|
||||
2, /* min */
|
||||
2, /* max */
|
||||
2, /* use protocol version 2 */
|
||||
2, /* min */
|
||||
3, /* max */
|
||||
PGC_SU_BACKEND,
|
||||
0, /* no flags required */
|
||||
NULL, NULL, NULL);
|
||||
|
||||
@@ -44,10 +44,15 @@ typedef enum
|
||||
T_NeonGetSlruSegmentResponse,
|
||||
} NeonMessageTag;
|
||||
|
||||
typedef uint64 NeonRequestId;
|
||||
|
||||
/* base struct for c-style inheritance */
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonRequestId reqid;
|
||||
XLogRecPtr lsn;
|
||||
XLogRecPtr not_modified_since;
|
||||
} NeonMessage;
|
||||
|
||||
#define messageTag(m) (((const NeonMessage *)(m))->tag)
|
||||
@@ -67,6 +72,7 @@ typedef enum {
|
||||
SLRU_MULTIXACT_OFFSETS
|
||||
} SlruKind;
|
||||
|
||||
|
||||
/*--
|
||||
* supertype of all the Neon*Request structs below.
|
||||
*
|
||||
@@ -87,37 +93,37 @@ typedef enum {
|
||||
*
|
||||
* These structs describe the V2 of these requests. (The old now-defunct V1
|
||||
* protocol contained just one LSN and a boolean 'latest' flag.)
|
||||
*
|
||||
* V3 version of protocol adds request ID to all requests. This request ID is also included in response
|
||||
* as well as other fields from requests, which allows to verify that we receive response for our request.
|
||||
* We copy fields from request to response to make checking more reliable: request ID is formed from process ID
|
||||
* and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
XLogRecPtr lsn;
|
||||
XLogRecPtr not_modified_since;
|
||||
} NeonRequest;
|
||||
typedef NeonMessage NeonRequest;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
NeonRequest hdr;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
} NeonExistsRequest;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
NeonRequest hdr;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
} NeonNblocksRequest;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
NeonRequest hdr;
|
||||
Oid dbNode;
|
||||
} NeonDbSizeRequest;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
NeonRequest hdr;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blkno;
|
||||
@@ -125,32 +131,29 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
SlruKind kind;
|
||||
int segno;
|
||||
NeonRequest hdr;
|
||||
SlruKind kind;
|
||||
int segno;
|
||||
} NeonGetSlruSegmentRequest;
|
||||
|
||||
/* supertype of all the Neon*Response structs below */
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
} NeonResponse;
|
||||
typedef NeonMessage NeonResponse;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonExistsRequest req;
|
||||
bool exists;
|
||||
} NeonExistsResponse;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonNblocksRequest req;
|
||||
uint32 n_blocks;
|
||||
} NeonNblocksResponse;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonGetPageRequest req;
|
||||
char page[FLEXIBLE_ARRAY_MEMBER];
|
||||
} NeonGetPageResponse;
|
||||
|
||||
@@ -158,21 +161,21 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonDbSizeRequest req;
|
||||
int64 db_size;
|
||||
} NeonDbSizeResponse;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonResponse req;
|
||||
char message[FLEXIBLE_ARRAY_MEMBER]; /* null-terminated error
|
||||
* message */
|
||||
} NeonErrorResponse;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
int n_blocks;
|
||||
NeonGetSlruSegmentRequest req;
|
||||
int n_blocks;
|
||||
char data[BLCKSZ * SLRU_PAGES_PER_SEGMENT];
|
||||
} NeonGetSlruSegmentResponse;
|
||||
|
||||
|
||||
@@ -120,6 +120,9 @@ static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block
|
||||
|
||||
static BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
|
||||
|
||||
static uint32 local_request_counter;
|
||||
#define GENERATE_REQUEST_ID() (((NeonRequestId)MyProcPid << 32) | ++local_request_counter)
|
||||
|
||||
/*
|
||||
* Prefetch implementation:
|
||||
*
|
||||
@@ -188,15 +191,11 @@ typedef struct PrefetchRequest
|
||||
uint8 status; /* see PrefetchStatus for valid values */
|
||||
uint8 flags; /* see PrefetchRequestFlags */
|
||||
neon_request_lsns request_lsns;
|
||||
NeonRequestId reqid;
|
||||
NeonResponse *response; /* may be null */
|
||||
uint64 my_ring_index;
|
||||
} PrefetchRequest;
|
||||
|
||||
StaticAssertDecl(sizeof(PrefetchRequest) == 64,
|
||||
"We prefer to have a power-of-2 size for this struct. Please"
|
||||
" try to find an alternative solution before reaching to"
|
||||
" increase the expected size here");
|
||||
|
||||
/* prefetch buffer lookup hash table */
|
||||
|
||||
typedef struct PrfHashEntry
|
||||
@@ -365,6 +364,7 @@ compact_prefetch_buffers(void)
|
||||
target_slot->shard_no = source_slot->shard_no;
|
||||
target_slot->status = source_slot->status;
|
||||
target_slot->response = source_slot->response;
|
||||
target_slot->reqid = source_slot->reqid;
|
||||
target_slot->request_lsns = source_slot->request_lsns;
|
||||
target_slot->my_ring_index = empty_ring_index;
|
||||
|
||||
@@ -798,7 +798,8 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
|
||||
uint64 mySlotNo PG_USED_FOR_ASSERTS_ONLY = slot->my_ring_index;
|
||||
|
||||
NeonGetPageRequest request = {
|
||||
.req.tag = T_NeonGetPageRequest,
|
||||
.hdr.tag = T_NeonGetPageRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
/* lsn and not_modified_since are filled in below */
|
||||
.rinfo = BufTagGetNRelFileInfo(slot->buftag),
|
||||
.forknum = slot->buftag.forkNum,
|
||||
@@ -807,14 +808,16 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
|
||||
|
||||
Assert(mySlotNo == MyPState->ring_unused);
|
||||
|
||||
slot->reqid = request.hdr.reqid;
|
||||
|
||||
if (force_request_lsns)
|
||||
slot->request_lsns = *force_request_lsns;
|
||||
else
|
||||
neon_get_request_lsns(BufTagGetNRelFileInfo(slot->buftag),
|
||||
slot->buftag.forkNum, slot->buftag.blockNum,
|
||||
&slot->request_lsns, 1, NULL);
|
||||
request.req.lsn = slot->request_lsns.request_lsn;
|
||||
request.req.not_modified_since = slot->request_lsns.not_modified_since;
|
||||
request.hdr.lsn = slot->request_lsns.request_lsn;
|
||||
request.hdr.not_modified_since = slot->request_lsns.not_modified_since;
|
||||
|
||||
Assert(slot->response == NULL);
|
||||
Assert(slot->my_ring_index == MyPState->ring_unused);
|
||||
@@ -1102,6 +1105,12 @@ Retry:
|
||||
return min_ring_index;
|
||||
}
|
||||
|
||||
static bool
|
||||
equal_requests(NeonRequest* a, NeonRequest* b)
|
||||
{
|
||||
return a->reqid == b->reqid && a->lsn == b->lsn && a->not_modified_since == b->not_modified_since;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Note: this function can get canceled and use a long jump to the next catch
|
||||
@@ -1184,6 +1193,10 @@ nm_pack_request(NeonRequest *msg)
|
||||
initStringInfo(&s);
|
||||
|
||||
pq_sendbyte(&s, msg->tag);
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
pq_sendint64(&s, msg->reqid);
|
||||
}
|
||||
pq_sendint64(&s, msg->lsn);
|
||||
pq_sendint64(&s, msg->not_modified_since);
|
||||
|
||||
@@ -1261,8 +1274,16 @@ NeonResponse *
|
||||
nm_unpack_response(StringInfo s)
|
||||
{
|
||||
NeonMessageTag tag = pq_getmsgbyte(s);
|
||||
NeonResponse resp_hdr = {0}; /* make valgrind happy */
|
||||
NeonResponse *resp = NULL;
|
||||
|
||||
resp_hdr.tag = tag;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
resp_hdr.reqid = pq_getmsgint64(s);
|
||||
resp_hdr.lsn = pq_getmsgint64(s);
|
||||
resp_hdr.not_modified_since = pq_getmsgint64(s);
|
||||
}
|
||||
switch (tag)
|
||||
{
|
||||
/* pagestore -> pagestore_client */
|
||||
@@ -1270,7 +1291,14 @@ nm_unpack_response(StringInfo s)
|
||||
{
|
||||
NeonExistsResponse *msg_resp = palloc0(sizeof(NeonExistsResponse));
|
||||
|
||||
msg_resp->tag = tag;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
NInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
msg_resp->req.forknum = pq_getmsgbyte(s);
|
||||
}
|
||||
msg_resp->req.hdr = resp_hdr;
|
||||
msg_resp->exists = pq_getmsgbyte(s);
|
||||
pq_getmsgend(s);
|
||||
|
||||
@@ -1282,7 +1310,14 @@ nm_unpack_response(StringInfo s)
|
||||
{
|
||||
NeonNblocksResponse *msg_resp = palloc0(sizeof(NeonNblocksResponse));
|
||||
|
||||
msg_resp->tag = tag;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
NInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
msg_resp->req.forknum = pq_getmsgbyte(s);
|
||||
}
|
||||
msg_resp->req.hdr = resp_hdr;
|
||||
msg_resp->n_blocks = pq_getmsgint(s, 4);
|
||||
pq_getmsgend(s);
|
||||
|
||||
@@ -1295,12 +1330,20 @@ nm_unpack_response(StringInfo s)
|
||||
NeonGetPageResponse *msg_resp;
|
||||
|
||||
msg_resp = MemoryContextAllocZero(MyPState->bufctx, PS_GETPAGERESPONSE_SIZE);
|
||||
msg_resp->tag = tag;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
NInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
msg_resp->req.forknum = pq_getmsgbyte(s);
|
||||
msg_resp->req.blkno = pq_getmsgint(s, 4);
|
||||
}
|
||||
msg_resp->req.hdr = resp_hdr;
|
||||
/* XXX: should be varlena */
|
||||
memcpy(msg_resp->page, pq_getmsgbytes(s, BLCKSZ), BLCKSZ);
|
||||
pq_getmsgend(s);
|
||||
|
||||
Assert(msg_resp->tag == T_NeonGetPageResponse);
|
||||
Assert(msg_resp->req.hdr.tag == T_NeonGetPageResponse);
|
||||
|
||||
resp = (NeonResponse *) msg_resp;
|
||||
break;
|
||||
@@ -1310,7 +1353,11 @@ nm_unpack_response(StringInfo s)
|
||||
{
|
||||
NeonDbSizeResponse *msg_resp = palloc0(sizeof(NeonDbSizeResponse));
|
||||
|
||||
msg_resp->tag = tag;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
msg_resp->req.dbNode = pq_getmsgint(s, 4);
|
||||
}
|
||||
msg_resp->req.hdr = resp_hdr;
|
||||
msg_resp->db_size = pq_getmsgint64(s);
|
||||
pq_getmsgend(s);
|
||||
|
||||
@@ -1328,7 +1375,7 @@ nm_unpack_response(StringInfo s)
|
||||
msglen = strlen(msgtext);
|
||||
|
||||
msg_resp = palloc0(sizeof(NeonErrorResponse) + msglen + 1);
|
||||
msg_resp->tag = tag;
|
||||
msg_resp->req = resp_hdr;
|
||||
memcpy(msg_resp->message, msgtext, msglen + 1);
|
||||
pq_getmsgend(s);
|
||||
|
||||
@@ -1339,9 +1386,17 @@ nm_unpack_response(StringInfo s)
|
||||
case T_NeonGetSlruSegmentResponse:
|
||||
{
|
||||
NeonGetSlruSegmentResponse *msg_resp;
|
||||
int n_blocks = pq_getmsgint(s, 4);
|
||||
msg_resp = palloc(sizeof(NeonGetSlruSegmentResponse));
|
||||
msg_resp->tag = tag;
|
||||
int n_blocks;
|
||||
msg_resp = palloc0(sizeof(NeonGetSlruSegmentResponse));
|
||||
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
msg_resp->req.kind = pq_getmsgbyte(s);
|
||||
msg_resp->req.segno = pq_getmsgint(s, 4);
|
||||
}
|
||||
msg_resp->req.hdr = resp_hdr;
|
||||
|
||||
n_blocks = pq_getmsgint(s, 4);
|
||||
msg_resp->n_blocks = n_blocks;
|
||||
memcpy(msg_resp->data, pq_getmsgbytes(s, n_blocks * BLCKSZ), n_blocks * BLCKSZ);
|
||||
pq_getmsgend(s);
|
||||
@@ -1386,8 +1441,8 @@ nm_to_string(NeonMessage *msg)
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonExistsRequest\"");
|
||||
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
|
||||
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
|
||||
appendStringInfoChar(&s, '}');
|
||||
break;
|
||||
}
|
||||
@@ -1399,8 +1454,8 @@ nm_to_string(NeonMessage *msg)
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonNblocksRequest\"");
|
||||
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
|
||||
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
|
||||
appendStringInfoChar(&s, '}');
|
||||
break;
|
||||
}
|
||||
@@ -1413,8 +1468,8 @@ nm_to_string(NeonMessage *msg)
|
||||
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
|
||||
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
|
||||
appendStringInfo(&s, ", \"blkno\": %u", msg_req->blkno);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
|
||||
appendStringInfoChar(&s, '}');
|
||||
break;
|
||||
}
|
||||
@@ -1424,8 +1479,8 @@ nm_to_string(NeonMessage *msg)
|
||||
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonDbSizeRequest\"");
|
||||
appendStringInfo(&s, ", \"dbnode\": \"%u\"", msg_req->dbNode);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
|
||||
appendStringInfoChar(&s, '}');
|
||||
break;
|
||||
}
|
||||
@@ -1436,8 +1491,8 @@ nm_to_string(NeonMessage *msg)
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonGetSlruSegmentRequest\"");
|
||||
appendStringInfo(&s, ", \"kind\": %u", msg_req->kind);
|
||||
appendStringInfo(&s, ", \"segno\": %u", msg_req->segno);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
|
||||
appendStringInfoChar(&s, '}');
|
||||
break;
|
||||
}
|
||||
@@ -2312,39 +2367,64 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
|
||||
REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1, NULL);
|
||||
{
|
||||
NeonExistsRequest request = {
|
||||
.req.tag = T_NeonExistsRequest,
|
||||
.req.lsn = request_lsns.request_lsn,
|
||||
.req.not_modified_since = request_lsns.not_modified_since,
|
||||
.hdr.tag = T_NeonExistsRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsns.request_lsn,
|
||||
.hdr.not_modified_since = request_lsns.not_modified_since,
|
||||
.rinfo = InfoFromSMgrRel(reln),
|
||||
.forknum = forkNum
|
||||
};
|
||||
|
||||
resp = page_server_request(&request);
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonExistsResponse:
|
||||
{
|
||||
NeonExistsResponse* exists_resp = (NeonExistsResponse *) resp;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr) ||
|
||||
!RelFileInfoEquals(exists_resp->req.rinfo, request.rinfo) ||
|
||||
exists_resp->req.forknum != request.forknum)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to exits request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(exists_resp->req.rinfo), exists_resp->req.forknum,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), request.forknum);
|
||||
}
|
||||
}
|
||||
exists = exists_resp->exists;
|
||||
break;
|
||||
}
|
||||
case T_NeonErrorResponse:
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected Exists (0x%02x) or Error (0x%02x) response to ExistsRequest, but got 0x%02x",
|
||||
T_NeonExistsResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
pfree(resp);
|
||||
}
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonExistsResponse:
|
||||
exists = ((NeonExistsResponse *) resp)->exists;
|
||||
break;
|
||||
|
||||
case T_NeonErrorResponse:
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected Exists (0x%02x) or Error (0x%02x) response to ExistsRequest, but got 0x%02x",
|
||||
T_NeonExistsResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
pfree(resp);
|
||||
return exists;
|
||||
}
|
||||
|
||||
@@ -2952,15 +3032,43 @@ Retry:
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonGetPageResponse:
|
||||
memcpy(buffer, ((NeonGetPageResponse *) resp)->page, BLCKSZ);
|
||||
{
|
||||
NeonGetPageResponse* getpage_resp = (NeonGetPageResponse *) resp;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (resp->reqid != slot->reqid ||
|
||||
resp->lsn != slot->request_lsns.request_lsn ||
|
||||
resp->not_modified_since != slot->request_lsns.not_modified_since ||
|
||||
!RelFileInfoEquals(getpage_resp->req.rinfo, rinfo) ||
|
||||
getpage_resp->req.forknum != forkNum ||
|
||||
getpage_resp->req.blkno != base_blockno + i)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u} to get page request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(getpage_resp->req.rinfo), getpage_resp->req.forknum, getpage_resp->req.blkno,
|
||||
slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since), RelFileInfoFmt(rinfo), forkNum, base_blockno + i);
|
||||
}
|
||||
}
|
||||
memcpy(buffer, getpage_resp->page, BLCKSZ);
|
||||
lfc_write(rinfo, forkNum, blockno, buffer);
|
||||
break;
|
||||
|
||||
}
|
||||
case T_NeonErrorResponse:
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (resp->reqid != slot->reqid ||
|
||||
resp->lsn != slot->request_lsns.request_lsn ||
|
||||
resp->not_modified_since != slot->request_lsns.not_modified_since)
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[shard %d] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
slot->shard_no, blockno, RelFileInfoFmt(rinfo),
|
||||
errmsg(NEON_TAG "[shard %d, reqid %lx] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
slot->shard_no, resp->reqid, blockno, RelFileInfoFmt(rinfo),
|
||||
forkNum, LSN_FORMAT_ARGS(reqlsns->effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
@@ -3443,47 +3551,72 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
|
||||
{
|
||||
NeonNblocksRequest request = {
|
||||
.req.tag = T_NeonNblocksRequest,
|
||||
.req.lsn = request_lsns.request_lsn,
|
||||
.req.not_modified_since = request_lsns.not_modified_since,
|
||||
.hdr.tag = T_NeonNblocksRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsns.request_lsn,
|
||||
.hdr.not_modified_since = request_lsns.not_modified_since,
|
||||
.rinfo = InfoFromSMgrRel(reln),
|
||||
.forknum = forknum,
|
||||
};
|
||||
|
||||
resp = page_server_request(&request);
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonNblocksResponse:
|
||||
{
|
||||
NeonNblocksResponse * relsize_resp = (NeonNblocksResponse *) resp;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr) ||
|
||||
!RelFileInfoEquals(relsize_resp->req.rinfo, request.rinfo) ||
|
||||
relsize_resp->req.forknum != forknum)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(relsize_resp->req.rinfo), relsize_resp->req.forknum,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), forknum);
|
||||
}
|
||||
}
|
||||
n_blocks = relsize_resp->n_blocks;
|
||||
break;
|
||||
}
|
||||
case T_NeonErrorResponse:
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected Nblocks (0x%02x) or Error (0x%02x) response to NblocksRequest, but got 0x%02x",
|
||||
T_NeonNblocksResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
|
||||
|
||||
neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn),
|
||||
n_blocks);
|
||||
|
||||
pfree(resp);
|
||||
}
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonNblocksResponse:
|
||||
n_blocks = ((NeonNblocksResponse *) resp)->n_blocks;
|
||||
break;
|
||||
|
||||
case T_NeonErrorResponse:
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected Nblocks (0x%02x) or Error (0x%02x) response to NblocksRequest, but got 0x%02x",
|
||||
T_NeonNblocksResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
|
||||
|
||||
neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn),
|
||||
n_blocks);
|
||||
|
||||
pfree(resp);
|
||||
return n_blocks;
|
||||
}
|
||||
|
||||
@@ -3503,40 +3636,64 @@ neon_dbsize(Oid dbNode)
|
||||
|
||||
{
|
||||
NeonDbSizeRequest request = {
|
||||
.req.tag = T_NeonDbSizeRequest,
|
||||
.req.lsn = request_lsns.request_lsn,
|
||||
.req.not_modified_since = request_lsns.not_modified_since,
|
||||
.hdr.tag = T_NeonDbSizeRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsns.request_lsn,
|
||||
.hdr.not_modified_since = request_lsns.not_modified_since,
|
||||
.dbNode = dbNode,
|
||||
};
|
||||
|
||||
resp = page_server_request(&request);
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonDbSizeResponse:
|
||||
{
|
||||
NeonDbSizeResponse* dbsize_resp = (NeonDbSizeResponse *) resp;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr) ||
|
||||
dbsize_resp->req.dbNode != dbNode)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, dbNode=%u} to get DB size request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, dbNode=%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), dbsize_resp->req.dbNode,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), dbNode);
|
||||
}
|
||||
}
|
||||
db_size = dbsize_resp->db_size;
|
||||
break;
|
||||
}
|
||||
case T_NeonErrorResponse:
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get DB size request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read db size of db %u from page server at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected DbSize (0x%02x) or Error (0x%02x) response to DbSizeRequest, but got 0x%02x",
|
||||
T_NeonDbSizeResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
|
||||
neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn), db_size);
|
||||
|
||||
pfree(resp);
|
||||
}
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonDbSizeResponse:
|
||||
db_size = ((NeonDbSizeResponse *) resp)->db_size;
|
||||
break;
|
||||
|
||||
case T_NeonErrorResponse:
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "could not read db size of db %u from page server at lsn %X/%08X",
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected DbSize (0x%02x) or Error (0x%02x) response to DbSizeRequest, but got 0x%02x",
|
||||
T_NeonDbSizeResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
|
||||
neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn), db_size);
|
||||
|
||||
pfree(resp);
|
||||
return db_size;
|
||||
}
|
||||
|
||||
@@ -3868,16 +4025,17 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
|
||||
return -1;
|
||||
|
||||
request = (NeonGetSlruSegmentRequest) {
|
||||
.req.tag = T_NeonGetSlruSegmentRequest,
|
||||
.req.lsn = request_lsn,
|
||||
.req.not_modified_since = not_modified_since,
|
||||
.hdr.tag = T_NeonGetSlruSegmentRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsn,
|
||||
.hdr.not_modified_since = not_modified_since,
|
||||
.kind = kind,
|
||||
.segno = segno
|
||||
};
|
||||
|
||||
do
|
||||
{
|
||||
while (!page_server->send(shard_no, &request.req) || !page_server->flush(shard_no));
|
||||
while (!page_server->send(shard_no, &request.hdr) || !page_server->flush(shard_no));
|
||||
|
||||
consume_prefetch_responses();
|
||||
|
||||
@@ -3887,14 +4045,38 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonGetSlruSegmentResponse:
|
||||
n_blocks = ((NeonGetSlruSegmentResponse *) resp)->n_blocks;
|
||||
memcpy(buffer, ((NeonGetSlruSegmentResponse *) resp)->data, n_blocks*BLCKSZ);
|
||||
{
|
||||
NeonGetSlruSegmentResponse* slru_resp = (NeonGetSlruSegmentResponse *) resp;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr) ||
|
||||
slru_resp->req.kind != kind ||
|
||||
slru_resp->req.segno != segno)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u} to get SLRU segment request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), slru_resp->req.kind, slru_resp->req.segno,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), kind, segno);
|
||||
}
|
||||
}
|
||||
n_blocks = slru_resp->n_blocks;
|
||||
memcpy(buffer, slru_resp->data, n_blocks*BLCKSZ);
|
||||
break;
|
||||
|
||||
}
|
||||
case T_NeonErrorResponse:
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get SLRU segment request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "could not read SLRU %d segment %d at lsn %X/%08X",
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read SLRU %d segment %d at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
kind,
|
||||
segno,
|
||||
LSN_FORMAT_ARGS(request_lsn)),
|
||||
@@ -4033,8 +4215,9 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
NeonResponse *response;
|
||||
NeonNblocksResponse *nbresponse;
|
||||
NeonNblocksRequest request = {
|
||||
.req = (NeonRequest) {
|
||||
.hdr = (NeonRequest) {
|
||||
.tag = T_NeonNblocksRequest,
|
||||
.reqid = GENERATE_REQUEST_ID(),
|
||||
.lsn = end_recptr,
|
||||
.not_modified_since = end_recptr,
|
||||
},
|
||||
|
||||
8
poetry.lock
generated
8
poetry.lock
generated
@@ -2028,13 +2028,13 @@ openapi-schema-validator = ">=0.4.2,<0.5.0"
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "23.0"
|
||||
version = "24.2"
|
||||
description = "Core utilities for Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"},
|
||||
{file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"},
|
||||
{file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
|
||||
{file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::fmt;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use postgres_client::config::SslMode;
|
||||
use pq_proto::BeMessage as Be;
|
||||
@@ -5,15 +7,19 @@ use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::{info, info_span};
|
||||
|
||||
use super::ComputeCredentialKeys;
|
||||
use super::{ComputeCredentialKeys, ControlPlaneApi};
|
||||
use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
|
||||
use crate::auth::IpPattern;
|
||||
use crate::cache::Cached;
|
||||
use crate::config::AuthenticationConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::client::cplane_proxy_v1;
|
||||
use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
|
||||
use crate::error::{ReportableError, UserFacingError};
|
||||
use crate::proxy::connect_compute::ComputeConnectBackend;
|
||||
use crate::proxy::NeonOptions;
|
||||
use crate::stream::PqStream;
|
||||
use crate::types::RoleName;
|
||||
use crate::{auth, compute, waiters};
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
@@ -31,6 +37,13 @@ pub(crate) enum ConsoleRedirectError {
|
||||
#[derive(Debug)]
|
||||
pub struct ConsoleRedirectBackend {
|
||||
console_uri: reqwest::Url,
|
||||
api: cplane_proxy_v1::NeonControlPlaneClient,
|
||||
}
|
||||
|
||||
impl fmt::Debug for cplane_proxy_v1::NeonControlPlaneClient {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "NeonControlPlaneClient")
|
||||
}
|
||||
}
|
||||
|
||||
impl UserFacingError for ConsoleRedirectError {
|
||||
@@ -71,9 +84,24 @@ pub(crate) fn new_psql_session_id() -> String {
|
||||
hex::encode(rand::random::<[u8; 8]>())
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl BackendIpAllowlist for ConsoleRedirectBackend {
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> auth::Result<Vec<auth::IpPattern>> {
|
||||
self.api
|
||||
.get_allowed_ips_and_secret(ctx, user_info)
|
||||
.await
|
||||
.map(|(ips, _)| ips.as_ref().clone())
|
||||
.map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl ConsoleRedirectBackend {
|
||||
pub fn new(console_uri: reqwest::Url) -> Self {
|
||||
Self { console_uri }
|
||||
pub fn new(console_uri: reqwest::Url, api: cplane_proxy_v1::NeonControlPlaneClient) -> Self {
|
||||
Self { console_uri, api }
|
||||
}
|
||||
|
||||
pub(crate) async fn authenticate(
|
||||
@@ -81,10 +109,16 @@ impl ConsoleRedirectBackend {
|
||||
ctx: &RequestContext,
|
||||
auth_config: &'static AuthenticationConfig,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) -> auth::Result<(ConsoleRedirectNodeInfo, Option<Vec<IpPattern>>)> {
|
||||
) -> auth::Result<(
|
||||
ConsoleRedirectNodeInfo,
|
||||
ComputeUserInfo,
|
||||
Option<Vec<IpPattern>>,
|
||||
)> {
|
||||
authenticate(ctx, auth_config, &self.console_uri, client)
|
||||
.await
|
||||
.map(|(node_info, ip_allowlist)| (ConsoleRedirectNodeInfo(node_info), ip_allowlist))
|
||||
.map(|(node_info, user_info, ip_allowlist)| {
|
||||
(ConsoleRedirectNodeInfo(node_info), user_info, ip_allowlist)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,7 +143,7 @@ async fn authenticate(
|
||||
auth_config: &'static AuthenticationConfig,
|
||||
link_uri: &reqwest::Url,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) -> auth::Result<(NodeInfo, Option<Vec<IpPattern>>)> {
|
||||
) -> auth::Result<(NodeInfo, ComputeUserInfo, Option<Vec<IpPattern>>)> {
|
||||
ctx.set_auth_method(crate::context::AuthMethod::ConsoleRedirect);
|
||||
|
||||
// registering waiter can fail if we get unlucky with rng.
|
||||
@@ -164,8 +198,15 @@ async fn authenticate(
|
||||
let mut config = compute::ConnCfg::new(db_info.host.to_string(), db_info.port);
|
||||
config.dbname(&db_info.dbname).user(&db_info.user);
|
||||
|
||||
let user: RoleName = db_info.user.into();
|
||||
let user_info = ComputeUserInfo {
|
||||
endpoint: db_info.aux.endpoint_id.as_str().into(),
|
||||
user: user.clone(),
|
||||
options: NeonOptions::default(),
|
||||
};
|
||||
|
||||
ctx.set_dbname(db_info.dbname.into());
|
||||
ctx.set_user(db_info.user.into());
|
||||
ctx.set_user(user);
|
||||
ctx.set_project(db_info.aux.clone());
|
||||
info!("woken up a compute node");
|
||||
|
||||
@@ -188,6 +229,7 @@ async fn authenticate(
|
||||
config,
|
||||
aux: db_info.aux,
|
||||
},
|
||||
user_info,
|
||||
db_info.allowed_ips,
|
||||
))
|
||||
}
|
||||
|
||||
@@ -16,7 +16,9 @@ use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::auth::credentials::check_peer_addr_is_in_list;
|
||||
use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint};
|
||||
use crate::auth::{
|
||||
self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint, IpPattern,
|
||||
};
|
||||
use crate::cache::Cached;
|
||||
use crate::config::AuthenticationConfig;
|
||||
use crate::context::RequestContext;
|
||||
@@ -131,7 +133,7 @@ pub(crate) struct ComputeUserInfoNoEndpoint {
|
||||
pub(crate) options: NeonOptions,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct ComputeUserInfo {
|
||||
pub(crate) endpoint: EndpointId,
|
||||
pub(crate) user: RoleName,
|
||||
@@ -244,6 +246,15 @@ impl AuthenticationConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub(crate) trait BackendIpAllowlist {
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> auth::Result<Vec<auth::IpPattern>>;
|
||||
}
|
||||
|
||||
/// True to its name, this function encapsulates our current auth trade-offs.
|
||||
/// Here, we choose the appropriate auth flow based on circumstances.
|
||||
///
|
||||
@@ -256,7 +267,7 @@ async fn auth_quirks(
|
||||
allow_cleartext: bool,
|
||||
config: &'static AuthenticationConfig,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> auth::Result<ComputeCredentials> {
|
||||
) -> auth::Result<(ComputeCredentials, Option<Vec<IpPattern>>)> {
|
||||
// If there's no project so far, that entails that client doesn't
|
||||
// support SNI or other means of passing the endpoint (project) name.
|
||||
// We now expect to see a very specific payload in the place of password.
|
||||
@@ -315,7 +326,7 @@ async fn auth_quirks(
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(keys) => Ok(keys),
|
||||
Ok(keys) => Ok((keys, Some(allowed_ips.as_ref().clone()))),
|
||||
Err(e) => {
|
||||
if e.is_password_failed() {
|
||||
// The password could have been changed, so we invalidate the cache.
|
||||
@@ -385,7 +396,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
|
||||
allow_cleartext: bool,
|
||||
config: &'static AuthenticationConfig,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> auth::Result<Backend<'a, ComputeCredentials>> {
|
||||
) -> auth::Result<(Backend<'a, ComputeCredentials>, Option<Vec<IpPattern>>)> {
|
||||
let res = match self {
|
||||
Self::ControlPlane(api, user_info) => {
|
||||
debug!(
|
||||
@@ -394,7 +405,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
|
||||
"performing authentication using the console"
|
||||
);
|
||||
|
||||
let credentials = auth_quirks(
|
||||
let (credentials, ip_allowlist) = auth_quirks(
|
||||
ctx,
|
||||
&*api,
|
||||
user_info,
|
||||
@@ -404,7 +415,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await?;
|
||||
Backend::ControlPlane(api, credentials)
|
||||
Ok((Backend::ControlPlane(api, credentials), ip_allowlist))
|
||||
}
|
||||
Self::Local(_) => {
|
||||
return Err(auth::AuthError::bad_auth_method("invalid for local proxy"))
|
||||
@@ -413,7 +424,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
|
||||
|
||||
// TODO: replace with some metric
|
||||
info!("user successfully authenticated");
|
||||
Ok(res)
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
@@ -441,6 +452,24 @@ impl Backend<'_, ComputeUserInfo> {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl BackendIpAllowlist for Backend<'_, ()> {
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> auth::Result<Vec<auth::IpPattern>> {
|
||||
let auth_data = match self {
|
||||
Self::ControlPlane(api, ()) => api.get_allowed_ips_and_secret(ctx, user_info).await,
|
||||
Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
|
||||
};
|
||||
|
||||
auth_data
|
||||
.map(|(ips, _)| ips.as_ref().clone())
|
||||
.map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ComputeConnectBackend for Backend<'_, ComputeCredentials> {
|
||||
async fn wake_compute(
|
||||
@@ -786,7 +815,7 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(creds.info.endpoint, "my-endpoint");
|
||||
assert_eq!(creds.0.info.endpoint, "my-endpoint");
|
||||
|
||||
handle.await.unwrap();
|
||||
}
|
||||
|
||||
@@ -744,9 +744,59 @@ fn build_auth_backend(
|
||||
}
|
||||
|
||||
AuthBackendType::ConsoleRedirect => {
|
||||
let url = args.uri.parse()?;
|
||||
let backend = ConsoleRedirectBackend::new(url);
|
||||
let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
|
||||
let project_info_cache_config: ProjectInfoCacheOptions =
|
||||
args.project_info_cache.parse()?;
|
||||
let endpoint_cache_config: config::EndpointCacheConfig =
|
||||
args.endpoint_cache_config.parse()?;
|
||||
|
||||
info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
|
||||
info!(
|
||||
"Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
|
||||
);
|
||||
info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
|
||||
let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
|
||||
wake_compute_cache_config,
|
||||
project_info_cache_config,
|
||||
endpoint_cache_config,
|
||||
)));
|
||||
|
||||
let config::ConcurrencyLockOptions {
|
||||
shards,
|
||||
limiter,
|
||||
epoch,
|
||||
timeout,
|
||||
} = args.wake_compute_lock.parse()?;
|
||||
info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
|
||||
let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
|
||||
"wake_compute_lock",
|
||||
limiter,
|
||||
shards,
|
||||
timeout,
|
||||
epoch,
|
||||
&Metrics::get().wake_compute_lock,
|
||||
)?));
|
||||
|
||||
let url = args.uri.clone().parse()?;
|
||||
let ep_url: proxy::url::ApiUrl = args.auth_endpoint.parse()?;
|
||||
let endpoint = http::Endpoint::new(ep_url, http::new_client());
|
||||
let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
|
||||
RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
|
||||
let wake_compute_endpoint_rate_limiter =
|
||||
Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
|
||||
|
||||
// Since we use only get_allowed_ips_and_secret() wake_compute_endpoint_rate_limiter
|
||||
// and locks are not used in ConsoleRedirectBackend,
|
||||
// but they are required by the NeonControlPlaneClient
|
||||
let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
|
||||
endpoint,
|
||||
args.control_plane_token.clone(),
|
||||
caches,
|
||||
locks,
|
||||
wake_compute_endpoint_rate_limiter,
|
||||
);
|
||||
|
||||
let backend = ConsoleRedirectBackend::new(url, api);
|
||||
let config = Box::leak(Box::new(backend));
|
||||
|
||||
Ok(Either::Right(config))
|
||||
|
||||
@@ -12,8 +12,10 @@ use tokio::sync::Mutex;
|
||||
use tracing::{debug, info};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::auth::{check_peer_addr_is_in_list, IpPattern};
|
||||
use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
|
||||
use crate::auth::{check_peer_addr_is_in_list, AuthError, IpPattern};
|
||||
use crate::config::ComputeConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::ReportableError;
|
||||
use crate::ext::LockExt;
|
||||
use crate::metrics::{CancellationRequest, CancellationSource, Metrics};
|
||||
@@ -56,6 +58,9 @@ pub(crate) enum CancelError {
|
||||
|
||||
#[error("IP is not allowed")]
|
||||
IpNotAllowed,
|
||||
|
||||
#[error("Authentication backend error")]
|
||||
AuthError(#[from] AuthError),
|
||||
}
|
||||
|
||||
impl ReportableError for CancelError {
|
||||
@@ -68,6 +73,7 @@ impl ReportableError for CancelError {
|
||||
CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
|
||||
CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
|
||||
CancelError::IpNotAllowed => crate::error::ErrorKind::User,
|
||||
CancelError::AuthError(_) => crate::error::ErrorKind::ControlPlane,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -102,10 +108,7 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to cancel a running query for the corresponding connection.
|
||||
/// If the cancellation key is not found, it will be published to Redis.
|
||||
/// check_allowed - if true, check if the IP is allowed to cancel the query
|
||||
/// return Result primarily for tests
|
||||
/// Cancelling only in notification, will be removed
|
||||
pub(crate) async fn cancel_session(
|
||||
&self,
|
||||
key: CancelKeyData,
|
||||
@@ -134,7 +137,8 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
|
||||
}
|
||||
|
||||
// NB: we should immediately release the lock after cloning the token.
|
||||
let Some(cancel_closure) = self.map.get(&key).and_then(|x| x.clone()) else {
|
||||
let cancel_state = self.map.get(&key).and_then(|x| x.clone());
|
||||
let Some(cancel_closure) = cancel_state else {
|
||||
tracing::warn!("query cancellation key not found: {key}");
|
||||
Metrics::get()
|
||||
.proxy
|
||||
@@ -185,6 +189,96 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
|
||||
cancel_closure.try_cancel_query(self.compute_config).await
|
||||
}
|
||||
|
||||
/// Try to cancel a running query for the corresponding connection.
|
||||
/// If the cancellation key is not found, it will be published to Redis.
|
||||
/// check_allowed - if true, check if the IP is allowed to cancel the query.
|
||||
/// Will fetch IP allowlist internally.
|
||||
///
|
||||
/// return Result primarily for tests
|
||||
pub(crate) async fn cancel_session_auth<T: BackendIpAllowlist>(
|
||||
&self,
|
||||
key: CancelKeyData,
|
||||
ctx: RequestContext,
|
||||
check_allowed: bool,
|
||||
auth_backend: &T,
|
||||
) -> Result<(), CancelError> {
|
||||
// TODO: check for unspecified address is only for backward compatibility, should be removed
|
||||
if !ctx.peer_addr().is_unspecified() {
|
||||
let subnet_key = match ctx.peer_addr() {
|
||||
IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here
|
||||
IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),
|
||||
};
|
||||
if !self.limiter.lock_propagate_poison().check(subnet_key, 1) {
|
||||
// log only the subnet part of the IP address to know which subnet is rate limited
|
||||
tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}");
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::RateLimitExceeded,
|
||||
});
|
||||
return Err(CancelError::RateLimit);
|
||||
}
|
||||
}
|
||||
|
||||
// NB: we should immediately release the lock after cloning the token.
|
||||
let cancel_state = self.map.get(&key).and_then(|x| x.clone());
|
||||
let Some(cancel_closure) = cancel_state else {
|
||||
tracing::warn!("query cancellation key not found: {key}");
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::NotFound,
|
||||
});
|
||||
|
||||
if ctx.session_id() == Uuid::nil() {
|
||||
// was already published, do not publish it again
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match self
|
||||
.client
|
||||
.try_publish(key, ctx.session_id(), ctx.peer_addr())
|
||||
.await
|
||||
{
|
||||
Ok(()) => {} // do nothing
|
||||
Err(e) => {
|
||||
// log it here since cancel_session could be spawned in a task
|
||||
tracing::error!("failed to publish cancellation key: {key}, error: {e}");
|
||||
return Err(CancelError::IO(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
e.to_string(),
|
||||
)));
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let ip_allowlist = auth_backend
|
||||
.get_allowed_ips(&ctx, &cancel_closure.user_info)
|
||||
.await
|
||||
.map_err(CancelError::AuthError)?;
|
||||
|
||||
if check_allowed && !check_peer_addr_is_in_list(&ctx.peer_addr(), &ip_allowlist) {
|
||||
// log it here since cancel_session could be spawned in a task
|
||||
tracing::warn!("IP is not allowed to cancel the query: {key}");
|
||||
return Err(CancelError::IpNotAllowed);
|
||||
}
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::Found,
|
||||
});
|
||||
info!("cancelling query per user's request using key {key}");
|
||||
cancel_closure.try_cancel_query(self.compute_config).await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn contains(&self, session: &Session<P>) -> bool {
|
||||
self.map.contains_key(&session.key)
|
||||
@@ -248,6 +342,7 @@ pub struct CancelClosure {
|
||||
cancel_token: CancelToken,
|
||||
ip_allowlist: Vec<IpPattern>,
|
||||
hostname: String, // for pg_sni router
|
||||
user_info: ComputeUserInfo,
|
||||
}
|
||||
|
||||
impl CancelClosure {
|
||||
@@ -256,12 +351,14 @@ impl CancelClosure {
|
||||
cancel_token: CancelToken,
|
||||
ip_allowlist: Vec<IpPattern>,
|
||||
hostname: String,
|
||||
user_info: ComputeUserInfo,
|
||||
) -> Self {
|
||||
Self {
|
||||
socket_addr,
|
||||
cancel_token,
|
||||
ip_allowlist,
|
||||
hostname,
|
||||
user_info,
|
||||
}
|
||||
}
|
||||
/// Cancels the query running on user's compute node.
|
||||
@@ -288,6 +385,8 @@ impl CancelClosure {
|
||||
debug!("query was cancelled");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Obsolete (will be removed after moving CancelMap to Redis), only for notifications
|
||||
pub(crate) fn set_ip_allowlist(&mut self, ip_allowlist: Vec<IpPattern>) {
|
||||
self.ip_allowlist = ip_allowlist;
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ use thiserror::Error;
|
||||
use tokio::net::TcpStream;
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::auth::backend::ComputeUserInfo;
|
||||
use crate::auth::parse_endpoint_param;
|
||||
use crate::cancellation::CancelClosure;
|
||||
use crate::config::ComputeConfig;
|
||||
@@ -250,6 +251,7 @@ impl ConnCfg {
|
||||
ctx: &RequestContext,
|
||||
aux: MetricsAuxInfo,
|
||||
config: &ComputeConfig,
|
||||
user_info: ComputeUserInfo,
|
||||
) -> Result<PostgresConnection, ConnectionError> {
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||
let (socket_addr, stream, host) = self.connect_raw(config.timeout).await?;
|
||||
@@ -294,8 +296,9 @@ impl ConnCfg {
|
||||
process_id,
|
||||
secret_key,
|
||||
},
|
||||
vec![],
|
||||
vec![], // TODO: deprecated, will be removed
|
||||
host.to_string(),
|
||||
user_info,
|
||||
);
|
||||
|
||||
let connection = PostgresConnection {
|
||||
|
||||
@@ -159,6 +159,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
let request_gauge = metrics.connection_requests.guard(proto);
|
||||
|
||||
let tls = config.tls_config.as_ref();
|
||||
|
||||
let record_handshake_error = !ctx.has_private_peer_addr();
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
let do_handshake = handshake(ctx, stream, tls, record_handshake_error);
|
||||
@@ -171,23 +172,20 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
// spawn a task to cancel the session, but don't wait for it
|
||||
cancellations.spawn({
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
let session_id = ctx.session_id();
|
||||
let peer_ip = ctx.peer_addr();
|
||||
let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?session_id);
|
||||
let ctx = ctx.clone();
|
||||
let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
|
||||
cancel_span.follows_from(tracing::Span::current());
|
||||
async move {
|
||||
drop(
|
||||
cancellation_handler_clone
|
||||
.cancel_session(
|
||||
cancel_key_data,
|
||||
session_id,
|
||||
peer_ip,
|
||||
config.authentication_config.ip_allowlist_check_enabled,
|
||||
)
|
||||
.instrument(cancel_span)
|
||||
.await,
|
||||
);
|
||||
}
|
||||
cancellation_handler_clone
|
||||
.cancel_session_auth(
|
||||
cancel_key_data,
|
||||
ctx,
|
||||
config.authentication_config.ip_allowlist_check_enabled,
|
||||
backend,
|
||||
)
|
||||
.await
|
||||
.inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
|
||||
}.instrument(cancel_span)
|
||||
});
|
||||
|
||||
return Ok(None);
|
||||
@@ -197,7 +195,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
|
||||
ctx.set_db_options(params.clone());
|
||||
|
||||
let (user_info, ip_allowlist) = match backend
|
||||
let (node_info, user_info, ip_allowlist) = match backend
|
||||
.authenticate(ctx, &config.authentication_config, &mut stream)
|
||||
.await
|
||||
{
|
||||
@@ -210,11 +208,12 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
let mut node = connect_to_compute(
|
||||
ctx,
|
||||
&TcpMechanism {
|
||||
user_info,
|
||||
params_compat: true,
|
||||
params: ¶ms,
|
||||
locks: &config.connect_compute_locks,
|
||||
},
|
||||
&user_info,
|
||||
&node_info,
|
||||
config.wake_compute_retry_config,
|
||||
&config.connect_to_compute,
|
||||
)
|
||||
|
||||
@@ -29,7 +29,7 @@ use crate::rate_limiter::WakeComputeRateLimiter;
|
||||
use crate::types::{EndpointCacheKey, EndpointId};
|
||||
use crate::{compute, http, scram};
|
||||
|
||||
const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
|
||||
pub(crate) const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct NeonControlPlaneClient {
|
||||
@@ -78,15 +78,30 @@ impl NeonControlPlaneClient {
|
||||
info!("endpoint is not valid, skipping the request");
|
||||
return Ok(AuthInfo::default());
|
||||
}
|
||||
let request_id = ctx.session_id().to_string();
|
||||
let application_name = ctx.console_application_name();
|
||||
self.do_get_auth_req(user_info, &ctx.session_id(), Some(ctx))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn do_get_auth_req(
|
||||
&self,
|
||||
user_info: &ComputeUserInfo,
|
||||
session_id: &uuid::Uuid,
|
||||
ctx: Option<&RequestContext>,
|
||||
) -> Result<AuthInfo, GetAuthInfoError> {
|
||||
let request_id: String = session_id.to_string();
|
||||
let application_name = if let Some(ctx) = ctx {
|
||||
ctx.console_application_name()
|
||||
} else {
|
||||
"auth_cancellation".to_string()
|
||||
};
|
||||
|
||||
async {
|
||||
let request = self
|
||||
.endpoint
|
||||
.get_path("get_endpoint_access_control")
|
||||
.header(X_REQUEST_ID, &request_id)
|
||||
.header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
|
||||
.query(&[("session_id", ctx.session_id())])
|
||||
.query(&[("session_id", session_id)])
|
||||
.query(&[
|
||||
("application_name", application_name.as_str()),
|
||||
("endpointish", user_info.endpoint.as_str()),
|
||||
@@ -96,9 +111,16 @@ impl NeonControlPlaneClient {
|
||||
|
||||
debug!(url = request.url().as_str(), "sending http request");
|
||||
let start = Instant::now();
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
|
||||
let response = self.endpoint.execute(request).await?;
|
||||
drop(pause);
|
||||
let response = match ctx {
|
||||
Some(ctx) => {
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
|
||||
let rsp = self.endpoint.execute(request).await;
|
||||
drop(pause);
|
||||
rsp?
|
||||
}
|
||||
None => self.endpoint.execute(request).await?,
|
||||
};
|
||||
|
||||
info!(duration = ?start.elapsed(), "received http response");
|
||||
let body = match parse_body::<GetEndpointAccessControl>(response).await {
|
||||
Ok(body) => body,
|
||||
|
||||
@@ -74,8 +74,11 @@ impl NodeInfo {
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
config: &ComputeConfig,
|
||||
user_info: ComputeUserInfo,
|
||||
) -> Result<compute::PostgresConnection, compute::ConnectionError> {
|
||||
self.config.connect(ctx, self.aux.clone(), config).await
|
||||
self.config
|
||||
.connect(ctx, self.aux.clone(), config, user_info)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) fn reuse_settings(&mut self, other: Self) {
|
||||
|
||||
@@ -4,7 +4,7 @@ use tokio::time;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use super::retry::ShouldRetryWakeCompute;
|
||||
use crate::auth::backend::ComputeCredentialKeys;
|
||||
use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo};
|
||||
use crate::compute::{self, PostgresConnection, COULD_NOT_CONNECT};
|
||||
use crate::config::{ComputeConfig, RetryConfig};
|
||||
use crate::context::RequestContext;
|
||||
@@ -71,6 +71,8 @@ pub(crate) struct TcpMechanism<'a> {
|
||||
|
||||
/// connect_to_compute concurrency lock
|
||||
pub(crate) locks: &'static ApiLocks<Host>,
|
||||
|
||||
pub(crate) user_info: ComputeUserInfo,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -88,7 +90,7 @@ impl ConnectMechanism for TcpMechanism<'_> {
|
||||
) -> Result<PostgresConnection, Self::Error> {
|
||||
let host = node_info.config.get_host();
|
||||
let permit = self.locks.get_permit(&host).await?;
|
||||
permit.release_result(node_info.connect(ctx, config).await)
|
||||
permit.release_result(node_info.connect(ctx, config, self.user_info.clone()).await)
|
||||
}
|
||||
|
||||
fn update_connect_config(&self, config: &mut compute::ConnCfg) {
|
||||
|
||||
@@ -273,23 +273,20 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
// spawn a task to cancel the session, but don't wait for it
|
||||
cancellations.spawn({
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
let session_id = ctx.session_id();
|
||||
let peer_ip = ctx.peer_addr();
|
||||
let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?session_id);
|
||||
let ctx = ctx.clone();
|
||||
let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
|
||||
cancel_span.follows_from(tracing::Span::current());
|
||||
async move {
|
||||
drop(
|
||||
cancellation_handler_clone
|
||||
.cancel_session(
|
||||
cancel_key_data,
|
||||
session_id,
|
||||
peer_ip,
|
||||
config.authentication_config.ip_allowlist_check_enabled,
|
||||
)
|
||||
.instrument(cancel_span)
|
||||
.await,
|
||||
);
|
||||
}
|
||||
cancellation_handler_clone
|
||||
.cancel_session_auth(
|
||||
cancel_key_data,
|
||||
ctx,
|
||||
config.authentication_config.ip_allowlist_check_enabled,
|
||||
auth_backend,
|
||||
)
|
||||
.await
|
||||
.inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
|
||||
}.instrument(cancel_span)
|
||||
});
|
||||
|
||||
return Ok(None);
|
||||
@@ -315,7 +312,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
};
|
||||
|
||||
let user = user_info.get_user().to_owned();
|
||||
let user_info = match user_info
|
||||
let (user_info, ip_allowlist) = match user_info
|
||||
.authenticate(
|
||||
ctx,
|
||||
&mut stream,
|
||||
@@ -335,16 +332,19 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
}
|
||||
};
|
||||
|
||||
let params_compat = match &user_info {
|
||||
auth::Backend::ControlPlane(_, info) => {
|
||||
info.info.options.get(NeonOptions::PARAMS_COMPAT).is_some()
|
||||
}
|
||||
auth::Backend::Local(_) => false,
|
||||
let compute_user_info = match &user_info {
|
||||
auth::Backend::ControlPlane(_, info) => &info.info,
|
||||
auth::Backend::Local(_) => unreachable!("local proxy does not run tcp proxy service"),
|
||||
};
|
||||
let params_compat = compute_user_info
|
||||
.options
|
||||
.get(NeonOptions::PARAMS_COMPAT)
|
||||
.is_some();
|
||||
|
||||
let mut node = connect_to_compute(
|
||||
ctx,
|
||||
&TcpMechanism {
|
||||
user_info: compute_user_info.clone(),
|
||||
params_compat,
|
||||
params: ¶ms,
|
||||
locks: &config.connect_compute_locks,
|
||||
@@ -356,6 +356,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
.or_else(|e| stream.throw_error(e))
|
||||
.await?;
|
||||
|
||||
node.cancel_closure
|
||||
.set_ip_allowlist(ip_allowlist.unwrap_or_default());
|
||||
let session = cancellation_handler.get_session();
|
||||
prepare_client_connection(&node, &session, &mut stream).await?;
|
||||
|
||||
|
||||
@@ -37,7 +37,6 @@ struct NotificationHeader<'a> {
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
#[serde(tag = "topic", content = "data")]
|
||||
// Message to contributors: Make sure to align these topic names with the list below.
|
||||
pub(crate) enum Notification {
|
||||
#[serde(
|
||||
rename = "/allowed_ips_updated",
|
||||
@@ -74,21 +73,13 @@ pub(crate) enum Notification {
|
||||
PasswordUpdate { password_update: PasswordUpdate },
|
||||
#[serde(rename = "/cancel_session")]
|
||||
Cancel(CancelSession),
|
||||
}
|
||||
|
||||
/// Returns true if the topic name given is a known topic that we can deserialize and action on.
|
||||
/// Returns false otherwise.
|
||||
fn known_topic(s: &str) -> bool {
|
||||
// Message to contributors: Make sure to align these topic names with the enum above.
|
||||
matches!(
|
||||
s,
|
||||
"/allowed_ips_updated"
|
||||
| "/block_public_or_vpc_access_updated"
|
||||
| "/allowed_vpc_endpoints_updated_for_org"
|
||||
| "/allowed_vpc_endpoints_updated_for_projects"
|
||||
| "/password_updated"
|
||||
| "/cancel_session"
|
||||
)
|
||||
#[serde(
|
||||
other,
|
||||
deserialize_with = "deserialize_unknown_topic",
|
||||
skip_serializing
|
||||
)]
|
||||
UnknownTopic,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
@@ -136,6 +127,15 @@ where
|
||||
serde_json::from_str(&s).map_err(<D::Error as serde::de::Error>::custom)
|
||||
}
|
||||
|
||||
// https://github.com/serde-rs/serde/issues/1714
|
||||
fn deserialize_unknown_topic<'de, D>(deserializer: D) -> Result<(), D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_any(serde::de::IgnoredAny)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct MessageHandler<C: ProjectInfoCache + Send + Sync + 'static> {
|
||||
cache: Arc<C>,
|
||||
cancellation_handler: Arc<CancellationHandler<()>>,
|
||||
@@ -178,32 +178,29 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
|
||||
let payload: String = msg.get_payload()?;
|
||||
tracing::debug!(?payload, "received a message payload");
|
||||
|
||||
// For better error handling, we first parse the payload to extract the topic.
|
||||
// If there's a topic we don't support, we can handle that error more gracefully.
|
||||
let header: NotificationHeader = match serde_json::from_str(&payload) {
|
||||
Ok(msg) => msg,
|
||||
Err(e) => {
|
||||
Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
|
||||
channel: msg.get_channel_name(),
|
||||
});
|
||||
tracing::error!("broken message: {e}");
|
||||
let msg: Notification = match serde_json::from_str(&payload) {
|
||||
Ok(Notification::UnknownTopic) => {
|
||||
match serde_json::from_str::<NotificationHeader>(&payload) {
|
||||
// don't update the metric for redis errors if it's just a topic we don't know about.
|
||||
Ok(header) => tracing::warn!(topic = header.topic, "unknown topic"),
|
||||
Err(e) => {
|
||||
Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
|
||||
channel: msg.get_channel_name(),
|
||||
});
|
||||
tracing::error!("broken message: {e}");
|
||||
}
|
||||
};
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
if !known_topic(header.topic) {
|
||||
// don't update the metric for redis errors if it's just a topic we don't know about.
|
||||
tracing::warn!(topic = header.topic, "unknown topic");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let msg: Notification = match serde_json::from_str(&payload) {
|
||||
Ok(msg) => msg,
|
||||
Err(e) => {
|
||||
Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
|
||||
channel: msg.get_channel_name(),
|
||||
});
|
||||
tracing::error!(topic = header.topic, "broken message: {e}");
|
||||
match serde_json::from_str::<NotificationHeader>(&payload) {
|
||||
Ok(header) => tracing::error!(topic = header.topic, "broken message: {e}"),
|
||||
Err(_) => tracing::error!("broken message: {e}"),
|
||||
};
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
@@ -278,6 +275,8 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
|
||||
invalidate_cache(cache, msg);
|
||||
});
|
||||
}
|
||||
|
||||
Notification::UnknownTopic => unreachable!(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -304,6 +303,7 @@ fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
|
||||
Notification::AllowedVpcEndpointsUpdatedForProjects { .. } => {
|
||||
// https://github.com/neondatabase/neon/pull/10073
|
||||
}
|
||||
Notification::UnknownTopic => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -471,4 +471,30 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_unknown_topic() -> anyhow::Result<()> {
|
||||
let with_data = json!({
|
||||
"type": "message",
|
||||
"topic": "/doesnotexist",
|
||||
"data": {
|
||||
"payload": "ignored"
|
||||
},
|
||||
"extra_fields": "something"
|
||||
})
|
||||
.to_string();
|
||||
let result: Notification = serde_json::from_str(&with_data)?;
|
||||
assert_eq!(result, Notification::UnknownTopic);
|
||||
|
||||
let without_data = json!({
|
||||
"type": "message",
|
||||
"topic": "/doesnotexist",
|
||||
"extra_fields": "something"
|
||||
})
|
||||
.to_string();
|
||||
let result: Notification = serde_json::from_str(&without_data)?;
|
||||
assert_eq!(result, Notification::UnknownTopic);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[toolchain]
|
||||
channel = "1.83.0"
|
||||
channel = "1.84.0"
|
||||
profile = "default"
|
||||
# The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
|
||||
# https://rust-lang.github.io/rustup/concepts/profiles.html
|
||||
|
||||
41
safekeeper/spec/MCProposerAcceptorReconfig.tla
Normal file
41
safekeeper/spec/MCProposerAcceptorReconfig.tla
Normal file
@@ -0,0 +1,41 @@
|
||||
---- MODULE MCProposerAcceptorReconfig ----
|
||||
EXTENDS TLC, ProposerAcceptorReconfig
|
||||
|
||||
\* Augments the spec with model checking constraints.
|
||||
|
||||
\* It slightly duplicates MCProposerAcceptorStatic, but we can't EXTENDS it
|
||||
\* because it EXTENDS ProposerAcceptorStatic in turn. The duplication isn't big
|
||||
\* anyway.
|
||||
|
||||
\* For model checking.
|
||||
CONSTANTS
|
||||
max_entries, \* model constraint: max log entries acceptor/proposer can hold
|
||||
max_term, \* model constraint: max allowed term
|
||||
max_generation \* mode constraint: max config generation
|
||||
|
||||
ASSUME max_entries \in Nat /\ max_term \in Nat /\ max_generation \in Nat
|
||||
|
||||
\* Model space constraint.
|
||||
StateConstraint == /\ \A p \in proposers:
|
||||
/\ prop_state[p].term <= max_term
|
||||
/\ Len(prop_state[p].wal) <= max_entries
|
||||
/\ conf_store.generation <= max_generation
|
||||
|
||||
\* Sets of proposers and acceptors and symmetric because we don't take any
|
||||
\* actions depending on some concrete proposer/acceptor (like IF p = p1 THEN
|
||||
\* ...)
|
||||
ProposerAcceptorSymmetry == Permutations(proposers) \union Permutations(acceptors)
|
||||
|
||||
\* enforce order of the vars in the error trace with ALIAS
|
||||
\* Note that ALIAS is supported only since version 1.8.0 which is pre-release
|
||||
\* as of writing this.
|
||||
Alias == [
|
||||
prop_state |-> prop_state,
|
||||
prop_conf |-> prop_conf,
|
||||
acc_state |-> acc_state,
|
||||
acc_conf |-> acc_conf,
|
||||
committed |-> committed,
|
||||
conf_store |-> conf_store
|
||||
]
|
||||
|
||||
====
|
||||
@@ -3,6 +3,9 @@ EXTENDS TLC, ProposerAcceptorStatic
|
||||
|
||||
\* Augments the spec with model checking constraints.
|
||||
|
||||
\* Note that MCProposerAcceptorReconfig duplicates it and might need to
|
||||
\* be updated as well.
|
||||
|
||||
\* For model checking.
|
||||
CONSTANTS
|
||||
max_entries, \* model constraint: max log entries acceptor/proposer can hold
|
||||
|
||||
350
safekeeper/spec/ProposerAcceptorReconfig.tla
Normal file
350
safekeeper/spec/ProposerAcceptorReconfig.tla
Normal file
@@ -0,0 +1,350 @@
|
||||
---- MODULE ProposerAcceptorReconfig ----
|
||||
|
||||
(*
|
||||
Spec for https://github.com/neondatabase/neon/blob/538e2312a617c65d489d391892c70b2e4d7407b5/docs/rfcs/035-safekeeper-dynamic-membership-change.md
|
||||
|
||||
Simplifications:
|
||||
- The ones inherited from ProposerAcceptorStatic.
|
||||
- We don't model transient state of the configuration change driver process
|
||||
(storage controller in the implementation). Its actions StartChange and FinishChange
|
||||
are taken based on the persistent state of safekeepers and conf store. The
|
||||
justification for that is the following: once new configuration n is
|
||||
created (e.g with StartChange or FinishChange), any old configuration
|
||||
change driver working on older conf < n will never be able to commit
|
||||
it to the conf store because it is protected by CAS. The
|
||||
propagation of these older confs is still possible though, and
|
||||
spec allows to do it through acceptors.
|
||||
Plus the model is already pretty huge.
|
||||
- Previous point also means that the FinishChange action is
|
||||
based only on the current state of safekeepers, not from
|
||||
the past. That's ok because while individual
|
||||
acceptor <last_log_term, flush_lsn> may go down,
|
||||
quorum one never does. So the FinishChange
|
||||
condition which collects max of the quorum may get
|
||||
only more strict over time.
|
||||
|
||||
The invariants expectedly break if any of FinishChange
|
||||
required conditions are removed.
|
||||
*)
|
||||
|
||||
EXTENDS Integers, Sequences, FiniteSets, TLC
|
||||
|
||||
VARIABLES
|
||||
\* state which is the same in the static spec
|
||||
prop_state,
|
||||
acc_state,
|
||||
committed,
|
||||
elected_history,
|
||||
\* reconfiguration only state
|
||||
prop_conf, \* prop_conf[p] is current configuration of proposer p
|
||||
acc_conf, \* acc_conf[a] is current configuration of acceptor a
|
||||
conf_store \* configuration in the configuration store.
|
||||
|
||||
CONSTANT
|
||||
acceptors,
|
||||
proposers
|
||||
|
||||
CONSTANT NULL
|
||||
|
||||
\* Import ProposerAcceptorStatic under PAS.
|
||||
\*
|
||||
\* Note that all vars and consts are named the same and thus substituted
|
||||
\* implicitly.
|
||||
PAS == INSTANCE ProposerAcceptorStatic
|
||||
|
||||
\********************************************************************************
|
||||
\* Helpers
|
||||
\********************************************************************************
|
||||
|
||||
\********************************************************************************
|
||||
\* Type assertion
|
||||
\********************************************************************************
|
||||
|
||||
\* Is c a valid config?
|
||||
IsConfig(c) ==
|
||||
/\ DOMAIN c = {"generation", "members", "newMembers"}
|
||||
\* Unique id of the configuration.
|
||||
/\ c.generation \in Nat
|
||||
/\ c.members \in SUBSET acceptors
|
||||
\* newMembers is NULL when it is not a joint conf.
|
||||
/\ \/ c.newMembers = NULL
|
||||
\/ c.newMembers \in SUBSET acceptors
|
||||
|
||||
TypeOk ==
|
||||
/\ PAS!TypeOk
|
||||
/\ \A p \in proposers: IsConfig(prop_conf[p])
|
||||
/\ \A a \in acceptors: IsConfig(acc_conf[a])
|
||||
/\ IsConfig(conf_store)
|
||||
|
||||
\********************************************************************************
|
||||
\* Initial
|
||||
\********************************************************************************
|
||||
|
||||
Init ==
|
||||
/\ PAS!Init
|
||||
/\ \E init_members \in SUBSET acceptors:
|
||||
LET init_conf == [generation |-> 1, members |-> init_members, newMembers |-> NULL] IN
|
||||
\* refer to RestartProposer why it is not NULL
|
||||
/\ prop_conf = [p \in proposers |-> init_conf]
|
||||
/\ acc_conf = [a \in acceptors |-> init_conf]
|
||||
/\ conf_store = init_conf
|
||||
\* We could start with anything, but to reduce state space state with
|
||||
\* the most reasonable total acceptors - 1 conf size, which e.g.
|
||||
\* makes basic {a1} -> {a2} change in {a1, a2} acceptors and {a1, a2,
|
||||
\* a3} -> {a2, a3, a4} in {a1, a2, a3, a4} acceptors models even in
|
||||
\* the smallest models with single change.
|
||||
/\ Cardinality(init_members) = Cardinality(acceptors) - 1
|
||||
|
||||
\********************************************************************************
|
||||
\* Actions
|
||||
\********************************************************************************
|
||||
|
||||
\* Proposer p loses all state, restarting. In the static spec we bump restarted
|
||||
\* proposer term to max of some quorum + 1 which is a minimal term which can win
|
||||
\* election. With reconfigurations it's harder to calculate such a term, so keep
|
||||
\* it simple and take random acceptor one + 1.
|
||||
\*
|
||||
\* Also make proposer to adopt configuration of another random acceptor. In the
|
||||
\* impl proposer starts with NULL configuration until handshake with first
|
||||
\* acceptor. Removing this NULL special case makes the spec a bit simpler.
|
||||
RestartProposer(p) ==
|
||||
/\ \E a \in acceptors: PAS!RestartProposerWithTerm(p, acc_state[a].term + 1)
|
||||
/\ \E a \in acceptors: prop_conf' = [prop_conf EXCEPT ![p] = acc_conf[a]]
|
||||
/\ UNCHANGED <<acc_conf, conf_store>>
|
||||
|
||||
\* Acceptor a immediately votes for proposer p.
|
||||
Vote(p, a) ==
|
||||
\* Configuration must be the same.
|
||||
/\ prop_conf[p].generation = acc_conf[a].generation
|
||||
\* And a is expected be a member of it. This is likely redundant as long as
|
||||
\* becoming leader checks membership (though vote also contributes to max
|
||||
\* <term, lsn> calculation).
|
||||
/\ \/ a \in prop_conf[p].members
|
||||
\/ (prop_conf[p].newMembers /= NULL) /\ (a \in prop_conf[p].newMembers)
|
||||
/\ PAS!Vote(p, a)
|
||||
/\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
|
||||
|
||||
\* Proposer p gets elected.
|
||||
BecomeLeader(p) ==
|
||||
/\ prop_state[p].state = "campaign"
|
||||
\* Votes must form quorum in both sets (if the newMembers exists).
|
||||
/\ PAS!FormsQuorum(DOMAIN prop_state[p].votes, prop_conf[p].members)
|
||||
/\ \/ prop_conf[p].newMembers = NULL
|
||||
\* TLA+ disjunction evaluation doesn't short-circuit for a good reason:
|
||||
\* https://groups.google.com/g/tlaplus/c/U6tOJ4dsjVM/m/UdOznPCVBwAJ
|
||||
\* so repeat the null check.
|
||||
\/ (prop_conf[p].newMembers /= NULL) /\ (PAS!FormsQuorum(DOMAIN prop_state[p].votes, prop_conf[p].newMembers))
|
||||
\* DoBecomeLeader will copy WAL of the highest voter to proposer's WAL, so
|
||||
\* ensure its conf is still the same. In the impl WAL fetching also has to
|
||||
\* check the configuration.
|
||||
/\ prop_conf[p].generation = acc_conf[PAS!MaxVoteAcc(p)].generation
|
||||
/\ \A a \in DOMAIN prop_state[p].votes: prop_conf[p].generation = acc_conf[a].generation
|
||||
/\ PAS!DoBecomeLeader(p)
|
||||
/\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
|
||||
|
||||
UpdateTerm(p, a) ==
|
||||
/\ PAS!UpdateTerm(p, a)
|
||||
/\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
|
||||
|
||||
TruncateWal(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
\* Configuration must be the same.
|
||||
/\ prop_conf[p].generation = acc_conf[a].generation
|
||||
/\ PAS!TruncateWal(p, a)
|
||||
/\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
|
||||
|
||||
NewEntry(p) ==
|
||||
/\ PAS!NewEntry(p)
|
||||
/\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
|
||||
|
||||
AppendEntry(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
\* Configuration must be the same.
|
||||
/\ prop_conf[p].generation = acc_conf[a].generation
|
||||
\* And a is member of it. Ignoring this likely wouldn't hurt, but not useful
|
||||
\* either.
|
||||
/\ \/ a \in prop_conf[p].members
|
||||
\/ (prop_conf[p].newMembers /= NULL) /\ (a \in prop_conf[p].newMembers)
|
||||
/\ PAS!AppendEntry(p, a)
|
||||
/\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
|
||||
|
||||
\* see PAS!CommitEntries for comments.
|
||||
CommitEntries(p) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ \E q1 \in PAS!AllMinQuorums(prop_conf[p].members):
|
||||
LET q1_commit_lsn == PAS!QuorumCommitLsn(p, q1) IN
|
||||
\* Configuration must be the same.
|
||||
/\ \A a \in q1: prop_conf[p].generation = acc_conf[a].generation
|
||||
/\ q1_commit_lsn /= NULL
|
||||
\* We must collect acks from both quorums, if newMembers is present.
|
||||
/\ IF prop_conf[p].newMembers = NULL THEN
|
||||
PAS!DoCommitEntries(p, q1_commit_lsn)
|
||||
ELSE
|
||||
\E q2 \in PAS!AllMinQuorums(prop_conf[p].newMembers):
|
||||
LET q2_commit_lsn == PAS!QuorumCommitLsn(p, q2) IN
|
||||
\* Configuration must be the same.
|
||||
/\ \A a \in q1: prop_conf[p].generation = acc_conf[a].generation
|
||||
/\ q2_commit_lsn /= NULL
|
||||
/\ PAS!DoCommitEntries(p, PAS!Min(q1_commit_lsn, q2_commit_lsn))
|
||||
/\ UNCHANGED <<prop_conf, acc_conf, conf_store>>
|
||||
|
||||
\* Proposer p adopts higher conf c from conf store or from some acceptor.
|
||||
ProposerSwitchConf(p) ==
|
||||
/\ \E c \in ({conf_store} \union {acc_conf[a]: a \in acceptors}):
|
||||
\* p's conf is lower than c.
|
||||
/\ (c.generation > prop_conf[p].generation)
|
||||
\* We allow to bump conf without restart only when wp is already elected.
|
||||
\* If it isn't, the votes it has already collected are from the previous
|
||||
\* configuration and can't be used.
|
||||
\*
|
||||
\* So if proposer is in 'campaign' in the impl we would restart preserving
|
||||
\* conf and increasing term. In the spec this transition is already covered
|
||||
\* by more a generic RestartProposer, so we don't specify it here.
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ prop_conf' = [prop_conf EXCEPT ![p] = c]
|
||||
/\ UNCHANGED <<prop_state, acc_state, committed, elected_history, acc_conf, conf_store>>
|
||||
|
||||
\* Do CAS on the conf store, starting change into the new_members conf.
|
||||
StartChange(new_members) ==
|
||||
\* Possible only if we don't already have the change in progress.
|
||||
/\ conf_store.newMembers = NULL
|
||||
\* Not necessary, but reduces space a bit.
|
||||
/\ new_members /= conf_store.members
|
||||
/\ conf_store' = [generation |-> conf_store.generation + 1, members |-> conf_store.members, newMembers |-> new_members]
|
||||
/\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf>>
|
||||
|
||||
\* Acceptor's last_log_term.
|
||||
AccLastLogTerm(acc) ==
|
||||
PAS!LastLogTerm(PAS!AcceptorTermHistory(acc))
|
||||
|
||||
\* Do CAS on the conf store, transferring joint conf into the newMembers only.
|
||||
FinishChange ==
|
||||
\* have joint conf
|
||||
/\ conf_store.newMembers /= NULL
|
||||
\* The conditions for finishing the change are:
|
||||
/\ \E qo \in PAS!AllMinQuorums(conf_store.members):
|
||||
\* 1) Old majority must be aware of the joint conf.
|
||||
\* Note: generally the driver can't know current acceptor
|
||||
\* generation, it can only know that it once had been the
|
||||
\* expected one, but it might have advanced since then.
|
||||
\* But as explained at the top of the file if acceptor gen
|
||||
\* advanced, FinishChange will never be able to complete
|
||||
\* due to CAS anyway. We use strict equality here because
|
||||
\* that's what makes sense conceptually (old driver should
|
||||
\* abandon its attempt if it observes that conf has advanced).
|
||||
/\ \A a \in qo: conf_store.generation = acc_conf[a].generation
|
||||
\* 2) New member set must have log synced, i.e. some its majority needs
|
||||
\* to have <last_log_term, lsn> at least as high as max of some
|
||||
\* old majority.
|
||||
\* 3) Term must be synced, i.e. some majority of the new set must
|
||||
\* have term >= than max term of some old majority.
|
||||
\* This ensures that two leaders are never elected with the same
|
||||
\* term even after config change (which would be bad unless we treat
|
||||
\* generation as a part of term which we don't).
|
||||
\* 4) A majority of the new set must be aware of the joint conf.
|
||||
\* This allows to safely destoy acceptor state if it is not a
|
||||
\* member of its current conf (which is useful for cleanup after
|
||||
\* migration as well as for aborts).
|
||||
/\ LET sync_pos == PAS!MaxTermLsn({[term |-> AccLastLogTerm(a), lsn |-> PAS!FlushLsn(a)]: a \in qo})
|
||||
sync_term == PAS!Maximum({acc_state[a].term: a \in qo})
|
||||
IN
|
||||
\E qn \in PAS!AllMinQuorums(conf_store.newMembers):
|
||||
\A a \in qn:
|
||||
/\ PAS!TermLsnGE([term |-> AccLastLogTerm(a), lsn |-> PAS!FlushLsn(a)], sync_pos)
|
||||
/\ acc_state[a].term >= sync_term
|
||||
\* The same note as above about strict equality applies here.
|
||||
/\ conf_store.generation = acc_conf[a].generation
|
||||
/\ conf_store' = [generation |-> conf_store.generation + 1, members |-> conf_store.newMembers, newMembers |-> NULL]
|
||||
/\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf>>
|
||||
|
||||
\* Do CAS on the conf store, aborting the change in progress.
|
||||
AbortChange ==
|
||||
\* have joint conf
|
||||
/\ conf_store.newMembers /= NULL
|
||||
/\ conf_store' = [generation |-> conf_store.generation + 1, members |-> conf_store.members, newMembers |-> NULL]
|
||||
/\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf>>
|
||||
|
||||
\* Acceptor a switches to higher configuration from the conf store
|
||||
\* or from some proposer.
|
||||
AccSwitchConf(a) ==
|
||||
/\ \E c \in ({conf_store} \union {prop_conf[p]: p \in proposers}):
|
||||
/\ acc_conf[a].generation < c.generation
|
||||
/\ acc_conf' = [acc_conf EXCEPT ![a] = c]
|
||||
/\ UNCHANGED <<prop_state, acc_state, committed, elected_history, prop_conf, conf_store>>
|
||||
|
||||
\* Nuke all acceptor state if it is not a member of its current conf. Models
|
||||
\* cleanup after migration/abort.
|
||||
AccReset(a) ==
|
||||
/\ \/ (acc_conf[a].newMembers = NULL) /\ (a \notin acc_conf[a].members)
|
||||
\/ (acc_conf[a].newMembers /= NULL) /\ (a \notin (acc_conf[a].members \union acc_conf[a].newMembers))
|
||||
/\ acc_state' = [acc_state EXCEPT ![a] = PAS!InitAcc]
|
||||
\* Set nextSendLsn to `a` to NULL everywhere. nextSendLsn serves as a mark
|
||||
\* that elected proposer performed TruncateWal on the acceptor, which isn't
|
||||
\* true anymore after state reset. In the impl local deletion is expected to
|
||||
\* terminate all existing connections.
|
||||
/\ prop_state' = [p \in proposers |-> [prop_state[p] EXCEPT !.nextSendLsn[a] = NULL]]
|
||||
/\ UNCHANGED <<committed, elected_history, prop_conf, acc_conf, conf_store>>
|
||||
|
||||
\*******************************************************************************
|
||||
\* Final spec
|
||||
\*******************************************************************************
|
||||
|
||||
Next ==
|
||||
\/ \E p \in proposers: RestartProposer(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: Vote(p, a)
|
||||
\/ \E p \in proposers: BecomeLeader(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: UpdateTerm(p, a)
|
||||
\/ \E p \in proposers: \E a \in acceptors: TruncateWal(p, a)
|
||||
\/ \E p \in proposers: NewEntry(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: AppendEntry(p, a)
|
||||
\/ \E p \in proposers: CommitEntries(p)
|
||||
\/ \E new_members \in SUBSET acceptors: StartChange(new_members)
|
||||
\/ FinishChange
|
||||
\/ AbortChange
|
||||
\/ \E p \in proposers: ProposerSwitchConf(p)
|
||||
\/ \E a \in acceptors: AccSwitchConf(a)
|
||||
\/ \E a \in acceptors: AccReset(a)
|
||||
|
||||
Spec == Init /\ [][Next]_<<prop_state, acc_state, committed, elected_history, prop_conf, acc_conf, conf_store>>
|
||||
|
||||
\********************************************************************************
|
||||
\* Invariants
|
||||
\********************************************************************************
|
||||
|
||||
AllConfs ==
|
||||
{conf_store} \union {prop_conf[p]: p \in proposers} \union {acc_conf[a]: a \in acceptors}
|
||||
|
||||
\* Fairly trivial (given the conf store) invariant that different configurations
|
||||
\* with the same generation are never issued.
|
||||
ConfigSafety ==
|
||||
\A c1, c2 \in AllConfs:
|
||||
(c1.generation = c2.generation) => (c1 = c2)
|
||||
|
||||
ElectionSafety == PAS!ElectionSafety
|
||||
|
||||
ElectionSafetyFull == PAS!ElectionSafetyFull
|
||||
|
||||
LogIsMonotonic == PAS!LogIsMonotonic
|
||||
|
||||
LogSafety == PAS!LogSafety
|
||||
|
||||
\********************************************************************************
|
||||
\* Invariants which don't need to hold, but useful for playing/debugging.
|
||||
\********************************************************************************
|
||||
|
||||
\* Check that we ever switch into non joint conf.
|
||||
MaxAccConf == ~ \E a \in acceptors:
|
||||
/\ acc_conf[a].generation = 3
|
||||
/\ acc_conf[a].newMembers /= NULL
|
||||
|
||||
CommittedNotTruncated == PAS!CommittedNotTruncated
|
||||
|
||||
MaxTerm == PAS!MaxTerm
|
||||
|
||||
MaxStoreConf == conf_store.generation <= 1
|
||||
|
||||
MaxAccWalLen == PAS!MaxAccWalLen
|
||||
|
||||
MaxCommitLsn == PAS!MaxCommitLsn
|
||||
|
||||
====
|
||||
@@ -18,7 +18,7 @@
|
||||
\* - old WAL is immediately copied to proposer on its election, without on-demand fetch later.
|
||||
|
||||
\* Some ideas how to break it to play around to get a feeling:
|
||||
\* - replace Quorums with BadQuorums.
|
||||
\* - replace Quorum with BadQuorum.
|
||||
\* - remove 'don't commit entries from previous terms separately' rule in
|
||||
\* CommitEntries and observe figure 8 from the raft paper.
|
||||
\* With p2a3t4l4 32 steps error was found in 1h on 80 cores.
|
||||
@@ -69,16 +69,26 @@ Upsert(f, k, v, l(_)) ==
|
||||
|
||||
\*****************
|
||||
|
||||
NumAccs == Cardinality(acceptors)
|
||||
\* Does set of acceptors `acc_set` form the quorum in the member set `members`?
|
||||
\* Acceptors not from `members` are excluded (matters only for reconfig).
|
||||
FormsQuorum(acc_set, members) ==
|
||||
Cardinality(acc_set \intersect members) >= (Cardinality(members) \div 2 + 1)
|
||||
|
||||
\* does acc_set form the quorum?
|
||||
Quorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2 + 1)
|
||||
\* all quorums of acceptors
|
||||
Quorums == {subset \in SUBSET acceptors: Quorum(subset)}
|
||||
\* Like FormsQuorum, but for minimal quorum.
|
||||
FormsMinQuorum(acc_set, members) ==
|
||||
Cardinality(acc_set \intersect members) = (Cardinality(members) \div 2 + 1)
|
||||
|
||||
\* For substituting Quorums and seeing what happens.
|
||||
BadQuorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2)
|
||||
BadQuorums == {subset \in SUBSET acceptors: BadQuorum(subset)}
|
||||
\* All sets of acceptors forming minimal quorums in the member set `members`.
|
||||
AllQuorums(members) == {subset \in SUBSET members: FormsQuorum(subset, members)}
|
||||
AllMinQuorums(members) == {subset \in SUBSET acceptors: FormsMinQuorum(subset, members)}
|
||||
|
||||
\* For substituting Quorum and seeing what happens.
|
||||
FormsBadQuorum(acc_set, members) ==
|
||||
Cardinality(acc_set \intersect members) >= (Cardinality(members) \div 2)
|
||||
FormsMinBadQuorum(acc_set, members) ==
|
||||
Cardinality(acc_set \intersect members) = (Cardinality(members) \div 2)
|
||||
AllBadQuorums(members) == {subset \in SUBSET acceptors: FormsBadQuorum(subset, members)}
|
||||
AllMinBadQuorums(members) == {subset \in SUBSET acceptors: FormsMinBadQuorum(subset, members)}
|
||||
|
||||
\* flushLsn (end of WAL, i.e. index of next entry) of acceptor a.
|
||||
FlushLsn(a) == Len(acc_state[a].wal) + 1
|
||||
@@ -135,10 +145,11 @@ TypeOk ==
|
||||
/\ IsWal(prop_state[p].wal)
|
||||
\* Map of acceptor -> next lsn to send. It is set when truncate_wal is
|
||||
\* done so sending entries is allowed only after that. In the impl TCP
|
||||
\* ensures this ordering.
|
||||
\* ensures this ordering. We use NULL instead of missing value to use
|
||||
\* EXCEPT in AccReset.
|
||||
/\ \A a \in DOMAIN prop_state[p].nextSendLsn:
|
||||
/\ a \in acceptors
|
||||
/\ prop_state[p].nextSendLsn[a] \in Lsns
|
||||
/\ prop_state[p].nextSendLsn[a] \in Lsns \union {NULL}
|
||||
/\ \A a \in acceptors:
|
||||
/\ DOMAIN acc_state[a] = {"term", "termHistory", "wal"}
|
||||
/\ acc_state[a].term \in Terms
|
||||
@@ -167,6 +178,19 @@ TypeOk ==
|
||||
\* Initial
|
||||
\********************************************************************************
|
||||
|
||||
InitAcc ==
|
||||
[
|
||||
\* There will be no leader in zero term, 1 is the first
|
||||
\* real.
|
||||
term |-> 0,
|
||||
\* Again, leader in term 0 doesn't exist, but we initialize
|
||||
\* term histories with it to always have common point in
|
||||
\* them. Lsn is 1 because TLA+ sequences are indexed from 1
|
||||
\* (we don't want to truncate WAL out of range).
|
||||
termHistory |-> << [term |-> 0, lsn |-> 1] >>,
|
||||
wal |-> << >>
|
||||
]
|
||||
|
||||
Init ==
|
||||
/\ prop_state = [p \in proposers |-> [
|
||||
state |-> "campaign",
|
||||
@@ -174,19 +198,9 @@ Init ==
|
||||
votes |-> EmptyF,
|
||||
termHistory |-> << >>,
|
||||
wal |-> << >>,
|
||||
nextSendLsn |-> EmptyF
|
||||
nextSendLsn |-> [a \in acceptors |-> NULL]
|
||||
]]
|
||||
/\ acc_state = [a \in acceptors |-> [
|
||||
\* There will be no leader in zero term, 1 is the first
|
||||
\* real.
|
||||
term |-> 0,
|
||||
\* Again, leader in term 0 doesn't exist, but we initialize
|
||||
\* term histories with it to always have common point in
|
||||
\* them. Lsn is 1 because TLA+ sequences are indexed from 1
|
||||
\* (we don't want to truncate WAL out of range).
|
||||
termHistory |-> << [term |-> 0, lsn |-> 1] >>,
|
||||
wal |-> << >>
|
||||
]]
|
||||
/\ acc_state = [a \in acceptors |-> InitAcc]
|
||||
/\ committed = {}
|
||||
/\ elected_history = EmptyF
|
||||
|
||||
@@ -195,23 +209,35 @@ Init ==
|
||||
\* Actions
|
||||
\********************************************************************************
|
||||
|
||||
\* Proposer loses all state.
|
||||
RestartProposerWithTerm(p, new_term) ==
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].state = "campaign",
|
||||
![p].term = new_term,
|
||||
![p].votes = EmptyF,
|
||||
![p].termHistory = << >>,
|
||||
![p].wal = << >>,
|
||||
![p].nextSendLsn = [a \in acceptors |-> NULL]]
|
||||
/\ UNCHANGED <<acc_state, committed, elected_history>>
|
||||
|
||||
\* Proposer p loses all state, restarting.
|
||||
\* For simplicity (and to reduct state space), we assume it immediately gets
|
||||
\* current state from quorum q of acceptors determining the term he will request
|
||||
\* to vote for.
|
||||
RestartProposer(p, q) ==
|
||||
/\ Quorum(q)
|
||||
/\ LET new_term == Maximum({acc_state[a].term : a \in q}) + 1 IN
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].state = "campaign",
|
||||
![p].term = new_term,
|
||||
![p].votes = EmptyF,
|
||||
![p].termHistory = << >>,
|
||||
![p].wal = << >>,
|
||||
![p].nextSendLsn = EmptyF]
|
||||
/\ UNCHANGED <<acc_state, committed, elected_history>>
|
||||
RestartProposer(p) ==
|
||||
\E q \in AllQuorums(acceptors):
|
||||
LET new_term == Maximum({acc_state[a].term : a \in q}) + 1 IN
|
||||
RestartProposerWithTerm(p, new_term)
|
||||
|
||||
\* Term history of acceptor a's WAL: the one saved truncated to contain only <=
|
||||
\* local FlushLsn entries.
|
||||
\* local FlushLsn entries. Note that FlushLsn is the end LSN of the last entry
|
||||
\* (and begin LSN of the next). The mental model for non strict comparison is
|
||||
\* that once proposer is elected it immediately writes log record with zero
|
||||
\* length. This allows leader to commit existing log without writing any new
|
||||
\* entries. For example, assume acceptor has WAL
|
||||
\* 1.1, 1.2
|
||||
\* written by prop with term 1; its current <last_log_term, flush_lsn>
|
||||
\* is <1, 3>. Now prop with term 2 and max vote from this acc is elected.
|
||||
\* Once TruncateWAL is done, <last_log_term, flush_lsn> becomes <2, 3>
|
||||
\* without any new records explicitly written.
|
||||
AcceptorTermHistory(a) ==
|
||||
SelectSeq(acc_state[a].termHistory, LAMBDA th_entry: th_entry.lsn <= FlushLsn(a))
|
||||
|
||||
@@ -230,35 +256,52 @@ Vote(p, a) ==
|
||||
\* Get lastLogTerm from term history th.
|
||||
LastLogTerm(th) == th[Len(th)].term
|
||||
|
||||
\* Compares <term, lsn> pairs: returns true if tl1 >= tl2.
|
||||
TermLsnGE(tl1, tl2) ==
|
||||
/\ tl1.term >= tl2.term
|
||||
/\ (tl1.term = tl2.term => tl1.lsn >= tl2.lsn)
|
||||
|
||||
\* Choose max <term, lsn> pair in the non empty set of them.
|
||||
MaxTermLsn(term_lsn_set) ==
|
||||
CHOOSE max_tl \in term_lsn_set: \A tl \in term_lsn_set: TermLsnGE(max_tl, tl)
|
||||
|
||||
\* Find acceptor with the highest <last_log_term, lsn> vote in proposer p's votes.
|
||||
MaxVoteAcc(p) ==
|
||||
CHOOSE a \in DOMAIN prop_state[p].votes:
|
||||
LET a_vote == prop_state[p].votes[a]
|
||||
a_vote_term_lsn == [term |-> LastLogTerm(a_vote.termHistory), lsn |-> a_vote.flushLsn]
|
||||
vote_term_lsns == {[term |-> LastLogTerm(v.termHistory), lsn |-> v.flushLsn]: v \in Range(prop_state[p].votes)}
|
||||
IN
|
||||
a_vote_term_lsn = MaxTermLsn(vote_term_lsns)
|
||||
|
||||
\* Workhorse for BecomeLeader.
|
||||
\* Assumes the check prop_state[p] votes is quorum has been done *outside*.
|
||||
DoBecomeLeader(p) ==
|
||||
LET
|
||||
\* Find acceptor with the highest <last_log_term, lsn> vote.
|
||||
max_vote_acc == MaxVoteAcc(p)
|
||||
max_vote == prop_state[p].votes[max_vote_acc]
|
||||
prop_th == Append(max_vote.termHistory, [term |-> prop_state[p].term, lsn |-> max_vote.flushLsn])
|
||||
IN
|
||||
\* We copy all log preceding proposer's term from the max vote node so
|
||||
\* make sure it is still on one term with us. This is a model
|
||||
\* simplification which can be removed, in impl we fetch WAL on demand
|
||||
\* from safekeeper which has it later. Note though that in case of on
|
||||
\* demand fetch we must check on donor not only term match, but that
|
||||
\* truncate_wal had already been done (if it is not max_vote_acc).
|
||||
/\ acc_state[max_vote_acc].term = prop_state[p].term
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].state = "leader",
|
||||
![p].termHistory = prop_th,
|
||||
![p].wal = acc_state[max_vote_acc].wal
|
||||
]
|
||||
/\ elected_history' = Upsert(elected_history, prop_state[p].term, 1, LAMBDA c: c + 1)
|
||||
/\ UNCHANGED <<acc_state, committed>>
|
||||
|
||||
\* Proposer p gets elected.
|
||||
BecomeLeader(p) ==
|
||||
/\ prop_state[p].state = "campaign"
|
||||
/\ Quorum(DOMAIN prop_state[p].votes)
|
||||
/\ LET
|
||||
\* Find acceptor with the highest <last_log_term, lsn> vote.
|
||||
max_vote_acc ==
|
||||
CHOOSE a \in DOMAIN prop_state[p].votes:
|
||||
LET v == prop_state[p].votes[a]
|
||||
IN \A v2 \in Range(prop_state[p].votes):
|
||||
/\ LastLogTerm(v.termHistory) >= LastLogTerm(v2.termHistory)
|
||||
/\ (LastLogTerm(v.termHistory) = LastLogTerm(v2.termHistory) => v.flushLsn >= v2.flushLsn)
|
||||
max_vote == prop_state[p].votes[max_vote_acc]
|
||||
prop_th == Append(max_vote.termHistory, [term |-> prop_state[p].term, lsn |-> max_vote.flushLsn])
|
||||
IN
|
||||
\* We copy all log preceding proposer's term from the max vote node so
|
||||
\* make sure it is still on one term with us. This is a model
|
||||
\* simplification which can be removed, in impl we fetch WAL on demand
|
||||
\* from safekeeper which has it later. Note though that in case of on
|
||||
\* demand fetch we must check on donor not only term match, but that
|
||||
\* truncate_wal had already been done (if it is not max_vote_acc).
|
||||
/\ acc_state[max_vote_acc].term = prop_state[p].term
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].state = "leader",
|
||||
![p].termHistory = prop_th,
|
||||
![p].wal = acc_state[max_vote_acc].wal
|
||||
]
|
||||
/\ elected_history' = Upsert(elected_history, prop_state[p].term, 1, LAMBDA c: c + 1)
|
||||
/\ UNCHANGED <<acc_state, committed>>
|
||||
|
||||
/\ FormsQuorum(DOMAIN prop_state[p].votes, acceptors)
|
||||
/\ DoBecomeLeader(p)
|
||||
|
||||
\* Acceptor a learns about elected proposer p's term. In impl it matches to
|
||||
\* VoteRequest/VoteResponse exchange when leader is already elected and is not
|
||||
@@ -287,10 +330,11 @@ FindHighestCommonPoint(prop_th, acc_th, acc_flush_lsn) ==
|
||||
IN
|
||||
[term |-> last_common_term, lsn |-> Min(acc_common_term_end, prop_common_term_end)]
|
||||
|
||||
\* Elected proposer p immediately truncates WAL (and term history) of acceptor a
|
||||
\* before starting streaming. Establishes nextSendLsn for a.
|
||||
\* Elected proposer p immediately truncates WAL (and sets term history) of
|
||||
\* acceptor a before starting streaming. Establishes nextSendLsn for a.
|
||||
\*
|
||||
\* In impl this happens at each reconnection, here we also allow to do it multiple times.
|
||||
\* In impl this happens at each reconnection, here we also allow to do it
|
||||
\* multiple times.
|
||||
TruncateWal(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ acc_state[a].term = prop_state[p].term
|
||||
@@ -321,8 +365,8 @@ NewEntry(p) ==
|
||||
AppendEntry(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ acc_state[a].term = prop_state[p].term
|
||||
/\ a \in DOMAIN prop_state[p].nextSendLsn \* did TruncateWal
|
||||
/\ prop_state[p].nextSendLsn[a] <= Len(prop_state[p].wal) \* have smth to send
|
||||
/\ prop_state[p].nextSendLsn[a] /= NULL \* did TruncateWal
|
||||
/\ prop_state[p].nextSendLsn[a] <= Len(prop_state[p].wal) \* have smth to send
|
||||
/\ LET
|
||||
send_lsn == prop_state[p].nextSendLsn[a]
|
||||
entry == prop_state[p].wal[send_lsn]
|
||||
@@ -337,41 +381,65 @@ AppendEntry(p, a) ==
|
||||
PropStartLsn(p) ==
|
||||
IF prop_state[p].state = "leader" THEN prop_state[p].termHistory[Len(prop_state[p].termHistory)].lsn ELSE NULL
|
||||
|
||||
\* Proposer p commits all entries it can using quorum q. Note that unlike
|
||||
\* will62794/logless-reconfig this allows to commit entries from previous terms
|
||||
\* (when conditions for that are met).
|
||||
CommitEntries(p, q) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ \A a \in q:
|
||||
\* LSN which can be committed by proposer p using min quorum q (check that q
|
||||
\* forms quorum must have been done outside). NULL if there is none.
|
||||
QuorumCommitLsn(p, q) ==
|
||||
IF
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ \A a \in q:
|
||||
\* Without explicit responses to appends this ensures that append
|
||||
\* up to FlushLsn has been accepted.
|
||||
/\ acc_state[a].term = prop_state[p].term
|
||||
\* nextSendLsn existence means TruncateWal has happened, it ensures
|
||||
\* acceptor's WAL (and FlushLsn) are from proper proposer's history.
|
||||
\* Alternatively we could compare LastLogTerm here, but that's closer to
|
||||
\* what we do in the impl (we check flushLsn in AppendResponse, but
|
||||
\* AppendRequest is processed only if HandleElected handling was good).
|
||||
/\ a \in DOMAIN prop_state[p].nextSendLsn
|
||||
\* Now find the LSN present on all the quorum.
|
||||
/\ LET quorum_lsn == Minimum({FlushLsn(a): a \in q}) IN
|
||||
\* This is the basic Raft rule of not committing entries from previous
|
||||
\* terms except along with current term entry (commit them only when
|
||||
\* quorum recovers, i.e. last_log_term on it reaches leader's term).
|
||||
/\ quorum_lsn >= PropStartLsn(p)
|
||||
/\ committed' = committed \cup {[term |-> prop_state[p].wal[lsn], lsn |-> lsn]: lsn \in 1..(quorum_lsn - 1)}
|
||||
/\ UNCHANGED <<prop_state, acc_state, elected_history>>
|
||||
/\ prop_state[p].nextSendLsn[a] /= NULL
|
||||
THEN
|
||||
\* Now find the LSN present on all the quorum.
|
||||
LET quorum_lsn == Minimum({FlushLsn(a): a \in q}) IN
|
||||
\* This is the basic Raft rule of not committing entries from previous
|
||||
\* terms except along with current term entry (commit them only when
|
||||
\* quorum recovers, i.e. last_log_term on it reaches leader's term).
|
||||
IF quorum_lsn >= PropStartLsn(p) THEN
|
||||
quorum_lsn
|
||||
ELSE
|
||||
NULL
|
||||
ELSE
|
||||
NULL
|
||||
|
||||
\* Commit all entries on proposer p with record lsn < commit_lsn.
|
||||
DoCommitEntries(p, commit_lsn) ==
|
||||
/\ committed' = committed \cup {[term |-> prop_state[p].wal[lsn], lsn |-> lsn]: lsn \in 1..(commit_lsn - 1)}
|
||||
/\ UNCHANGED <<prop_state, acc_state, elected_history>>
|
||||
|
||||
\* Proposer p commits all entries it can using some quorum. Note that unlike
|
||||
\* will62794/logless-reconfig this allows to commit entries from previous terms
|
||||
\* (when conditions for that are met).
|
||||
CommitEntries(p) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
\* Using min quorums here is better because 1) QuorumCommitLsn for
|
||||
\* simplicity checks min across all accs in q. 2) it probably makes
|
||||
\* evaluation faster.
|
||||
/\ \E q \in AllMinQuorums(acceptors):
|
||||
LET commit_lsn == QuorumCommitLsn(p, q) IN
|
||||
/\ commit_lsn /= NULL
|
||||
/\ DoCommitEntries(p, commit_lsn)
|
||||
|
||||
\*******************************************************************************
|
||||
\* Final spec
|
||||
\*******************************************************************************
|
||||
|
||||
Next ==
|
||||
\/ \E q \in Quorums: \E p \in proposers: RestartProposer(p, q)
|
||||
\/ \E p \in proposers: RestartProposer(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: Vote(p, a)
|
||||
\/ \E p \in proposers: BecomeLeader(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: UpdateTerm(p, a)
|
||||
\/ \E p \in proposers: \E a \in acceptors: TruncateWal(p, a)
|
||||
\/ \E p \in proposers: NewEntry(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: AppendEntry(p, a)
|
||||
\/ \E q \in Quorums: \E p \in proposers: CommitEntries(p, q)
|
||||
\/ \E p \in proposers: CommitEntries(p)
|
||||
|
||||
Spec == Init /\ [][Next]_<<prop_state, acc_state, committed, elected_history>>
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
# Usage: ./modelcheck.sh <config_file> <spec_file>, e.g.
|
||||
# ./modelcheck.sh models/MCProposerAcceptorStatic_p2_a3_t3_l3.cfg MCProposerAcceptorStatic.tla
|
||||
# ./modelcheck.sh models/MCProposerAcceptorReconfig_p2_a3_t3_l3_c3.cfg MCProposerAcceptorReconfig.tla
|
||||
CONFIG=$1
|
||||
SPEC=$2
|
||||
|
||||
@@ -12,6 +13,7 @@ mkdir -p "tlc-results"
|
||||
CONFIG_FILE=$(basename -- "$CONFIG")
|
||||
outfilename="$SPEC-${CONFIG_FILE}-$(date --utc +%Y-%m-%d--%H-%M-%S)".log
|
||||
outfile="tlc-results/$outfilename"
|
||||
echo "saving results to $outfile"
|
||||
touch $outfile
|
||||
|
||||
# Save some info about the run.
|
||||
@@ -45,5 +47,6 @@ echo "" >> $outfile
|
||||
# https://docs.tlapl.us/codebase:architecture#fingerprint_sets_fpsets
|
||||
#
|
||||
# Add -simulate to run in infinite simulation mode.
|
||||
# -coverage 1 is useful for profiling (check how many times actions are taken).
|
||||
java -Xmx$MEM -XX:MaxDirectMemorySize=$MEM -XX:+UseParallelGC -Dtlc2.tool.fp.FPSet.impl=tlc2.tool.fp.OffHeapDiskFPSet \
|
||||
-cp "${TOOLSPATH}" tlc2.TLC $SPEC -config $CONFIG -workers auto -gzip | tee -a $outfile
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
CONSTANTS
|
||||
NULL = NULL
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2}
|
||||
max_term = 2
|
||||
max_entries = 2
|
||||
max_generation = 3
|
||||
SPECIFICATION Spec
|
||||
CONSTRAINT StateConstraint
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ConfigSafety
|
||||
ElectionSafetyFull
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
\* As its comment explains generally it is not expected to hold, but
|
||||
\* in such small model it is true.
|
||||
CommittedNotTruncated
|
||||
SYMMETRY ProposerAcceptorSymmetry
|
||||
CHECK_DEADLOCK FALSE
|
||||
ALIAS Alias
|
||||
@@ -0,0 +1,19 @@
|
||||
CONSTANTS
|
||||
NULL = NULL
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2}
|
||||
max_term = 2
|
||||
max_entries = 2
|
||||
max_generation = 5
|
||||
SPECIFICATION Spec
|
||||
CONSTRAINT StateConstraint
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ConfigSafety
|
||||
ElectionSafetyFull
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
CommittedNotTruncated
|
||||
SYMMETRY ProposerAcceptorSymmetry
|
||||
CHECK_DEADLOCK FALSE
|
||||
ALIAS Alias
|
||||
@@ -0,0 +1,20 @@
|
||||
CONSTANTS
|
||||
NULL = NULL
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2, a3}
|
||||
max_term = 2
|
||||
max_entries = 2
|
||||
max_generation = 3
|
||||
SPECIFICATION Spec
|
||||
CONSTRAINT StateConstraint
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ConfigSafety
|
||||
ElectionSafetyFull
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
CommittedNotTruncated
|
||||
SYMMETRY ProposerAcceptorSymmetry
|
||||
CHECK_DEADLOCK FALSE
|
||||
ALIAS Alias
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
CONSTANTS
|
||||
NULL = NULL
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2, a3, a4}
|
||||
max_term = 2
|
||||
max_entries = 2
|
||||
max_generation = 3
|
||||
SPECIFICATION Spec
|
||||
CONSTRAINT StateConstraint
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ElectionSafetyFull
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
CommittedNotTruncated
|
||||
SYMMETRY ProposerAcceptorSymmetry
|
||||
CHECK_DEADLOCK FALSE
|
||||
ALIAS Alias
|
||||
|
||||
25
safekeeper/spec/remove_interm_progress.awk
Normal file
25
safekeeper/spec/remove_interm_progress.awk
Normal file
@@ -0,0 +1,25 @@
|
||||
# Print all lines, but thin out lines starting with Progress:
|
||||
# leave only first and last 5 ones in the beginning, and only 1 of 1440
|
||||
# of others (once a day).
|
||||
# Also remove checkpointing logs.
|
||||
{
|
||||
lines[NR] = $0
|
||||
}
|
||||
$0 ~ /^Progress/ {
|
||||
++pcount
|
||||
}
|
||||
END {
|
||||
progress_idx = 0
|
||||
for (i = 1; i <= NR; i++) {
|
||||
if (lines[i] ~ /^Progress/) {
|
||||
if (progress_idx < 5 || progress_idx >= pcount - 5 || progress_idx % 1440 == 0) {
|
||||
print lines[i]
|
||||
}
|
||||
progress_idx++
|
||||
}
|
||||
else if (lines[i] ~ /^Checkpointing/) {}
|
||||
else {
|
||||
print lines[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
3
safekeeper/spec/remove_interm_progress.sh
Executable file
3
safekeeper/spec/remove_interm_progress.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/bash
|
||||
|
||||
awk -f remove_interm_progress.awk $1 > $1.thin
|
||||
@@ -0,0 +1,65 @@
|
||||
git revision: 9e386917a
|
||||
Platform: Linux neon-dev-arm64-1 6.8.0-49-generic #49-Ubuntu SMP PREEMPT_DYNAMIC Sun Nov 3 21:21:58 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux
|
||||
CPU Info Linux: Neoverse-N1
|
||||
CPU Cores Linux: 80
|
||||
CPU Info Mac:
|
||||
CPU Cores Mac:
|
||||
Spec: MCProposerAcceptorReconfig.tla
|
||||
Config: models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c3.cfg
|
||||
----
|
||||
CONSTANTS
|
||||
NULL = NULL
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2}
|
||||
max_term = 2
|
||||
max_entries = 2
|
||||
max_generation = 3
|
||||
SPECIFICATION Spec
|
||||
CONSTRAINT StateConstraint
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ElectionSafetyFull
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
\* CommittedNotTruncated
|
||||
SYMMETRY ProposerAcceptorSymmetry
|
||||
CHECK_DEADLOCK FALSE
|
||||
ALIAS Alias
|
||||
|
||||
----
|
||||
|
||||
TLC2 Version 2.20 of Day Month 20?? (rev: f68cb71)
|
||||
Running breadth-first search Model-Checking with fp 99 and seed -9189733667206762985 with 35 workers on 80 cores with 27307MB heap and 30720MB offheap memory [pid: 391272] (Linux 6.8.0-49-generic aarch64, Ubuntu 21.0.5 x86_64, OffHeapDiskFPSet, DiskStateQueue).
|
||||
Parsing file /home/arseny/neon2/safekeeper/spec/MCProposerAcceptorReconfig.tla
|
||||
Parsing file /tmp/tlc-3211535543066978921/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla)
|
||||
Parsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorReconfig.tla
|
||||
Parsing file /tmp/tlc-3211535543066978921/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla)
|
||||
Parsing file /tmp/tlc-3211535543066978921/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla)
|
||||
Parsing file /tmp/tlc-3211535543066978921/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla)
|
||||
Parsing file /tmp/tlc-3211535543066978921/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla)
|
||||
Parsing file /tmp/tlc-3211535543066978921/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla)
|
||||
Parsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorStatic.tla
|
||||
Parsing file /tmp/tlc-3211535543066978921/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla)
|
||||
Semantic processing of module Naturals
|
||||
Semantic processing of module Sequences
|
||||
Semantic processing of module FiniteSets
|
||||
Semantic processing of module TLC
|
||||
Semantic processing of module Integers
|
||||
Semantic processing of module ProposerAcceptorStatic
|
||||
Semantic processing of module ProposerAcceptorReconfig
|
||||
Semantic processing of module TLCExt
|
||||
Semantic processing of module _TLCTrace
|
||||
Semantic processing of module MCProposerAcceptorReconfig
|
||||
Starting... (2024-12-11 04:24:13)
|
||||
Computing initial states...
|
||||
Finished computing initial states: 2 states generated, with 1 of them distinct at 2024-12-11 04:24:15.
|
||||
Progress(16) at 2024-12-11 04:24:18: 1,427,589 states generated (1,427,589 s/min), 142,472 distinct states found (142,472 ds/min), 47,162 states left on queue.
|
||||
Model checking completed. No error has been found.
|
||||
Estimates of the probability that TLC did not check all reachable states
|
||||
because two distinct states had the same fingerprint:
|
||||
calculated (optimistic): val = 1.0E-6
|
||||
based on the actual fingerprints: val = 4.2E-8
|
||||
17746857 states generated, 1121659 distinct states found, 0 states left on queue.
|
||||
The depth of the complete state graph search is 37.
|
||||
The average outdegree of the complete state graph is 1 (minimum is 0, the maximum 9 and the 95th percentile is 3).
|
||||
Finished in 33s at (2024-12-11 04:24:46)
|
||||
@@ -0,0 +1,64 @@
|
||||
git revision: 9e386917a
|
||||
Platform: Linux neon-dev-arm64-1 6.8.0-49-generic #49-Ubuntu SMP PREEMPT_DYNAMIC Sun Nov 3 21:21:58 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux
|
||||
CPU Info Linux: Neoverse-N1
|
||||
CPU Cores Linux: 80
|
||||
CPU Info Mac:
|
||||
CPU Cores Mac:
|
||||
Spec: MCProposerAcceptorReconfig.tla
|
||||
Config: models/MCProposerAcceptorReconfig_p2_a2_t2_l2_c5.cfg
|
||||
----
|
||||
CONSTANTS
|
||||
NULL = NULL
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2}
|
||||
max_term = 2
|
||||
max_entries = 2
|
||||
max_generation = 5
|
||||
SPECIFICATION Spec
|
||||
CONSTRAINT StateConstraint
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ElectionSafetyFull
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
\* CommittedNotTruncated
|
||||
SYMMETRY ProposerAcceptorSymmetry
|
||||
CHECK_DEADLOCK FALSE
|
||||
ALIAS Alias
|
||||
|
||||
----
|
||||
|
||||
TLC2 Version 2.20 of Day Month 20?? (rev: f68cb71)
|
||||
Running breadth-first search Model-Checking with fp 114 and seed -8099467489737745861 with 35 workers on 80 cores with 27307MB heap and 30720MB offheap memory [pid: 392020] (Linux 6.8.0-49-generic aarch64, Ubuntu 21.0.5 x86_64, OffHeapDiskFPSet, DiskStateQueue).
|
||||
Parsing file /home/arseny/neon2/safekeeper/spec/MCProposerAcceptorReconfig.tla
|
||||
Parsing file /tmp/tlc-11757875725969857497/TLC.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLC.tla)
|
||||
Parsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorReconfig.tla
|
||||
Parsing file /tmp/tlc-11757875725969857497/_TLCTrace.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/_TLCTrace.tla)
|
||||
Parsing file /tmp/tlc-11757875725969857497/Integers.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Integers.tla)
|
||||
Parsing file /tmp/tlc-11757875725969857497/Sequences.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Sequences.tla)
|
||||
Parsing file /tmp/tlc-11757875725969857497/FiniteSets.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/FiniteSets.tla)
|
||||
Parsing file /tmp/tlc-11757875725969857497/Naturals.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/Naturals.tla)
|
||||
Parsing file /home/arseny/neon2/safekeeper/spec/ProposerAcceptorStatic.tla
|
||||
Parsing file /tmp/tlc-11757875725969857497/TLCExt.tla (jar:file:/home/arseny/tla2tools.jar!/tla2sany/StandardModules/TLCExt.tla)
|
||||
Semantic processing of module Naturals
|
||||
Semantic processing of module Sequences
|
||||
Semantic processing of module FiniteSets
|
||||
Semantic processing of module TLC
|
||||
Semantic processing of module Integers
|
||||
Semantic processing of module ProposerAcceptorStatic
|
||||
Semantic processing of module ProposerAcceptorReconfig
|
||||
Semantic processing of module TLCExt
|
||||
Semantic processing of module _TLCTrace
|
||||
Semantic processing of module MCProposerAcceptorReconfig
|
||||
Starting... (2024-12-11 04:26:12)
|
||||
Computing initial states...
|
||||
Finished computing initial states: 2 states generated, with 1 of them distinct at 2024-12-11 04:26:14.
|
||||
Progress(14) at 2024-12-11 04:26:17: 1,519,385 states generated (1,519,385 s/min), 231,263 distinct states found (231,263 ds/min), 121,410 states left on queue.
|
||||
Progress(20) at 2024-12-11 04:27:17: 42,757,204 states generated (41,237,819 s/min), 4,198,386 distinct states found (3,967,123 ds/min), 1,308,109 states left on queue.
|
||||
Progress(22) at 2024-12-11 04:28:17: 83,613,929 states generated (40,856,725 s/min), 7,499,873 distinct states found (3,301,487 ds/min), 1,929,464 states left on queue.
|
||||
Progress(23) at 2024-12-11 04:29:17: 124,086,758 states generated (40,472,829 s/min), 10,569,712 distinct states found (3,069,839 ds/min), 2,386,988 states left on queue.
|
||||
Progress(24) at 2024-12-11 04:30:17: 163,412,538 states generated (39,325,780 s/min), 13,314,303 distinct states found (2,744,591 ds/min), 2,610,637 states left on queue.
|
||||
Progress(25) at 2024-12-11 04:31:17: 202,643,708 states generated (39,231,170 s/min), 15,960,583 distinct states found (2,646,280 ds/min), 2,759,681 states left on queue.
|
||||
Progress(26) at 2024-12-11 04:32:17: 240,681,633 states generated (38,037,925 s/min), 18,443,440 distinct states found (2,482,857 ds/min), 2,852,177 states left on queue.
|
||||
Progress(27) at 2024-12-11 04:33:17: 278,559,134 states generated (37,877,501 s/min), 20,878,067 distinct states found (2,434,627 ds/min), 2,904,400 states left on queue.
|
||||
Progress(28) at 2024-12-11 04:34:17: 316,699,911 states generated (38,140,777 s/min), 23,212,229 distinct states found (2,334,162 ds/min), 2,864,969 states left on queue.
|
||||
@@ -51,12 +51,10 @@ use utils::{
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
|
||||
// TODO: disabled because concurrent CPU profiles cause seg faults. See:
|
||||
// https://github.com/neondatabase/neon/issues/10225.
|
||||
//#[allow(non_upper_case_globals)]
|
||||
//#[export_name = "malloc_conf"]
|
||||
//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
|
||||
/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
|
||||
#[allow(non_upper_case_globals)]
|
||||
#[export_name = "malloc_conf"]
|
||||
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
|
||||
|
||||
const PID_FILE_NAME: &str = "safekeeper.pid";
|
||||
const ID_FILE_NAME: &str = "safekeeper.id";
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use pageserver_client::mgmt_api::{self, ResponseErrorMessageExt};
|
||||
use reqwest::{Method, Url};
|
||||
use serde::{de::DeserializeOwned, Serialize};
|
||||
use std::str::FromStr;
|
||||
|
||||
pub struct Client {
|
||||
base_url: Url,
|
||||
@@ -31,16 +30,11 @@ impl Client {
|
||||
RQ: Serialize + Sized,
|
||||
RS: DeserializeOwned + Sized,
|
||||
{
|
||||
// The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
|
||||
// for general purpose API access.
|
||||
let url = Url::from_str(&format!(
|
||||
"http://{}:{}/{path}",
|
||||
self.base_url.host_str().unwrap(),
|
||||
self.base_url.port().unwrap()
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let mut builder = self.client.request(method, url);
|
||||
let request_path = self
|
||||
.base_url
|
||||
.join(&path)
|
||||
.expect("Failed to build request path");
|
||||
let mut builder = self.client.request(method, request_path);
|
||||
if let Some(body) = body {
|
||||
builder = builder.json(&body)
|
||||
}
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
ALTER TABLE safekeepers DROP scheduling_policy;
|
||||
@@ -0,0 +1 @@
|
||||
ALTER TABLE safekeepers ADD scheduling_policy VARCHAR NOT NULL DEFAULT 'disabled';
|
||||
@@ -3,7 +3,7 @@ use crate::metrics::{
|
||||
HttpRequestLatencyLabelGroup, HttpRequestStatusLabelGroup, PageserverRequestLabelGroup,
|
||||
METRICS_REGISTRY,
|
||||
};
|
||||
use crate::persistence::SafekeeperPersistence;
|
||||
use crate::persistence::SafekeeperUpsert;
|
||||
use crate::reconciler::ReconcileError;
|
||||
use crate::service::{LeadershipStatus, Service, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIMEOUT};
|
||||
use anyhow::Context;
|
||||
@@ -1249,7 +1249,7 @@ async fn handle_get_safekeeper(req: Request<Body>) -> Result<Response<Body>, Api
|
||||
async fn handle_upsert_safekeeper(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
check_permissions(&req, Scope::Infra)?;
|
||||
|
||||
let body = json_request::<SafekeeperPersistence>(&mut req).await?;
|
||||
let body = json_request::<SafekeeperUpsert>(&mut req).await?;
|
||||
let id = parse_request_param::<i64>(&req, "id")?;
|
||||
|
||||
if id != body.id {
|
||||
|
||||
@@ -112,6 +112,14 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn try_exclusive(&self, key: T, operation: I) -> Option<TracingExclusiveGuard<I>> {
|
||||
let mut locked = self.entities.lock().unwrap();
|
||||
let entry = locked.entry(key).or_default().clone();
|
||||
let mut guard = TracingExclusiveGuard::new(entry.try_write_owned().ok()?);
|
||||
*guard.guard = Some(operation);
|
||||
Some(guard)
|
||||
}
|
||||
|
||||
/// Rather than building a lock guard that re-takes the [`Self::entities`] lock, we just do
|
||||
/// periodic housekeeping to avoid the map growing indefinitely
|
||||
pub(crate) fn housekeeping(&self) {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user