mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-17 10:22:56 +00:00
Compare commits
159 Commits
release-pr
...
vlad/warn-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4537184f42 | ||
|
|
68e7e0f0f6 | ||
|
|
02fc58b878 | ||
|
|
b312a3c320 | ||
|
|
7d761a9d22 | ||
|
|
8bdaee35f3 | ||
|
|
b0f34099f9 | ||
|
|
6975228a76 | ||
|
|
053abff71f | ||
|
|
871e8b325f | ||
|
|
c47c5f4ace | ||
|
|
b0838a68e5 | ||
|
|
8f2ebc0684 | ||
|
|
3a285a046b | ||
|
|
da13154791 | ||
|
|
2e13a3aa7a | ||
|
|
cccc196848 | ||
|
|
e436dcad57 | ||
|
|
21d7b6a258 | ||
|
|
86dbc44db1 | ||
|
|
58f6af6c9a | ||
|
|
7be971081a | ||
|
|
6fe4c6798f | ||
|
|
2eda484ef6 | ||
|
|
c7429af8a0 | ||
|
|
a753349cb0 | ||
|
|
55a68b28a2 | ||
|
|
fb0e2acb2f | ||
|
|
efaec6cdf8 | ||
|
|
3d41069dc4 | ||
|
|
dbebede7bf | ||
|
|
3e529f124f | ||
|
|
05a71c7d6a | ||
|
|
b9464865b6 | ||
|
|
1577430408 | ||
|
|
05d17a10ae | ||
|
|
2d0ea08524 | ||
|
|
c98cbbeac1 | ||
|
|
47c1640acc | ||
|
|
6debb49b87 | ||
|
|
e58e29e639 | ||
|
|
d36112d20f | ||
|
|
ffaa52ff5d | ||
|
|
aa7323a384 | ||
|
|
2466a2f977 | ||
|
|
9bdb14c1c0 | ||
|
|
df4abd8b14 | ||
|
|
a039f8381f | ||
|
|
430b556b34 | ||
|
|
1783501eaa | ||
|
|
fd1368d31e | ||
|
|
e9ed53b14f | ||
|
|
a338aee132 | ||
|
|
96243af651 | ||
|
|
ef8bfacd6b | ||
|
|
ceacc29609 | ||
|
|
b31ed0acd1 | ||
|
|
b2d0e1a519 | ||
|
|
d1bc36f536 | ||
|
|
0b9032065e | ||
|
|
09fe3b025c | ||
|
|
12053cf832 | ||
|
|
de199d71e1 | ||
|
|
22a6460010 | ||
|
|
cd982a82ec | ||
|
|
8327f68043 | ||
|
|
846e8fdce4 | ||
|
|
70a3bf37a0 | ||
|
|
23c0748cdd | ||
|
|
b5d54ba52a | ||
|
|
58332cb361 | ||
|
|
9b43204893 | ||
|
|
cdd34dfc12 | ||
|
|
3cd5034eac | ||
|
|
425b777840 | ||
|
|
4398051385 | ||
|
|
71bca6f580 | ||
|
|
105f66c4ce | ||
|
|
0d4fce2d35 | ||
|
|
2b8ea1e768 | ||
|
|
db00eb41a1 | ||
|
|
735c66dc65 | ||
|
|
77660f3d88 | ||
|
|
b6205af4a5 | ||
|
|
6149ac8834 | ||
|
|
49756a0d01 | ||
|
|
99b5a6705f | ||
|
|
f37eeb56ad | ||
|
|
bebc46e713 | ||
|
|
ad51622568 | ||
|
|
ac6cca17ac | ||
|
|
640ac4fc9e | ||
|
|
20c40eb733 | ||
|
|
f4739d49e3 | ||
|
|
030ab1c0e8 | ||
|
|
5baa4e7f0a | ||
|
|
5b2751397d | ||
|
|
fcfff72454 | ||
|
|
0ad0db6ff8 | ||
|
|
68d8acfd05 | ||
|
|
dc284247a5 | ||
|
|
5c76e2a983 | ||
|
|
237dae71a1 | ||
|
|
43a5e575d6 | ||
|
|
0a117fb1f1 | ||
|
|
4aa9786c6b | ||
|
|
be38123e62 | ||
|
|
ea84ec357f | ||
|
|
30863c0104 | ||
|
|
02f81b6469 | ||
|
|
ad7f14d526 | ||
|
|
b342a02b1c | ||
|
|
4a6556e269 | ||
|
|
95f1920231 | ||
|
|
fda52a0005 | ||
|
|
406cca643b | ||
|
|
b368e62cfc | ||
|
|
4b2f56862d | ||
|
|
a77e87a48a | ||
|
|
1393cc668b | ||
|
|
b33299dc37 | ||
|
|
e9d30edc7f | ||
|
|
1303cd5d05 | ||
|
|
c08759f367 | ||
|
|
ba9722a2fd | ||
|
|
2d4f267983 | ||
|
|
7a598b9842 | ||
|
|
eefad27538 | ||
|
|
cd10c719f9 | ||
|
|
363ea97f69 | ||
|
|
56e6ebfe17 | ||
|
|
1622fd8bda | ||
|
|
8c7dcd2598 | ||
|
|
ee22d4c9ef | ||
|
|
26600f2973 | ||
|
|
b3cd883f93 | ||
|
|
38c7a2abfc | ||
|
|
f94248a594 | ||
|
|
9c53b41245 | ||
|
|
197a89ab3d | ||
|
|
b89e02f3e8 | ||
|
|
04517c6ff3 | ||
|
|
628451d68e | ||
|
|
502d512fe2 | ||
|
|
afda6d4700 | ||
|
|
65042cbadd | ||
|
|
b135194090 | ||
|
|
43dc03459d | ||
|
|
a1b0558493 | ||
|
|
cc138b56f9 | ||
|
|
61fcf64c22 | ||
|
|
6d3e8096fc | ||
|
|
3d1c3a80ae | ||
|
|
835287ba3a | ||
|
|
d63602cc78 | ||
|
|
1668d39b7c | ||
|
|
1d12efc428 | ||
|
|
85696297c5 | ||
|
|
aaf980f70d |
@@ -3,6 +3,16 @@
|
||||
# by the RUSTDOCFLAGS env var in CI.
|
||||
rustdocflags = ["-Arustdoc::private_intra_doc_links"]
|
||||
|
||||
# Enable frame pointers. This may have a minor performance overhead, but makes it easier and more
|
||||
# efficient to obtain stack traces (and thus CPU/heap profiles). It may also avoid seg faults that
|
||||
# we've seen with libunwind-based profiling. See also:
|
||||
#
|
||||
# * <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>
|
||||
# * <https://github.com/rust-lang/rust/pull/122646>
|
||||
#
|
||||
# NB: the RUSTFLAGS envvar will replace this. Make sure to update e.g. Dockerfile as well.
|
||||
rustflags = ["-Cforce-frame-pointers=yes"]
|
||||
|
||||
[alias]
|
||||
build_testing = ["build", "--features", "testing"]
|
||||
neon = ["run", "--bin", "neon_local"]
|
||||
|
||||
1
.github/actionlint.yml
vendored
1
.github/actionlint.yml
vendored
@@ -25,3 +25,4 @@ config-variables:
|
||||
- PGREGRESS_PG17_PROJECT_ID
|
||||
- SLACK_ON_CALL_QA_STAGING_STREAM
|
||||
- DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
|
||||
- SLACK_ON_CALL_STORAGE_STAGING_STREAM
|
||||
|
||||
12
.github/file-filters.yaml
vendored
Normal file
12
.github/file-filters.yaml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
rust_code: ['**/*.rs', '**/Cargo.toml', '**/Cargo.lock']
|
||||
|
||||
v14: ['vendor/postgres-v14/**', 'Makefile', 'pgxn/**']
|
||||
v15: ['vendor/postgres-v15/**', 'Makefile', 'pgxn/**']
|
||||
v16: ['vendor/postgres-v16/**', 'Makefile', 'pgxn/**']
|
||||
v17: ['vendor/postgres-v17/**', 'Makefile', 'pgxn/**']
|
||||
|
||||
rebuild_neon_extra:
|
||||
- .github/workflows/neon_extra_builds.yml
|
||||
|
||||
rebuild_macos:
|
||||
- .github/workflows/build-macos.yml
|
||||
91
.github/workflows/_check-codestyle-rust.yml
vendored
Normal file
91
.github/workflows/_check-codestyle-rust.yml
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
name: Check Codestyle Rust
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
build-tools-image:
|
||||
description: "build-tools image"
|
||||
required: true
|
||||
type: string
|
||||
archs:
|
||||
description: "Json array of architectures to run on"
|
||||
type: string
|
||||
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
jobs:
|
||||
check-codestyle-rust:
|
||||
strategy:
|
||||
matrix:
|
||||
arch: ${{ fromJson(inputs.archs) }}
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Cache cargo deps
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
# Some of our rust modules use FFI and need those to be checked
|
||||
- name: Get postgres headers
|
||||
run: make postgres-headers -j$(nproc)
|
||||
|
||||
# cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
|
||||
# This will catch compiler & clippy warnings in all feature combinations.
|
||||
# TODO: use cargo hack for build and test as well, but, that's quite expensive.
|
||||
# NB: keep clippy args in sync with ./run_clippy.sh
|
||||
#
|
||||
# The only difference between "clippy --debug" and "clippy --release" is that in --release mode,
|
||||
# #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second
|
||||
# time just for that, so skip "clippy --release".
|
||||
- run: |
|
||||
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
|
||||
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
|
||||
echo "No clippy args found in .neon_clippy_args"
|
||||
exit 1
|
||||
fi
|
||||
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
|
||||
- name: Run cargo clippy (debug)
|
||||
run: cargo hack --features default --ignore-unknown-features --feature-powerset clippy $CLIPPY_COMMON_ARGS
|
||||
|
||||
- name: Check documentation generation
|
||||
run: cargo doc --workspace --no-deps --document-private-items
|
||||
env:
|
||||
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
|
||||
|
||||
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
|
||||
- name: Check formatting
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo fmt --all -- --check
|
||||
|
||||
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
|
||||
- name: Check rust dependencies
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
|
||||
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
|
||||
|
||||
# https://github.com/EmbarkStudios/cargo-deny
|
||||
- name: Check rust licenses/bans/advisories/sources
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo deny check --hide-inclusion-graph
|
||||
2
.github/workflows/actionlint.yml
vendored
2
.github/workflows/actionlint.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
# SC2086 - Double quote to prevent globbing and word splitting. - https://www.shellcheck.net/wiki/SC2086
|
||||
SHELLCHECK_OPTS: --exclude=SC2046,SC2086
|
||||
with:
|
||||
fail_on_error: true
|
||||
fail_level: error
|
||||
filter_mode: nofilter
|
||||
level: error
|
||||
|
||||
|
||||
25
.github/workflows/benchmarking.yml
vendored
25
.github/workflows/benchmarking.yml
vendored
@@ -308,6 +308,7 @@ jobs:
|
||||
"image": [ "'"$image_default"'" ],
|
||||
"include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
@@ -410,7 +411,7 @@ jobs:
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
|
||||
if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
@@ -429,7 +430,7 @@ jobs:
|
||||
neonvm-captest-sharding-reuse)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
|
||||
;;
|
||||
neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
|
||||
neonvm-captest-new | neonvm-captest-new-many-tables | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
|
||||
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
|
||||
;;
|
||||
rds-aurora)
|
||||
@@ -446,6 +447,26 @@ jobs:
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
# we want to compare Neon project OLTP throughput and latency at scale factor 10 GB
|
||||
# without (neonvm-captest-new)
|
||||
# and with (neonvm-captest-new-many-tables) many relations in the database
|
||||
- name: Create many relations before the run
|
||||
if: contains(fromJson('["neonvm-captest-new-many-tables"]'), matrix.platform)
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
TEST_NUM_RELATIONS: 10000
|
||||
|
||||
- name: Benchmark init
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
|
||||
241
.github/workflows/build-macos.yml
vendored
Normal file
241
.github/workflows/build-macos.yml
vendored
Normal file
@@ -0,0 +1,241 @@
|
||||
name: Check neon with MacOS builds
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
pg_versions:
|
||||
description: "Array of the pg versions to build for, for example: ['v14', 'v17']"
|
||||
type: string
|
||||
default: '[]'
|
||||
required: false
|
||||
rebuild_rust_code:
|
||||
description: "Rebuild Rust code"
|
||||
type: boolean
|
||||
default: false
|
||||
required: false
|
||||
rebuild_everything:
|
||||
description: "If true, rebuild for all versions"
|
||||
type: boolean
|
||||
default: false
|
||||
required: false
|
||||
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
|
||||
# TODO: move `check-*` and `files-changed` jobs to the "Caller" Workflow
|
||||
# We should care about that as Github has limitations:
|
||||
# - You can connect up to four levels of workflows
|
||||
# - You can call a maximum of 20 unique reusable workflows from a single workflow file.
|
||||
# https://docs.github.com/en/actions/sharing-automations/reusing-workflows#limitations
|
||||
jobs:
|
||||
build-pgxn:
|
||||
if: |
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
strategy:
|
||||
matrix:
|
||||
postgres-version: ${{ inputs.rebuild_everything && fromJson('["v14", "v15", "v16", "v17"]') || fromJSON(inputs.pg_versions) }}
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
steps:
|
||||
- name: Checkout main repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set pg ${{ matrix.postgres-version }} for caching
|
||||
id: pg_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-${{ matrix.postgres-version }}) | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache postgres ${{ matrix.postgres-version }} build
|
||||
id: cache_pg
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/${{ matrix.postgres-version }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Checkout submodule vendor/postgres-${{ matrix.postgres-version }}
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git submodule init vendor/postgres-${{ matrix.postgres-version }}
|
||||
git submodule update --depth 1 --recursive
|
||||
|
||||
- name: Install build dependencies
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
brew install flex bison openssl protobuf icu4c
|
||||
|
||||
- name: Set extra env for macOS
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Build Postgres ${{ matrix.postgres-version }}
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make postgres-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build Neon Pg Ext ${{ matrix.postgres-version }}
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Get postgres headers ${{ matrix.postgres-version }}
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
|
||||
|
||||
build-walproposer-lib:
|
||||
if: |
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
needs: [build-pgxn]
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
steps:
|
||||
- name: Checkout main repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set pg v17 for caching
|
||||
id: pg_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Checkout submodule vendor/postgres-v17
|
||||
if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git submodule init vendor/postgres-v17
|
||||
git submodule update --depth 1 --recursive
|
||||
|
||||
- name: Install build dependencies
|
||||
if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
brew install flex bison openssl protobuf icu4c
|
||||
|
||||
- name: Set extra env for macOS
|
||||
if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Build walproposer-lib (only for v17)
|
||||
if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
|
||||
run:
|
||||
make walproposer-lib -j$(sysctl -n hw.ncpu)
|
||||
|
||||
cargo-build:
|
||||
if: |
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
needs: [build-pgxn, build-walproposer-lib]
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
steps:
|
||||
- name: Checkout main repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Set pg v14 for caching
|
||||
id: pg_rev_v14
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v15 for caching
|
||||
id: pg_rev_v15
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v16 for caching
|
||||
id: pg_rev_v16
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v17 for caching
|
||||
id: pg_rev_v17
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_v15
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_v16
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_v17
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache cargo deps (only for v17)
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
brew install flex bison openssl protobuf icu4c
|
||||
|
||||
- name: Set extra env for macOS
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Run cargo build (only for v17)
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Check that no warnings are produced (only for v17)
|
||||
run: ./run_clippy.sh
|
||||
193
.github/workflows/build_and_test.yml
vendored
193
.github/workflows/build_and_test.yml
vendored
@@ -164,77 +164,11 @@ jobs:
|
||||
|
||||
check-codestyle-rust:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
strategy:
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Cache cargo deps
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
# Some of our rust modules use FFI and need those to be checked
|
||||
- name: Get postgres headers
|
||||
run: make postgres-headers -j$(nproc)
|
||||
|
||||
# cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
|
||||
# This will catch compiler & clippy warnings in all feature combinations.
|
||||
# TODO: use cargo hack for build and test as well, but, that's quite expensive.
|
||||
# NB: keep clippy args in sync with ./run_clippy.sh
|
||||
#
|
||||
# The only difference between "clippy --debug" and "clippy --release" is that in --release mode,
|
||||
# #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second
|
||||
# time just for that, so skip "clippy --release".
|
||||
- run: |
|
||||
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
|
||||
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
|
||||
echo "No clippy args found in .neon_clippy_args"
|
||||
exit 1
|
||||
fi
|
||||
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
|
||||
- name: Run cargo clippy (debug)
|
||||
run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
|
||||
|
||||
- name: Check documentation generation
|
||||
run: cargo doc --workspace --no-deps --document-private-items
|
||||
env:
|
||||
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
|
||||
|
||||
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
|
||||
- name: Check formatting
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo fmt --all -- --check
|
||||
|
||||
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
|
||||
- name: Check rust dependencies
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
|
||||
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
|
||||
|
||||
# https://github.com/EmbarkStudios/cargo-deny
|
||||
- name: Check rust licenses/bans/advisories/sources
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo deny check --hide-inclusion-graph
|
||||
uses: ./.github/workflows/_check-codestyle-rust.yml
|
||||
with:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
archs: '["x64", "arm64"]'
|
||||
secrets: inherit
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ tag, build-build-tools-image ]
|
||||
@@ -346,25 +280,22 @@ jobs:
|
||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||
# while coverage is currently collected for the debug ones
|
||||
|
||||
report-benchmarks-failures:
|
||||
report-benchmarks-results-to-slack:
|
||||
needs: [ benchmarks, create-test-report ]
|
||||
if: github.ref_name == 'main' && failure() && needs.benchmarks.result == 'failure'
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
if: github.ref_name == 'main' && !cancelled() && contains(fromJSON('["success", "failure"]'), needs.benchmarks.result)
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: slackapi/slack-github-action@v1
|
||||
- uses: slackapi/slack-github-action@v2
|
||||
with:
|
||||
channel-id: C060CNA47S9 # on-call-staging-storage-stream
|
||||
slack-message: |
|
||||
Benchmarks failed on main <${{ github.event.head_commit.url }}|${{ github.sha }}>
|
||||
<${{ needs.create-test-report.outputs.report-url }}|Allure report>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: "${{ vars.SLACK_ON_CALL_STORAGE_STAGING_STREAM }}"
|
||||
text: |
|
||||
Benchmarks on main: *${{ needs.benchmarks.result }}*
|
||||
- <${{ needs.create-test-report.outputs.report-url }}|Allure report>
|
||||
- <${{ github.event.head_commit.url }}|${{ github.sha }}>
|
||||
|
||||
create-test-report:
|
||||
needs: [ check-permissions, build-and-test-locally, coverage-report, build-build-tools-image, benchmarks ]
|
||||
@@ -538,7 +469,7 @@ jobs:
|
||||
|
||||
trigger-e2e-tests:
|
||||
if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
|
||||
needs: [ check-permissions, promote-images, tag ]
|
||||
needs: [ check-permissions, promote-images-dev, tag ]
|
||||
uses: ./.github/workflows/trigger-e2e-tests.yml
|
||||
secrets: inherit
|
||||
|
||||
@@ -728,30 +659,6 @@ jobs:
|
||||
tags: |
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
|
||||
- name: Build compute-tools image
|
||||
# compute-tools are Postgres independent, so build it only once
|
||||
# We pick 16, because that builds on debian 11 with older glibc (and is
|
||||
# thus compatible with newer glibc), rather than 17 on Debian 12, as
|
||||
# that isn't guaranteed to be compatible with Debian 11
|
||||
if: matrix.version.pg == 'v16'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
target: compute-tools-image
|
||||
context: .
|
||||
build-args: |
|
||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
|
||||
DEBIAN_VERSION=${{ matrix.version.debian }}
|
||||
provenance: false
|
||||
push: true
|
||||
pull: true
|
||||
file: compute/compute-node.Dockerfile
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-tools-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
|
||||
tags: |
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
|
||||
compute-node-image:
|
||||
needs: [ compute-node-image-arch, tag ]
|
||||
permissions:
|
||||
@@ -794,14 +701,6 @@ jobs:
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
|
||||
- name: Create multi-arch compute-tools image
|
||||
if: matrix.version.pg == 'v16'
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} \
|
||||
-t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
@@ -817,12 +716,6 @@ jobs:
|
||||
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
- name: Push multi-arch compute-tools image to ECR
|
||||
if: matrix.version.pg == 'v16'
|
||||
run: |
|
||||
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
vm-compute-node-image:
|
||||
needs: [ check-permissions, tag, compute-node-image ]
|
||||
runs-on: [ self-hosted, large ]
|
||||
@@ -930,8 +823,8 @@ jobs:
|
||||
docker compose -f ./docker-compose/docker-compose.yml logs || 0
|
||||
docker compose -f ./docker-compose/docker-compose.yml down
|
||||
|
||||
promote-images:
|
||||
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
||||
promote-images-dev:
|
||||
needs: [ check-permissions, tag, vm-compute-node-image, neon-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
@@ -965,6 +858,35 @@ jobs:
|
||||
neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
|
||||
done
|
||||
|
||||
promote-images-prod:
|
||||
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
|
||||
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: read
|
||||
|
||||
env:
|
||||
VERSIONS: v14 v15 v16 v17
|
||||
|
||||
steps:
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Login to Amazon Dev ECR
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Add latest tag to images
|
||||
if: github.ref_name == 'main'
|
||||
run: |
|
||||
@@ -972,9 +894,6 @@ jobs:
|
||||
docker buildx imagetools create -t $repo/neon:latest \
|
||||
$repo/neon:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
docker buildx imagetools create -t $repo/compute-tools:latest \
|
||||
$repo/compute-tools:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
for version in ${VERSIONS}; do
|
||||
docker buildx imagetools create -t $repo/compute-node-${version}:latest \
|
||||
$repo/compute-node-${version}:${{ needs.tag.outputs.build-tag }}
|
||||
@@ -1003,31 +922,31 @@ jobs:
|
||||
- name: Copy all images to prod ECR
|
||||
if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
|
||||
run: |
|
||||
for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16,v17}; do
|
||||
for image in neon {vm-,}compute-node-{v14,v15,v16,v17}; do
|
||||
docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \
|
||||
369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
|
||||
done
|
||||
|
||||
push-to-acr-dev:
|
||||
if: github.ref_name == 'main'
|
||||
needs: [ tag, promote-images ]
|
||||
needs: [ tag, promote-images-dev ]
|
||||
uses: ./.github/workflows/_push-to-acr.yml
|
||||
with:
|
||||
client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
|
||||
image_tag: ${{ needs.tag.outputs.build-tag }}
|
||||
images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
|
||||
images: neon vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
|
||||
registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
|
||||
subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
|
||||
tenant_id: ${{ vars.AZURE_TENANT_ID }}
|
||||
|
||||
push-to-acr-prod:
|
||||
if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
|
||||
needs: [ tag, promote-images ]
|
||||
needs: [ tag, promote-images-prod ]
|
||||
uses: ./.github/workflows/_push-to-acr.yml
|
||||
with:
|
||||
client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
|
||||
image_tag: ${{ needs.tag.outputs.build-tag }}
|
||||
images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
|
||||
images: neon vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
|
||||
registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
|
||||
subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
|
||||
tenant_id: ${{ vars.AZURE_TENANT_ID }}
|
||||
@@ -1112,7 +1031,7 @@ jobs:
|
||||
exit 1
|
||||
|
||||
deploy:
|
||||
needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
|
||||
needs: [ check-permissions, promote-images-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
|
||||
# `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
|
||||
if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
|
||||
permissions:
|
||||
@@ -1333,7 +1252,7 @@ jobs:
|
||||
done
|
||||
|
||||
pin-build-tools-image:
|
||||
needs: [ build-build-tools-image, promote-images, build-and-test-locally ]
|
||||
needs: [ build-build-tools-image, promote-images-prod, build-and-test-locally ]
|
||||
if: github.ref_name == 'main'
|
||||
uses: ./.github/workflows/pin-build-tools-image.yml
|
||||
with:
|
||||
@@ -1356,7 +1275,7 @@ jobs:
|
||||
- build-and-test-locally
|
||||
- check-codestyle-python
|
||||
- check-codestyle-rust
|
||||
- promote-images
|
||||
- promote-images-dev
|
||||
- test-images
|
||||
- trigger-custom-extensions-build-and-wait
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
139
.github/workflows/neon_extra_builds.yml
vendored
139
.github/workflows/neon_extra_builds.yml
vendored
@@ -31,19 +31,15 @@ jobs:
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
secrets: inherit
|
||||
|
||||
check-macos-build:
|
||||
needs: [ check-permissions ]
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
timeout-minutes: 90
|
||||
runs-on: macos-15
|
||||
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
files-changed:
|
||||
name: Detect what files changed
|
||||
runs-on: ubuntu-22.04
|
||||
timeout-minutes: 3
|
||||
outputs:
|
||||
v17: ${{ steps.files_changed.outputs.v17 }}
|
||||
postgres_changes: ${{ steps.postgres_changes.outputs.changes }}
|
||||
rebuild_rust_code: ${{ steps.files_changed.outputs.rust_code }}
|
||||
rebuild_everything: ${{ steps.files_changed.outputs.rebuild_neon_extra || steps.files_changed.outputs.rebuild_macos }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -51,106 +47,45 @@ jobs:
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Install macOS postgres dependencies
|
||||
run: brew install flex bison openssl protobuf icu4c
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 15 revision for caching
|
||||
id: pg_v15_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 16 revision for caching
|
||||
id: pg_v16_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 17 revision for caching
|
||||
id: pg_v17_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
uses: actions/cache@v4
|
||||
- name: Check for Postgres changes
|
||||
uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242 #v3
|
||||
id: files_changed
|
||||
with:
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
token: ${{ github.token }}
|
||||
filters: .github/file-filters.yaml
|
||||
base: ${{ github.event_name != 'pull_request' && (github.event.merge_group.base_ref || github.ref_name) || '' }}
|
||||
ref: ${{ github.event_name != 'pull_request' && (github.event.merge_group.head_ref || github.ref) || '' }}
|
||||
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_15
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Set extra env for macOS
|
||||
- name: Filter out only v-string for build matrix
|
||||
id: postgres_changes
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
v_strings_only_as_json_array=$(echo ${{ steps.files_changed.outputs.chnages }} | jq '.[]|select(test("v\\d+"))' | jq --slurp -c)
|
||||
echo "changes=${v_strings_only_as_json_array}" | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache cargo deps
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
run: make postgres-v14 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v15
|
||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||
run: make postgres-v15 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v16
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: make postgres-v16 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v17
|
||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
||||
run: make postgres-v17 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: make neon-pg-ext -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build walproposer-lib
|
||||
run: make walproposer-lib -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Run cargo build
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release
|
||||
|
||||
- name: Check that no warnings are produced
|
||||
run: ./run_clippy.sh
|
||||
check-macos-build:
|
||||
needs: [ check-permissions, files-changed ]
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
uses: ./.github/workflows/build-macos.yml
|
||||
with:
|
||||
pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
|
||||
rebuild_rust_code: ${{ needs.files-changed.outputs.rebuild_rust_code }}
|
||||
rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }}
|
||||
|
||||
gather-rust-build-stats:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
needs: [ check-permissions, build-build-tools-image, files-changed ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
(needs.files-changed.outputs.v17 == 'true' || needs.files-changed.outputs.rebuild_everything == 'true') && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
runs-on: [ self-hosted, large ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
|
||||
33
.github/workflows/pre-merge-checks.yml
vendored
33
.github/workflows/pre-merge-checks.yml
vendored
@@ -1,6 +1,12 @@
|
||||
name: Pre-merge checks
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/_check-codestyle-python.yml
|
||||
- .github/workflows/_check-codestyle-rust.yml
|
||||
- .github/workflows/build-build-tools-image.yml
|
||||
- .github/workflows/pre-merge-checks.yml
|
||||
merge_group:
|
||||
branches:
|
||||
- main
|
||||
@@ -17,8 +23,10 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
python-changed: ${{ steps.python-src.outputs.any_changed }}
|
||||
rust-changed: ${{ steps.rust-src.outputs.any_changed }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
|
||||
id: python-src
|
||||
with:
|
||||
@@ -30,11 +38,25 @@ jobs:
|
||||
poetry.lock
|
||||
pyproject.toml
|
||||
|
||||
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
|
||||
id: rust-src
|
||||
with:
|
||||
files: |
|
||||
.github/workflows/_check-codestyle-rust.yml
|
||||
.github/workflows/build-build-tools-image.yml
|
||||
.github/workflows/pre-merge-checks.yml
|
||||
**/**.rs
|
||||
**/Cargo.toml
|
||||
Cargo.toml
|
||||
Cargo.lock
|
||||
|
||||
- name: PRINT ALL CHANGED FILES FOR DEBUG PURPOSES
|
||||
env:
|
||||
PYTHON_CHANGED_FILES: ${{ steps.python-src.outputs.all_changed_files }}
|
||||
RUST_CHANGED_FILES: ${{ steps.rust-src.outputs.all_changed_files }}
|
||||
run: |
|
||||
echo "${PYTHON_CHANGED_FILES}"
|
||||
echo "${RUST_CHANGED_FILES}"
|
||||
|
||||
build-build-tools-image:
|
||||
if: needs.get-changed-files.outputs.python-changed == 'true'
|
||||
@@ -55,6 +77,16 @@ jobs:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64
|
||||
secrets: inherit
|
||||
|
||||
check-codestyle-rust:
|
||||
if: needs.get-changed-files.outputs.rust-changed == 'true'
|
||||
needs: [ get-changed-files, build-build-tools-image ]
|
||||
uses: ./.github/workflows/_check-codestyle-rust.yml
|
||||
with:
|
||||
# `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64
|
||||
archs: '["x64"]'
|
||||
secrets: inherit
|
||||
|
||||
# To get items from the merge queue merged into main we need to satisfy "Status checks that are required".
|
||||
# Currently we require 2 jobs (checks with exact name):
|
||||
# - conclusion
|
||||
@@ -67,6 +99,7 @@ jobs:
|
||||
needs:
|
||||
- get-changed-files
|
||||
- check-codestyle-python
|
||||
- check-codestyle-rust
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Create fake `neon-cloud-e2e` check
|
||||
|
||||
8
.github/workflows/trigger-e2e-tests.yml
vendored
8
.github/workflows/trigger-e2e-tests.yml
vendored
@@ -68,7 +68,7 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
TAG: ${{ needs.tag.outputs.build-tag }}
|
||||
steps:
|
||||
- name: Wait for `promote-images` job to finish
|
||||
- name: Wait for `promote-images-dev` job to finish
|
||||
# It's important to have a timeout here, the script in the step can run infinitely
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
@@ -79,17 +79,17 @@ jobs:
|
||||
# For PRs we use the run id as the tag
|
||||
BUILD_AND_TEST_RUN_ID=${TAG}
|
||||
while true; do
|
||||
conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
|
||||
conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images-dev") | .conclusion')
|
||||
case "$conclusion" in
|
||||
success)
|
||||
break
|
||||
;;
|
||||
failure | cancelled | skipped)
|
||||
echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
|
||||
echo "The 'promote-images-dev' job didn't succeed: '${conclusion}'. Exiting..."
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo "The 'promote-images' hasn't succeed yet. Waiting..."
|
||||
echo "The 'promote-images-dev' hasn't succeed yet. Waiting..."
|
||||
sleep 60
|
||||
;;
|
||||
esac
|
||||
|
||||
301
Cargo.lock
generated
301
Cargo.lock
generated
@@ -718,13 +718,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.5"
|
||||
version = "0.7.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
|
||||
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core",
|
||||
"base64 0.21.1",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
@@ -746,8 +746,8 @@ dependencies = [
|
||||
"sha1",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tokio",
|
||||
"tokio-tungstenite",
|
||||
"tower",
|
||||
"tokio-tungstenite 0.24.0",
|
||||
"tower 0.5.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
@@ -1267,6 +1267,7 @@ dependencies = [
|
||||
"aws-config",
|
||||
"aws-sdk-kms",
|
||||
"aws-sdk-s3",
|
||||
"axum",
|
||||
"base64 0.13.1",
|
||||
"bytes",
|
||||
"camino",
|
||||
@@ -1274,9 +1275,10 @@ dependencies = [
|
||||
"chrono",
|
||||
"clap",
|
||||
"compute_api",
|
||||
"fail",
|
||||
"flate2",
|
||||
"futures",
|
||||
"hyper 0.14.30",
|
||||
"http 1.1.0",
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
"notify",
|
||||
@@ -1302,12 +1304,15 @@ dependencies = [
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tower 0.5.2",
|
||||
"tower-http",
|
||||
"tracing",
|
||||
"tracing-opentelemetry",
|
||||
"tracing-subscriber",
|
||||
"tracing-utils",
|
||||
"url",
|
||||
"utils",
|
||||
"uuid",
|
||||
"vm_monitor",
|
||||
"workspace_hack",
|
||||
"zstd",
|
||||
@@ -1601,6 +1606,32 @@ dependencies = [
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "curve25519-dalek"
|
||||
version = "4.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"curve25519-dalek-derive",
|
||||
"digest",
|
||||
"fiat-crypto",
|
||||
"rustc_version",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "curve25519-dalek-derive"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.20.1"
|
||||
@@ -1649,6 +1680,20 @@ dependencies = [
|
||||
"parking_lot_core 0.9.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "6.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"hashbrown 0.14.5",
|
||||
"lock_api",
|
||||
"once_cell",
|
||||
"parking_lot_core 0.9.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "data-encoding"
|
||||
version = "2.4.0"
|
||||
@@ -1732,9 +1777,9 @@ checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c"
|
||||
|
||||
[[package]]
|
||||
name = "diesel"
|
||||
version = "2.2.3"
|
||||
version = "2.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "65e13bab2796f412722112327f3e575601a3e9cdcbe426f0d30dbf43f3f5dc71"
|
||||
checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"byteorder",
|
||||
@@ -1857,6 +1902,28 @@ dependencies = [
|
||||
"spki 0.7.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ed25519"
|
||||
version = "2.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
|
||||
dependencies = [
|
||||
"signature 2.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ed25519-dalek"
|
||||
version = "2.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a3daa8e81a3963a60642bcc1f90a670680bd4a77535faa384e9d1c79d620871"
|
||||
dependencies = [
|
||||
"curve25519-dalek",
|
||||
"ed25519",
|
||||
"rand_core 0.6.4",
|
||||
"sha2",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.8.1"
|
||||
@@ -1948,6 +2015,15 @@ dependencies = [
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
|
||||
dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.10.2"
|
||||
@@ -1961,6 +2037,16 @@ dependencies = [
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d"
|
||||
dependencies = [
|
||||
"env_filter",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equator"
|
||||
version = "0.2.2"
|
||||
@@ -2076,6 +2162,12 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fiat-crypto"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d"
|
||||
|
||||
[[package]]
|
||||
name = "filetime"
|
||||
version = "0.2.22"
|
||||
@@ -2719,7 +2811,7 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tower 0.4.13",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
@@ -2944,6 +3036,28 @@ dependencies = [
|
||||
"str_stack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inferno"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75a5d75fee4d36809e6b021e4b96b686e763d365ffdb03af2bd00786353f84fe"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"clap",
|
||||
"crossbeam-channel",
|
||||
"crossbeam-utils",
|
||||
"dashmap 6.1.0",
|
||||
"env_logger 0.11.2",
|
||||
"indexmap 2.0.1",
|
||||
"itoa",
|
||||
"log",
|
||||
"num-format",
|
||||
"once_cell",
|
||||
"quick-xml 0.37.1",
|
||||
"rgb",
|
||||
"str_stack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inotify"
|
||||
version = "0.9.6"
|
||||
@@ -3151,7 +3265,7 @@ version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2"
|
||||
dependencies = [
|
||||
"dashmap",
|
||||
"dashmap 5.5.0",
|
||||
"hashbrown 0.13.2",
|
||||
]
|
||||
|
||||
@@ -3259,9 +3373,9 @@ checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40"
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.8.2"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "540f1c43aed89909c0cc0cc604e3bb2f7e7a341a3728a9e6cfe760e733cd11ed"
|
||||
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
|
||||
|
||||
[[package]]
|
||||
name = "md-5"
|
||||
@@ -3689,23 +3803,23 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry"
|
||||
version = "0.26.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "570074cc999d1a58184080966e5bd3bf3a9a4af650c3b05047c2621e7405cd17"
|
||||
checksum = "ab70038c28ed37b97d8ed414b6429d343a8bbf44c9f79ec854f3a643029ba6d7"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"js-sys",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"thiserror",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-http"
|
||||
version = "0.26.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6351496aeaa49d7c267fb480678d85d1cd30c5edb20b497c48c56f62a8c14b99"
|
||||
checksum = "10a8a7f5f6ba7c1b286c2fbca0454eaba116f63bbe69ed250b642d36fbb04d80"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -3716,9 +3830,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-otlp"
|
||||
version = "0.26.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29e1f9c8b032d4f635c730c0efcf731d5e2530ea13fa8bef7939ddc8420696bd"
|
||||
checksum = "91cf61a1868dacc576bf2b2a1c3e9ab150af7272909e80085c3173384fe11f76"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-core",
|
||||
@@ -3734,9 +3848,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-proto"
|
||||
version = "0.26.1"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c9d3968ce3aefdcca5c27e3c4ea4391b37547726a70893aab52d3de95d5f8b34"
|
||||
checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6"
|
||||
dependencies = [
|
||||
"opentelemetry",
|
||||
"opentelemetry_sdk",
|
||||
@@ -3746,22 +3860,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-semantic-conventions"
|
||||
version = "0.26.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db945c1eaea8ac6a9677185357480d215bb6999faa9f691d0c4d4d641eab7a09"
|
||||
checksum = "bc1b6902ff63b32ef6c489e8048c5e253e2e4a803ea3ea7e783914536eb15c52"
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry_sdk"
|
||||
version = "0.26.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2c627d9f4c9cdc1f21a29ee4bfbd6028fcb8bcf2a857b43f3abdf72c9c862f3"
|
||||
checksum = "231e9d6ceef9b0b2546ddf52335785ce41252bc7474ee8ba05bfad277be13ab8"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-channel",
|
||||
"futures-executor",
|
||||
"futures-util",
|
||||
"glob",
|
||||
"once_cell",
|
||||
"opentelemetry",
|
||||
"percent-encoding",
|
||||
"rand 0.8.5",
|
||||
@@ -3769,6 +3882,7 @@ dependencies = [
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3868,9 +3982,11 @@ name = "pagectl"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"camino",
|
||||
"clap",
|
||||
"humantime",
|
||||
"itertools 0.10.5",
|
||||
"pageserver",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
@@ -3892,6 +4008,7 @@ dependencies = [
|
||||
"arc-swap",
|
||||
"async-compression",
|
||||
"async-stream",
|
||||
"bincode",
|
||||
"bit_field",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
@@ -3931,6 +4048,7 @@ dependencies = [
|
||||
"postgres_connection",
|
||||
"postgres_ffi",
|
||||
"postgres_initdb",
|
||||
"pprof",
|
||||
"pq_proto",
|
||||
"procfs",
|
||||
"rand 0.8.5",
|
||||
@@ -4417,7 +4535,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"crc32c",
|
||||
"criterion",
|
||||
"env_logger",
|
||||
"env_logger 0.10.2",
|
||||
"log",
|
||||
"memoffset 0.9.0",
|
||||
"once_cell",
|
||||
@@ -4458,7 +4576,7 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"criterion",
|
||||
"findshlibs",
|
||||
"inferno",
|
||||
"inferno 0.11.21",
|
||||
"libc",
|
||||
"log",
|
||||
"nix 0.26.4",
|
||||
@@ -4493,9 +4611,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||
|
||||
[[package]]
|
||||
name = "pq-sys"
|
||||
version = "0.4.8"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "31c0052426df997c0cbd30789eb44ca097e3541717a7b8fa36b1c464ee7edebd"
|
||||
checksum = "f6cc05d7ea95200187117196eee9edd0644424911821aeb28a18ce60ea0b8793"
|
||||
dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
@@ -4684,9 +4802,10 @@ dependencies = [
|
||||
"clap",
|
||||
"compute_api",
|
||||
"consumption_metrics",
|
||||
"dashmap",
|
||||
"dashmap 5.5.0",
|
||||
"ecdsa 0.16.9",
|
||||
"env_logger",
|
||||
"ed25519-dalek",
|
||||
"env_logger 0.10.2",
|
||||
"fallible-iterator",
|
||||
"flate2",
|
||||
"framed-websockets",
|
||||
@@ -4757,7 +4876,7 @@ dependencies = [
|
||||
"tokio-postgres",
|
||||
"tokio-postgres2",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-tungstenite",
|
||||
"tokio-tungstenite 0.21.0",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
@@ -4793,6 +4912,15 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.37.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f22f29bdff3987b4d8632ef95fd6424ec7e4e0a57e2f4fc63e489e75357f6a03"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.37"
|
||||
@@ -5177,15 +5305,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "reqwest-tracing"
|
||||
version = "0.5.4"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ff82cf5730a1311fb9413b0bc2b8e743e0157cd73f010ab4ec374a923873b6a2"
|
||||
checksum = "73e6153390585f6961341b50e5a1931d6be6dee4292283635903c26ef9d980d2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"getrandom 0.2.11",
|
||||
"http 1.1.0",
|
||||
"matchit 0.8.2",
|
||||
"matchit 0.8.4",
|
||||
"opentelemetry",
|
||||
"reqwest",
|
||||
"reqwest-middleware",
|
||||
@@ -5531,6 +5659,7 @@ dependencies = [
|
||||
"crc32c",
|
||||
"criterion",
|
||||
"desim",
|
||||
"env_logger 0.10.2",
|
||||
"fail",
|
||||
"futures",
|
||||
"hex",
|
||||
@@ -5559,6 +5688,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"smallvec",
|
||||
"storage_broker",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
@@ -5583,10 +5713,13 @@ dependencies = [
|
||||
name = "safekeeper_api"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"const_format",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"pq_proto",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"utils",
|
||||
]
|
||||
@@ -6799,7 +6932,19 @@ dependencies = [
|
||||
"futures-util",
|
||||
"log",
|
||||
"tokio",
|
||||
"tungstenite",
|
||||
"tungstenite 0.21.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-tungstenite"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"log",
|
||||
"tokio",
|
||||
"tungstenite 0.24.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6880,7 +7025,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-stream",
|
||||
"tower",
|
||||
"tower 0.4.13",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
@@ -6921,16 +7066,49 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-layer"
|
||||
version = "0.3.2"
|
||||
name = "tower"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0"
|
||||
checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tokio",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-http"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bytes",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"pin-project-lite",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-layer"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
|
||||
|
||||
[[package]]
|
||||
name = "tower-service"
|
||||
version = "0.3.2"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
|
||||
checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
|
||||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
@@ -6999,9 +7177,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-opentelemetry"
|
||||
version = "0.27.0"
|
||||
version = "0.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc58af5d3f6c5811462cabb3289aec0093f7338e367e5a33d28c0433b3c7360b"
|
||||
checksum = "97a971f6058498b5c0f1affa23e7ea202057a7301dbff68e968b2d578bcbd053"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"once_cell",
|
||||
@@ -7085,6 +7263,24 @@ dependencies = [
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tungstenite"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"data-encoding",
|
||||
"http 1.1.0",
|
||||
"httparse",
|
||||
"log",
|
||||
"rand 0.8.5",
|
||||
"sha1",
|
||||
"thiserror",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "twox-hash"
|
||||
version = "1.6.3"
|
||||
@@ -7252,6 +7448,7 @@ dependencies = [
|
||||
"hex-literal",
|
||||
"humantime",
|
||||
"hyper 0.14.30",
|
||||
"inferno 0.12.0",
|
||||
"itertools 0.10.5",
|
||||
"jemalloc_pprof",
|
||||
"jsonwebtoken",
|
||||
@@ -7355,7 +7552,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"camino-tempfile",
|
||||
"clap",
|
||||
"env_logger",
|
||||
"env_logger 0.10.2",
|
||||
"log",
|
||||
"postgres",
|
||||
"postgres_ffi",
|
||||
@@ -7370,12 +7567,21 @@ dependencies = [
|
||||
"anyhow",
|
||||
"async-compression",
|
||||
"bytes",
|
||||
"camino",
|
||||
"camino-tempfile",
|
||||
"criterion",
|
||||
"futures",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"pprof",
|
||||
"prost",
|
||||
"remote_storage",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
"tracing",
|
||||
@@ -7866,7 +8072,8 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"tonic",
|
||||
"tower",
|
||||
"tower 0.4.13",
|
||||
"tower 0.5.2",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"url",
|
||||
|
||||
23
Cargo.toml
23
Cargo.toml
@@ -65,7 +65,7 @@ aws-smithy-types = "1.2"
|
||||
aws-credential-types = "1.2.0"
|
||||
aws-sigv4 = { version = "1.2", features = ["sign-http"] }
|
||||
aws-types = "1.3"
|
||||
axum = { version = "0.7.5", features = ["ws"] }
|
||||
axum = { version = "0.7.9", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
bindgen = "0.70"
|
||||
@@ -110,6 +110,7 @@ hyper-util = "0.1"
|
||||
tokio-tungstenite = "0.21.0"
|
||||
indexmap = "2"
|
||||
indoc = "2"
|
||||
inferno = "0.12.0"
|
||||
ipnet = "2.10.0"
|
||||
itertools = "0.10"
|
||||
itoa = "1.0.11"
|
||||
@@ -126,16 +127,16 @@ notify = "6.0.0"
|
||||
num_cpus = "1.15"
|
||||
num-traits = "0.2.15"
|
||||
once_cell = "1.13"
|
||||
opentelemetry = "0.26"
|
||||
opentelemetry_sdk = "0.26"
|
||||
opentelemetry-otlp = { version = "0.26", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.26"
|
||||
opentelemetry = "0.27"
|
||||
opentelemetry_sdk = "0.27"
|
||||
opentelemetry-otlp = { version = "0.27", default-features = false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.27"
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pin-project-lite = "0.2"
|
||||
pprof = { version = "0.14", features = ["criterion", "flamegraph", "protobuf", "protobuf-codec"] }
|
||||
pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "protobuf", "protobuf-codec"] }
|
||||
procfs = "0.16"
|
||||
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
||||
prost = "0.13"
|
||||
@@ -143,7 +144,7 @@ rand = "0.8"
|
||||
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
|
||||
regex = "1.10.2"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
|
||||
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_26"] }
|
||||
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_27"] }
|
||||
reqwest-middleware = "0.4"
|
||||
reqwest-retry = "0.7"
|
||||
routerify = "3"
|
||||
@@ -187,10 +188,12 @@ tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
toml = "0.8"
|
||||
toml_edit = "0.22"
|
||||
tonic = {version = "0.12.3", features = ["tls", "tls-roots"]}
|
||||
tower-service = "0.3.2"
|
||||
tower = { version = "0.5.2", default-features = false }
|
||||
tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
|
||||
tower-service = "0.3.3"
|
||||
tracing = "0.1"
|
||||
tracing-error = "0.2"
|
||||
tracing-opentelemetry = "0.27"
|
||||
tracing-opentelemetry = "0.28"
|
||||
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
|
||||
try-lock = "0.2.5"
|
||||
twox-hash = { version = "1.6.3", default-features = false }
|
||||
@@ -266,6 +269,8 @@ tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", br
|
||||
[profile.release]
|
||||
# This is useful for profiling and, to some extent, debug.
|
||||
# Besides, debug info should not affect the performance.
|
||||
#
|
||||
# NB: we also enable frame pointers for improved profiling, see .cargo/config.toml.
|
||||
debug = true
|
||||
|
||||
# disable debug symbols for all packages except this one to decrease binaries size
|
||||
|
||||
10
Dockerfile
10
Dockerfile
@@ -45,7 +45,7 @@ COPY --chown=nonroot . .
|
||||
|
||||
ARG ADDITIONAL_RUSTFLAGS
|
||||
RUN set -e \
|
||||
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
--bin pg_sni_router \
|
||||
--bin pageserver \
|
||||
--bin pagectl \
|
||||
@@ -69,6 +69,9 @@ RUN set -e \
|
||||
libreadline-dev \
|
||||
libseccomp-dev \
|
||||
ca-certificates \
|
||||
# System postgres for use with client libraries (e.g. in storage controller)
|
||||
postgresql-15 \
|
||||
openssl \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
|
||||
&& useradd -d /data neon \
|
||||
&& chown -R neon:neon /data
|
||||
@@ -101,11 +104,6 @@ RUN mkdir -p /data/.neon/ && \
|
||||
> /data/.neon/pageserver.toml && \
|
||||
chown -R neon:neon /data/.neon
|
||||
|
||||
# When running a binary that links with libpq, default to using our most recent postgres version. Binaries
|
||||
# that want a particular postgres version will select it explicitly: this is just a default.
|
||||
ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib
|
||||
|
||||
|
||||
VOLUME ["/data"]
|
||||
USER neon
|
||||
EXPOSE 6400
|
||||
|
||||
3
Makefile
3
Makefile
@@ -3,7 +3,6 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
|
||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
|
||||
|
||||
OPENSSL_PREFIX_DIR := /usr/local/openssl
|
||||
ICU_PREFIX_DIR := /usr/local/icu
|
||||
|
||||
#
|
||||
@@ -26,11 +25,9 @@ endif
|
||||
ifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)
|
||||
# Exclude static build openssl, icu for local build (MacOS, Linux)
|
||||
# Only keep for build type release and debug
|
||||
PG_CFLAGS += -I$(OPENSSL_PREFIX_DIR)/include
|
||||
PG_CONFIGURE_OPTS += --with-icu
|
||||
PG_CONFIGURE_OPTS += ICU_CFLAGS='-I/$(ICU_PREFIX_DIR)/include -DU_STATIC_IMPLEMENTATION'
|
||||
PG_CONFIGURE_OPTS += ICU_LIBS='-L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -licui18n -licuuc -licudata -lstdc++ -Wl,-Bdynamic -lm'
|
||||
PG_CONFIGURE_OPTS += LDFLAGS='-L$(OPENSSL_PREFIX_DIR)/lib -L$(OPENSSL_PREFIX_DIR)/lib64 -L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -Wl,-Bstatic -lssl -lcrypto -Wl,-Bdynamic -lrt -lm -ldl -lpthread'
|
||||
endif
|
||||
|
||||
UNAME_S := $(shell uname -s)
|
||||
|
||||
@@ -115,7 +115,7 @@ RUN set -e \
|
||||
|
||||
# Keep the version the same as in compute/compute-node.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py.
|
||||
ENV SQL_EXPORTER_VERSION=0.16.0
|
||||
ENV SQL_EXPORTER_VERSION=0.17.0
|
||||
RUN curl -fsSL \
|
||||
"https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case "$(uname -m)" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz" \
|
||||
--output sql_exporter.tar.gz \
|
||||
@@ -190,21 +190,6 @@ RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JS
|
||||
&& make install \
|
||||
&& rm -rf ../lcov.tar.gz
|
||||
|
||||
# Compile and install the static OpenSSL library
|
||||
ENV OPENSSL_VERSION=1.1.1w
|
||||
ENV OPENSSL_PREFIX=/usr/local/openssl
|
||||
RUN wget -O /tmp/openssl-${OPENSSL_VERSION}.tar.gz https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz && \
|
||||
echo "cf3098950cb4d853ad95c0841f1f9c6d3dc102dccfcacd521d93925208b76ac8 /tmp/openssl-${OPENSSL_VERSION}.tar.gz" | sha256sum --check && \
|
||||
cd /tmp && \
|
||||
tar xzvf /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
|
||||
rm /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
|
||||
cd /tmp/openssl-${OPENSSL_VERSION} && \
|
||||
./config --prefix=${OPENSSL_PREFIX} -static --static no-shared -fPIC && \
|
||||
make -j "$(nproc)" && \
|
||||
make install && \
|
||||
cd /tmp && \
|
||||
rm -rf /tmp/openssl-${OPENSSL_VERSION}
|
||||
|
||||
# Use the same version of libicu as the compute nodes so that
|
||||
# clusters created using inidb on pageserver can be used by computes.
|
||||
#
|
||||
@@ -258,7 +243,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.83.0
|
||||
ENV RUSTC_VERSION=1.84.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
|
||||
@@ -66,6 +66,7 @@ RUN cd postgres && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
|
||||
# Enable some of contrib extensions
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
|
||||
@@ -104,16 +105,18 @@ RUN cd postgres && \
|
||||
esac; \
|
||||
done;
|
||||
|
||||
# Set PATH for all the subsequent build steps
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "postgis-build"
|
||||
# Build PostGIS from the upstream PostGIS mirror.
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS postgis-build
|
||||
FROM pg-build AS postgis-build
|
||||
ARG DEBIAN_VERSION
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y \
|
||||
gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
|
||||
@@ -151,8 +154,6 @@ RUN case "${DEBIAN_VERSION}" in \
|
||||
DESTDIR=/sfcgal ninja install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
ninja clean && cp -R /sfcgal/* /
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
# Postgis 3.5.0 supports v17
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v17") \
|
||||
@@ -170,7 +171,6 @@ RUN case "${PG_VERSION}" in \
|
||||
wget https://download.osgeo.org/postgis/source/postgis-${POSTGIS_VERSION}.tar.gz -O postgis.tar.gz && \
|
||||
echo "${POSTGIS_CHECKSUM} postgis.tar.gz" | sha256sum --check && \
|
||||
mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
./autogen.sh && \
|
||||
./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -220,11 +220,7 @@ RUN case "${PG_VERSION}" in \
|
||||
cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. && \
|
||||
ninja -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
ninja -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\
|
||||
cp /usr/local/pgsql/share/extension/pgrouting.control /extensions/postgis && \
|
||||
sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \
|
||||
comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/postgis.tar.zst -T -
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -232,9 +228,8 @@ RUN case "${PG_VERSION}" in \
|
||||
# Build plv8
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS plv8-build
|
||||
FROM pg-build AS plv8-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
COPY compute/patches/plv8-3.1.10.patch /plv8-3.1.10.patch
|
||||
|
||||
@@ -269,7 +264,6 @@ RUN case "${PG_VERSION}" in \
|
||||
# generate and copy upgrade scripts
|
||||
mkdir -p upgrade && ./generate_upgrade.sh ${PLV8_TAG#v} && \
|
||||
cp upgrade/* /usr/local/pgsql/share/extension/ && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
rm -rf /plv8-* && \
|
||||
find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
|
||||
@@ -296,9 +290,8 @@ RUN case "${PG_VERSION}" in \
|
||||
# Build h3_pg
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS h3-pg-build
|
||||
FROM pg-build AS h3-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# last release v4.1.0 - Jan 18, 2023
|
||||
@@ -319,7 +312,6 @@ RUN mkdir -p /h3/usr/ && \
|
||||
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
|
||||
echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
|
||||
mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
|
||||
@@ -331,17 +323,16 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3
|
||||
# compile unit extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS unit-pg-build
|
||||
FROM pg-build AS unit-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# last release 7.9 - Sep 15, 2024
|
||||
RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.9.tar.gz -O postgresql-unit.tar.gz && \
|
||||
echo "e46de6245dcc8b2c2ecf29873dbd43b2b346773f31dd5ce4b8315895a052b456 postgresql-unit.tar.gz" | sha256sum --check && \
|
||||
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
# unit extension's "create extension" script relies on absolute install path to fill some reference tables.
|
||||
# We move the extension from '/usr/local/pgsql/' to '/usr/local/' after it is build. So we need to adjust the path.
|
||||
# This one-liner removes pgsql/ part of the path.
|
||||
@@ -355,9 +346,8 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.9.tar.gz -
|
||||
# compile pgvector extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS vector-pg-build
|
||||
FROM pg-build AS vector-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
COPY compute/patches/pgvector.patch /pgvector.patch
|
||||
|
||||
@@ -371,8 +361,8 @@ RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.8.0.tar.gz -O
|
||||
echo "867a2c328d4928a5a9d6f052cd3bc78c7d60228a9b914ad32aa3db88e9de27b0 pgvector.tar.gz" | sha256sum --check && \
|
||||
mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < /pgvector.patch && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -381,16 +371,15 @@ RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.8.0.tar.gz -O
|
||||
# compile pgjwt extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pgjwt-pg-build
|
||||
FROM pg-build AS pgjwt-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# doesn't use releases, last commit f3d82fd - Mar 2, 2023
|
||||
RUN wget https://github.com/michelp/pgjwt/archive/f3d82fd30151e754e19ce5d6a06c71c20689ce3d.tar.gz -O pgjwt.tar.gz && \
|
||||
echo "dae8ed99eebb7593b43013f6532d772b12dfecd55548d2673f2dfd0163f6d2b9 pgjwt.tar.gz" | sha256sum --check && \
|
||||
mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -399,17 +388,16 @@ RUN wget https://github.com/michelp/pgjwt/archive/f3d82fd30151e754e19ce5d6a06c71
|
||||
# compile hypopg extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS hypopg-pg-build
|
||||
FROM pg-build AS hypopg-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# HypoPG 1.4.1 supports v17
|
||||
# last release 1.4.1 - Apr 28, 2024
|
||||
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.1.tar.gz -O hypopg.tar.gz && \
|
||||
echo "9afe6357fd389d8d33fad81703038ce520b09275ec00153c6c89282bcdedd6bc hypopg.tar.gz" | sha256sum --check && \
|
||||
mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -418,17 +406,16 @@ RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.1.tar.gz -O hypo
|
||||
# compile pg_hashids extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-hashids-pg-build
|
||||
FROM pg-build AS pg-hashids-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# last release v1.2.1 -Jan 12, 2018
|
||||
RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
|
||||
echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install USE_PGXS=1 && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -437,9 +424,8 @@ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz
|
||||
# compile rum extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS rum-pg-build
|
||||
FROM pg-build AS rum-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
COPY compute/patches/rum.patch /rum.patch
|
||||
|
||||
@@ -450,8 +436,8 @@ RUN wget https://github.com/postgrespro/rum/archive/cb1edffc57736cd2a4455f8d0fea
|
||||
echo "65e0a752e99f4c3226400c9b899f997049e93503db8bf5c8072efa136d32fd83 rum.tar.gz" | sha256sum --check && \
|
||||
mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < /rum.patch && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install USE_PGXS=1 && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -460,17 +446,16 @@ RUN wget https://github.com/postgrespro/rum/archive/cb1edffc57736cd2a4455f8d0fea
|
||||
# compile pgTAP extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pgtap-pg-build
|
||||
FROM pg-build AS pgtap-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# pgtap 1.3.3 supports v17
|
||||
# last release v1.3.3 - Apr 8, 2024
|
||||
RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.3.3.tar.gz -O pgtap.tar.gz && \
|
||||
echo "325ea79d0d2515bce96bce43f6823dcd3effbd6c54cb2a4d6c2384fffa3a14c7 pgtap.tar.gz" | sha256sum --check && \
|
||||
mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -479,17 +464,16 @@ RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.3.3.tar.gz -O pgta
|
||||
# compile ip4r extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS ip4r-pg-build
|
||||
FROM pg-build AS ip4r-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# last release v2.4.2 - Jul 29, 2023
|
||||
RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
|
||||
echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
|
||||
mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -498,17 +482,16 @@ RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O i
|
||||
# compile Prefix extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS prefix-pg-build
|
||||
FROM pg-build AS prefix-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# last release v1.2.10 - Jul 5, 2023
|
||||
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
|
||||
echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
|
||||
mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -517,17 +500,16 @@ RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O p
|
||||
# compile hll extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS hll-pg-build
|
||||
FROM pg-build AS hll-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# last release v2.18 - Aug 29, 2023
|
||||
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
|
||||
echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
|
||||
mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -536,17 +518,16 @@ RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar
|
||||
# compile plpgsql_check extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS plpgsql-check-pg-build
|
||||
FROM pg-build AS plpgsql-check-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# plpgsql_check v2.7.11 supports v17
|
||||
# last release v2.7.11 - Sep 16, 2024
|
||||
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.7.11.tar.gz -O plpgsql_check.tar.gz && \
|
||||
echo "208933f8dbe8e0d2628eb3851e9f52e6892b8e280c63700c0f1ce7883625d172 plpgsql_check.tar.gz" | sha256sum --check && \
|
||||
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install USE_PGXS=1 && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -555,11 +536,8 @@ RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.7.11.tar.gz
|
||||
# compile timescaledb extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS timescaledb-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
FROM pg-build AS timescaledb-pg-build
|
||||
ARG PG_VERSION
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
@@ -590,11 +568,8 @@ RUN case "${PG_VERSION}" in \
|
||||
# compile pg_hint_plan extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-hint-plan-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
FROM pg-build AS pg-hint-plan-pg-build
|
||||
ARG PG_VERSION
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
# version-specific, has separate releases for each version
|
||||
RUN case "${PG_VERSION}" in \
|
||||
@@ -632,14 +607,12 @@ RUN case "${PG_VERSION}" in \
|
||||
# compile pg_cron extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-cron-pg-build
|
||||
FROM pg-build AS pg-cron-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# This is an experimental extension that we do not support on prod yet.
|
||||
# !Do not remove!
|
||||
# We set it in shared_preload_libraries and computes will fail to start if library is not found.
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.4.tar.gz -O pg_cron.tar.gz && \
|
||||
echo "52d1850ee7beb85a4cb7185731ef4e5a90d1de216709d8988324b0d02e76af61 pg_cron.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
|
||||
@@ -653,9 +626,8 @@ RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.4.tar.gz -O
|
||||
# compile rdkit extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS rdkit-pg-build
|
||||
FROM pg-build AS rdkit-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y \
|
||||
@@ -673,7 +645,13 @@ RUN apt update && \
|
||||
# Use new version only for v17
|
||||
# because Release_2024_09_1 has some backward incompatible changes
|
||||
# https://github.com/rdkit/rdkit/releases/tag/Release_2024_09_1
|
||||
ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
|
||||
|
||||
# XXX: /usr/local/pgsql/bin is already in PATH, and that should be enough to find
|
||||
# pg_config. For some reason the rdkit cmake script doesn't work with just that,
|
||||
# however. By also adding /usr/local/pgsql, it works, which is weird because there
|
||||
# are no executables in that directory.
|
||||
ENV PATH="/usr/local/pgsql:$PATH"
|
||||
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v17") \
|
||||
export RDKIT_VERSION=Release_2024_09_1 \
|
||||
@@ -726,13 +704,11 @@ RUN case "${PG_VERSION}" in \
|
||||
# compile pg_uuidv7 extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-uuidv7-pg-build
|
||||
FROM pg-build AS pg-uuidv7-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# last release v1.6.0 - Oct 9, 2024
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.6.0.tar.gz -O pg_uuidv7.tar.gz && \
|
||||
echo "0fa6c710929d003f6ce276a7de7a864e9d1667b2d78be3dc2c07f2409eb55867 pg_uuidv7.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
||||
@@ -746,13 +722,11 @@ RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.6.0.tar.gz
|
||||
# compile pg_roaringbitmap extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-roaringbitmap-pg-build
|
||||
FROM pg-build AS pg-roaringbitmap-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# last release v0.5.4 - Jun 28, 2022
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
|
||||
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
|
||||
@@ -766,16 +740,14 @@ RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4
|
||||
# compile pg_semver extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-semver-pg-build
|
||||
FROM pg-build AS pg-semver-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# Release 0.40.0 breaks backward compatibility with previous versions
|
||||
# see release note https://github.com/theory/pg-semver/releases/tag/v0.40.0
|
||||
# Use new version only for v17
|
||||
#
|
||||
# last release v0.40.0 - Jul 22, 2024
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v17") \
|
||||
export SEMVER_VERSION=0.40.0 \
|
||||
@@ -802,13 +774,11 @@ RUN case "${PG_VERSION}" in \
|
||||
# compile pg_embedding extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-embedding-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
FROM pg-build AS pg-embedding-pg-build
|
||||
|
||||
# This is our extension, support stopped in favor of pgvector
|
||||
# TODO: deprecate it
|
||||
ARG PG_VERSION
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
export PG_EMBEDDING_VERSION=0.3.5 \
|
||||
@@ -829,26 +799,19 @@ RUN case "${PG_VERSION}" in \
|
||||
# compile anon extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-anon-pg-build
|
||||
FROM pg-build AS pg-anon-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# This is an experimental extension, never got to real production.
|
||||
# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
|
||||
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\
|
||||
mkdir -p /extensions/anon && cp /usr/local/pgsql/share/extension/anon.control /extensions/anon && \
|
||||
sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \
|
||||
comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/anon.tar.zst -T -
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -856,9 +819,8 @@ RUN case "${PG_VERSION}" in "v17") \
|
||||
# This layer is used to build `pgrx` deps
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS rust-extensions-build
|
||||
FROM pg-build AS rust-extensions-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
|
||||
@@ -866,7 +828,7 @@ RUN apt update && \
|
||||
useradd -ms /bin/bash nonroot -b /home
|
||||
|
||||
ENV HOME=/home/nonroot
|
||||
ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:$PATH"
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
@@ -893,9 +855,8 @@ USER root
|
||||
# and eventually get merged with `rust-extensions-build`
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS rust-extensions-build-pgrx12
|
||||
FROM pg-build AS rust-extensions-build-pgrx12
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
|
||||
@@ -903,7 +864,7 @@ RUN apt update && \
|
||||
useradd -ms /bin/bash nonroot -b /home
|
||||
|
||||
ENV HOME=/home/nonroot
|
||||
ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:$PATH"
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
@@ -911,7 +872,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
|
||||
rm rustup-init && \
|
||||
cargo install --locked --version 0.12.6 cargo-pgrx && \
|
||||
cargo install --locked --version 0.12.9 cargo-pgrx && \
|
||||
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
||||
|
||||
USER root
|
||||
@@ -948,19 +909,19 @@ RUN apt update && apt install --no-install-recommends --no-install-suggests -y p
|
||||
mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C . && \
|
||||
\
|
||||
cd exts/rag && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag.control && \
|
||||
\
|
||||
cd ../rag_bge_small_en_v15 && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
ORT_LIB_LOCATION=/home/nonroot/onnxruntime-src/build/Linux \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
|
||||
cargo pgrx install --release --features remote_onnx && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control && \
|
||||
\
|
||||
cd ../rag_jina_reranker_v1_tiny_en && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
ORT_LIB_LOCATION=/home/nonroot/onnxruntime-src/build/Linux \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
|
||||
cargo pgrx install --release --features remote_onnx && \
|
||||
@@ -976,29 +937,17 @@ RUN apt update && apt install --no-install-recommends --no-install-suggests -y p
|
||||
|
||||
FROM rust-extensions-build-pgrx12 AS pg-jsonschema-pg-build
|
||||
ARG PG_VERSION
|
||||
# version 0.3.3 supports v17
|
||||
# last release v0.3.3 - Oct 16, 2024
|
||||
#
|
||||
# there were no breaking changes
|
||||
# so we can use the same version for all postgres versions
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15" | "v16" | "v17") \
|
||||
export PG_JSONSCHEMA_VERSION=0.3.3 \
|
||||
export PG_JSONSCHEMA_CHECKSUM=40c2cffab4187e0233cb8c3bde013be92218c282f95f4469c5282f6b30d64eac \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v${PG_JSONSCHEMA_VERSION}.tar.gz -O pg_jsonschema.tar.gz && \
|
||||
echo "${PG_JSONSCHEMA_CHECKSUM} pg_jsonschema.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.3.tar.gz -O pg_jsonschema.tar.gz && \
|
||||
echo "40c2cffab4187e0233cb8c3bde013be92218c282f95f4469c5282f6b30d64eac pg_jsonschema.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
|
||||
# see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8
|
||||
# `unsafe-postgres` feature allows to build pgx extensions
|
||||
# against postgres forks that decided to change their ABI name (like us).
|
||||
# With that we can build extensions without forking them and using stock
|
||||
# pgx. As this feature is new few manual version bumps were required.
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx-tests = "0.12.6"/pgrx-tests = "0.12.9"/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
|
||||
|
||||
@@ -1012,24 +961,12 @@ RUN case "${PG_VERSION}" in \
|
||||
FROM rust-extensions-build-pgrx12 AS pg-graphql-pg-build
|
||||
ARG PG_VERSION
|
||||
|
||||
# version 1.5.9 supports v17
|
||||
# last release v1.5.9 - Oct 16, 2024
|
||||
#
|
||||
# there were no breaking changes
|
||||
# so we can use the same version for all postgres versions
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15" | "v16" | "v17") \
|
||||
export PG_GRAPHQL_VERSION=1.5.9 \
|
||||
export PG_GRAPHQL_CHECKSUM=cf768385a41278be1333472204fc0328118644ae443182cf52f7b9b23277e497 \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/supabase/pg_graphql/archive/refs/tags/v${PG_GRAPHQL_VERSION}.tar.gz -O pg_graphql.tar.gz && \
|
||||
echo "${PG_GRAPHQL_CHECKSUM} pg_graphql.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.9.tar.gz -O pg_graphql.tar.gz && \
|
||||
echo "cf768385a41278be1333472204fc0328118644ae443182cf52f7b9b23277e497 pg_graphql.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "=0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "=0.12.6"/pgrx = { version = "=0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx-tests = "=0.12.6"/pgrx-tests = "=0.12.9"/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
# it's needed to enable extension because it uses untrusted C language
|
||||
sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
|
||||
@@ -1050,33 +987,58 @@ ARG PG_VERSION
|
||||
RUN wget https://github.com/kelvich/pg_tiktoken/archive/9118dd4549b7d8c0bbc98e04322499f7bf2fa6f7.tar.gz -O pg_tiktoken.tar.gz && \
|
||||
echo "a5bc447e7920ee149d3c064b8b9f0086c0e83939499753178f7d35788416f628 pg_tiktoken.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
|
||||
# TODO update pgrx version in the pg_tiktoken repo and remove this line
|
||||
sed -i 's/pgrx = { version = "=0.10.2",/pgrx = { version = "0.11.3",/g' Cargo.toml && \
|
||||
sed -i 's/pgrx-tests = "=0.10.2"/pgrx-tests = "0.11.3"/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = { version = "=0.12.6",/pgrx = { version = "0.12.9",/g' Cargo.toml && \
|
||||
sed -i 's/pgrx-tests = "=0.12.6"/pgrx-tests = "0.12.9"/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-pgx-ulid-build"
|
||||
# Compile "pgx_ulid" extension
|
||||
# Compile "pgx_ulid" extension for v16 and below
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build AS pg-pgx-ulid-build
|
||||
ARG PG_VERSION
|
||||
|
||||
# doesn't support v17 yet
|
||||
# https://github.com/pksunkara/pgx_ulid/pull/52
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pgx_ulid does not support pg17 as of the latest version (0.1.5)" && exit 0;; \
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15" | "v16") \
|
||||
;; \
|
||||
*) \
|
||||
echo "skipping the version of pgx_ulid for $PG_VERSION" && exit 0 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
|
||||
echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
|
||||
echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
|
||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "^0.11.2"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/ulid.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-pgx-ulid-pgrx12-build"
|
||||
# Compile "pgx_ulid" extension for v17 and up
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build-pgrx12 AS pg-pgx-ulid-pgrx12-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v17") \
|
||||
;; \
|
||||
*) \
|
||||
echo "skipping the version of pgx_ulid for $PG_VERSION" && exit 0 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.2.0.tar.gz -O pgx_ulid.tar.gz && \
|
||||
echo "cef6a9a2e5e7bd1a10a18989286586ee9e6c1c06005a4055cff190de41bf3e9f pgx_ulid.tar.gz" | sha256sum --check && \
|
||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "^0.12.7"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgx_ulid.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1091,10 +1053,14 @@ ARG PG_VERSION
|
||||
# NOTE: local_proxy depends on the version of pg_session_jwt
|
||||
# Do not update without approve from proxy team
|
||||
# Make sure the version is reflected in proxy/src/serverless/local_conn_pool.rs
|
||||
RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.1.2-v17.tar.gz -O pg_session_jwt.tar.gz && \
|
||||
echo "c8ecbed9cb8c6441bce5134a176002b043018adf9d05a08e457dda233090a86e pg_session_jwt.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.2.0.tar.gz -O pg_session_jwt.tar.gz && \
|
||||
echo "5ace028e591f2e000ca10afa5b1ca62203ebff014c2907c0ec3b29c36f28a1bb pg_session_jwt.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "=0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/version = "0.12.6"/version = "0.12.9"/g' pgrx-tests/Cargo.toml && \
|
||||
sed -i 's/pgrx = "=0.12.6"/pgrx = { version = "=0.12.9", features = [ "unsafe-postgres" ] }/g' pgrx-tests/Cargo.toml && \
|
||||
sed -i 's/pgrx-macros = "=0.12.6"/pgrx-macros = "=0.12.9"/g' pgrx-tests/Cargo.toml && \
|
||||
sed -i 's/pgrx-pg-config = "=0.12.6"/pgrx-pg-config = "=0.12.9"/g' pgrx-tests/Cargo.toml && \
|
||||
cargo pgrx install --release
|
||||
|
||||
#########################################################################################
|
||||
@@ -1104,13 +1070,11 @@ RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.1.2
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM build-deps AS wal2json-pg-build
|
||||
FROM pg-build AS wal2json-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# wal2json wal2json_2_6 supports v17
|
||||
# last release wal2json_2_6 - Apr 25, 2024
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_6.tar.gz -O wal2json.tar.gz && \
|
||||
echo "18b4bdec28c74a8fc98a11c72de38378a760327ef8e5e42e975b0029eb96ba0d wal2json.tar.gz" | sha256sum --check && \
|
||||
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json.tar.gz --strip-components=1 -C . && \
|
||||
@@ -1123,13 +1087,11 @@ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_6.tar.
|
||||
# compile pg_ivm extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-ivm-build
|
||||
FROM pg-build AS pg-ivm-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# pg_ivm v1.9 supports v17
|
||||
# last release v1.9 - Jul 31
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.9.tar.gz -O pg_ivm.tar.gz && \
|
||||
echo "59e15722939f274650abf637f315dd723c87073496ca77236b044cb205270d8b pg_ivm.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
|
||||
@@ -1143,13 +1105,11 @@ RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.9.tar.gz -O pg_iv
|
||||
# compile pg_partman extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-partman-build
|
||||
FROM pg-build AS pg-partman-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# should support v17 https://github.com/pgpartman/pg_partman/discussions/693
|
||||
# last release 5.1.0 Apr 2, 2024
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.1.0.tar.gz -O pg_partman.tar.gz && \
|
||||
echo "3e3a27d7ff827295d5c55ef72f07a49062d6204b3cb0b9a048645d6db9f3cb9f pg_partman.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
|
||||
@@ -1165,24 +1125,12 @@ RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.1.0.tar.gz
|
||||
#########################################################################################
|
||||
FROM rust-extensions-build AS pg-mooncake-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# The topmost commit in the `neon` branch at the time of writing this
|
||||
# https://github.com/Mooncake-Labs/pg_mooncake/commits/neon/
|
||||
# https://github.com/Mooncake-Labs/pg_mooncake/commit/077c92c452bb6896a7b7776ee95f039984f076af
|
||||
ENV PG_MOONCAKE_VERSION=077c92c452bb6896a7b7776ee95f039984f076af
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
|
||||
RUN case "${PG_VERSION}" in \
|
||||
'v14') \
|
||||
echo "pg_mooncake is not supported on Postgres ${PG_VERSION}" && exit 0;; \
|
||||
esac && \
|
||||
git clone --depth 1 --branch neon https://github.com/Mooncake-Labs/pg_mooncake.git pg_mooncake-src && \
|
||||
cd pg_mooncake-src && \
|
||||
git checkout "${PG_MOONCAKE_VERSION}" && \
|
||||
git submodule update --init --depth 1 --recursive && \
|
||||
make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.0/pg_mooncake-0.1.0.tar.gz -O pg_mooncake.tar.gz && \
|
||||
echo "eafd059b77f541f11525eb8affcd66a176968cbd8fe7c0d436e733f2aa4da59f pg_mooncake.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_mooncake-src && cd pg_mooncake-src && tar xzf ../pg_mooncake.tar.gz --strip-components=1 -C . && \
|
||||
make release -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -1192,11 +1140,8 @@ RUN case "${PG_VERSION}" in \
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM build-deps AS pg-repack-build
|
||||
FROM pg-build AS pg-repack-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
|
||||
RUN wget https://github.com/reorg/pg_repack/archive/refs/tags/ver_1.5.2.tar.gz -O pg_repack.tar.gz && \
|
||||
echo '4516cad42251ed3ad53ff619733004db47d5755acac83f75924cd94d1c4fb681 pg_repack.tar.gz' | sha256sum --check && \
|
||||
@@ -1238,6 +1183,7 @@ COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-pgx-ulid-pgrx12-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-session-jwt-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
@@ -1267,25 +1213,11 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
|
||||
-C pgxn/neon_rmgr \
|
||||
-s install && \
|
||||
case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
;; \
|
||||
"v16" | "v17") \
|
||||
echo "Skipping HNSW for PostgreSQL ${PG_VERSION}" && exit 0 \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
|
||||
-C pgxn/hnsw \
|
||||
-s install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Compile and run the Neon-specific `compute_ctl` and `fast_import` binaries
|
||||
# Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
|
||||
@@ -1295,18 +1227,7 @@ ENV BUILD_TAG=$BUILD_TAG
|
||||
USER nonroot
|
||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||
COPY --chown=nonroot . .
|
||||
RUN cd compute_tools && mold -run cargo build --locked --profile release-line-debug-size-lto
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Final compute-tools image
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM debian:$DEBIAN_FLAVOR AS compute-tools-image
|
||||
|
||||
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
|
||||
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/fast_import /usr/local/bin/fast_import
|
||||
RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1338,31 +1259,17 @@ RUN set -e \
|
||||
&& make -j $(nproc) dist_man_MANS= \
|
||||
&& make install dist_man_MANS=
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Compile the Neon-specific `local_proxy` binary
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy
|
||||
ARG BUILD_TAG
|
||||
ENV BUILD_TAG=$BUILD_TAG
|
||||
|
||||
USER nonroot
|
||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||
COPY --chown=nonroot . .
|
||||
RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layers "postgres-exporter" and "sql-exporter"
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter
|
||||
FROM quay.io/prometheuscommunity/postgres-exporter:v0.16.0 AS postgres-exporter
|
||||
|
||||
# Keep the version the same as in build-tools.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py.
|
||||
FROM burningalchemist/sql_exporter:0.16.0 AS sql-exporter
|
||||
FROM burningalchemist/sql_exporter:0.17.0 AS sql-exporter
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1491,7 +1398,7 @@ COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/
|
||||
COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
|
||||
|
||||
# local_proxy and its config
|
||||
COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
|
||||
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
|
||||
RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
|
||||
|
||||
# Metrics exporter binaries and configuration files
|
||||
@@ -1556,28 +1463,30 @@ RUN apt update && \
|
||||
locales \
|
||||
procps \
|
||||
ca-certificates \
|
||||
curl \
|
||||
unzip \
|
||||
$VERSION_INSTALLS && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||
|
||||
# s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2
|
||||
# used by fast_import
|
||||
# aws cli is used by fast_import (curl and unzip above are at this time only used for this installation step)
|
||||
ARG TARGETARCH
|
||||
ADD https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_linux_$TARGETARCH.deb /tmp/s5cmd.deb
|
||||
RUN set -ex; \
|
||||
\
|
||||
# Determine the expected checksum based on TARGETARCH
|
||||
if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
CHECKSUM="392c385320cd5ffa435759a95af77c215553d967e4b1c0fffe52e4f14c29cf85"; \
|
||||
TARGETARCH_ALT="x86_64"; \
|
||||
CHECKSUM="c9a9df3770a3ff9259cb469b6179e02829687a464e0824d5c32d378820b53a00"; \
|
||||
elif [ "${TARGETARCH}" = "arm64" ]; then \
|
||||
CHECKSUM="939bee3cf4b5604ddb00e67f8c157b91d7c7a5b553d1fbb6890fad32894b7b46"; \
|
||||
TARGETARCH_ALT="aarch64"; \
|
||||
CHECKSUM="8181730be7891582b38b028112e81b4899ca817e8c616aad807c9e9d1289223a"; \
|
||||
else \
|
||||
echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
|
||||
fi; \
|
||||
\
|
||||
# Compute and validate the checksum
|
||||
echo "${CHECKSUM} /tmp/s5cmd.deb" | sha256sum -c -
|
||||
RUN dpkg -i /tmp/s5cmd.deb && rm /tmp/s5cmd.deb
|
||||
curl -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \
|
||||
echo "${CHECKSUM} /tmp/awscliv2.zip" | sha256sum -c -; \
|
||||
unzip /tmp/awscliv2.zip -d /tmp/awscliv2; \
|
||||
/tmp/awscliv2/aws/install; \
|
||||
rm -rf /tmp/awscliv2.zip /tmp/awscliv2; \
|
||||
true
|
||||
|
||||
ENV LANG=en_US.utf8
|
||||
USER postgres
|
||||
|
||||
@@ -7,7 +7,7 @@ license.workspace = true
|
||||
[features]
|
||||
default = []
|
||||
# Enables test specific features.
|
||||
testing = []
|
||||
testing = ["fail/failpoints"]
|
||||
|
||||
[dependencies]
|
||||
base64.workspace = true
|
||||
@@ -15,13 +15,15 @@ aws-config.workspace = true
|
||||
aws-sdk-s3.workspace = true
|
||||
aws-sdk-kms.workspace = true
|
||||
anyhow.workspace = true
|
||||
axum = { workspace = true, features = [] }
|
||||
camino.workspace = true
|
||||
chrono.workspace = true
|
||||
cfg-if.workspace = true
|
||||
clap.workspace = true
|
||||
fail.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
hyper0 = { workspace = true, features = ["full"] }
|
||||
http.workspace = true
|
||||
metrics.workspace = true
|
||||
nix.workspace = true
|
||||
notify.workspace = true
|
||||
@@ -36,6 +38,8 @@ serde_with.workspace = true
|
||||
serde_json.workspace = true
|
||||
signal-hook.workspace = true
|
||||
tar.workspace = true
|
||||
tower.workspace = true
|
||||
tower-http.workspace = true
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
tokio-postgres.workspace = true
|
||||
@@ -47,6 +51,7 @@ tracing-subscriber.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
thiserror.workspace = true
|
||||
url.workspace = true
|
||||
uuid.workspace = true
|
||||
prometheus.workspace = true
|
||||
|
||||
postgres_initdb.workspace = true
|
||||
|
||||
@@ -60,19 +60,22 @@ use compute_tools::compute::{
|
||||
};
|
||||
use compute_tools::configurator::launch_configurator;
|
||||
use compute_tools::extension_server::get_pg_version_string;
|
||||
use compute_tools::http::api::launch_http_server;
|
||||
use compute_tools::http::launch_http_server;
|
||||
use compute_tools::logger::*;
|
||||
use compute_tools::monitor::launch_monitor;
|
||||
use compute_tools::params::*;
|
||||
use compute_tools::spec::*;
|
||||
use compute_tools::swap::resize_swap;
|
||||
use rlimit::{setrlimit, Resource};
|
||||
use utils::failpoint_support;
|
||||
|
||||
// this is an arbitrary build tag. Fine as a default / for testing purposes
|
||||
// in-case of not-set environment var
|
||||
const BUILD_TAG_DEFAULT: &str = "latest";
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let scenario = failpoint_support::init();
|
||||
|
||||
let (build_tag, clap_args) = init()?;
|
||||
|
||||
// enable core dumping for all child processes
|
||||
@@ -100,17 +103,14 @@ fn main() -> Result<()> {
|
||||
|
||||
maybe_delay_exit(delay_exit);
|
||||
|
||||
scenario.teardown();
|
||||
|
||||
deinit_and_exit(wait_pg_result);
|
||||
}
|
||||
|
||||
fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
opentelemetry::global::set_error_handler(|err| {
|
||||
tracing::info!("OpenTelemetry error: {err}");
|
||||
})
|
||||
.expect("global error handler lock poisoned");
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
thread::spawn(move || {
|
||||
for sig in signals.forever() {
|
||||
@@ -419,9 +419,14 @@ fn start_postgres(
|
||||
"running compute with features: {:?}",
|
||||
state.pspec.as_ref().unwrap().spec.features
|
||||
);
|
||||
// before we release the mutex, fetch the swap size (if any) for later.
|
||||
let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
|
||||
let disk_quota_bytes = state.pspec.as_ref().unwrap().spec.disk_quota_bytes;
|
||||
// before we release the mutex, fetch some parameters for later.
|
||||
let &ComputeSpec {
|
||||
swap_size_bytes,
|
||||
disk_quota_bytes,
|
||||
#[cfg(target_os = "linux")]
|
||||
disable_lfc_resizing,
|
||||
..
|
||||
} = &state.pspec.as_ref().unwrap().spec;
|
||||
drop(state);
|
||||
|
||||
// Launch remaining service threads
|
||||
@@ -483,7 +488,10 @@ fn start_postgres(
|
||||
let mut pg = None;
|
||||
if !prestartup_failed {
|
||||
pg = match compute.start_compute() {
|
||||
Ok(pg) => Some(pg),
|
||||
Ok(pg) => {
|
||||
info!(postmaster_pid = %pg.0.id(), "Postgres was started");
|
||||
Some(pg)
|
||||
}
|
||||
Err(err) => {
|
||||
error!("could not start the compute node: {:#}", err);
|
||||
compute.set_failed_status(err);
|
||||
@@ -526,11 +534,18 @@ fn start_postgres(
|
||||
// This token is used internally by the monitor to clean up all threads
|
||||
let token = CancellationToken::new();
|
||||
|
||||
// don't pass postgres connection string to vm-monitor if we don't want it to resize LFC
|
||||
let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
|
||||
None
|
||||
} else {
|
||||
file_cache_connstr.cloned()
|
||||
};
|
||||
|
||||
let vm_monitor = rt.as_ref().map(|rt| {
|
||||
rt.spawn(vm_monitor::start(
|
||||
Box::leak(Box::new(vm_monitor::Args {
|
||||
cgroup: cgroup.cloned(),
|
||||
pgconnstr: file_cache_connstr.cloned(),
|
||||
pgconnstr,
|
||||
addr: vm_monitor_addr.clone(),
|
||||
})),
|
||||
token.clone(),
|
||||
@@ -574,6 +589,8 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
|
||||
// propagate to Postgres and it will be shut down as well.
|
||||
let mut exit_code = None;
|
||||
if let Some((mut pg, logs_handle)) = pg {
|
||||
info!(postmaster_pid = %pg.id(), "Waiting for Postgres to exit");
|
||||
|
||||
let ecode = pg
|
||||
.wait()
|
||||
.expect("failed to start waiting on Postgres process");
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
//!
|
||||
//! # Local Testing
|
||||
//!
|
||||
//! - Comment out most of the pgxns in The Dockerfile.compute-tools to speed up the build.
|
||||
//! - Comment out most of the pgxns in compute-node.Dockerfile to speed up the build.
|
||||
//! - Build the image with the following command:
|
||||
//!
|
||||
//! ```bash
|
||||
@@ -31,22 +31,29 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use clap::Parser;
|
||||
use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
|
||||
use nix::unistd::Pid;
|
||||
use tracing::{info, info_span, warn, Instrument};
|
||||
use tracing::{error, info, info_span, warn, Instrument};
|
||||
use utils::fs_ext::is_directory_empty;
|
||||
|
||||
#[path = "fast_import/aws_s3_sync.rs"]
|
||||
mod aws_s3_sync;
|
||||
#[path = "fast_import/child_stdio_to_log.rs"]
|
||||
mod child_stdio_to_log;
|
||||
#[path = "fast_import/s3_uri.rs"]
|
||||
mod s3_uri;
|
||||
#[path = "fast_import/s5cmd.rs"]
|
||||
mod s5cmd;
|
||||
|
||||
const PG_WAIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(600);
|
||||
const PG_WAIT_RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_millis(300);
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long)]
|
||||
working_directory: Utf8PathBuf,
|
||||
#[clap(long, env = "NEON_IMPORTER_S3_PREFIX")]
|
||||
s3_prefix: s3_uri::S3Uri,
|
||||
s3_prefix: Option<s3_uri::S3Uri>,
|
||||
#[clap(long)]
|
||||
source_connection_string: Option<String>,
|
||||
#[clap(short, long)]
|
||||
interactive: bool,
|
||||
#[clap(long)]
|
||||
pg_bin_dir: Utf8PathBuf,
|
||||
#[clap(long)]
|
||||
@@ -77,30 +84,70 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
|
||||
info!("starting");
|
||||
|
||||
let Args {
|
||||
working_directory,
|
||||
s3_prefix,
|
||||
pg_bin_dir,
|
||||
pg_lib_dir,
|
||||
} = Args::parse();
|
||||
let args = Args::parse();
|
||||
|
||||
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
// Validate arguments
|
||||
if args.s3_prefix.is_none() && args.source_connection_string.is_none() {
|
||||
anyhow::bail!("either s3_prefix or source_connection_string must be specified");
|
||||
}
|
||||
if args.s3_prefix.is_some() && args.source_connection_string.is_some() {
|
||||
anyhow::bail!("only one of s3_prefix or source_connection_string can be specified");
|
||||
}
|
||||
|
||||
let spec: Spec = {
|
||||
let spec_key = s3_prefix.append("/spec.json");
|
||||
let s3_client = aws_sdk_s3::Client::new(&aws_config);
|
||||
let object = s3_client
|
||||
.get_object()
|
||||
.bucket(&spec_key.bucket)
|
||||
.key(spec_key.key)
|
||||
.send()
|
||||
.await
|
||||
.context("get spec from s3")?
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.context("download spec body")?;
|
||||
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
|
||||
let working_directory = args.working_directory;
|
||||
let pg_bin_dir = args.pg_bin_dir;
|
||||
let pg_lib_dir = args.pg_lib_dir;
|
||||
|
||||
// Initialize AWS clients only if s3_prefix is specified
|
||||
let (aws_config, kms_client) = if args.s3_prefix.is_some() {
|
||||
let config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
let kms = aws_sdk_kms::Client::new(&config);
|
||||
(Some(config), Some(kms))
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
// Get source connection string either from S3 spec or direct argument
|
||||
let source_connection_string = if let Some(s3_prefix) = &args.s3_prefix {
|
||||
let spec: Spec = {
|
||||
let spec_key = s3_prefix.append("/spec.json");
|
||||
let s3_client = aws_sdk_s3::Client::new(aws_config.as_ref().unwrap());
|
||||
let object = s3_client
|
||||
.get_object()
|
||||
.bucket(&spec_key.bucket)
|
||||
.key(spec_key.key)
|
||||
.send()
|
||||
.await
|
||||
.context("get spec from s3")?
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.context("download spec body")?;
|
||||
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
|
||||
};
|
||||
|
||||
match spec.encryption_secret {
|
||||
EncryptionSecret::KMS { key_id } => {
|
||||
let mut output = kms_client
|
||||
.unwrap()
|
||||
.decrypt()
|
||||
.key_id(key_id)
|
||||
.ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
|
||||
spec.source_connstring_ciphertext_base64,
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.context("decrypt source connection string")?;
|
||||
let plaintext = output
|
||||
.plaintext
|
||||
.take()
|
||||
.context("get plaintext source connection string")?;
|
||||
String::from_utf8(plaintext.into_inner())
|
||||
.context("parse source connection string as utf8")?
|
||||
}
|
||||
}
|
||||
} else {
|
||||
args.source_connection_string.unwrap()
|
||||
};
|
||||
|
||||
match tokio::fs::create_dir(&working_directory).await {
|
||||
@@ -123,15 +170,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.await
|
||||
.context("create pgdata directory")?;
|
||||
|
||||
//
|
||||
// Setup clients
|
||||
//
|
||||
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
let kms_client = aws_sdk_kms::Client::new(&aws_config);
|
||||
|
||||
//
|
||||
// Initialize pgdata
|
||||
//
|
||||
let pgbin = pg_bin_dir.join("postgres");
|
||||
let pg_version = match get_pg_version(pgbin.as_ref()) {
|
||||
PostgresMajorVersion::V14 => 14,
|
||||
@@ -170,7 +208,13 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.args(["-c", &format!("max_parallel_workers={nproc}")])
|
||||
.args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
|
||||
.args(["-c", &format!("max_worker_processes={nproc}")])
|
||||
.args(["-c", "effective_io_concurrency=100"])
|
||||
.args([
|
||||
"-c",
|
||||
&format!(
|
||||
"effective_io_concurrency={}",
|
||||
if cfg!(target_os = "macos") { 0 } else { 100 }
|
||||
),
|
||||
])
|
||||
.env_clear()
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
@@ -185,44 +229,58 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.instrument(info_span!("postgres")),
|
||||
);
|
||||
|
||||
// Create neondb database in the running postgres
|
||||
let restore_pg_connstring =
|
||||
format!("host=localhost port=5432 user={superuser} dbname=postgres");
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
|
||||
loop {
|
||||
let res = tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await;
|
||||
if res.is_ok() {
|
||||
info!("postgres is ready, could connect to it");
|
||||
break;
|
||||
if start_time.elapsed() > PG_WAIT_TIMEOUT {
|
||||
error!(
|
||||
"timeout exceeded: failed to poll postgres and create database within 10 minutes"
|
||||
);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
match tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await {
|
||||
Ok((client, connection)) => {
|
||||
// Spawn the connection handling task to maintain the connection
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
warn!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
match client.simple_query("CREATE DATABASE neondb;").await {
|
||||
Ok(_) => {
|
||||
info!("created neondb database");
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"failed to create database: {}, retying in {}s",
|
||||
e,
|
||||
PG_WAIT_RETRY_INTERVAL.as_secs_f32()
|
||||
);
|
||||
tokio::time::sleep(PG_WAIT_RETRY_INTERVAL).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
info!(
|
||||
"postgres not ready yet, retrying in {}s",
|
||||
PG_WAIT_RETRY_INTERVAL.as_secs_f32()
|
||||
);
|
||||
tokio::time::sleep(PG_WAIT_RETRY_INTERVAL).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Decrypt connection string
|
||||
//
|
||||
let source_connection_string = {
|
||||
match spec.encryption_secret {
|
||||
EncryptionSecret::KMS { key_id } => {
|
||||
let mut output = kms_client
|
||||
.decrypt()
|
||||
.key_id(key_id)
|
||||
.ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
|
||||
spec.source_connstring_ciphertext_base64,
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.context("decrypt source connection string")?;
|
||||
let plaintext = output
|
||||
.plaintext
|
||||
.take()
|
||||
.context("get plaintext source connection string")?;
|
||||
String::from_utf8(plaintext.into_inner())
|
||||
.context("parse source connection string as utf8")?
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// Start the work
|
||||
//
|
||||
let restore_pg_connstring = restore_pg_connstring.replace("dbname=postgres", "dbname=neondb");
|
||||
|
||||
let dumpdir = working_directory.join("dumpdir");
|
||||
|
||||
@@ -310,6 +368,12 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
// If interactive mode, wait for Ctrl+C
|
||||
if args.interactive {
|
||||
info!("Running in interactive mode. Press Ctrl+C to shut down.");
|
||||
tokio::signal::ctrl_c().await.context("wait for ctrl-c")?;
|
||||
}
|
||||
|
||||
info!("shutdown postgres");
|
||||
{
|
||||
nix::sys::signal::kill(
|
||||
@@ -325,21 +389,24 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.context("wait for postgres to shut down")?;
|
||||
}
|
||||
|
||||
info!("upload pgdata");
|
||||
s5cmd::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/"))
|
||||
.await
|
||||
.context("sync dump directory to destination")?;
|
||||
|
||||
info!("write status");
|
||||
{
|
||||
let status_dir = working_directory.join("status");
|
||||
std::fs::create_dir(&status_dir).context("create status directory")?;
|
||||
let status_file = status_dir.join("status");
|
||||
std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
|
||||
.context("write status file")?;
|
||||
s5cmd::sync(&status_file, &s3_prefix.append("/status/pgdata"))
|
||||
// Only sync if s3_prefix was specified
|
||||
if let Some(s3_prefix) = args.s3_prefix {
|
||||
info!("upload pgdata");
|
||||
aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
|
||||
.await
|
||||
.context("sync status directory to destination")?;
|
||||
.context("sync dump directory to destination")?;
|
||||
|
||||
info!("write status");
|
||||
{
|
||||
let status_dir = working_directory.join("status");
|
||||
std::fs::create_dir(&status_dir).context("create status directory")?;
|
||||
let status_file = status_dir.join("pgdata");
|
||||
std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
|
||||
.context("write status file")?;
|
||||
aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
|
||||
.await
|
||||
.context("sync status directory to destination")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -4,24 +4,21 @@ use camino::Utf8Path;
|
||||
use super::s3_uri::S3Uri;
|
||||
|
||||
pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
|
||||
let mut builder = tokio::process::Command::new("s5cmd");
|
||||
// s5cmd uses aws-sdk-go v1, hence doesn't support AWS_ENDPOINT_URL
|
||||
if let Some(val) = std::env::var_os("AWS_ENDPOINT_URL") {
|
||||
builder.arg("--endpoint-url").arg(val);
|
||||
}
|
||||
let mut builder = tokio::process::Command::new("aws");
|
||||
builder
|
||||
.arg("s3")
|
||||
.arg("sync")
|
||||
.arg(local.as_str())
|
||||
.arg(remote.to_string());
|
||||
let st = builder
|
||||
.spawn()
|
||||
.context("spawn s5cmd")?
|
||||
.context("spawn aws s3 sync")?
|
||||
.wait()
|
||||
.await
|
||||
.context("wait for s5cmd")?;
|
||||
.context("wait for aws s3 sync")?;
|
||||
if st.success() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(anyhow::anyhow!("s5cmd failed"))
|
||||
Err(anyhow::anyhow!("aws s3 sync failed"))
|
||||
}
|
||||
}
|
||||
@@ -36,11 +36,11 @@ pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<Cat
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum SchemaDumpError {
|
||||
#[error("Database does not exist.")]
|
||||
#[error("database does not exist")]
|
||||
DatabaseDoesNotExist,
|
||||
#[error("Failed to execute pg_dump.")]
|
||||
#[error("failed to execute pg_dump")]
|
||||
IO(#[from] std::io::Error),
|
||||
#[error("Unexpected error.")]
|
||||
#[error("unexpected I/O error")]
|
||||
Unexpected,
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ use std::time::Instant;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use compute_api::spec::{PgIdent, Role};
|
||||
use compute_api::spec::{Database, PgIdent, Role};
|
||||
use futures::future::join_all;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use futures::StreamExt;
|
||||
@@ -45,8 +45,10 @@ use crate::spec_apply::ApplySpecPhase::{
|
||||
DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
|
||||
RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
|
||||
};
|
||||
use crate::spec_apply::PerDatabasePhase;
|
||||
use crate::spec_apply::PerDatabasePhase::{
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, HandleAnonExtension,
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropSubscriptionsForDeletedDatabases,
|
||||
HandleAnonExtension,
|
||||
};
|
||||
use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
|
||||
use crate::sync_sk::{check_if_synced, ping_safekeeper};
|
||||
@@ -834,7 +836,7 @@ impl ComputeNode {
|
||||
conf
|
||||
}
|
||||
|
||||
async fn get_maintenance_client(
|
||||
pub async fn get_maintenance_client(
|
||||
conf: &tokio_postgres::Config,
|
||||
) -> Result<tokio_postgres::Client> {
|
||||
let mut conf = conf.clone();
|
||||
@@ -943,6 +945,78 @@ impl ComputeNode {
|
||||
dbs: databases,
|
||||
}));
|
||||
|
||||
// Apply special pre drop database phase.
|
||||
// NOTE: we use the code of RunInEachDatabase phase for parallelism
|
||||
// and connection management, but we don't really run it in *each* database,
|
||||
// only in databases, we're about to drop.
|
||||
info!("Applying PerDatabase (pre-dropdb) phase");
|
||||
let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
|
||||
|
||||
// Run the phase for each database that we're about to drop.
|
||||
let db_processes = spec
|
||||
.delta_operations
|
||||
.iter()
|
||||
.flatten()
|
||||
.filter_map(move |op| {
|
||||
if op.action.as_str() == "delete_db" {
|
||||
Some(op.name.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.map(|dbname| {
|
||||
let spec = spec.clone();
|
||||
let ctx = ctx.clone();
|
||||
let jwks_roles = jwks_roles.clone();
|
||||
let mut conf = conf.as_ref().clone();
|
||||
let concurrency_token = concurrency_token.clone();
|
||||
// We only need dbname field for this phase, so set other fields to dummy values
|
||||
let db = DB::UserDB(Database {
|
||||
name: dbname.clone(),
|
||||
owner: "cloud_admin".to_string(),
|
||||
options: None,
|
||||
restrict_conn: false,
|
||||
invalid: false,
|
||||
});
|
||||
|
||||
debug!("Applying per-database phases for Database {:?}", &db);
|
||||
|
||||
match &db {
|
||||
DB::SystemDB => {}
|
||||
DB::UserDB(db) => {
|
||||
conf.dbname(db.name.as_str());
|
||||
}
|
||||
}
|
||||
|
||||
let conf = Arc::new(conf);
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
spec.clone(),
|
||||
conf,
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
[DropSubscriptionsForDeletedDatabases].to_vec(),
|
||||
);
|
||||
|
||||
Ok(spawn(fut))
|
||||
})
|
||||
.collect::<Vec<Result<_, anyhow::Error>>>();
|
||||
|
||||
for process in db_processes.into_iter() {
|
||||
let handle = process?;
|
||||
if let Err(e) = handle.await? {
|
||||
// Handle the error case where the database does not exist
|
||||
// We do not check whether the DB exists or not in the deletion phase,
|
||||
// so we shouldn't be strict about it in pre-deletion cleanup as well.
|
||||
if e.to_string().contains("does not exist") {
|
||||
warn!("Error dropping subscription: {}", e);
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for phase in [
|
||||
CreateSuperUser,
|
||||
DropInvalidDatabases,
|
||||
@@ -962,7 +1036,7 @@ impl ComputeNode {
|
||||
.await?;
|
||||
}
|
||||
|
||||
info!("Applying RunInEachDatabase phase");
|
||||
info!("Applying RunInEachDatabase2 phase");
|
||||
let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
|
||||
|
||||
let db_processes = spec
|
||||
@@ -997,6 +1071,12 @@ impl ComputeNode {
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
[
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
]
|
||||
.to_vec(),
|
||||
);
|
||||
|
||||
Ok(spawn(fut))
|
||||
@@ -1043,16 +1123,13 @@ impl ComputeNode {
|
||||
jwks_roles: Arc<HashSet<String>>,
|
||||
concurrency_token: Arc<tokio::sync::Semaphore>,
|
||||
db: DB,
|
||||
subphases: Vec<PerDatabasePhase>,
|
||||
) -> Result<()> {
|
||||
let _permit = concurrency_token.acquire().await?;
|
||||
|
||||
let mut client_conn = None;
|
||||
|
||||
for subphase in [
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
] {
|
||||
for subphase in subphases {
|
||||
apply_operations(
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
@@ -1181,8 +1258,19 @@ impl ComputeNode {
|
||||
let mut conf = postgres::config::Config::from(conf);
|
||||
conf.application_name("compute_ctl:migrations");
|
||||
|
||||
let mut client = conf.connect(NoTls)?;
|
||||
handle_migrations(&mut client).context("apply_config handle_migrations")
|
||||
match conf.connect(NoTls) {
|
||||
Ok(mut client) => {
|
||||
if let Err(e) = handle_migrations(&mut client) {
|
||||
error!("Failed to run migrations: {}", e);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to connect to the compute for running migrations: {}",
|
||||
e
|
||||
);
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
Ok::<(), anyhow::Error>(())
|
||||
|
||||
@@ -1,591 +0,0 @@
|
||||
use std::convert::Infallible;
|
||||
use std::net::IpAddr;
|
||||
use std::net::Ipv6Addr;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
use crate::catalog::SchemaDumpError;
|
||||
use crate::catalog::{get_database_schema, get_dbs_and_roles};
|
||||
use crate::compute::forward_termination_signal;
|
||||
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
|
||||
use crate::installed_extensions;
|
||||
use compute_api::requests::{ConfigurationRequest, ExtensionInstallRequest, SetRoleGrantsRequest};
|
||||
use compute_api::responses::{
|
||||
ComputeStatus, ComputeStatusResponse, ExtensionInstallResult, GenericAPIError,
|
||||
SetRoleGrantsResponse,
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use hyper::header::CONTENT_TYPE;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Method, Request, Response, Server, StatusCode};
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::Encoder;
|
||||
use metrics::TextEncoder;
|
||||
use tokio::task;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use tracing_utils::http::OtelName;
|
||||
use utils::http::request::must_get_query_param;
|
||||
|
||||
fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
|
||||
ComputeStatusResponse {
|
||||
start_time: state.start_time,
|
||||
tenant: state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.map(|pspec| pspec.tenant_id.to_string()),
|
||||
timeline: state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.map(|pspec| pspec.timeline_id.to_string()),
|
||||
status: state.status,
|
||||
last_active: state.last_active,
|
||||
error: state.error.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
// Service function to handle all available routes.
|
||||
async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
|
||||
//
|
||||
// NOTE: The URI path is currently included in traces. That's OK because
|
||||
// it doesn't contain any variable parts or sensitive information. But
|
||||
// please keep that in mind if you change the routing here.
|
||||
//
|
||||
match (req.method(), req.uri().path()) {
|
||||
// Serialized compute state.
|
||||
(&Method::GET, "/status") => {
|
||||
debug!("serving /status GET request");
|
||||
let state = compute.state.lock().unwrap();
|
||||
let status_response = status_response_from_state(&state);
|
||||
Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
|
||||
}
|
||||
|
||||
// Startup metrics in JSON format. Keep /metrics reserved for a possible
|
||||
// future use for Prometheus metrics format.
|
||||
(&Method::GET, "/metrics.json") => {
|
||||
info!("serving /metrics.json GET request");
|
||||
let metrics = compute.state.lock().unwrap().metrics.clone();
|
||||
Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
|
||||
}
|
||||
|
||||
// Prometheus metrics
|
||||
(&Method::GET, "/metrics") => {
|
||||
debug!("serving /metrics GET request");
|
||||
|
||||
// When we call TextEncoder::encode() below, it will immediately
|
||||
// return an error if a metric family has no metrics, so we need to
|
||||
// preemptively filter out metric families with no metrics.
|
||||
let metrics = installed_extensions::collect()
|
||||
.into_iter()
|
||||
.filter(|m| !m.get_metric().is_empty())
|
||||
.collect::<Vec<MetricFamily>>();
|
||||
|
||||
let encoder = TextEncoder::new();
|
||||
let mut buffer = vec![];
|
||||
|
||||
if let Err(err) = encoder.encode(&metrics, &mut buffer) {
|
||||
let msg = format!("error handling /metrics request: {err}");
|
||||
error!(msg);
|
||||
return render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
match Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(CONTENT_TYPE, encoder.format_type())
|
||||
.body(Body::from(buffer))
|
||||
{
|
||||
Ok(response) => response,
|
||||
Err(err) => {
|
||||
let msg = format!("error handling /metrics request: {err}");
|
||||
error!(msg);
|
||||
render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Collect Postgres current usage insights
|
||||
(&Method::GET, "/insights") => {
|
||||
info!("serving /insights GET request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!("compute is not running, current status: {:?}", status);
|
||||
error!(msg);
|
||||
return Response::new(Body::from(msg));
|
||||
}
|
||||
|
||||
let insights = compute.collect_insights().await;
|
||||
Response::new(Body::from(insights))
|
||||
}
|
||||
|
||||
(&Method::POST, "/check_writability") => {
|
||||
info!("serving /check_writability POST request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for check_writability request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return Response::new(Body::from(msg));
|
||||
}
|
||||
|
||||
let res = crate::checker::check_writability(compute).await;
|
||||
match res {
|
||||
Ok(_) => Response::new(Body::from("true")),
|
||||
Err(e) => {
|
||||
error!("check_writability failed: {}", e);
|
||||
Response::new(Body::from(e.to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::POST, "/extensions") => {
|
||||
info!("serving /extensions POST request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for extensions request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
|
||||
}
|
||||
|
||||
let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let request = serde_json::from_slice::<ExtensionInstallRequest>(&request).unwrap();
|
||||
let res = compute
|
||||
.install_extension(&request.extension, &request.database, request.version)
|
||||
.await;
|
||||
match res {
|
||||
Ok(version) => render_json(Body::from(
|
||||
serde_json::to_string(&ExtensionInstallResult {
|
||||
extension: request.extension,
|
||||
version,
|
||||
})
|
||||
.unwrap(),
|
||||
)),
|
||||
Err(e) => {
|
||||
error!("install_extension failed: {}", e);
|
||||
render_json_error(&e.to_string(), StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/info") => {
|
||||
let num_cpus = num_cpus::get_physical();
|
||||
info!("serving /info GET request. num_cpus: {}", num_cpus);
|
||||
Response::new(Body::from(
|
||||
serde_json::json!({
|
||||
"num_cpus": num_cpus,
|
||||
})
|
||||
.to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
// Accept spec in JSON format and request compute configuration. If
|
||||
// anything goes wrong after we set the compute status to `ConfigurationPending`
|
||||
// and update compute state with new spec, we basically leave compute
|
||||
// in the potentially wrong state. That said, it's control-plane's
|
||||
// responsibility to watch compute state after reconfiguration request
|
||||
// and to clean restart in case of errors.
|
||||
(&Method::POST, "/configure") => {
|
||||
info!("serving /configure POST request");
|
||||
match handle_configure_request(req, compute).await {
|
||||
Ok(msg) => Response::new(Body::from(msg)),
|
||||
Err((msg, code)) => {
|
||||
error!("error handling /configure request: {msg}");
|
||||
render_json_error(&msg, code)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::POST, "/terminate") => {
|
||||
info!("serving /terminate POST request");
|
||||
match handle_terminate_request(compute).await {
|
||||
Ok(()) => Response::new(Body::empty()),
|
||||
Err((msg, code)) => {
|
||||
error!("error handling /terminate request: {msg}");
|
||||
render_json_error(&msg, code)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/dbs_and_roles") => {
|
||||
info!("serving /dbs_and_roles GET request",);
|
||||
match get_dbs_and_roles(compute).await {
|
||||
Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
|
||||
Err(_) => {
|
||||
render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/database_schema") => {
|
||||
let database = match must_get_query_param(&req, "database") {
|
||||
Err(e) => return e.into_response(),
|
||||
Ok(database) => database,
|
||||
};
|
||||
info!("serving /database_schema GET request with database: {database}",);
|
||||
match get_database_schema(compute, &database).await {
|
||||
Ok(res) => render_plain(Body::wrap_stream(res)),
|
||||
Err(SchemaDumpError::DatabaseDoesNotExist) => {
|
||||
render_json_error("database does not exist", StatusCode::NOT_FOUND)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("can't get schema dump: {}", e);
|
||||
render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::POST, "/grants") => {
|
||||
info!("serving /grants POST request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for set_role_grants request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
|
||||
}
|
||||
|
||||
let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let request = serde_json::from_slice::<SetRoleGrantsRequest>(&request).unwrap();
|
||||
|
||||
let res = compute
|
||||
.set_role_grants(
|
||||
&request.database,
|
||||
&request.schema,
|
||||
&request.privileges,
|
||||
&request.role,
|
||||
)
|
||||
.await;
|
||||
match res {
|
||||
Ok(()) => render_json(Body::from(
|
||||
serde_json::to_string(&SetRoleGrantsResponse {
|
||||
database: request.database,
|
||||
schema: request.schema,
|
||||
role: request.role,
|
||||
privileges: request.privileges,
|
||||
})
|
||||
.unwrap(),
|
||||
)),
|
||||
Err(e) => render_json_error(
|
||||
&format!("could not grant role privileges to the schema: {e}"),
|
||||
// TODO: can we filter on role/schema not found errors
|
||||
// and return appropriate error code?
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// get the list of installed extensions
|
||||
// currently only used in python tests
|
||||
// TODO: call it from cplane
|
||||
(&Method::GET, "/installed_extensions") => {
|
||||
info!("serving /installed_extensions GET request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for extensions request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return Response::new(Body::from(msg));
|
||||
}
|
||||
|
||||
let conf = compute.get_conn_conf(None);
|
||||
let res =
|
||||
task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
match res {
|
||||
Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
|
||||
Err(e) => render_json_error(
|
||||
&format!("could not get list of installed extensions: {}", e),
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// download extension files from remote extension storage on demand
|
||||
(&Method::POST, route) if route.starts_with("/extension_server/") => {
|
||||
info!("serving {:?} POST request", route);
|
||||
info!("req.uri {:?}", req.uri());
|
||||
|
||||
// don't even try to download extensions
|
||||
// if no remote storage is configured
|
||||
if compute.ext_remote_storage.is_none() {
|
||||
info!("no extensions remote storage configured");
|
||||
let mut resp = Response::new(Body::from("no remote storage configured"));
|
||||
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
return resp;
|
||||
}
|
||||
|
||||
let mut is_library = false;
|
||||
if let Some(params) = req.uri().query() {
|
||||
info!("serving {:?} POST request with params: {}", route, params);
|
||||
if params == "is_library=true" {
|
||||
is_library = true;
|
||||
} else {
|
||||
let mut resp = Response::new(Body::from("Wrong request parameters"));
|
||||
*resp.status_mut() = StatusCode::BAD_REQUEST;
|
||||
return resp;
|
||||
}
|
||||
}
|
||||
let filename = route.split('/').last().unwrap().to_string();
|
||||
info!("serving /extension_server POST request, filename: {filename:?} is_library: {is_library}");
|
||||
|
||||
// get ext_name and path from spec
|
||||
// don't lock compute_state for too long
|
||||
let ext = {
|
||||
let compute_state = compute.state.lock().unwrap();
|
||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
let spec = &pspec.spec;
|
||||
|
||||
// debug only
|
||||
info!("spec: {:?}", spec);
|
||||
|
||||
let remote_extensions = match spec.remote_extensions.as_ref() {
|
||||
Some(r) => r,
|
||||
None => {
|
||||
info!("no remote extensions spec was provided");
|
||||
let mut resp = Response::new(Body::from("no remote storage configured"));
|
||||
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
return resp;
|
||||
}
|
||||
};
|
||||
|
||||
remote_extensions.get_ext(
|
||||
&filename,
|
||||
is_library,
|
||||
&compute.build_tag,
|
||||
&compute.pgversion,
|
||||
)
|
||||
};
|
||||
|
||||
match ext {
|
||||
Ok((ext_name, ext_path)) => {
|
||||
match compute.download_extension(ext_name, ext_path).await {
|
||||
Ok(_) => Response::new(Body::from("OK")),
|
||||
Err(e) => {
|
||||
error!("extension download failed: {}", e);
|
||||
let mut resp = Response::new(Body::from(e.to_string()));
|
||||
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
resp
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("extension download failed to find extension: {}", e);
|
||||
let mut resp = Response::new(Body::from("failed to find file"));
|
||||
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
resp
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return the `404 Not Found` for any other routes.
|
||||
_ => {
|
||||
let mut not_found = Response::new(Body::from("404 Not Found"));
|
||||
*not_found.status_mut() = StatusCode::NOT_FOUND;
|
||||
not_found
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_configure_request(
|
||||
req: Request<Body>,
|
||||
compute: &Arc<ComputeNode>,
|
||||
) -> Result<String, (String, StatusCode)> {
|
||||
if !compute.live_config_allowed {
|
||||
return Err((
|
||||
"live configuration is not allowed for this compute node".to_string(),
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
));
|
||||
}
|
||||
|
||||
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
|
||||
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
|
||||
let spec = request.spec;
|
||||
|
||||
let parsed_spec = match ParsedSpec::try_from(spec) {
|
||||
Ok(ps) => ps,
|
||||
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
|
||||
};
|
||||
|
||||
// XXX: wrap state update under lock in code blocks. Otherwise,
|
||||
// we will try to `Send` `mut state` into the spawned thread
|
||||
// bellow, which will cause error:
|
||||
// ```
|
||||
// error: future cannot be sent between threads safely
|
||||
// ```
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for configuration request: {:?}",
|
||||
state.status.clone()
|
||||
);
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
}
|
||||
state.pspec = Some(parsed_spec);
|
||||
state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
info!("set new spec and notified waiters");
|
||||
}
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Running.
|
||||
// This is needed to do not block the main pool of workers and
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Running {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become Running, current status: {:?}",
|
||||
state.status
|
||||
);
|
||||
|
||||
if state.status == ComputeStatus::Failed {
|
||||
let err = state.error.as_ref().map_or("unknown error", |x| x);
|
||||
let msg = format!("compute configuration failed: {:?}", err);
|
||||
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()?;
|
||||
|
||||
// Return current compute state if everything went well.
|
||||
let state = compute.state.lock().unwrap().clone();
|
||||
let status_response = status_response_from_state(&state);
|
||||
Ok(serde_json::to_string(&status_response).unwrap())
|
||||
} else {
|
||||
Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
|
||||
}
|
||||
}
|
||||
|
||||
fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
|
||||
let error = GenericAPIError {
|
||||
error: e.to_string(),
|
||||
};
|
||||
Response::builder()
|
||||
.status(status)
|
||||
.header(CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(serde_json::to_string(&error).unwrap()))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn render_json(body: Body) -> Response<Body> {
|
||||
Response::builder()
|
||||
.header(CONTENT_TYPE, "application/json")
|
||||
.body(body)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn render_plain(body: Body) -> Response<Body> {
|
||||
Response::builder()
|
||||
.header(CONTENT_TYPE, "text/plain")
|
||||
.body(body)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status == ComputeStatus::Terminated {
|
||||
return Ok(());
|
||||
}
|
||||
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for termination request: {}",
|
||||
state.status
|
||||
);
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
}
|
||||
state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
}
|
||||
|
||||
forward_termination_signal();
|
||||
info!("sent signal and notified waiters");
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Terminated.
|
||||
// This is needed to do not block the main pool of workers and
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Terminated {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become {}, current status: {:?}",
|
||||
ComputeStatus::Terminated,
|
||||
state.status
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()?;
|
||||
info!("terminated Postgres");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
|
||||
#[tokio::main]
|
||||
async fn serve(port: u16, state: Arc<ComputeNode>) {
|
||||
// this usually binds to both IPv4 and IPv6 on linux
|
||||
// see e.g. https://github.com/rust-lang/rust/pull/34440
|
||||
let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
|
||||
|
||||
let make_service = make_service_fn(move |_conn| {
|
||||
let state = state.clone();
|
||||
async move {
|
||||
Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
|
||||
let state = state.clone();
|
||||
async move {
|
||||
Ok::<_, Infallible>(
|
||||
// NOTE: We include the URI path in the string. It
|
||||
// doesn't contain any variable parts or sensitive
|
||||
// information in this API.
|
||||
tracing_utils::http::tracing_handler(
|
||||
req,
|
||||
|req| routes(req, &state),
|
||||
OtelName::UriPath,
|
||||
)
|
||||
.await,
|
||||
)
|
||||
}
|
||||
}))
|
||||
}
|
||||
});
|
||||
|
||||
info!("starting HTTP server on {}", addr);
|
||||
|
||||
let server = Server::bind(&addr).serve(make_service);
|
||||
|
||||
// Run this server forever
|
||||
if let Err(e) = server.await {
|
||||
error!("server error: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
|
||||
pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
|
||||
let state = Arc::clone(state);
|
||||
|
||||
Ok(thread::Builder::new()
|
||||
.name("http-endpoint".into())
|
||||
.spawn(move || serve(port, state))?)
|
||||
}
|
||||
48
compute_tools/src/http/extract/json.rs
Normal file
48
compute_tools/src/http/extract/json.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::JsonRejection, FromRequest, Request},
|
||||
};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::StatusCode;
|
||||
|
||||
/// Custom `Json` extractor, so that we can format errors into
|
||||
/// `JsonResponse<GenericAPIError>`.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Json<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequest<S> for Json<T>
|
||||
where
|
||||
axum::Json<T>: FromRequest<S, Rejection = JsonRejection>,
|
||||
S: Send + Sync,
|
||||
{
|
||||
type Rejection = (StatusCode, axum::Json<GenericAPIError>);
|
||||
|
||||
async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {
|
||||
match axum::Json::<T>::from_request(req, state).await {
|
||||
Ok(value) => Ok(Self(value.0)),
|
||||
Err(rejection) => Err((
|
||||
rejection.status(),
|
||||
axum::Json(GenericAPIError {
|
||||
error: rejection.body_text().to_lowercase(),
|
||||
}),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for Json<T> {
|
||||
type Target = T;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DerefMut for Json<T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
7
compute_tools/src/http/extract/mod.rs
Normal file
7
compute_tools/src/http/extract/mod.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
pub(crate) mod json;
|
||||
pub(crate) mod path;
|
||||
pub(crate) mod query;
|
||||
|
||||
pub(crate) use json::Json;
|
||||
pub(crate) use path::Path;
|
||||
pub(crate) use query::Query;
|
||||
48
compute_tools/src/http/extract/path.rs
Normal file
48
compute_tools/src/http/extract/path.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::PathRejection, FromRequestParts},
|
||||
};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::{request::Parts, StatusCode};
|
||||
|
||||
/// Custom `Path` extractor, so that we can format errors into
|
||||
/// `JsonResponse<GenericAPIError>`.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Path<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequestParts<S> for Path<T>
|
||||
where
|
||||
axum::extract::Path<T>: FromRequestParts<S, Rejection = PathRejection>,
|
||||
S: Send + Sync,
|
||||
{
|
||||
type Rejection = (StatusCode, axum::Json<GenericAPIError>);
|
||||
|
||||
async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
|
||||
match axum::extract::Path::<T>::from_request_parts(parts, state).await {
|
||||
Ok(value) => Ok(Self(value.0)),
|
||||
Err(rejection) => Err((
|
||||
rejection.status(),
|
||||
axum::Json(GenericAPIError {
|
||||
error: rejection.body_text().to_ascii_lowercase(),
|
||||
}),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for Path<T> {
|
||||
type Target = T;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DerefMut for Path<T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
48
compute_tools/src/http/extract/query.rs
Normal file
48
compute_tools/src/http/extract/query.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::QueryRejection, FromRequestParts},
|
||||
};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::{request::Parts, StatusCode};
|
||||
|
||||
/// Custom `Query` extractor, so that we can format errors into
|
||||
/// `JsonResponse<GenericAPIError>`.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Query<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequestParts<S> for Query<T>
|
||||
where
|
||||
axum::extract::Query<T>: FromRequestParts<S, Rejection = QueryRejection>,
|
||||
S: Send + Sync,
|
||||
{
|
||||
type Rejection = (StatusCode, axum::Json<GenericAPIError>);
|
||||
|
||||
async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
|
||||
match axum::extract::Query::<T>::from_request_parts(parts, state).await {
|
||||
Ok(value) => Ok(Self(value.0)),
|
||||
Err(rejection) => Err((
|
||||
rejection.status(),
|
||||
axum::Json(GenericAPIError {
|
||||
error: rejection.body_text().to_ascii_lowercase(),
|
||||
}),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for Query<T> {
|
||||
type Target = T;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DerefMut for Query<T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
@@ -1 +1,56 @@
|
||||
pub mod api;
|
||||
use axum::{body::Body, response::Response};
|
||||
use compute_api::responses::{ComputeStatus, GenericAPIError};
|
||||
use http::{header::CONTENT_TYPE, StatusCode};
|
||||
use serde::Serialize;
|
||||
use tracing::error;
|
||||
|
||||
pub use server::launch_http_server;
|
||||
|
||||
mod extract;
|
||||
mod routes;
|
||||
mod server;
|
||||
|
||||
/// Convenience response builder for JSON responses
|
||||
struct JsonResponse;
|
||||
|
||||
impl JsonResponse {
|
||||
/// Helper for actually creating a response
|
||||
fn create_response(code: StatusCode, body: impl Serialize) -> Response {
|
||||
Response::builder()
|
||||
.status(code)
|
||||
.header(CONTENT_TYPE.as_str(), "application/json")
|
||||
.body(Body::from(serde_json::to_string(&body).unwrap()))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Create a successful error response
|
||||
pub(self) fn success(code: StatusCode, body: impl Serialize) -> Response {
|
||||
assert!({
|
||||
let code = code.as_u16();
|
||||
|
||||
(200..300).contains(&code)
|
||||
});
|
||||
|
||||
Self::create_response(code, body)
|
||||
}
|
||||
|
||||
/// Create an error response
|
||||
pub(self) fn error(code: StatusCode, error: impl ToString) -> Response {
|
||||
assert!(code.as_u16() >= 400);
|
||||
|
||||
let message = error.to_string();
|
||||
error!(message);
|
||||
|
||||
Self::create_response(code, &GenericAPIError { error: message })
|
||||
}
|
||||
|
||||
/// Create an error response related to the compute being in an invalid state
|
||||
pub(self) fn invalid_status(status: ComputeStatus) -> Response {
|
||||
Self::create_response(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
&GenericAPIError {
|
||||
error: format!("invalid compute status: {status}"),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ComputeMetrics"
|
||||
|
||||
/metrics
|
||||
/metrics:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
|
||||
20
compute_tools/src/http/routes/check_writability.rs
Normal file
20
compute_tools/src/http/routes/check_writability.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{checker::check_writability, compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Check that the compute is currently running.
|
||||
pub(in crate::http) async fn is_writable(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
match check_writability(&compute).await {
|
||||
Ok(_) => JsonResponse::success(StatusCode::OK, true),
|
||||
Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
|
||||
}
|
||||
}
|
||||
91
compute_tools/src/http/routes/configure.rs
Normal file
91
compute_tools/src/http/routes/configure.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::{
|
||||
requests::ConfigurationRequest,
|
||||
responses::{ComputeStatus, ComputeStatusResponse},
|
||||
};
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
use tracing::info;
|
||||
|
||||
use crate::{
|
||||
compute::{ComputeNode, ParsedSpec},
|
||||
http::{extract::Json, JsonResponse},
|
||||
};
|
||||
|
||||
// Accept spec in JSON format and request compute configuration. If anything
|
||||
// goes wrong after we set the compute status to `ConfigurationPending` and
|
||||
// update compute state with new spec, we basically leave compute in the
|
||||
// potentially wrong state. That said, it's control-plane's responsibility to
|
||||
// watch compute state after reconfiguration request and to clean restart in
|
||||
// case of errors.
|
||||
pub(in crate::http) async fn configure(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
request: Json<ConfigurationRequest>,
|
||||
) -> Response {
|
||||
if !compute.live_config_allowed {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"live configuration is not allowed for this compute node".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let pspec = match ParsedSpec::try_from(request.spec.clone()) {
|
||||
Ok(p) => p,
|
||||
Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),
|
||||
};
|
||||
|
||||
// XXX: wrap state update under lock in a code block. Otherwise, we will try
|
||||
// to `Send` `mut state` into the spawned thread bellow, which will cause
|
||||
// the following rustc error:
|
||||
//
|
||||
// error: future cannot be sent between threads safely
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
|
||||
return JsonResponse::invalid_status(state.status);
|
||||
}
|
||||
|
||||
state.pspec = Some(pspec);
|
||||
state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
}
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Running. This is
|
||||
// needed to do not block the main pool of workers and be able to serve
|
||||
// other requests while some particular request is waiting for compute to
|
||||
// finish configuration.
|
||||
let c = compute.clone();
|
||||
let completed = task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Running {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become {}, current status: {}",
|
||||
ComputeStatus::Running,
|
||||
state.status
|
||||
);
|
||||
|
||||
if state.status == ComputeStatus::Failed {
|
||||
let err = state.error.as_ref().map_or("unknown error", |x| x);
|
||||
let msg = format!("compute configuration failed: {:?}", err);
|
||||
return Err(msg);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
if let Err(e) = completed {
|
||||
return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);
|
||||
}
|
||||
|
||||
// Return current compute state if everything went well.
|
||||
let state = compute.state.lock().unwrap().clone();
|
||||
let body = ComputeStatusResponse::from(&state);
|
||||
|
||||
JsonResponse::success(StatusCode::OK, body)
|
||||
}
|
||||
34
compute_tools/src/http/routes/database_schema.rs
Normal file
34
compute_tools/src/http/routes/database_schema.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{body::Body, extract::State, response::Response};
|
||||
use http::{header::CONTENT_TYPE, StatusCode};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::{
|
||||
catalog::{get_database_schema, SchemaDumpError},
|
||||
compute::ComputeNode,
|
||||
http::{extract::Query, JsonResponse},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub(in crate::http) struct DatabaseSchemaParams {
|
||||
database: String,
|
||||
}
|
||||
|
||||
/// Get a schema dump of the requested database.
|
||||
pub(in crate::http) async fn get_schema_dump(
|
||||
params: Query<DatabaseSchemaParams>,
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
match get_database_schema(&compute, ¶ms.database).await {
|
||||
Ok(schema) => Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(CONTENT_TYPE.as_str(), "application/json")
|
||||
.body(Body::from_stream(schema))
|
||||
.unwrap(),
|
||||
Err(SchemaDumpError::DatabaseDoesNotExist) => {
|
||||
JsonResponse::error(StatusCode::NOT_FOUND, SchemaDumpError::DatabaseDoesNotExist)
|
||||
}
|
||||
Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
|
||||
}
|
||||
}
|
||||
16
compute_tools/src/http/routes/dbs_and_roles.rs
Normal file
16
compute_tools/src/http/routes/dbs_and_roles.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{catalog::get_dbs_and_roles, compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Get the databases and roles from the compute.
|
||||
pub(in crate::http) async fn get_catalog_objects(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
match get_dbs_and_roles(&compute).await {
|
||||
Ok(catalog_objects) => JsonResponse::success(StatusCode::OK, catalog_objects),
|
||||
Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
|
||||
}
|
||||
}
|
||||
68
compute_tools/src/http/routes/extension_server.rs
Normal file
68
compute_tools/src/http/routes/extension_server.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{
|
||||
extract::State,
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
use http::StatusCode;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
http::{
|
||||
extract::{Path, Query},
|
||||
JsonResponse,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub(in crate::http) struct ExtensionServerParams {
|
||||
#[serde(default)]
|
||||
is_library: bool,
|
||||
}
|
||||
|
||||
/// Download a remote extension.
|
||||
pub(in crate::http) async fn download_extension(
|
||||
Path(filename): Path<String>,
|
||||
params: Query<ExtensionServerParams>,
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
// Don't even try to download extensions if no remote storage is configured
|
||||
if compute.ext_remote_storage.is_none() {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"remote storage is not configured",
|
||||
);
|
||||
}
|
||||
|
||||
let ext = {
|
||||
let state = compute.state.lock().unwrap();
|
||||
let pspec = state.pspec.as_ref().unwrap();
|
||||
let spec = &pspec.spec;
|
||||
|
||||
let remote_extensions = match spec.remote_extensions.as_ref() {
|
||||
Some(r) => r,
|
||||
None => {
|
||||
return JsonResponse::error(
|
||||
StatusCode::CONFLICT,
|
||||
"information about remote extensions is unavailable",
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
remote_extensions.get_ext(
|
||||
&filename,
|
||||
params.is_library,
|
||||
&compute.build_tag,
|
||||
&compute.pgversion,
|
||||
)
|
||||
};
|
||||
|
||||
match ext {
|
||||
Ok((ext_name, ext_path)) => match compute.download_extension(ext_name, ext_path).await {
|
||||
Ok(_) => StatusCode::OK.into_response(),
|
||||
Err(e) => JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e),
|
||||
},
|
||||
Err(e) => JsonResponse::error(StatusCode::NOT_FOUND, e),
|
||||
}
|
||||
}
|
||||
45
compute_tools/src/http/routes/extensions.rs
Normal file
45
compute_tools/src/http/routes/extensions.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::{
|
||||
requests::ExtensionInstallRequest,
|
||||
responses::{ComputeStatus, ExtensionInstallResponse},
|
||||
};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
http::{extract::Json, JsonResponse},
|
||||
};
|
||||
|
||||
/// Install a extension.
|
||||
pub(in crate::http) async fn install_extension(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
request: Json<ExtensionInstallRequest>,
|
||||
) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
match compute
|
||||
.install_extension(
|
||||
&request.extension,
|
||||
&request.database,
|
||||
request.version.to_string(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(version) => JsonResponse::success(
|
||||
StatusCode::CREATED,
|
||||
Some(ExtensionInstallResponse {
|
||||
extension: request.extension.clone(),
|
||||
version,
|
||||
}),
|
||||
),
|
||||
Err(e) => JsonResponse::error(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("failed to install extension: {e}"),
|
||||
),
|
||||
}
|
||||
}
|
||||
35
compute_tools/src/http/routes/failpoints.rs
Normal file
35
compute_tools/src/http/routes/failpoints.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use http::StatusCode;
|
||||
use tracing::info;
|
||||
use utils::failpoint_support::{apply_failpoint, ConfigureFailpointsRequest};
|
||||
|
||||
use crate::http::{extract::Json, JsonResponse};
|
||||
|
||||
/// Configure failpoints for testing purposes.
|
||||
pub(in crate::http) async fn configure_failpoints(
|
||||
failpoints: Json<ConfigureFailpointsRequest>,
|
||||
) -> Response {
|
||||
if !fail::has_failpoints() {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"Cannot manage failpoints because neon was compiled without failpoints support",
|
||||
);
|
||||
}
|
||||
|
||||
for fp in &*failpoints {
|
||||
info!("cfg failpoint: {} {}", fp.name, fp.actions);
|
||||
|
||||
// We recognize one extra "action" that's not natively recognized
|
||||
// by the failpoints crate: exit, to immediately kill the process
|
||||
let cfg_result = apply_failpoint(&fp.name, &fp.actions);
|
||||
|
||||
if let Err(e) = cfg_result {
|
||||
return JsonResponse::error(
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("failed to configure failpoints: {e}"),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
StatusCode::OK.into_response()
|
||||
}
|
||||
48
compute_tools/src/http/routes/grants.rs
Normal file
48
compute_tools/src/http/routes/grants.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::{
|
||||
requests::SetRoleGrantsRequest,
|
||||
responses::{ComputeStatus, SetRoleGrantsResponse},
|
||||
};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
http::{extract::Json, JsonResponse},
|
||||
};
|
||||
|
||||
/// Add grants for a role.
|
||||
pub(in crate::http) async fn add_grant(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
request: Json<SetRoleGrantsRequest>,
|
||||
) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
match compute
|
||||
.set_role_grants(
|
||||
&request.database,
|
||||
&request.schema,
|
||||
&request.privileges,
|
||||
&request.role,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => JsonResponse::success(
|
||||
StatusCode::CREATED,
|
||||
Some(SetRoleGrantsResponse {
|
||||
database: request.database.clone(),
|
||||
schema: request.schema.clone(),
|
||||
role: request.role.clone(),
|
||||
privileges: request.privileges.clone(),
|
||||
}),
|
||||
),
|
||||
Err(e) => JsonResponse::error(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("failed to grant role privileges to the schema: {e}"),
|
||||
),
|
||||
}
|
||||
}
|
||||
11
compute_tools/src/http/routes/info.rs
Normal file
11
compute_tools/src/http/routes/info.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
use axum::response::Response;
|
||||
use compute_api::responses::InfoResponse;
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::http::JsonResponse;
|
||||
|
||||
/// Get information about the physical characteristics about the compute.
|
||||
pub(in crate::http) async fn get_info() -> Response {
|
||||
let num_cpus = num_cpus::get_physical();
|
||||
JsonResponse::success(StatusCode::OK, &InfoResponse { num_cpus })
|
||||
}
|
||||
18
compute_tools/src/http/routes/insights.rs
Normal file
18
compute_tools/src/http/routes/insights.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Collect current Postgres usage insights.
|
||||
pub(in crate::http) async fn get_insights(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
let insights = compute.collect_insights().await;
|
||||
JsonResponse::success(StatusCode::OK, insights)
|
||||
}
|
||||
33
compute_tools/src/http/routes/installed_extensions.rs
Normal file
33
compute_tools/src/http/routes/installed_extensions.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
|
||||
use crate::{compute::ComputeNode, http::JsonResponse, installed_extensions};
|
||||
|
||||
/// Get a list of installed extensions.
|
||||
pub(in crate::http) async fn get_installed_extensions(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
let conf = compute.get_conn_conf(None);
|
||||
let res = task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
match res {
|
||||
Ok(installed_extensions) => {
|
||||
JsonResponse::success(StatusCode::OK, Some(installed_extensions))
|
||||
}
|
||||
Err(e) => JsonResponse::error(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("failed to get list of installed extensions: {e}"),
|
||||
),
|
||||
}
|
||||
}
|
||||
32
compute_tools/src/http/routes/metrics.rs
Normal file
32
compute_tools/src/http/routes/metrics.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use axum::{body::Body, response::Response};
|
||||
use http::header::CONTENT_TYPE;
|
||||
use http::StatusCode;
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::Encoder;
|
||||
use metrics::TextEncoder;
|
||||
|
||||
use crate::{http::JsonResponse, installed_extensions};
|
||||
|
||||
/// Expose Prometheus metrics.
|
||||
pub(in crate::http) async fn get_metrics() -> Response {
|
||||
// When we call TextEncoder::encode() below, it will immediately return an
|
||||
// error if a metric family has no metrics, so we need to preemptively
|
||||
// filter out metric families with no metrics.
|
||||
let metrics = installed_extensions::collect()
|
||||
.into_iter()
|
||||
.filter(|m| !m.get_metric().is_empty())
|
||||
.collect::<Vec<MetricFamily>>();
|
||||
|
||||
let encoder = TextEncoder::new();
|
||||
let mut buffer = vec![];
|
||||
|
||||
if let Err(e) = encoder.encode(&metrics, &mut buffer) {
|
||||
return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, e);
|
||||
}
|
||||
|
||||
Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(CONTENT_TYPE, encoder.format_type())
|
||||
.body(Body::from(buffer))
|
||||
.unwrap()
|
||||
}
|
||||
12
compute_tools/src/http/routes/metrics_json.rs
Normal file
12
compute_tools/src/http/routes/metrics_json.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::{compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Get startup metrics.
|
||||
pub(in crate::http) async fn get_metrics(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
let metrics = compute.state.lock().unwrap().metrics.clone();
|
||||
JsonResponse::success(StatusCode::OK, metrics)
|
||||
}
|
||||
38
compute_tools/src/http/routes/mod.rs
Normal file
38
compute_tools/src/http/routes/mod.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
use compute_api::responses::ComputeStatusResponse;
|
||||
|
||||
use crate::compute::ComputeState;
|
||||
|
||||
pub(in crate::http) mod check_writability;
|
||||
pub(in crate::http) mod configure;
|
||||
pub(in crate::http) mod database_schema;
|
||||
pub(in crate::http) mod dbs_and_roles;
|
||||
pub(in crate::http) mod extension_server;
|
||||
pub(in crate::http) mod extensions;
|
||||
pub(in crate::http) mod failpoints;
|
||||
pub(in crate::http) mod grants;
|
||||
pub(in crate::http) mod info;
|
||||
pub(in crate::http) mod insights;
|
||||
pub(in crate::http) mod installed_extensions;
|
||||
pub(in crate::http) mod metrics;
|
||||
pub(in crate::http) mod metrics_json;
|
||||
pub(in crate::http) mod status;
|
||||
pub(in crate::http) mod terminate;
|
||||
|
||||
impl From<&ComputeState> for ComputeStatusResponse {
|
||||
fn from(state: &ComputeState) -> Self {
|
||||
ComputeStatusResponse {
|
||||
start_time: state.start_time,
|
||||
tenant: state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.map(|pspec| pspec.tenant_id.to_string()),
|
||||
timeline: state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.map(|pspec| pspec.timeline_id.to_string()),
|
||||
status: state.status,
|
||||
last_active: state.last_active,
|
||||
error: state.error.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
14
compute_tools/src/http/routes/status.rs
Normal file
14
compute_tools/src/http/routes/status.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use std::{ops::Deref, sync::Arc};
|
||||
|
||||
use axum::{extract::State, http::StatusCode, response::Response};
|
||||
use compute_api::responses::ComputeStatusResponse;
|
||||
|
||||
use crate::{compute::ComputeNode, http::JsonResponse};
|
||||
|
||||
/// Retrieve the state of the comute.
|
||||
pub(in crate::http) async fn get_status(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
let state = compute.state.lock().unwrap();
|
||||
let body = ComputeStatusResponse::from(state.deref());
|
||||
|
||||
JsonResponse::success(StatusCode::OK, body)
|
||||
}
|
||||
58
compute_tools/src/http/routes/terminate.rs
Normal file
58
compute_tools/src/http/routes/terminate.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{
|
||||
extract::State,
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
use tracing::info;
|
||||
|
||||
use crate::{
|
||||
compute::{forward_termination_signal, ComputeNode},
|
||||
http::JsonResponse,
|
||||
};
|
||||
|
||||
/// Terminate the compute.
|
||||
pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status == ComputeStatus::Terminated {
|
||||
return StatusCode::CREATED.into_response();
|
||||
}
|
||||
|
||||
if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
|
||||
return JsonResponse::invalid_status(state.status);
|
||||
}
|
||||
|
||||
state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
}
|
||||
|
||||
forward_termination_signal();
|
||||
info!("sent signal and notified waiters");
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Terminated.
|
||||
// This is needed to do not block the main pool of workers and
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Terminated {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become {}, current status: {:?}",
|
||||
ComputeStatus::Terminated,
|
||||
state.status
|
||||
);
|
||||
}
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
info!("terminated Postgres");
|
||||
|
||||
StatusCode::OK.into_response()
|
||||
}
|
||||
155
compute_tools/src/http/server.rs
Normal file
155
compute_tools/src/http/server.rs
Normal file
@@ -0,0 +1,155 @@
|
||||
use std::{
|
||||
net::{IpAddr, Ipv6Addr, SocketAddr},
|
||||
sync::Arc,
|
||||
thread,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use axum::{
|
||||
extract::Request,
|
||||
middleware::{self, Next},
|
||||
response::{IntoResponse, Response},
|
||||
routing::{get, post},
|
||||
Router,
|
||||
};
|
||||
use http::StatusCode;
|
||||
use tokio::net::TcpListener;
|
||||
use tower::ServiceBuilder;
|
||||
use tower_http::{request_id::PropagateRequestIdLayer, trace::TraceLayer};
|
||||
use tracing::{debug, error, info, Span};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::routes::{
|
||||
check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
|
||||
grants, info as info_route, insights, installed_extensions, metrics, metrics_json, status,
|
||||
terminate,
|
||||
};
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
async fn handle_404() -> Response {
|
||||
StatusCode::NOT_FOUND.into_response()
|
||||
}
|
||||
|
||||
const X_REQUEST_ID: &str = "x-request-id";
|
||||
|
||||
/// This middleware function allows compute_ctl to generate its own request ID
|
||||
/// if one isn't supplied. The control plane will always send one as a UUID. The
|
||||
/// neon Postgres extension on the other hand does not send one.
|
||||
async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
|
||||
let headers = request.headers_mut();
|
||||
|
||||
if headers.get(X_REQUEST_ID).is_none() {
|
||||
headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
|
||||
}
|
||||
|
||||
next.run(request).await
|
||||
}
|
||||
|
||||
/// Run the HTTP server and wait on it forever.
|
||||
#[tokio::main]
|
||||
async fn serve(port: u16, compute: Arc<ComputeNode>) {
|
||||
let mut app = Router::new()
|
||||
.route("/check_writability", post(check_writability::is_writable))
|
||||
.route("/configure", post(configure::configure))
|
||||
.route("/database_schema", get(database_schema::get_schema_dump))
|
||||
.route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
|
||||
.route(
|
||||
"/extension_server/*filename",
|
||||
post(extension_server::download_extension),
|
||||
)
|
||||
.route("/extensions", post(extensions::install_extension))
|
||||
.route("/grants", post(grants::add_grant))
|
||||
.route("/info", get(info_route::get_info))
|
||||
.route("/insights", get(insights::get_insights))
|
||||
.route(
|
||||
"/installed_extensions",
|
||||
get(installed_extensions::get_installed_extensions),
|
||||
)
|
||||
.route("/metrics", get(metrics::get_metrics))
|
||||
.route("/metrics.json", get(metrics_json::get_metrics))
|
||||
.route("/status", get(status::get_status))
|
||||
.route("/terminate", post(terminate::terminate))
|
||||
.fallback(handle_404)
|
||||
.layer(
|
||||
ServiceBuilder::new()
|
||||
// Add this middleware since we assume the request ID exists
|
||||
.layer(middleware::from_fn(maybe_add_request_id_header))
|
||||
.layer(
|
||||
TraceLayer::new_for_http()
|
||||
.on_request(|request: &http::Request<_>, _span: &Span| {
|
||||
let request_id = request
|
||||
.headers()
|
||||
.get(X_REQUEST_ID)
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap();
|
||||
|
||||
match request.uri().path() {
|
||||
"/metrics" => {
|
||||
debug!(%request_id, "{} {}", request.method(), request.uri())
|
||||
}
|
||||
_ => info!(%request_id, "{} {}", request.method(), request.uri()),
|
||||
};
|
||||
})
|
||||
.on_response(
|
||||
|response: &http::Response<_>, latency: Duration, _span: &Span| {
|
||||
let request_id = response
|
||||
.headers()
|
||||
.get(X_REQUEST_ID)
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap();
|
||||
|
||||
info!(
|
||||
%request_id,
|
||||
code = response.status().as_u16(),
|
||||
latency = latency.as_millis()
|
||||
)
|
||||
},
|
||||
),
|
||||
)
|
||||
.layer(PropagateRequestIdLayer::x_request_id()),
|
||||
)
|
||||
.with_state(compute);
|
||||
|
||||
// Add in any testing support
|
||||
if cfg!(feature = "testing") {
|
||||
use super::routes::failpoints;
|
||||
|
||||
app = app.route("/failpoints", post(failpoints::configure_failpoints))
|
||||
}
|
||||
|
||||
// This usually binds to both IPv4 and IPv6 on Linux, see
|
||||
// https://github.com/rust-lang/rust/pull/34440 for more information
|
||||
let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
|
||||
let listener = match TcpListener::bind(&addr).await {
|
||||
Ok(listener) => listener,
|
||||
Err(e) => {
|
||||
error!(
|
||||
"failed to bind the compute_ctl HTTP server to port {}: {}",
|
||||
port, e
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if let Ok(local_addr) = listener.local_addr() {
|
||||
info!("compute_ctl HTTP server listening on {}", local_addr);
|
||||
} else {
|
||||
info!("compute_ctl HTTP server listening on port {}", port);
|
||||
}
|
||||
|
||||
if let Err(e) = axum::serve(listener, app).await {
|
||||
error!("compute_ctl HTTP server error: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
/// Launch a separate HTTP server thread and return its `JoinHandle`.
|
||||
pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
|
||||
let state = Arc::clone(state);
|
||||
|
||||
Ok(thread::Builder::new()
|
||||
.name("http-server".into())
|
||||
.spawn(move || serve(port, state))?)
|
||||
}
|
||||
@@ -3,8 +3,6 @@
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
extern crate hyper0 as hyper;
|
||||
|
||||
pub mod checker;
|
||||
pub mod config;
|
||||
pub mod configurator;
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
use anyhow::{Context, Result};
|
||||
use postgres::Client;
|
||||
use fail::fail_point;
|
||||
use postgres::{Client, Transaction};
|
||||
use tracing::info;
|
||||
|
||||
/// Runs a series of migrations on a target database
|
||||
pub(crate) struct MigrationRunner<'m> {
|
||||
client: &'m mut Client,
|
||||
migrations: &'m [&'m str],
|
||||
}
|
||||
|
||||
impl<'m> MigrationRunner<'m> {
|
||||
/// Create a new migration runner
|
||||
pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
|
||||
// The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64
|
||||
assert!(migrations.len() + 1 < i64::MAX as usize);
|
||||
@@ -15,87 +18,110 @@ impl<'m> MigrationRunner<'m> {
|
||||
Self { client, migrations }
|
||||
}
|
||||
|
||||
/// Get the current value neon_migration.migration_id
|
||||
fn get_migration_id(&mut self) -> Result<i64> {
|
||||
let query = "SELECT id FROM neon_migration.migration_id";
|
||||
let row = self
|
||||
.client
|
||||
.query_one(query, &[])
|
||||
.context("run_migrations get migration_id")?;
|
||||
.query_one("SELECT id FROM neon_migration.migration_id", &[])?;
|
||||
|
||||
Ok(row.get::<&str, i64>("id"))
|
||||
}
|
||||
|
||||
fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
|
||||
let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);
|
||||
/// Update the neon_migration.migration_id value
|
||||
///
|
||||
/// This function has a fail point called compute-migration, which can be
|
||||
/// used if you would like to fail the application of a series of migrations
|
||||
/// at some point.
|
||||
fn update_migration_id(txn: &mut Transaction, migration_id: i64) -> Result<()> {
|
||||
// We use this fail point in order to check that failing in the
|
||||
// middle of applying a series of migrations fails in an expected
|
||||
// manner
|
||||
if cfg!(feature = "testing") {
|
||||
let fail = (|| {
|
||||
fail_point!("compute-migration", |fail_migration_id| {
|
||||
migration_id == fail_migration_id.unwrap().parse::<i64>().unwrap()
|
||||
});
|
||||
|
||||
false
|
||||
})();
|
||||
|
||||
if fail {
|
||||
return Err(anyhow::anyhow!(format!(
|
||||
"migration {} was configured to fail because of a failpoint",
|
||||
migration_id
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
txn.query(
|
||||
"UPDATE neon_migration.migration_id SET id = $1",
|
||||
&[&migration_id],
|
||||
)
|
||||
.with_context(|| format!("update neon_migration.migration_id to {migration_id}"))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Prepare the migrations the target database for handling migrations
|
||||
fn prepare_database(&mut self) -> Result<()> {
|
||||
self.client
|
||||
.simple_query(&setval)
|
||||
.context("run_migrations update id")?;
|
||||
.simple_query("CREATE SCHEMA IF NOT EXISTS neon_migration")?;
|
||||
self.client.simple_query("CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)")?;
|
||||
self.client.simple_query(
|
||||
"INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING",
|
||||
)?;
|
||||
self.client
|
||||
.simple_query("ALTER SCHEMA neon_migration OWNER TO cloud_admin")?;
|
||||
self.client
|
||||
.simple_query("REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn prepare_migrations(&mut self) -> Result<()> {
|
||||
let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
|
||||
self.client.simple_query(query)?;
|
||||
/// Run an individual migration
|
||||
fn run_migration(txn: &mut Transaction, migration_id: i64, migration: &str) -> Result<()> {
|
||||
if migration.starts_with("-- SKIP") {
|
||||
info!("Skipping migration id={}", migration_id);
|
||||
|
||||
let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
|
||||
self.client.simple_query(query)?;
|
||||
// Even though we are skipping the migration, updating the
|
||||
// migration ID should help keep logic easy to understand when
|
||||
// trying to understand the state of a cluster.
|
||||
Self::update_migration_id(txn, migration_id)?;
|
||||
} else {
|
||||
info!("Running migration id={}:\n{}\n", migration_id, migration);
|
||||
|
||||
let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
|
||||
self.client.simple_query(query)?;
|
||||
txn.simple_query(migration)
|
||||
.with_context(|| format!("apply migration {migration_id}"))?;
|
||||
|
||||
let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
|
||||
self.client.simple_query(query)?;
|
||||
|
||||
let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
|
||||
self.client.simple_query(query)?;
|
||||
Self::update_migration_id(txn, migration_id)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run the configured set of migrations
|
||||
pub fn run_migrations(mut self) -> Result<()> {
|
||||
self.prepare_migrations()?;
|
||||
self.prepare_database()
|
||||
.context("prepare database to handle migrations")?;
|
||||
|
||||
let mut current_migration = self.get_migration_id()? as usize;
|
||||
while current_migration < self.migrations.len() {
|
||||
macro_rules! migration_id {
|
||||
($cm:expr) => {
|
||||
($cm + 1) as i64
|
||||
};
|
||||
}
|
||||
// The index lags the migration ID by 1, so the current migration
|
||||
// ID is also the next index
|
||||
let migration_id = (current_migration + 1) as i64;
|
||||
|
||||
let migration = self.migrations[current_migration];
|
||||
let mut txn = self
|
||||
.client
|
||||
.transaction()
|
||||
.with_context(|| format!("begin transaction for migration {migration_id}"))?;
|
||||
|
||||
if migration.starts_with("-- SKIP") {
|
||||
info!("Skipping migration id={}", migration_id!(current_migration));
|
||||
} else {
|
||||
info!(
|
||||
"Running migration id={}:\n{}\n",
|
||||
migration_id!(current_migration),
|
||||
migration
|
||||
);
|
||||
Self::run_migration(&mut txn, migration_id, self.migrations[current_migration])
|
||||
.with_context(|| format!("running migration {migration_id}"))?;
|
||||
|
||||
self.client
|
||||
.simple_query("BEGIN")
|
||||
.context("begin migration")?;
|
||||
txn.commit()
|
||||
.with_context(|| format!("commit transaction for migration {migration_id}"))?;
|
||||
|
||||
self.client.simple_query(migration).with_context(|| {
|
||||
format!(
|
||||
"run_migrations migration id={}",
|
||||
migration_id!(current_migration)
|
||||
)
|
||||
})?;
|
||||
|
||||
// Migration IDs start at 1
|
||||
self.update_migration_id(migration_id!(current_migration))?;
|
||||
|
||||
self.client
|
||||
.simple_query("COMMIT")
|
||||
.context("commit migration")?;
|
||||
|
||||
info!("Finished migration id={}", migration_id!(current_migration));
|
||||
}
|
||||
info!("Finished migration id={}", migration_id);
|
||||
|
||||
current_migration += 1;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
bypassrls boolean;
|
||||
BEGIN
|
||||
SELECT rolbypassrls INTO bypassrls FROM pg_roles WHERE rolname = 'neon_superuser';
|
||||
IF NOT bypassrls THEN
|
||||
RAISE EXCEPTION 'neon_superuser cannot bypass RLS';
|
||||
END IF;
|
||||
END $$;
|
||||
25
compute_tools/src/migrations/tests/0002-alter_roles.sql
Normal file
25
compute_tools/src/migrations/tests/0002-alter_roles.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
role record;
|
||||
BEGIN
|
||||
FOR role IN
|
||||
SELECT rolname AS name, rolinherit AS inherit
|
||||
FROM pg_roles
|
||||
WHERE pg_has_role(rolname, 'neon_superuser', 'member')
|
||||
LOOP
|
||||
IF NOT role.inherit THEN
|
||||
RAISE EXCEPTION '% cannot inherit', quote_ident(role.name);
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
FOR role IN
|
||||
SELECT rolname AS name, rolbypassrls AS bypassrls
|
||||
FROM pg_roles
|
||||
WHERE NOT pg_has_role(rolname, 'neon_superuser', 'member')
|
||||
AND NOT starts_with(rolname, 'pg_')
|
||||
LOOP
|
||||
IF role.bypassrls THEN
|
||||
RAISE EXCEPTION '% can bypass RLS', quote_ident(role.name);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END $$;
|
||||
@@ -0,0 +1,10 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT current_setting('server_version_num')::numeric < 160000) THEN
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
IF NOT (SELECT pg_has_role('neon_superuser', 'pg_create_subscription', 'member')) THEN
|
||||
RAISE EXCEPTION 'neon_superuser cannot execute pg_create_subscription';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,19 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
monitor record;
|
||||
BEGIN
|
||||
SELECT pg_has_role('neon_superuser', 'pg_monitor', 'member') AS member,
|
||||
admin_option AS admin
|
||||
INTO monitor
|
||||
FROM pg_auth_members
|
||||
WHERE roleid = 'pg_monitor'::regrole
|
||||
AND member = 'pg_monitor'::regrole;
|
||||
|
||||
IF NOT monitor.member THEN
|
||||
RAISE EXCEPTION 'neon_superuser is not a member of pg_monitor';
|
||||
END IF;
|
||||
|
||||
IF NOT monitor.admin THEN
|
||||
RAISE EXCEPTION 'neon_superuser cannot grant pg_monitor';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,2 @@
|
||||
-- This test was never written becuase at the time migration tests were added
|
||||
-- the accompanying migration was already skipped.
|
||||
@@ -0,0 +1,2 @@
|
||||
-- This test was never written becuase at the time migration tests were added
|
||||
-- the accompanying migration was already skipped.
|
||||
@@ -0,0 +1,2 @@
|
||||
-- This test was never written becuase at the time migration tests were added
|
||||
-- the accompanying migration was already skipped.
|
||||
@@ -0,0 +1,2 @@
|
||||
-- This test was never written becuase at the time migration tests were added
|
||||
-- the accompanying migration was already skipped.
|
||||
@@ -0,0 +1,2 @@
|
||||
-- This test was never written becuase at the time migration tests were added
|
||||
-- the accompanying migration was already skipped.
|
||||
@@ -0,0 +1,13 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
can_execute boolean;
|
||||
BEGIN
|
||||
SELECT bool_and(has_function_privilege('neon_superuser', oid, 'execute'))
|
||||
INTO can_execute
|
||||
FROM pg_proc
|
||||
WHERE proname IN ('pg_export_snapshot', 'pg_log_standby_snapshot')
|
||||
AND pronamespace = 'pg_catalog'::regnamespace;
|
||||
IF NOT can_execute THEN
|
||||
RAISE EXCEPTION 'neon_superuser cannot execute both pg_export_snapshot and pg_log_standby_snapshot';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,13 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
can_execute boolean;
|
||||
BEGIN
|
||||
SELECT has_function_privilege('neon_superuser', oid, 'execute')
|
||||
INTO can_execute
|
||||
FROM pg_proc
|
||||
WHERE proname = 'pg_show_replication_origin_status'
|
||||
AND pronamespace = 'pg_catalog'::regnamespace;
|
||||
IF NOT can_execute THEN
|
||||
RAISE EXCEPTION 'neon_superuser cannot execute pg_show_replication_origin_status';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -47,6 +47,7 @@ pub enum PerDatabasePhase {
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
DropSubscriptionsForDeletedDatabases,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -74,7 +75,7 @@ pub struct MutableApplyContext {
|
||||
pub dbs: HashMap<String, Database>,
|
||||
}
|
||||
|
||||
/// Appply the operations that belong to the given spec apply phase.
|
||||
/// Apply the operations that belong to the given spec apply phase.
|
||||
///
|
||||
/// Commands within a single phase are executed in order of Iterator yield.
|
||||
/// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database
|
||||
@@ -326,13 +327,12 @@ async fn get_operations<'a>(
|
||||
|
||||
// Use FORCE to drop database even if there are active connections.
|
||||
// We run this from `cloud_admin`, so it should have enough privileges.
|
||||
//
|
||||
// NB: there could be other db states, which prevent us from dropping
|
||||
// the database. For example, if db is used by any active subscription
|
||||
// or replication slot.
|
||||
// TODO: deal with it once we allow logical replication. Proper fix should
|
||||
// involve returning an error code to the control plane, so it could
|
||||
// figure out that this is a non-retryable error, return it to the user
|
||||
// and fail operation permanently.
|
||||
// Such cases are handled in the DropSubscriptionsForDeletedDatabases
|
||||
// phase. We do all the cleanup before actually dropping the database.
|
||||
let drop_db_query: String = format!(
|
||||
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
|
||||
&op.name.pg_quote()
|
||||
@@ -444,6 +444,30 @@ async fn get_operations<'a>(
|
||||
}
|
||||
ApplySpecPhase::RunInEachDatabase { db, subphase } => {
|
||||
match subphase {
|
||||
PerDatabasePhase::DropSubscriptionsForDeletedDatabases => {
|
||||
match &db {
|
||||
DB::UserDB(db) => {
|
||||
let drop_subscription_query: String = format!(
|
||||
include_str!("sql/drop_subscription_for_drop_dbs.sql"),
|
||||
datname_str = escape_literal(&db.name),
|
||||
);
|
||||
|
||||
let operations = vec![Operation {
|
||||
query: drop_subscription_query,
|
||||
comment: Some(format!(
|
||||
"optionally dropping subscriptions for DB {}",
|
||||
db.name,
|
||||
)),
|
||||
}]
|
||||
.into_iter();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
// skip this cleanup for the system databases
|
||||
// because users can't drop them
|
||||
DB::SystemDB => Ok(Box::new(empty())),
|
||||
}
|
||||
}
|
||||
PerDatabasePhase::DeleteDBRoleReferences => {
|
||||
let ctx = ctx.read().await;
|
||||
|
||||
@@ -474,7 +498,19 @@ async fn get_operations<'a>(
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
// Revoke some potentially blocking privileges (Neon-specific currently)
|
||||
Operation {
|
||||
query: format!(
|
||||
include_str!("sql/pre_drop_role_revoke_privileges.sql"),
|
||||
role_name = quoted,
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
// This now will only drop privileges of the role
|
||||
// TODO: this is obviously not 100% true because of the above case,
|
||||
// there could be still some privileges that are not revoked. Maybe this
|
||||
// only drops privileges that were granted *by this* role, not *to this* role,
|
||||
// but this has to be checked.
|
||||
Operation {
|
||||
query: format!("DROP OWNED BY {}", quoted),
|
||||
comment: None,
|
||||
|
||||
11
compute_tools/src/sql/drop_subscription_for_drop_dbs.sql
Normal file
11
compute_tools/src/sql/drop_subscription_for_drop_dbs.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
subname TEXT;
|
||||
BEGIN
|
||||
FOR subname IN SELECT pg_subscription.subname FROM pg_subscription WHERE subdbid = (SELECT oid FROM pg_database WHERE datname = {datname_str}) LOOP
|
||||
EXECUTE format('ALTER SUBSCRIPTION %I DISABLE;', subname);
|
||||
EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
|
||||
EXECUTE format('DROP SUBSCRIPTION %I;', subname);
|
||||
END LOOP;
|
||||
END;
|
||||
$$;
|
||||
28
compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
Normal file
28
compute_tools/src/sql/pre_drop_role_revoke_privileges.sql
Normal file
@@ -0,0 +1,28 @@
|
||||
SET SESSION ROLE neon_superuser;
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
schema TEXT;
|
||||
revoke_query TEXT;
|
||||
BEGIN
|
||||
FOR schema IN
|
||||
SELECT schema_name
|
||||
FROM information_schema.schemata
|
||||
-- So far, we only had issues with 'public' schema. Probably, because we do some additional grants,
|
||||
-- e.g., make DB owner the owner of 'public' schema automatically (when created via API).
|
||||
-- See https://github.com/neondatabase/cloud/issues/13582 for the context.
|
||||
-- Still, keep the loop because i) it efficiently handles the case when there is no 'public' schema,
|
||||
-- ii) it's easy to add more schemas to the list if needed.
|
||||
WHERE schema_name IN ('public')
|
||||
LOOP
|
||||
revoke_query := format(
|
||||
'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM {role_name} GRANTED BY neon_superuser;',
|
||||
schema
|
||||
);
|
||||
|
||||
EXECUTE revoke_query;
|
||||
END LOOP;
|
||||
END;
|
||||
$$;
|
||||
|
||||
RESET ROLE;
|
||||
@@ -19,6 +19,7 @@ use control_plane::storage_controller::{
|
||||
NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
|
||||
};
|
||||
use control_plane::{broker, local_env};
|
||||
use nix::fcntl::{flock, FlockArg};
|
||||
use pageserver_api::config::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
|
||||
@@ -36,6 +37,8 @@ use safekeeper_api::{
|
||||
};
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::fs::File;
|
||||
use std::os::fd::AsRawFd;
|
||||
use std::path::PathBuf;
|
||||
use std::process::exit;
|
||||
use std::str::FromStr;
|
||||
@@ -689,6 +692,21 @@ struct TimelineTreeEl {
|
||||
pub children: BTreeSet<TimelineId>,
|
||||
}
|
||||
|
||||
/// A flock-based guard over the neon_local repository directory
|
||||
struct RepoLock {
|
||||
_file: File,
|
||||
}
|
||||
|
||||
impl RepoLock {
|
||||
fn new() -> Result<Self> {
|
||||
let repo_dir = File::open(local_env::base_path())?;
|
||||
let repo_dir_fd = repo_dir.as_raw_fd();
|
||||
flock(repo_dir_fd, FlockArg::LockExclusive)?;
|
||||
|
||||
Ok(Self { _file: repo_dir })
|
||||
}
|
||||
}
|
||||
|
||||
// Main entry point for the 'neon_local' CLI utility
|
||||
//
|
||||
// This utility helps to manage neon installation. That includes following:
|
||||
@@ -700,9 +718,14 @@ fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
// Check for 'neon init' command first.
|
||||
let subcommand_result = if let NeonLocalCmd::Init(args) = cli.command {
|
||||
handle_init(&args).map(|env| Some(Cow::Owned(env)))
|
||||
let (subcommand_result, _lock) = if let NeonLocalCmd::Init(args) = cli.command {
|
||||
(handle_init(&args).map(|env| Some(Cow::Owned(env))), None)
|
||||
} else {
|
||||
// This tool uses a collection of simple files to store its state, and consequently
|
||||
// it is not generally safe to run multiple commands concurrently. Rather than expect
|
||||
// all callers to know this, use a lock file to protect against concurrent execution.
|
||||
let _repo_lock = RepoLock::new().unwrap();
|
||||
|
||||
// all other commands need an existing config
|
||||
let env = LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
|
||||
let original_env = env.clone();
|
||||
@@ -728,11 +751,12 @@ fn main() -> Result<()> {
|
||||
NeonLocalCmd::Mappings(subcmd) => handle_mappings(&subcmd, env),
|
||||
};
|
||||
|
||||
if &original_env != env {
|
||||
let subcommand_result = if &original_env != env {
|
||||
subcommand_result.map(|()| Some(Cow::Borrowed(env)))
|
||||
} else {
|
||||
subcommand_result.map(|()| None)
|
||||
}
|
||||
};
|
||||
(subcommand_result, Some(_repo_lock))
|
||||
};
|
||||
|
||||
match subcommand_result {
|
||||
@@ -922,7 +946,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
|
||||
} else {
|
||||
// User (likely interactive) did not provide a description of the environment, give them the default
|
||||
NeonLocalInitConf {
|
||||
control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
|
||||
control_plane_api: Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap()),
|
||||
broker: NeonBroker {
|
||||
listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
|
||||
},
|
||||
@@ -1718,18 +1742,15 @@ async fn handle_start_all_impl(
|
||||
broker::start_broker_process(env, &retry_timeout).await
|
||||
});
|
||||
|
||||
// Only start the storage controller if the pageserver is configured to need it
|
||||
if env.control_plane_api.is_some() {
|
||||
js.spawn(async move {
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
storage_controller
|
||||
.start(NeonStorageControllerStartArgs::with_default_instance_id(
|
||||
retry_timeout,
|
||||
))
|
||||
.await
|
||||
.map_err(|e| e.context("start storage_controller"))
|
||||
});
|
||||
}
|
||||
js.spawn(async move {
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
storage_controller
|
||||
.start(NeonStorageControllerStartArgs::with_default_instance_id(
|
||||
retry_timeout,
|
||||
))
|
||||
.await
|
||||
.map_err(|e| e.context("start storage_controller"))
|
||||
});
|
||||
|
||||
for ps_conf in &env.pageservers {
|
||||
js.spawn(async move {
|
||||
@@ -1774,10 +1795,6 @@ async fn neon_start_status_check(
|
||||
const RETRY_INTERVAL: Duration = Duration::from_millis(100);
|
||||
const NOTICE_AFTER_RETRIES: Duration = Duration::from_secs(5);
|
||||
|
||||
if env.control_plane_api.is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let storcon = StorageController::from_env(env);
|
||||
|
||||
let retries = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
|
||||
|
||||
@@ -62,7 +62,7 @@ use crate::local_env::LocalEnv;
|
||||
use crate::postgresql_conf::PostgresConf;
|
||||
use crate::storage_controller::StorageController;
|
||||
|
||||
use compute_api::responses::{ComputeState, ComputeStatus};
|
||||
use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
|
||||
use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
|
||||
|
||||
// contents of a endpoint.json file
|
||||
@@ -316,6 +316,10 @@ impl Endpoint {
|
||||
// and can cause errors like 'no unpinned buffers available', see
|
||||
// <https://github.com/neondatabase/neon/issues/9956>
|
||||
conf.append("shared_buffers", "1MB");
|
||||
// Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's
|
||||
// batching logic. Set this to 2 so that we exercise the code a bit without letting
|
||||
// individual tests do a lot of concurrent work on underpowered test machines
|
||||
conf.append("effective_io_concurrency", "2");
|
||||
conf.append("fsync", "off");
|
||||
conf.append("max_connections", "100");
|
||||
conf.append("wal_level", "logical");
|
||||
@@ -581,6 +585,7 @@ impl Endpoint {
|
||||
features: self.features.clone(),
|
||||
swap_size_bytes: None,
|
||||
disk_quota_bytes: None,
|
||||
disable_lfc_resizing: None,
|
||||
cluster: Cluster {
|
||||
cluster_id: None, // project ID: not used
|
||||
name: None, // project name: not used
|
||||
@@ -734,7 +739,7 @@ impl Endpoint {
|
||||
}
|
||||
|
||||
// Call the /status HTTP API
|
||||
pub async fn get_status(&self) -> Result<ComputeState> {
|
||||
pub async fn get_status(&self) -> Result<ComputeStatusResponse> {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let response = client
|
||||
|
||||
@@ -76,7 +76,7 @@ pub struct LocalEnv {
|
||||
|
||||
// Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
|
||||
// be propagated into each pageserver's configuration.
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_api: Url,
|
||||
|
||||
// Control plane upcall API for storage controller. If set, this will be propagated into the
|
||||
// storage controller's configuration.
|
||||
@@ -133,7 +133,7 @@ pub struct NeonLocalInitConf {
|
||||
pub storage_controller: Option<NeonStorageControllerConf>,
|
||||
pub pageservers: Vec<NeonLocalInitPageserverConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Option<Url>>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Option<Url>>,
|
||||
}
|
||||
|
||||
@@ -180,7 +180,7 @@ impl NeonStorageControllerConf {
|
||||
const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);
|
||||
|
||||
// Very tight heartbeat interval to speed up tests
|
||||
const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(100);
|
||||
const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(1000);
|
||||
}
|
||||
|
||||
impl Default for NeonStorageControllerConf {
|
||||
@@ -483,7 +483,6 @@ impl LocalEnv {
|
||||
.iter()
|
||||
.find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)
|
||||
.map(|&(_, timeline_id)| timeline_id)
|
||||
.map(TimelineId::from)
|
||||
}
|
||||
|
||||
pub fn timeline_name_mappings(&self) -> HashMap<TenantTimelineId, String> {
|
||||
@@ -535,7 +534,7 @@ impl LocalEnv {
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_api: control_plane_api.unwrap(),
|
||||
control_plane_compute_hook_api,
|
||||
branch_name_mappings,
|
||||
}
|
||||
@@ -638,7 +637,7 @@ impl LocalEnv {
|
||||
storage_controller: self.storage_controller.clone(),
|
||||
pageservers: vec![], // it's skip_serializing anyway
|
||||
safekeepers: self.safekeepers.clone(),
|
||||
control_plane_api: self.control_plane_api.clone(),
|
||||
control_plane_api: Some(self.control_plane_api.clone()),
|
||||
control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
|
||||
branch_name_mappings: self.branch_name_mappings.clone(),
|
||||
},
|
||||
@@ -768,7 +767,7 @@ impl LocalEnv {
|
||||
storage_controller: storage_controller.unwrap_or_default(),
|
||||
pageservers: pageservers.iter().map(Into::into).collect(),
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap_or_default(),
|
||||
control_plane_api: control_plane_api.unwrap(),
|
||||
control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
|
||||
branch_name_mappings: Default::default(),
|
||||
};
|
||||
|
||||
@@ -95,21 +95,19 @@ impl PageServerNode {
|
||||
|
||||
let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];
|
||||
|
||||
if let Some(control_plane_api) = &self.env.control_plane_api {
|
||||
overrides.push(format!(
|
||||
"control_plane_api='{}'",
|
||||
control_plane_api.as_str()
|
||||
));
|
||||
overrides.push(format!(
|
||||
"control_plane_api='{}'",
|
||||
self.env.control_plane_api.as_str()
|
||||
));
|
||||
|
||||
// Storage controller uses the same auth as pageserver: if JWT is enabled
|
||||
// for us, we will also need it to talk to them.
|
||||
if matches!(conf.http_auth_type, AuthType::NeonJWT) {
|
||||
let jwt_token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
|
||||
.unwrap();
|
||||
overrides.push(format!("control_plane_api_token='{}'", jwt_token));
|
||||
}
|
||||
// Storage controller uses the same auth as pageserver: if JWT is enabled
|
||||
// for us, we will also need it to talk to them.
|
||||
if matches!(conf.http_auth_type, AuthType::NeonJWT) {
|
||||
let jwt_token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
|
||||
.unwrap();
|
||||
overrides.push(format!("control_plane_api_token='{}'", jwt_token));
|
||||
}
|
||||
|
||||
if !conf.other.contains_key("remote_storage") {
|
||||
|
||||
@@ -338,7 +338,7 @@ impl StorageController {
|
||||
.port(),
|
||||
)
|
||||
} else {
|
||||
let listen_url = self.env.control_plane_api.clone().unwrap();
|
||||
let listen_url = self.env.control_plane_api.clone();
|
||||
|
||||
let listen = format!(
|
||||
"{}:{}",
|
||||
@@ -708,7 +708,7 @@ impl StorageController {
|
||||
} else {
|
||||
// The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
|
||||
// for general purpose API access.
|
||||
let listen_url = self.env.control_plane_api.clone().unwrap();
|
||||
let listen_url = self.env.control_plane_api.clone();
|
||||
Url::from_str(&format!(
|
||||
"http://{}:{}/{path}",
|
||||
listen_url.host_str().unwrap(),
|
||||
@@ -822,10 +822,7 @@ impl StorageController {
|
||||
self.dispatch(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_shard_id}/migrate"),
|
||||
Some(TenantShardMigrateRequest {
|
||||
tenant_shard_id,
|
||||
node_id,
|
||||
}),
|
||||
Some(TenantShardMigrateRequest { node_id }),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
use futures::StreamExt;
|
||||
use std::{str::FromStr, time::Duration};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
str::FromStr,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
|
||||
ShardSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||
SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
|
||||
ShardsPreferredAzsRequest, SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse,
|
||||
TenantPolicyRequest,
|
||||
},
|
||||
models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
||||
@@ -111,6 +117,13 @@ enum Command {
|
||||
#[arg(long)]
|
||||
node: NodeId,
|
||||
},
|
||||
/// Migrate the secondary location for a tenant shard to a specific pageserver.
|
||||
TenantShardMigrateSecondary {
|
||||
#[arg(long)]
|
||||
tenant_shard_id: TenantShardId,
|
||||
#[arg(long)]
|
||||
node: NodeId,
|
||||
},
|
||||
/// Cancel any ongoing reconciliation for this shard
|
||||
TenantShardCancelReconcile {
|
||||
#[arg(long)]
|
||||
@@ -145,6 +158,12 @@ enum Command {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
TenantSetPreferredAz {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
preferred_az: Option<String>,
|
||||
},
|
||||
/// Uncleanly drop a tenant from the storage controller: this doesn't delete anything from pageservers. Appropriate
|
||||
/// if you e.g. used `tenant-warmup` by mistake on a tenant ID that doesn't really exist, or is in some other region.
|
||||
TenantDrop {
|
||||
@@ -211,6 +230,15 @@ enum Command {
|
||||
#[arg(long)]
|
||||
timeout: humantime::Duration,
|
||||
},
|
||||
/// List safekeepers known to the storage controller
|
||||
Safekeepers {},
|
||||
/// Set the scheduling policy of the specified safekeeper
|
||||
SafekeeperScheduling {
|
||||
#[arg(long)]
|
||||
node_id: NodeId,
|
||||
#[arg(long)]
|
||||
scheduling_policy: SkSchedulingPolicyArg,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -263,6 +291,17 @@ impl FromStr for PlacementPolicyArg {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct SkSchedulingPolicyArg(SkSchedulingPolicy);
|
||||
|
||||
impl FromStr for SkSchedulingPolicyArg {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
SkSchedulingPolicy::from_str(s).map(Self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ShardSchedulingPolicyArg(ShardSchedulingPolicy);
|
||||
|
||||
@@ -392,11 +431,12 @@ async fn main() -> anyhow::Result<()> {
|
||||
resp.sort_by(|a, b| a.listen_http_addr.cmp(&b.listen_http_addr));
|
||||
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header(["Id", "Hostname", "Scheduling", "Availability"]);
|
||||
table.set_header(["Id", "Hostname", "AZ", "Scheduling", "Availability"]);
|
||||
for node in resp {
|
||||
table.add_row([
|
||||
format!("{}", node.id),
|
||||
node.listen_http_addr,
|
||||
node.availability_zone_id,
|
||||
format!("{:?}", node.scheduling),
|
||||
format!("{:?}", node.availability),
|
||||
]);
|
||||
@@ -456,33 +496,65 @@ async fn main() -> anyhow::Result<()> {
|
||||
println!("{table}");
|
||||
}
|
||||
Command::Tenants { node_id: None } => {
|
||||
let mut resp = storcon_client
|
||||
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/tenant".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
resp.sort_by(|a, b| a.tenant_id.cmp(&b.tenant_id));
|
||||
|
||||
// Set up output formatting
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header([
|
||||
"TenantId",
|
||||
"Preferred AZ",
|
||||
"ShardCount",
|
||||
"StripeSize",
|
||||
"Placement",
|
||||
"Scheduling",
|
||||
]);
|
||||
for tenant in resp {
|
||||
let shard_zero = tenant.shards.into_iter().next().unwrap();
|
||||
table.add_row([
|
||||
format!("{}", tenant.tenant_id),
|
||||
format!("{}", shard_zero.tenant_shard_id.shard_count.literal()),
|
||||
format!("{:?}", tenant.stripe_size),
|
||||
format!("{:?}", tenant.policy),
|
||||
format!("{:?}", shard_zero.scheduling_policy),
|
||||
]);
|
||||
|
||||
// Pagination loop over listing API
|
||||
let mut start_after = None;
|
||||
const LIMIT: usize = 1000;
|
||||
loop {
|
||||
let path = match start_after {
|
||||
None => format!("control/v1/tenant?limit={LIMIT}"),
|
||||
Some(start_after) => {
|
||||
format!("control/v1/tenant?limit={LIMIT}&start_after={start_after}")
|
||||
}
|
||||
};
|
||||
|
||||
let resp = storcon_client
|
||||
.dispatch::<(), Vec<TenantDescribeResponse>>(Method::GET, path, None)
|
||||
.await?;
|
||||
|
||||
if resp.is_empty() {
|
||||
// End of data reached
|
||||
break;
|
||||
}
|
||||
|
||||
// Give some visual feedback while we're building up the table (comfy_table doesn't have
|
||||
// streaming output)
|
||||
if resp.len() >= LIMIT {
|
||||
eprint!(".");
|
||||
}
|
||||
|
||||
start_after = Some(resp.last().unwrap().tenant_id);
|
||||
|
||||
for tenant in resp {
|
||||
let shard_zero = tenant.shards.into_iter().next().unwrap();
|
||||
table.add_row([
|
||||
format!("{}", tenant.tenant_id),
|
||||
shard_zero
|
||||
.preferred_az_id
|
||||
.as_ref()
|
||||
.cloned()
|
||||
.unwrap_or("".to_string()),
|
||||
format!("{}", shard_zero.tenant_shard_id.shard_count.literal()),
|
||||
format!("{:?}", tenant.stripe_size),
|
||||
format!("{:?}", tenant.policy),
|
||||
format!("{:?}", shard_zero.scheduling_policy),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
// Terminate progress dots
|
||||
if table.row_count() > LIMIT {
|
||||
eprint!("");
|
||||
}
|
||||
|
||||
println!("{table}");
|
||||
@@ -537,10 +609,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
tenant_shard_id,
|
||||
node,
|
||||
} => {
|
||||
let req = TenantShardMigrateRequest {
|
||||
tenant_shard_id,
|
||||
node_id: node,
|
||||
};
|
||||
let req = TenantShardMigrateRequest { node_id: node };
|
||||
|
||||
storcon_client
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
@@ -550,6 +619,20 @@ async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantShardMigrateSecondary {
|
||||
tenant_shard_id,
|
||||
node,
|
||||
} => {
|
||||
let req = TenantShardMigrateRequest { node_id: node };
|
||||
|
||||
storcon_client
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_shard_id}/migrate_secondary"),
|
||||
Some(req),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantShardCancelReconcile { tenant_shard_id } => {
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(
|
||||
@@ -593,6 +676,19 @@ async fn main() -> anyhow::Result<()> {
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let nodes = storcon_client
|
||||
.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/node".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
let nodes = nodes
|
||||
.into_iter()
|
||||
.map(|n| (n.id, n))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
println!("Tenant {tenant_id}");
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.add_row(["Policy", &format!("{:?}", policy)]);
|
||||
@@ -601,7 +697,14 @@ async fn main() -> anyhow::Result<()> {
|
||||
println!("{table}");
|
||||
println!("Shards:");
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header(["Shard", "Attached", "Secondary", "Last error", "status"]);
|
||||
table.set_header([
|
||||
"Shard",
|
||||
"Attached",
|
||||
"Attached AZ",
|
||||
"Secondary",
|
||||
"Last error",
|
||||
"status",
|
||||
]);
|
||||
for shard in shards {
|
||||
let secondary = shard
|
||||
.node_secondary
|
||||
@@ -624,11 +727,18 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
let status = status_parts.join(",");
|
||||
|
||||
let attached_node = shard
|
||||
.node_attached
|
||||
.as_ref()
|
||||
.map(|id| nodes.get(id).expect("Shard references nonexistent node"));
|
||||
|
||||
table.add_row([
|
||||
format!("{}", shard.tenant_shard_id),
|
||||
shard
|
||||
.node_attached
|
||||
.map(|n| format!("{}", n))
|
||||
attached_node
|
||||
.map(|n| format!("{} ({})", n.listen_http_addr, n.id))
|
||||
.unwrap_or(String::new()),
|
||||
attached_node
|
||||
.map(|n| n.availability_zone_id.clone())
|
||||
.unwrap_or(String::new()),
|
||||
secondary,
|
||||
shard.last_error,
|
||||
@@ -637,6 +747,66 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
println!("{table}");
|
||||
}
|
||||
Command::TenantSetPreferredAz {
|
||||
tenant_id,
|
||||
preferred_az,
|
||||
} => {
|
||||
// First learn about the tenant's shards
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/tenant/{tenant_id}"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Learn about nodes to validate the AZ ID
|
||||
let nodes = storcon_client
|
||||
.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/node".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(preferred_az) = &preferred_az {
|
||||
let azs = nodes
|
||||
.into_iter()
|
||||
.map(|n| (n.availability_zone_id))
|
||||
.collect::<HashSet<_>>();
|
||||
if !azs.contains(preferred_az) {
|
||||
anyhow::bail!(
|
||||
"AZ {} not found on any node: known AZs are: {:?}",
|
||||
preferred_az,
|
||||
azs
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// Make it obvious to the user that since they've omitted an AZ, we're clearing it
|
||||
eprintln!("Clearing preferred AZ for tenant {}", tenant_id);
|
||||
}
|
||||
|
||||
// Construct a request that modifies all the tenant's shards
|
||||
let req = ShardsPreferredAzsRequest {
|
||||
preferred_az_ids: describe_response
|
||||
.shards
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
(
|
||||
s.tenant_shard_id,
|
||||
preferred_az.clone().map(AvailabilityZone),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
storcon_client
|
||||
.dispatch::<ShardsPreferredAzsRequest, ()>(
|
||||
Method::PUT,
|
||||
"control/v1/preferred_azs".to_string(),
|
||||
Some(req),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantWarmup { tenant_id } => {
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
@@ -912,10 +1082,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id),
|
||||
Some(TenantShardMigrateRequest {
|
||||
tenant_shard_id: mv.tenant_shard_id,
|
||||
node_id: mv.to,
|
||||
}),
|
||||
Some(TenantShardMigrateRequest { node_id: mv.to }),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| (mv.tenant_shard_id, mv.from, mv.to, e))
|
||||
@@ -1020,6 +1187,57 @@ async fn main() -> anyhow::Result<()> {
|
||||
"Fill was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}"
|
||||
);
|
||||
}
|
||||
Command::Safekeepers {} => {
|
||||
let mut resp = storcon_client
|
||||
.dispatch::<(), Vec<SafekeeperDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/safekeeper".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
resp.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header([
|
||||
"Id",
|
||||
"Version",
|
||||
"Host",
|
||||
"Port",
|
||||
"Http Port",
|
||||
"AZ Id",
|
||||
"Scheduling",
|
||||
]);
|
||||
for sk in resp {
|
||||
table.add_row([
|
||||
format!("{}", sk.id),
|
||||
format!("{}", sk.version),
|
||||
sk.host,
|
||||
format!("{}", sk.port),
|
||||
format!("{}", sk.http_port),
|
||||
sk.availability_zone_id.clone(),
|
||||
String::from(sk.scheduling_policy),
|
||||
]);
|
||||
}
|
||||
println!("{table}");
|
||||
}
|
||||
Command::SafekeeperScheduling {
|
||||
node_id,
|
||||
scheduling_policy,
|
||||
} => {
|
||||
let scheduling_policy = scheduling_policy.0;
|
||||
storcon_client
|
||||
.dispatch::<SafekeeperSchedulingPolicyRequest, ()>(
|
||||
Method::POST,
|
||||
format!("control/v1/safekeeper/{node_id}/scheduling_policy"),
|
||||
Some(SafekeeperSchedulingPolicyRequest { scheduling_policy }),
|
||||
)
|
||||
.await?;
|
||||
println!(
|
||||
"Scheduling policy of {node_id} set to {}",
|
||||
String::from(scheduling_policy)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -7,15 +7,11 @@ Currently we build two main images:
|
||||
- [neondatabase/neon](https://hub.docker.com/repository/docker/neondatabase/neon) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14. Built from [/compute-node/Dockerfile](/compute/compute-node.Dockerfile).
|
||||
|
||||
And additional intermediate image:
|
||||
|
||||
- [neondatabase/compute-tools](https://hub.docker.com/repository/docker/neondatabase/compute-tools) — compute node configuration management tools.
|
||||
|
||||
## Build pipeline
|
||||
|
||||
We build all images after a successful `release` tests run and push automatically to Docker Hub with two parallel CI jobs
|
||||
|
||||
1. `neondatabase/compute-tools` and `neondatabase/compute-node-v16` (and -v15 and -v14)
|
||||
1. `neondatabase/compute-node-v17` (and -16, -v15, -v14)
|
||||
|
||||
2. `neondatabase/neon`
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@ configuration generation in them is less than its current one. Namely, it
|
||||
refuses to vote, to truncate WAL in `handle_elected` and to accept WAL. In
|
||||
response it sends its current configuration generation to let walproposer know.
|
||||
|
||||
Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
|
||||
Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
|
||||
accepting `Configuration`. Safekeeper switches to the given conf it is higher than its
|
||||
current one and ignores it otherwise. In any case it replies with
|
||||
```
|
||||
@@ -103,7 +103,7 @@ currently and tries to communicate with all of them. However, the list does not
|
||||
define consensus members. Instead, on start walproposer tracks highest
|
||||
configuration it receives from `AcceptorGreeting`s. Once it assembles greetings
|
||||
from majority of `sk_set` and majority of `new_sk_set` (if it is present), it
|
||||
establishes this configuration as its own and moves to voting.
|
||||
establishes this configuration as its own and moves to voting.
|
||||
|
||||
It should stop talking to safekeepers not listed in the configuration at this
|
||||
point, though it is not unsafe to continue doing so.
|
||||
@@ -119,7 +119,7 @@ refusal to accept due to configuration change) it simply restarts.
|
||||
The following algorithm can be executed anywhere having access to configuration
|
||||
storage and safekeepers. It is safe to interrupt / restart it and run multiple
|
||||
instances of it concurrently, though likely one of them won't make
|
||||
progress then. It accepts `desired_set: Vec<NodeId>` as input.
|
||||
progress then. It accepts `desired_set: Vec<NodeId>` as input.
|
||||
|
||||
Algorithm will refuse to make the change if it encounters previous interrupted
|
||||
change attempt, but in this case it will try to finish it.
|
||||
@@ -140,7 +140,7 @@ storage are reachable.
|
||||
safe. Failed CAS aborts the procedure.
|
||||
4) Call `PUT` `configuration` on safekeepers from the current set,
|
||||
delivering them `joint_conf`. Collecting responses from majority is required
|
||||
to proceed. If any response returned generation higher than
|
||||
to proceed. If any response returned generation higher than
|
||||
`joint_conf.generation`, abort (another switch raced us). Otherwise, choose
|
||||
max `<last_log_term, flush_lsn>` among responses and establish it as
|
||||
(in memory) `sync_position`. Also choose max `term` and establish it as (in
|
||||
@@ -149,49 +149,49 @@ storage are reachable.
|
||||
without ack from the new set. Similarly, we'll bump term on new majority
|
||||
to `sync_term` so that two computes with the same term are never elected.
|
||||
4) Initialize timeline on safekeeper(s) from `new_sk_set` where it
|
||||
doesn't exist yet by doing `pull_timeline` from the majority of the
|
||||
doesn't exist yet by doing `pull_timeline` from the majority of the
|
||||
current set. Doing that on majority of `new_sk_set` is enough to
|
||||
proceed, but it is reasonable to ensure that all `new_sk_set` members
|
||||
are initialized -- if some of them are down why are we migrating there?
|
||||
5) Call `POST` `bump_term(sync_term)` on safekeepers from the new set.
|
||||
5) Call `POST` `bump_term(sync_term)` on safekeepers from the new set.
|
||||
Success on majority is enough.
|
||||
6) Repeatedly call `PUT` `configuration` on safekeepers from the new set,
|
||||
delivering them `joint_conf` and collecting their positions. This will
|
||||
switch them to the `joint_conf` which generally won't be needed
|
||||
switch them to the `joint_conf` which generally won't be needed
|
||||
because `pull_timeline` already includes it and plus additionally would be
|
||||
broadcast by compute. More importantly, we may proceed to the next step
|
||||
only when `<last_log_term, flush_lsn>` on the majority of the new set reached
|
||||
`sync_position`. Similarly, on the happy path no waiting is not needed because
|
||||
only when `<last_log_term, flush_lsn>` on the majority of the new set reached
|
||||
`sync_position`. Similarly, on the happy path no waiting is not needed because
|
||||
`pull_timeline` already includes it. However, we should double
|
||||
check to be safe. For example, timeline could have been created earlier e.g.
|
||||
manually or after try-to-migrate, abort, try-to-migrate-again sequence.
|
||||
7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
|
||||
safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
|
||||
manually or after try-to-migrate, abort, try-to-migrate-again sequence.
|
||||
7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
|
||||
safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
|
||||
storage under one more CAS.
|
||||
8) Call `PUT` `configuration` on safekeepers from the new set,
|
||||
delivering them `new_conf`. It is enough to deliver it to the majority
|
||||
delivering them `new_conf`. It is enough to deliver it to the majority
|
||||
of the new set; the rest can be updated by compute.
|
||||
|
||||
I haven't put huge effort to make the description above very precise, because it
|
||||
is natural language prone to interpretations anyway. Instead I'd like to make TLA+
|
||||
spec of it.
|
||||
|
||||
Description above focuses on safety. To make the flow practical and live, here a few more
|
||||
Description above focuses on safety. To make the flow practical and live, here a few more
|
||||
considerations.
|
||||
1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
|
||||
1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
|
||||
step 3.
|
||||
2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
|
||||
2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
|
||||
it is safe to rollback to the old conf with one more CAS.
|
||||
3) On step 4 timeline might be already created on members of the new set for various reasons;
|
||||
3) On step 4 timeline might be already created on members of the new set for various reasons;
|
||||
the simplest is the procedure restart. There are more complicated scenarious like mentioned
|
||||
in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
|
||||
generations, so seems simpler to treat existing timeline as success. However, this also
|
||||
in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
|
||||
generations, so seems simpler to treat existing timeline as success. However, this also
|
||||
has a disadvantage: you might imagine an surpassingly unlikely schedule where condition in
|
||||
the step 5 is never reached until compute is (re)awaken up to synchronize new member(s).
|
||||
I don't think we'll observe this in practice, but can add waking up compute if needed.
|
||||
4) In the end timeline should be locally deleted on the safekeeper(s) which are
|
||||
in the old set but not in the new one, unless they are unreachable. To be
|
||||
safe this also should be done under generation number (deletion proceeds only if
|
||||
safe this also should be done under generation number (deletion proceeds only if
|
||||
current configuration is <= than one in request and safekeeper is not memeber of it).
|
||||
5) If current conf fetched on step 1 is already not joint and members equal to `desired_set`,
|
||||
jump to step 7, using it as `new_conf`.
|
||||
@@ -202,47 +202,87 @@ The procedure ought to be driven from somewhere. Obvious candidates are control
|
||||
plane and storage_controller; and as each of them already has db we don't want
|
||||
yet another storage. I propose to manage safekeepers in storage_controller
|
||||
because 1) since it is in rust it simplifies simulation testing (more on this
|
||||
below) 2) it already manages pageservers.
|
||||
below) 2) it already manages pageservers.
|
||||
|
||||
This assumes that migration will be fully usable only after we migrate all
|
||||
tenants/timelines to storage_controller. It is discussible whether we want also
|
||||
to manage pageserver attachments for all of these, but likely we do.
|
||||
|
||||
This requires us to define storcon <-> cplane interface.
|
||||
This requires us to define storcon <-> cplane interface and changes.
|
||||
|
||||
### storage_controller <-> control plane interface
|
||||
### storage_controller <-> control plane interface and changes
|
||||
|
||||
First of all, control plane should
|
||||
[change](https://neondb.slack.com/archives/C03438W3FLZ/p1719226543199829)
|
||||
storing safekeepers per timeline instead of per tenant because we can't migrate
|
||||
tenants atomically.
|
||||
tenants atomically.
|
||||
|
||||
The important question is how updated configuration is delivered from
|
||||
storage_controller to control plane to provide it to computes. As always, there
|
||||
are two options, pull and push. Let's do it the same push as with pageserver
|
||||
`/notify-attach` because 1) it keeps storage_controller out of critical compute
|
||||
start path 2) provides easier upgrade: there won't be such a thing as 'timeline
|
||||
managed by control plane / storcon', cplane just takes the value out of its db
|
||||
when needed 3) uniformity. It makes storage_controller responsible for retrying notifying
|
||||
control plane until it succeeds.
|
||||
start path 2) uniformity. It makes storage_controller responsible for retrying
|
||||
notifying control plane until it succeeds.
|
||||
|
||||
So, cplane `/notify-safekeepers` for the timeline accepts `Configuration` and
|
||||
updates it in the db if the provided conf generation is higher (the cplane db
|
||||
should also store generations for this). Similarly to [`/notify-attach`](https://www.notion.so/neondatabase/Storage-Controller-Control-Plane-interface-6de56dd310a043bfa5c2f5564fa98365), it
|
||||
should update db which makes the call successful, and then try to schedule
|
||||
`apply_config` if possible, it is ok if not. storage_controller
|
||||
should rate limit calling the endpoint, but likely this won't be needed, as migration
|
||||
It is not needed for the control plane to fully know the `Configuration`. It is
|
||||
enough for it to only to be aware of the list of safekeepers in the latest
|
||||
configuration to supply it to compute, plus associated generation number to
|
||||
protect from stale update requests and to also pass it to compute.
|
||||
|
||||
So, cplane `/notify-safekeepers` for the timeline can accept JSON like
|
||||
```
|
||||
{
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
generation: u32,
|
||||
safekeepers: Vec<SafekeeperId>,
|
||||
}
|
||||
```
|
||||
where `SafekeeperId` is
|
||||
```
|
||||
{
|
||||
node_id: u64,
|
||||
host: String
|
||||
}
|
||||
```
|
||||
In principle `host` is redundant, but may be useful for observability.
|
||||
|
||||
The request updates list of safekeepers in the db if the provided conf
|
||||
generation is higher (the cplane db should also store generations for this).
|
||||
Similarly to
|
||||
[`/notify-attach`](https://www.notion.so/neondatabase/Storage-Controller-Control-Plane-interface-6de56dd310a043bfa5c2f5564fa98365),
|
||||
it should update db which makes the call successful, and then try to schedule
|
||||
`apply_config` if possible, it is ok if not. storage_controller should rate
|
||||
limit calling the endpoint, but likely this won't be needed, as migration
|
||||
throughput is limited by `pull_timeline`.
|
||||
|
||||
Timeline (branch) creation in cplane should call storage_controller POST
|
||||
`tenant/:tenant_id/timeline` like it currently does for sharded tenants.
|
||||
Response should be augmented with `safekeeper_conf: Configuration`. The call
|
||||
should be retried until succeeds.
|
||||
Response should be augmented with `safekeepers_generation` and `safekeepers`
|
||||
fields like described in `/notify-safekeepers` above. Initially (currently)
|
||||
these fields may be absent; in this case cplane chooses safekeepers on its own
|
||||
like it currently does. The call should be retried until succeeds.
|
||||
|
||||
Timeline deletion and tenant deletion in cplane should call appropriate
|
||||
storage_controller endpoints like it currently does for sharded tenants. The
|
||||
calls should be retried until they succeed.
|
||||
|
||||
When compute receives safekeepers list from control plane it needs to know the
|
||||
generation to checked whether it should be updated (note that compute may get
|
||||
safekeeper list from either cplane or safekeepers). Currently `neon.safekeepers`
|
||||
GUC is just a comma separates list of `host:port`. Let's prefix it with
|
||||
`g#<generation>:` to this end, so it will look like
|
||||
```
|
||||
g#42:safekeeper-0.eu-central-1.aws.neon.tech:6401,safekeeper-2.eu-central-1.aws.neon.tech:6401,safekeeper-1.eu-central-1.aws.neon.tech:6401
|
||||
```
|
||||
|
||||
To summarize, list of cplane changes:
|
||||
- per tenant -> per timeline safekeepers management and addition of int `safekeeper_generation` field.
|
||||
- `/notify-safekeepers` endpoint.
|
||||
- Branch creation call may return list of safekeepers and when it is
|
||||
present cplane should adopt it instead of choosing on its own like it does currently.
|
||||
- `neon.safekeepers` GUC should be prefixed with `g#<generation>:`.
|
||||
|
||||
### storage_controller implementation
|
||||
|
||||
Current 'load everything on startup and keep in memory' easy design is fine.
|
||||
@@ -360,10 +400,10 @@ source safekeeper might fail, which is not a problem if we are going to
|
||||
decomission the node but leaves garbage otherwise. I'd propose in the first version
|
||||
1) Don't attempt deletion at all if node status is `offline`.
|
||||
2) If it failed, just issue warning.
|
||||
And add PUT `/control/v1/safekeepers/:node_id/scrub` endpoint which would find and
|
||||
remove garbage timelines for manual use. It will 1) list all timelines on the
|
||||
safekeeper 2) compare each one against configuration storage: if timeline
|
||||
doesn't exist at all (had been deleted), it can be deleted. Otherwise, it can
|
||||
And add PUT `/control/v1/safekeepers/:node_id/scrub` endpoint which would find and
|
||||
remove garbage timelines for manual use. It will 1) list all timelines on the
|
||||
safekeeper 2) compare each one against configuration storage: if timeline
|
||||
doesn't exist at all (had been deleted), it can be deleted. Otherwise, it can
|
||||
be deleted under generation number if node is not member of current generation.
|
||||
|
||||
Automating this is untrivial; we'd need to register all potential missing
|
||||
@@ -412,8 +452,8 @@ There should be following layers of tests:
|
||||
3) Since simulation testing injects at relatively high level points (not
|
||||
syscalls), it omits some code, in particular `pull_timeline`. Thus it is
|
||||
better to have basic tests covering whole system as well. Extended version of
|
||||
`test_restarts_under_load` would do: start background load and do migration
|
||||
under it, then restart endpoint and check that no reported commits
|
||||
`test_restarts_under_load` would do: start background load and do migration
|
||||
under it, then restart endpoint and check that no reported commits
|
||||
had been lost. I'd also add one more creating classic network split scenario, with
|
||||
one compute talking to AC and another to BD while migration from nodes ABC to ABD
|
||||
happens.
|
||||
@@ -422,35 +462,51 @@ There should be following layers of tests:
|
||||
|
||||
## Order of implementation and rollout
|
||||
|
||||
Note that
|
||||
Note that
|
||||
- Control plane parts and integration with it is fully independent from everything else
|
||||
(tests would use simulation and neon_local).
|
||||
- It is reasonable to make compute <-> safekeepers protocol change
|
||||
independent of enabling generations.
|
||||
- There is a lot of infra work making storage_controller aware of timelines and safekeepers
|
||||
and its impl/rollout should be separate from migration itself.
|
||||
- Initially walproposer can just stop working while it observers joint configuration.
|
||||
- Initially walproposer can just stop working while it observes joint configuration.
|
||||
Such window would be typically very short anyway.
|
||||
- Obviously we want to test the whole thing thoroughly on staging and only then
|
||||
gradually enable in prod.
|
||||
|
||||
To rollout smoothly, both walproposer and safekeeper should have flag
|
||||
`configurations_enabled`; when set to false, they would work as currently, i.e.
|
||||
walproposer is able to commit on whatever safekeeper set it is provided. Until
|
||||
all timelines are managed by storcon we'd need to use current script to migrate
|
||||
and update/drop entries in the storage_controller database if it has any.
|
||||
Let's have the following implementation bits for gradual rollout:
|
||||
- compute gets `neon.safekeepers_proto_version` flag.
|
||||
Initially both compute and safekeepers will be able to talk both
|
||||
versions so that we can delay force restart of them and for
|
||||
simplicity of rollback in case it is needed.
|
||||
- storcon gets `-set-safekeepers` config option disabled by
|
||||
default. Timeline creation request chooses safekeepers
|
||||
(and returns them in response to cplane) only when it is set to
|
||||
true.
|
||||
- control_plane [see above](storage_controller-<->-control-plane interface-and-changes)
|
||||
prefixes `neon.safekeepers` GUC with generation number. When it is 0
|
||||
(or prefix not present at all), walproposer behaves as currently, committing on
|
||||
the provided safekeeper list -- generations are disabled.
|
||||
If it is non 0 it follows this RFC rules.
|
||||
- We provide a script for manual migration to storage controller.
|
||||
It selects timeline(s) from control plane (specified or all of them) db
|
||||
and calls special import endpoint on storage controller which is very
|
||||
similar to timeline creation: it inserts into the db, sets
|
||||
configuration to initial on the safekeepers, calls cplane
|
||||
`notify-safekeepers`.
|
||||
|
||||
Safekeepers would need to be able to talk both current and new protocol version
|
||||
with compute to reduce number of computes restarted in prod once v2 protocol is
|
||||
deployed (though before completely switching we'd need to force this).
|
||||
|
||||
Let's have the following rollout order:
|
||||
- storage_controller becomes aware of safekeepers;
|
||||
- storage_controller gets timeline creation for new timelines and deletion requests, but
|
||||
doesn't manage all timelines yet. Migration can be tested on these new timelines.
|
||||
To keep control plane and storage_controller databases in sync while control
|
||||
plane still chooses the safekeepers initially (until all timelines are imported
|
||||
it can choose better), `TimelineCreateRequest` can get optional safekeepers
|
||||
field with safekeepers chosen by cplane.
|
||||
- Then we can import all existing timelines from control plane to
|
||||
storage_controller and gradually enable configurations region by region.
|
||||
Then the rollout for a region would be:
|
||||
- Current situation: safekeepers are choosen by control_plane.
|
||||
- We manually migrate some timelines, test moving them around.
|
||||
- Then we enable `--set-safekeepers` so that all new timelines
|
||||
are on storage controller.
|
||||
- Finally migrate all existing timelines using the script (no
|
||||
compute should be speaking old proto version at this point).
|
||||
|
||||
Until all timelines are managed by storcon we'd need to use current ad hoc
|
||||
script to migrate if needed. To keep state clean, all storage controller managed
|
||||
timelines must be migrated before that, or controller db and configurations
|
||||
state of safekeepers dropped manually.
|
||||
|
||||
Very rough implementation order:
|
||||
- Add concept of configurations to safekeepers (including control file),
|
||||
@@ -458,10 +514,10 @@ Very rough implementation order:
|
||||
- Implement walproposer changes, including protocol.
|
||||
- Implement storconn part. Use it in neon_local (and pytest).
|
||||
- Make cplane store safekeepers per timeline instead of per tenant.
|
||||
- Implement cplane/storcon integration. Route branch creation/deletion
|
||||
- Implement cplane/storcon integration. Route branch creation/deletion
|
||||
through storcon. Then we can test migration of new branches.
|
||||
- Finally import existing branches. Then we can drop cplane
|
||||
safekeeper selection code. Gradually enable configurations at
|
||||
- Finally import existing branches. Then we can drop cplane
|
||||
safekeeper selection code. Gradually enable configurations at
|
||||
computes and safekeepers. Before that, all computes must talk only
|
||||
v3 protocol version.
|
||||
|
||||
|
||||
247
docs/rfcs/040-profiling.md
Normal file
247
docs/rfcs/040-profiling.md
Normal file
@@ -0,0 +1,247 @@
|
||||
# CPU and Memory Profiling
|
||||
|
||||
Created 2025-01-12 by Erik Grinaker.
|
||||
|
||||
See also [internal user guide](https://www.notion.so/neondatabase/Storage-CPU-Memory-Profiling-14bf189e004780228ec7d04442742324?pvs=4).
|
||||
|
||||
## Summary
|
||||
|
||||
This document proposes a standard cross-team pattern for CPU and memory profiling across
|
||||
applications and languages, using the [pprof](https://github.com/google/pprof) profile format.
|
||||
|
||||
It enables both ad hoc profiles via HTTP endpoints, and continuous profiling across the fleet via
|
||||
[Grafana Cloud Profiles](https://grafana.com/docs/grafana-cloud/monitor-applications/profiles/).
|
||||
Continuous profiling incurs an overhead of about 0.1% CPU usage and 3% slower heap allocations.
|
||||
|
||||
## Motivation
|
||||
|
||||
CPU and memory profiles are crucial observability tools for understanding performance issues,
|
||||
resource exhaustion, and resource costs. They allow answering questions like:
|
||||
|
||||
* Why is this process using 100% CPU?
|
||||
* How do I make this go faster?
|
||||
* Why did this process run out of memory?
|
||||
* Why are we paying for all these CPU cores and memory chips?
|
||||
|
||||
Go has [first-class support](https://pkg.go.dev/net/http/pprof) for profiling included in its
|
||||
standard library, using the [pprof](https://github.com/google/pprof) profile format and associated
|
||||
tooling.
|
||||
|
||||
This is not the case for Rust and C, where obtaining profiles can be rather cumbersome. It requires
|
||||
installing and running additional tools like `perf` as root on production nodes, with analysis tools
|
||||
that can be hard to use and often don't give good results. This is not only annoying, but can also
|
||||
significantly affect the resolution time of production incidents.
|
||||
|
||||
This proposal will:
|
||||
|
||||
* Provide CPU and heap profiles in pprof format via HTTP API.
|
||||
* Record continuous profiles in Grafana for aggregate historical analysis.
|
||||
* Make it easy for anyone to see a flamegraph in less than one minute.
|
||||
* Be reasonably consistent across teams and services (Rust, Go, C).
|
||||
|
||||
## Non Goals (For Now)
|
||||
|
||||
* [Additional profile types](https://grafana.com/docs/pyroscope/next/configure-client/profile-types/)
|
||||
like mutexes, locks, goroutines, etc.
|
||||
* [Runtime trace integration](https://grafana.com/docs/pyroscope/next/configure-client/trace-span-profiles/).
|
||||
* [Profile-guided optimization](https://en.wikipedia.org/wiki/Profile-guided_optimization).
|
||||
|
||||
## Using Profiles
|
||||
|
||||
Ready-to-use profiles can be obtained using e.g. `curl`. For Rust services:
|
||||
|
||||
```
|
||||
$ curl localhost:9898/profile/cpu >profile.pb.gz
|
||||
```
|
||||
|
||||
pprof profiles can be explored using the [`pprof`](https://github.com/google/pprof) web UI, which
|
||||
provides flamegraphs, call graphs, plain text listings, and more:
|
||||
|
||||
```
|
||||
$ pprof -http :6060 <profile>
|
||||
```
|
||||
|
||||
Some endpoints (e.g. Rust-based ones) can also generate flamegraph SVGs directly:
|
||||
|
||||
```
|
||||
$ curl localhost:9898/profile/cpu?format=svg >profile.svg
|
||||
$ open profile.svg
|
||||
```
|
||||
|
||||
Continuous profiles are available in Grafana under Explore → Profiles → Explore Profiles
|
||||
(currently only in [staging](https://neonstaging.grafana.net/a/grafana-pyroscope-app/profiles-explorer)).
|
||||
|
||||
## API Requirements
|
||||
|
||||
* HTTP endpoints that return a profile in pprof format (with symbols).
|
||||
* CPU: records a profile over the request time interval (`seconds` query parameter).
|
||||
* Memory: returns the current in-use heap allocations.
|
||||
* Unauthenticated, as it should not expose user data or pose a denial-of-service risk.
|
||||
* Default sample frequency should not impact service (maximum 5% CPU overhead).
|
||||
* Linux-compatibility.
|
||||
|
||||
Nice to have:
|
||||
|
||||
* Return flamegraph SVG directly from the HTTP endpoint if requested.
|
||||
* Configurable sample frequency for CPU profiles.
|
||||
* Historical heap allocations, by count and bytes.
|
||||
* macOS-compatiblity.
|
||||
|
||||
## Rust Profiling
|
||||
|
||||
[`libs/utils/src/http/endpoint.rs`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs)
|
||||
contains ready-to-use HTTP endpoints for CPU and memory profiling:
|
||||
[`profile_cpu_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L338) and [`profile_heap_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L416).
|
||||
|
||||
### CPU
|
||||
|
||||
CPU profiles are provided by [pprof-rs](https://github.com/tikv/pprof-rs) via
|
||||
[`profile_cpu_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L338).
|
||||
Expose it unauthenticated at `/profile/cpu`.
|
||||
|
||||
Parameters:
|
||||
|
||||
* `format`: profile output format (`pprof` or `svg`; default `pprof`).
|
||||
* `seconds`: duration to collect profile over, in seconds (default `5`).
|
||||
* `frequency`: how often to sample thread stacks, in Hz (default `99`).
|
||||
* `force`: if `true`, cancel a running profile and start a new one (default `false`).
|
||||
|
||||
Works on Linux and macOS.
|
||||
|
||||
### Memory
|
||||
|
||||
Use the jemalloc allocator via [`tikv-jemallocator`](https://github.com/tikv/jemallocator),
|
||||
and enable profiling with samples every 2 MB allocated:
|
||||
|
||||
```rust
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
#[allow(non_upper_case_globals)]
|
||||
#[export_name = "malloc_conf"]
|
||||
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
|
||||
```
|
||||
|
||||
pprof profiles are generated by
|
||||
[`jemalloc-pprof`](https://github.com/polarsignals/rust-jemalloc-pprof) via
|
||||
[`profile_heap_handler`](https://github.com/neondatabase/neon/blob/8327f68043e692c77f70d6a6dafa463636c01578/libs/utils/src/http/endpoint.rs#L416).
|
||||
Expose it unauthenticated at `/profile/heap`.
|
||||
|
||||
Parameters:
|
||||
|
||||
* `format`: profile output format (`pprof`, `svg`, or `jemalloc`; default `pprof`).
|
||||
|
||||
Works on Linux only, due to [jemalloc limitations](https://github.com/jemalloc/jemalloc/issues/26).
|
||||
|
||||
## Go Profiling
|
||||
|
||||
The Go standard library includes pprof profiling via HTTP API in
|
||||
[`net/http/pprof`](https://pkg.go.dev/net/http/pprof). Expose it unauthenticated at
|
||||
`/debug/pprof`.
|
||||
|
||||
Works on Linux and macOS.
|
||||
|
||||
### CPU
|
||||
|
||||
Via `/debug/pprof/profile`. Parameters:
|
||||
|
||||
* `debug`: profile output format (`0` is pprof, `1` or above is plaintext; default `0`).
|
||||
* `seconds`: duration to collect profile over, in seconds (default `30`).
|
||||
|
||||
Does not support a frequency parameter (see [#57488](https://github.com/golang/go/issues/57488)),
|
||||
and defaults to 100 Hz. A lower frequency can be hardcoded via `SetCPUProfileRate`, but the default
|
||||
is likely ok (estimated 1% overhead).
|
||||
|
||||
### Memory
|
||||
|
||||
Via `/debug/pprof/heap`. Parameters:
|
||||
|
||||
* `seconds`: take a delta profile over the given duration, in seconds (default `0`).
|
||||
* `gc`: if `1`, garbage collect before taking profile.
|
||||
|
||||
## C Profiling
|
||||
|
||||
[gperftools](https://github.com/gperftools/gperftools) provides in-process CPU and heap profiling
|
||||
with pprof output.
|
||||
|
||||
However, continuous profiling of PostgreSQL is expensive (many computes), and has limited value
|
||||
since we don't own the internals anyway.
|
||||
|
||||
Ad hoc profiling might still be useful, but the compute team considers existing tooling sufficient,
|
||||
so this is not a priority at the moment.
|
||||
|
||||
## Grafana Continuous Profiling
|
||||
|
||||
[Grafana Alloy](https://grafana.com/docs/alloy/latest/) continually scrapes CPU and memory profiles
|
||||
across the fleet, and archives them as time series. This can be used to analyze resource usage over
|
||||
time, either in aggregate or zoomed in to specific events and nodes.
|
||||
|
||||
Profiles are retained for 30 days. Profile ingestion volume for CPU+heap at 60-second intervals
|
||||
is about 0.5 GB/node/day, or about $0.25/node/day = $7.5/node/month ($0.50/GB).
|
||||
|
||||
It is currently enabled in [staging](https://neonstaging.grafana.net/a/grafana-pyroscope-app/profiles-explorer)
|
||||
for Pageserver and Safekeeper.
|
||||
|
||||
### Scraping
|
||||
|
||||
* CPU profiling: 59 seconds at 19 Hz every 60 seconds.
|
||||
* Heap profiling: heap snapshot with 2 MB frequency every 60 seconds.
|
||||
|
||||
There are two main approaches that can be taken for CPU profiles:
|
||||
|
||||
* Continuous low-frequency profiles (e.g. 19 Hz for 60 seconds every 60 seconds).
|
||||
* Occasional high-frequency profiles (e.g. 99 Hz for 5 seconds every 60 seconds).
|
||||
|
||||
We choose continuous low-frequency profiles where possible. This has a fixed low overhead, instead
|
||||
of a spiky high overhead. It likely also gives a more representative view of resource usage.
|
||||
However, a 19 Hz rate gives a minimum resolution of 52.6 ms per sample, which may be larger than the
|
||||
actual runtime of small functions. Note that Go does not support a frequency parameter, so we must
|
||||
use a fixed frequency for all profiles via `SetCPUProfileRate()` (default 100 Hz).
|
||||
|
||||
Only one CPU profile can be taken at a time. With continuous profiling, one will always be running.
|
||||
To allow also taking an ad hoc CPU profile, the Rust endpoint supports a `force` query parameter to
|
||||
cancel a running profile and start a new one.
|
||||
|
||||
### Overhead
|
||||
|
||||
With Rust:
|
||||
|
||||
* CPU profiles at 19 Hz frequency: 0.1% overhead.
|
||||
* Heap profiles at 2 MB frequency: 3% allocation overhead.
|
||||
* Profile call/encoding/symbolization: 20 ms every 60 seconds, or 0.03% of 1 CPU (for Pageserver).
|
||||
* Profile symbolization caches: 125 MB memory, or 0.4% of 32 GB (for Pageserver).
|
||||
|
||||
Benchmarks with pprof-rs showed that the CPU time for taking a stack trace of a 40-frame stack was
|
||||
11 µs using the `frame-pointer` feature, and 1.4 µs using `libunwind` with DWARF. `libunwind` saw
|
||||
frequent seg faults, so we use `frame-pointer` and build binaries with frame pointers (negligible
|
||||
overhead).
|
||||
|
||||
CPU profiles work by installing an `ITIMER_PROF` for the process, which triggers a `SIGPROF` signal
|
||||
after a given amount of cumulative CPU time across all CPUs. The signal handler will run for one
|
||||
of the currently executing threads and take a stack trace. Thus, a 19 Hz profile will take 1 stack
|
||||
trace every 52.6 ms CPU time -- assuming 11 µs for a stack trace, this is 0.02% overhead, but
|
||||
likely 0.1% in practice (given e.g. context switches).
|
||||
|
||||
Heap profiles work by probabilistically taking a stack trace on allocations, adjusted for the
|
||||
allocation size. A 1 MB allocation takes about 15 µs in benchmarks, and a stack trace about 1 µs,
|
||||
so we can estimate that a 2 MB sampling frequency has about 3% allocation overhead -- this is
|
||||
consistent with benchmarks. This is significantly larger than CPU profiles, but mitigated by the
|
||||
fact that performance-sensitive code will avoid allocations as far as possible.
|
||||
|
||||
Profile symbolization uses in-memory caches for symbol lookups. These take about 125 MB for
|
||||
Pageserver.
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
* eBPF profiles.
|
||||
* Don't require instrumenting the binary.
|
||||
* Use less resources.
|
||||
* Can profile in kernel space too.
|
||||
* Supported by Grafana.
|
||||
* Less information about stack frames and spans.
|
||||
* Limited tooling for local analysis.
|
||||
* Does not support heap profiles.
|
||||
* Does not work on macOS.
|
||||
|
||||
* [Polar Signals](https://www.polarsignals.com) instead of Grafana.
|
||||
* We already use Grafana for everything else. Appears good enough.
|
||||
@@ -15,6 +15,17 @@ pub struct GenericAPIError {
|
||||
pub error: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct InfoResponse {
|
||||
pub num_cpus: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ExtensionInstallResponse {
|
||||
pub extension: PgIdent,
|
||||
pub version: ExtVersion,
|
||||
}
|
||||
|
||||
/// Response of the /status API
|
||||
#[derive(Serialize, Debug, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
@@ -28,16 +39,6 @@ pub struct ComputeStatusResponse {
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub struct ComputeState {
|
||||
pub status: ComputeStatus,
|
||||
/// Timestamp of the last Postgres activity
|
||||
#[serde(serialize_with = "rfc3339_serialize")]
|
||||
pub last_active: Option<DateTime<Utc>>,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ComputeStatus {
|
||||
@@ -78,7 +79,7 @@ impl Display for ComputeStatus {
|
||||
}
|
||||
}
|
||||
|
||||
fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
|
||||
pub fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
|
||||
@@ -67,6 +67,15 @@ pub struct ComputeSpec {
|
||||
#[serde(default)]
|
||||
pub disk_quota_bytes: Option<u64>,
|
||||
|
||||
/// Disables the vm-monitor behavior that resizes LFC on upscale/downscale, instead relying on
|
||||
/// the initial size of LFC.
|
||||
///
|
||||
/// This is intended for use when the LFC size is being overridden from the default but
|
||||
/// autoscaling is still enabled, and we don't want the vm-monitor to interfere with the custom
|
||||
/// LFC sizing.
|
||||
#[serde(default)]
|
||||
pub disable_lfc_resizing: Option<bool>,
|
||||
|
||||
/// Expected cluster state at the end of transition process.
|
||||
pub cluster: Cluster,
|
||||
pub delta_operations: Option<Vec<DeltaOp>>,
|
||||
|
||||
@@ -87,7 +87,7 @@ impl Display for AvailabilityZone {
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ShardsPreferredAzsRequest {
|
||||
#[serde(flatten)]
|
||||
pub preferred_az_ids: HashMap<TenantShardId, AvailabilityZone>,
|
||||
pub preferred_az_ids: HashMap<TenantShardId, Option<AvailabilityZone>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@@ -144,6 +144,8 @@ pub struct NodeDescribeResponse {
|
||||
pub availability: NodeAvailabilityWrapper,
|
||||
pub scheduling: NodeSchedulingPolicy,
|
||||
|
||||
pub availability_zone_id: String,
|
||||
|
||||
pub listen_http_addr: String,
|
||||
pub listen_http_port: u16,
|
||||
|
||||
@@ -179,7 +181,6 @@ pub struct TenantDescribeResponseShard {
|
||||
/// specifies some constraints, e.g. asking it to get off particular node(s)
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct TenantShardMigrateRequest {
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
pub node_id: NodeId,
|
||||
}
|
||||
|
||||
@@ -320,6 +321,42 @@ impl From<NodeSchedulingPolicy> for String {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
|
||||
pub enum SkSchedulingPolicy {
|
||||
Active,
|
||||
Pause,
|
||||
Decomissioned,
|
||||
}
|
||||
|
||||
impl FromStr for SkSchedulingPolicy {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"active" => Self::Active,
|
||||
"pause" => Self::Pause,
|
||||
"decomissioned" => Self::Decomissioned,
|
||||
_ => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Unknown scheduling policy '{s}', try active,pause,decomissioned"
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SkSchedulingPolicy> for String {
|
||||
fn from(value: SkSchedulingPolicy) -> String {
|
||||
use SkSchedulingPolicy::*;
|
||||
match value {
|
||||
Active => "active",
|
||||
Pause => "pause",
|
||||
Decomissioned => "decomissioned",
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Controls how tenant shards are mapped to locations on pageservers, e.g. whether
|
||||
/// to create secondary locations.
|
||||
#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]
|
||||
@@ -336,6 +373,16 @@ pub enum PlacementPolicy {
|
||||
Detached,
|
||||
}
|
||||
|
||||
impl PlacementPolicy {
|
||||
pub fn want_secondaries(&self) -> usize {
|
||||
match self {
|
||||
PlacementPolicy::Attached(secondary_count) => *secondary_count,
|
||||
PlacementPolicy::Secondary => 1,
|
||||
PlacementPolicy::Detached => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct TenantShardMigrateResponse {}
|
||||
|
||||
@@ -372,6 +419,27 @@ pub struct MetadataHealthListOutdatedResponse {
|
||||
pub health_records: Vec<MetadataHealthRecord>,
|
||||
}
|
||||
|
||||
/// Publicly exposed safekeeper description
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct SafekeeperDescribeResponse {
|
||||
pub id: NodeId,
|
||||
pub region_id: String,
|
||||
/// 1 is special, it means just created (not currently posted to storcon).
|
||||
/// Zero or negative is not really expected.
|
||||
/// Otherwise the number from `release-$(number_of_commits_on_branch)` tag.
|
||||
pub version: i64,
|
||||
pub host: String,
|
||||
pub port: i32,
|
||||
pub http_port: i32,
|
||||
pub availability_zone_id: String,
|
||||
pub scheduling_policy: SkSchedulingPolicy,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct SafekeeperSchedulingPolicyRequest {
|
||||
pub scheduling_policy: SkSchedulingPolicy,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
@@ -24,7 +24,9 @@ pub struct Key {
|
||||
|
||||
/// When working with large numbers of Keys in-memory, it is more efficient to handle them as i128 than as
|
||||
/// a struct of fields.
|
||||
#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Debug)]
|
||||
#[derive(
|
||||
Clone, Copy, Default, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Debug,
|
||||
)]
|
||||
pub struct CompactKey(i128);
|
||||
|
||||
/// The storage key size.
|
||||
@@ -706,7 +708,7 @@ pub fn repl_origin_key_range() -> Range<Key> {
|
||||
/// Non inherited range for vectored get.
|
||||
pub const NON_INHERITED_RANGE: Range<Key> = AUX_FILES_KEY..AUX_FILES_KEY.next();
|
||||
/// Sparse keyspace range for vectored get. Missing key error will be ignored for this range.
|
||||
pub const NON_INHERITED_SPARSE_RANGE: Range<Key> = Key::metadata_key_range();
|
||||
pub const SPARSE_RANGE: Range<Key> = Key::metadata_key_range();
|
||||
|
||||
impl Key {
|
||||
// AUX_FILES currently stores only data for logical replication (slots etc), and
|
||||
@@ -714,7 +716,42 @@ impl Key {
|
||||
// switch (and generally it likely should be optional), so ignore these.
|
||||
#[inline(always)]
|
||||
pub fn is_inherited_key(self) -> bool {
|
||||
!NON_INHERITED_RANGE.contains(&self) && !NON_INHERITED_SPARSE_RANGE.contains(&self)
|
||||
if self.is_sparse() {
|
||||
self.is_inherited_sparse_key()
|
||||
} else {
|
||||
!NON_INHERITED_RANGE.contains(&self)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_sparse(self) -> bool {
|
||||
self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX
|
||||
}
|
||||
|
||||
/// Check if the key belongs to the inherited keyspace.
|
||||
fn is_inherited_sparse_key(self) -> bool {
|
||||
debug_assert!(self.is_sparse());
|
||||
self.field1 == RELATION_SIZE_PREFIX
|
||||
}
|
||||
|
||||
pub fn sparse_non_inherited_keyspace() -> Range<Key> {
|
||||
// The two keys are adjacent; if we will have non-adjancent keys in the future, we should return a keyspace
|
||||
debug_assert_eq!(AUX_KEY_PREFIX + 1, REPL_ORIGIN_KEY_PREFIX);
|
||||
Key {
|
||||
field1: AUX_KEY_PREFIX,
|
||||
field2: 0,
|
||||
field3: 0,
|
||||
field4: 0,
|
||||
field5: 0,
|
||||
field6: 0,
|
||||
}..Key {
|
||||
field1: REPL_ORIGIN_KEY_PREFIX + 1,
|
||||
field2: 0,
|
||||
field3: 0,
|
||||
field4: 0,
|
||||
field5: 0,
|
||||
field6: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
|
||||
@@ -6,6 +6,7 @@ pub mod utilization;
|
||||
use camino::Utf8PathBuf;
|
||||
pub use utilization::PageserverUtilization;
|
||||
|
||||
use core::ops::Range;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fmt::Display,
|
||||
@@ -28,10 +29,10 @@ use utils::{
|
||||
};
|
||||
|
||||
use crate::{
|
||||
key::{CompactKey, Key},
|
||||
reltag::RelTag,
|
||||
shard::{ShardCount, ShardStripeSize, TenantShardId},
|
||||
};
|
||||
use anyhow::bail;
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
|
||||
/// The state of a tenant in this pageserver.
|
||||
@@ -210,6 +211,70 @@ pub enum TimelineState {
|
||||
Broken { reason: String, backtrace: String },
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
pub struct CompactLsnRange {
|
||||
pub start: Lsn,
|
||||
pub end: Lsn,
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
pub struct CompactKeyRange {
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pub start: Key,
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pub end: Key,
|
||||
}
|
||||
|
||||
impl From<Range<Lsn>> for CompactLsnRange {
|
||||
fn from(range: Range<Lsn>) -> Self {
|
||||
Self {
|
||||
start: range.start,
|
||||
end: range.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Range<Key>> for CompactKeyRange {
|
||||
fn from(range: Range<Key>) -> Self {
|
||||
Self {
|
||||
start: range.start,
|
||||
end: range.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CompactLsnRange> for Range<Lsn> {
|
||||
fn from(range: CompactLsnRange) -> Self {
|
||||
range.start..range.end
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CompactKeyRange> for Range<Key> {
|
||||
fn from(range: CompactKeyRange) -> Self {
|
||||
range.start..range.end
|
||||
}
|
||||
}
|
||||
|
||||
impl CompactLsnRange {
|
||||
pub fn above(lsn: Lsn) -> Self {
|
||||
Self {
|
||||
start: lsn,
|
||||
end: Lsn::MAX,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct CompactInfoResponse {
|
||||
pub compact_key_range: Option<CompactKeyRange>,
|
||||
pub compact_lsn_range: Option<CompactLsnRange>,
|
||||
pub sub_compaction: bool,
|
||||
pub running: bool,
|
||||
pub job_id: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineCreateRequest {
|
||||
pub new_timeline_id: TimelineId,
|
||||
@@ -1334,6 +1399,8 @@ pub enum PagestreamFeMessage {
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentRequest),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestRequest),
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
@@ -1345,6 +1412,22 @@ pub enum PagestreamBeMessage {
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentResponse),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestResponse),
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
#[repr(u8)]
|
||||
enum PagestreamFeMessageTag {
|
||||
Exists = 0,
|
||||
Nblocks = 1,
|
||||
GetPage = 2,
|
||||
DbSize = 3,
|
||||
GetSlruSegment = 4,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 99,
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
@@ -1356,7 +1439,28 @@ enum PagestreamBeMessageTag {
|
||||
Error = 103,
|
||||
DbSize = 104,
|
||||
GetSlruSegment = 105,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 199,
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamFeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
match value {
|
||||
0 => Ok(PagestreamFeMessageTag::Exists),
|
||||
1 => Ok(PagestreamFeMessageTag::Nblocks),
|
||||
2 => Ok(PagestreamFeMessageTag::GetPage),
|
||||
3 => Ok(PagestreamFeMessageTag::DbSize),
|
||||
4 => Ok(PagestreamFeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
99 => Ok(PagestreamFeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
@@ -1367,6 +1471,8 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
103 => Ok(PagestreamBeMessageTag::Error),
|
||||
104 => Ok(PagestreamBeMessageTag::DbSize),
|
||||
105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
199 => Ok(PagestreamBeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
@@ -1396,78 +1502,108 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
// interface allows sending both LSNs, and let the pageserver do the right thing. There was no
|
||||
// difference in the responses between V1 and V2.
|
||||
//
|
||||
#[derive(Clone, Copy)]
|
||||
// V3 version of protocol adds request ID to all requests. This request ID is also included in response
|
||||
// as well as other fields from requests, which allows to verify that we receive response for our request.
|
||||
// We copy fields from request to response to make checking more reliable: request ID is formed from process ID
|
||||
// and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
|
||||
//
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum PagestreamProtocolVersion {
|
||||
V2,
|
||||
V3,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub type RequestId = u64;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamRequest {
|
||||
pub reqid: RequestId,
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamExistsRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamNblocksRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetPageRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
pub blkno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamDbSizeRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetSlruSegmentRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub kind: u8,
|
||||
pub segno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsResponse {
|
||||
pub req: PagestreamExistsRequest,
|
||||
pub exists: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksResponse {
|
||||
pub req: PagestreamNblocksRequest,
|
||||
pub n_blocks: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageResponse {
|
||||
pub req: PagestreamGetPageRequest,
|
||||
pub page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetSlruSegmentResponse {
|
||||
pub req: PagestreamGetSlruSegmentRequest,
|
||||
pub segment: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamErrorResponse {
|
||||
pub req: PagestreamRequest,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeResponse {
|
||||
pub req: PagestreamDbSizeRequest,
|
||||
pub db_size: i64,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct PagestreamTestRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub batch_key: u64,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamTestResponse {
|
||||
pub req: PagestreamTestRequest,
|
||||
}
|
||||
|
||||
// This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
|
||||
// that require pageserver-internal types. It is sufficient to get the total size.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@@ -1481,15 +1617,16 @@ pub struct TenantHistorySize {
|
||||
|
||||
impl PagestreamFeMessage {
|
||||
/// Serialize a compute -> pageserver message. This is currently only used in testing
|
||||
/// tools. Always uses protocol version 2.
|
||||
/// tools. Always uses protocol version 3.
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
match self {
|
||||
Self::Exists(req) => {
|
||||
bytes.put_u8(0);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u8(PagestreamFeMessageTag::Exists as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
@@ -1497,9 +1634,10 @@ impl PagestreamFeMessage {
|
||||
}
|
||||
|
||||
Self::Nblocks(req) => {
|
||||
bytes.put_u8(1);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u8(PagestreamFeMessageTag::Nblocks as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
@@ -1507,9 +1645,10 @@ impl PagestreamFeMessage {
|
||||
}
|
||||
|
||||
Self::GetPage(req) => {
|
||||
bytes.put_u8(2);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetPage as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
@@ -1518,123 +1657,278 @@ impl PagestreamFeMessage {
|
||||
}
|
||||
|
||||
Self::DbSize(req) => {
|
||||
bytes.put_u8(3);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u8(PagestreamFeMessageTag::DbSize as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.dbnode);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(req) => {
|
||||
bytes.put_u8(4);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetSlruSegment as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(req.kind);
|
||||
bytes.put_u32(req.segno);
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Test as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(req.batch_key);
|
||||
let message = req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn parse<R: std::io::Read>(body: &mut R) -> anyhow::Result<PagestreamFeMessage> {
|
||||
pub fn parse<R: std::io::Read>(
|
||||
body: &mut R,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
) -> anyhow::Result<PagestreamFeMessage> {
|
||||
// these correspond to the NeonMessageTag enum in pagestore_client.h
|
||||
//
|
||||
// TODO: consider using protobuf or serde bincode for less error prone
|
||||
// serialization.
|
||||
let msg_tag = body.read_u8()?;
|
||||
let (reqid, request_lsn, not_modified_since) = match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => (
|
||||
0,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
PagestreamProtocolVersion::V3 => (
|
||||
body.read_u64::<BigEndian>()?,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
};
|
||||
|
||||
// these two fields are the same for every request type
|
||||
let request_lsn = Lsn::from(body.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn::from(body.read_u64::<BigEndian>()?);
|
||||
|
||||
match msg_tag {
|
||||
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
match PagestreamFeMessageTag::try_from(msg_tag)
|
||||
.map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))?
|
||||
{
|
||||
PagestreamFeMessageTag::Exists => {
|
||||
Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::Nblocks => {
|
||||
Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetPage => {
|
||||
Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
blkno: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::DbSize => {
|
||||
Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
})),
|
||||
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
})),
|
||||
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
blkno: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
4 => Ok(PagestreamFeMessage::GetSlruSegment(
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetSlruSegment => Ok(PagestreamFeMessage::GetSlruSegment(
|
||||
PagestreamGetSlruSegmentRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind: body.read_u8()?,
|
||||
segno: body.read_u32::<BigEndian>()?,
|
||||
},
|
||||
)),
|
||||
_ => bail!("unknown smgr message tag: {:?}", msg_tag),
|
||||
#[cfg(feature = "testing")]
|
||||
PagestreamFeMessageTag::Test => Ok(PagestreamFeMessage::Test(PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key: body.read_u64::<BigEndian>()?,
|
||||
message: {
|
||||
let len = body.read_u64::<BigEndian>()?;
|
||||
let mut buf = vec![0; len as usize];
|
||||
body.read_exact(&mut buf)?;
|
||||
String::from_utf8(buf)?
|
||||
},
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PagestreamBeMessage {
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
pub fn serialize(&self, protocol_version: PagestreamProtocolVersion) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put(&resp.page[..]);
|
||||
}
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
PagestreamProtocolVersion::V3 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.req.blkno);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put_u64(resp.req.reqid);
|
||||
bytes.put_u64(resp.req.request_lsn.0);
|
||||
bytes.put_u64(resp.req.not_modified_since.0);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.dbnode);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(resp.req.kind);
|
||||
bytes.put_u32(resp.req.segno);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
@@ -1646,41 +1940,156 @@ impl PagestreamBeMessage {
|
||||
let ok =
|
||||
match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
|
||||
Tag::Exists => {
|
||||
let exists = buf.read_u8()?;
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let exists = buf.read_u8()? != 0;
|
||||
Self::Exists(PagestreamExistsResponse {
|
||||
exists: exists != 0,
|
||||
req: PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
exists,
|
||||
})
|
||||
}
|
||||
Tag::Nblocks => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
Self::Nblocks(PagestreamNblocksResponse { n_blocks })
|
||||
Self::Nblocks(PagestreamNblocksResponse {
|
||||
req: PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
n_blocks,
|
||||
})
|
||||
}
|
||||
Tag::GetPage => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let blkno = buf.read_u32::<BigEndian>()?;
|
||||
let mut page = vec![0; 8192]; // TODO: use MaybeUninit
|
||||
buf.read_exact(&mut page)?;
|
||||
PagestreamBeMessage::GetPage(PagestreamGetPageResponse { page: page.into() })
|
||||
Self::GetPage(PagestreamGetPageResponse {
|
||||
req: PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
blkno,
|
||||
},
|
||||
page: page.into(),
|
||||
})
|
||||
}
|
||||
Tag::Error => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let mut msg = Vec::new();
|
||||
buf.read_until(0, &mut msg)?;
|
||||
let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
|
||||
let rust_str = cstring.to_str()?;
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
Self::Error(PagestreamErrorResponse {
|
||||
req: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
message: rust_str.to_owned(),
|
||||
})
|
||||
}
|
||||
Tag::DbSize => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let dbnode = buf.read_u32::<BigEndian>()?;
|
||||
let db_size = buf.read_i64::<BigEndian>()?;
|
||||
Self::DbSize(PagestreamDbSizeResponse { db_size })
|
||||
Self::DbSize(PagestreamDbSizeResponse {
|
||||
req: PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode,
|
||||
},
|
||||
db_size,
|
||||
})
|
||||
}
|
||||
Tag::GetSlruSegment => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let kind = buf.read_u8()?;
|
||||
let segno = buf.read_u32::<BigEndian>()?;
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
let mut segment = vec![0; n_blocks as usize * BLCKSZ as usize];
|
||||
buf.read_exact(&mut segment)?;
|
||||
Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
|
||||
req: PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind,
|
||||
segno,
|
||||
},
|
||||
segment: segment.into(),
|
||||
})
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Tag::Test => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let batch_key = buf.read_u64::<BigEndian>()?;
|
||||
let len = buf.read_u64::<BigEndian>()?;
|
||||
let mut msg = vec![0; len as usize];
|
||||
buf.read_exact(&mut msg)?;
|
||||
let message = String::from_utf8(msg)?;
|
||||
Self::Test(PagestreamTestResponse {
|
||||
req: PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key,
|
||||
message,
|
||||
},
|
||||
})
|
||||
}
|
||||
};
|
||||
let remaining = buf.into_inner();
|
||||
if !remaining.is_empty() {
|
||||
@@ -1700,6 +2109,25 @@ impl PagestreamBeMessage {
|
||||
Self::Error(_) => "Error",
|
||||
Self::DbSize(_) => "DbSize",
|
||||
Self::GetSlruSegment(_) => "GetSlruSegment",
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(_) => "Test",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PageTraceEvent {
|
||||
pub key: CompactKey,
|
||||
pub effective_lsn: Lsn,
|
||||
pub time: SystemTime,
|
||||
}
|
||||
|
||||
impl Default for PageTraceEvent {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
key: Default::default(),
|
||||
effective_lsn: Default::default(),
|
||||
time: std::time::UNIX_EPOCH,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1716,8 +2144,11 @@ mod tests {
|
||||
// Test serialization/deserialization of PagestreamFeMessage
|
||||
let messages = vec![
|
||||
PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
@@ -1726,8 +2157,11 @@ mod tests {
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(4),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(4),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
@@ -1736,8 +2170,11 @@ mod tests {
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
@@ -1747,14 +2184,19 @@ mod tests {
|
||||
blkno: 7,
|
||||
}),
|
||||
PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
dbnode: 7,
|
||||
}),
|
||||
];
|
||||
for msg in messages {
|
||||
let bytes = msg.serialize();
|
||||
let reconstructed = PagestreamFeMessage::parse(&mut bytes.reader()).unwrap();
|
||||
let reconstructed =
|
||||
PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V3)
|
||||
.unwrap();
|
||||
assert!(msg == reconstructed);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,6 +31,8 @@
|
||||
//! - In a tenant with 4 shards, each shard has ShardCount(N), ShardNumber(i) where i in 0..N-1 (inclusive),
|
||||
//! and their slugs are 0004, 0104, 0204, and 0304.
|
||||
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use crate::{key::Key, models::ShardParameters};
|
||||
use postgres_ffi::relfile_utils::INIT_FORKNUM;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -48,6 +50,23 @@ pub struct ShardIdentity {
|
||||
layout: ShardLayout,
|
||||
}
|
||||
|
||||
/// Hash implementation
|
||||
///
|
||||
/// The stripe size cannot change dynamically, so it can be ignored for efficiency reasons.
|
||||
impl Hash for ShardIdentity {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
let ShardIdentity {
|
||||
number,
|
||||
count,
|
||||
stripe_size: _,
|
||||
layout: _,
|
||||
} = self;
|
||||
|
||||
number.0.hash(state);
|
||||
count.0.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
/// Stripe size in number of pages
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
|
||||
pub struct ShardStripeSize(pub u32);
|
||||
@@ -59,7 +78,7 @@ impl Default for ShardStripeSize {
|
||||
}
|
||||
|
||||
/// Layout version: for future upgrades where we might change how the key->shard mapping works
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Hash, Debug)]
|
||||
pub struct ShardLayout(u8);
|
||||
|
||||
const LAYOUT_V1: ShardLayout = ShardLayout(1);
|
||||
|
||||
@@ -106,11 +106,11 @@ impl<R: RecordGenerator> WalGenerator<R> {
|
||||
const TIMELINE_ID: u32 = 1;
|
||||
|
||||
/// Creates a new WAL generator with the given record generator.
|
||||
pub fn new(record_generator: R) -> WalGenerator<R> {
|
||||
pub fn new(record_generator: R, start_lsn: Lsn) -> WalGenerator<R> {
|
||||
Self {
|
||||
record_generator,
|
||||
lsn: Lsn(0),
|
||||
prev_lsn: Lsn(0),
|
||||
lsn: start_lsn,
|
||||
prev_lsn: start_lsn,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ use utils::bin_ser::DeserializeError;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct XlMultiXactCreate {
|
||||
pub mid: MultiXactId,
|
||||
/* new MultiXact's ID */
|
||||
@@ -46,7 +46,7 @@ impl XlMultiXactCreate {
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct XlMultiXactTruncate {
|
||||
pub oldest_multi_db: Oid,
|
||||
/* to-be-truncated range of multixact offsets */
|
||||
@@ -72,7 +72,7 @@ impl XlMultiXactTruncate {
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct XlRelmapUpdate {
|
||||
pub dbid: Oid, /* database ID, or 0 for shared map */
|
||||
pub tsid: Oid, /* database's tablespace, or pg_global */
|
||||
@@ -90,7 +90,7 @@ impl XlRelmapUpdate {
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct XlReploriginDrop {
|
||||
pub node_id: RepOriginId,
|
||||
}
|
||||
@@ -104,7 +104,7 @@ impl XlReploriginDrop {
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct XlReploriginSet {
|
||||
pub remote_lsn: Lsn,
|
||||
pub node_id: RepOriginId,
|
||||
@@ -911,7 +911,7 @@ impl XlSmgrCreate {
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct XlSmgrTruncate {
|
||||
pub blkno: BlockNumber,
|
||||
pub rnode: RelFileNode,
|
||||
@@ -984,7 +984,7 @@ impl XlDropDatabase {
|
||||
/// xl_xact_parsed_abort structs in PostgreSQL, but we use the same
|
||||
/// struct for commits and aborts.
|
||||
///
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct XlXactParsedRecord {
|
||||
pub xid: TransactionId,
|
||||
pub info: u8,
|
||||
|
||||
@@ -44,7 +44,7 @@ pub struct ProtocolVersion(u32);
|
||||
|
||||
impl ProtocolVersion {
|
||||
pub const fn new(major: u16, minor: u16) -> Self {
|
||||
Self((major as u32) << 16 | minor as u32)
|
||||
Self(((major as u32) << 16) | minor as u32)
|
||||
}
|
||||
pub const fn minor(self) -> u16 {
|
||||
self.0 as u16
|
||||
|
||||
@@ -33,10 +33,14 @@ pub struct Response {
|
||||
#[derive(PartialEq, Debug)]
|
||||
enum State {
|
||||
Active,
|
||||
Terminating,
|
||||
Closing,
|
||||
}
|
||||
|
||||
enum WriteReady {
|
||||
Terminating,
|
||||
WaitingOnRead,
|
||||
}
|
||||
|
||||
/// A connection to a PostgreSQL database.
|
||||
///
|
||||
/// This is one half of what is returned when a new connection is established. It performs the actual IO with the
|
||||
@@ -51,7 +55,6 @@ pub struct Connection<S, T> {
|
||||
/// HACK: we need this in the Neon Proxy to forward params.
|
||||
pub parameters: HashMap<String, String>,
|
||||
receiver: mpsc::UnboundedReceiver<Request>,
|
||||
pending_request: Option<RequestMessages>,
|
||||
pending_responses: VecDeque<BackendMessage>,
|
||||
responses: VecDeque<Response>,
|
||||
state: State,
|
||||
@@ -72,7 +75,6 @@ where
|
||||
stream,
|
||||
parameters,
|
||||
receiver,
|
||||
pending_request: None,
|
||||
pending_responses,
|
||||
responses: VecDeque::new(),
|
||||
state: State::Active,
|
||||
@@ -93,26 +95,23 @@ where
|
||||
.map(|o| o.map(|r| r.map_err(Error::io)))
|
||||
}
|
||||
|
||||
fn poll_read(&mut self, cx: &mut Context<'_>) -> Result<Option<AsyncMessage>, Error> {
|
||||
if self.state != State::Active {
|
||||
trace!("poll_read: done");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
/// Read and process messages from the connection to postgres.
|
||||
/// client <- postgres
|
||||
fn poll_read(&mut self, cx: &mut Context<'_>) -> Poll<Result<AsyncMessage, Error>> {
|
||||
loop {
|
||||
let message = match self.poll_response(cx)? {
|
||||
Poll::Ready(Some(message)) => message,
|
||||
Poll::Ready(None) => return Err(Error::closed()),
|
||||
Poll::Ready(None) => return Poll::Ready(Err(Error::closed())),
|
||||
Poll::Pending => {
|
||||
trace!("poll_read: waiting on response");
|
||||
return Ok(None);
|
||||
return Poll::Pending;
|
||||
}
|
||||
};
|
||||
|
||||
let (mut messages, request_complete) = match message {
|
||||
BackendMessage::Async(Message::NoticeResponse(body)) => {
|
||||
let error = DbError::parse(&mut body.fields()).map_err(Error::parse)?;
|
||||
return Ok(Some(AsyncMessage::Notice(error)));
|
||||
return Poll::Ready(Ok(AsyncMessage::Notice(error)));
|
||||
}
|
||||
BackendMessage::Async(Message::NotificationResponse(body)) => {
|
||||
let notification = Notification {
|
||||
@@ -120,7 +119,7 @@ where
|
||||
channel: body.channel().map_err(Error::parse)?.to_string(),
|
||||
payload: body.message().map_err(Error::parse)?.to_string(),
|
||||
};
|
||||
return Ok(Some(AsyncMessage::Notification(notification)));
|
||||
return Poll::Ready(Ok(AsyncMessage::Notification(notification)));
|
||||
}
|
||||
BackendMessage::Async(Message::ParameterStatus(body)) => {
|
||||
self.parameters.insert(
|
||||
@@ -139,8 +138,10 @@ where
|
||||
let mut response = match self.responses.pop_front() {
|
||||
Some(response) => response,
|
||||
None => match messages.next().map_err(Error::parse)? {
|
||||
Some(Message::ErrorResponse(error)) => return Err(Error::db(error)),
|
||||
_ => return Err(Error::unexpected_message()),
|
||||
Some(Message::ErrorResponse(error)) => {
|
||||
return Poll::Ready(Err(Error::db(error)))
|
||||
}
|
||||
_ => return Poll::Ready(Err(Error::unexpected_message())),
|
||||
},
|
||||
};
|
||||
|
||||
@@ -164,18 +165,14 @@ where
|
||||
request_complete,
|
||||
});
|
||||
trace!("poll_read: waiting on sender");
|
||||
return Ok(None);
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch the next client request and enqueue the response sender.
|
||||
fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll<Option<RequestMessages>> {
|
||||
if let Some(messages) = self.pending_request.take() {
|
||||
trace!("retrying pending request");
|
||||
return Poll::Ready(Some(messages));
|
||||
}
|
||||
|
||||
if self.receiver.is_closed() {
|
||||
return Poll::Ready(None);
|
||||
}
|
||||
@@ -193,74 +190,80 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_write(&mut self, cx: &mut Context<'_>) -> Result<bool, Error> {
|
||||
/// Process client requests and write them to the postgres connection, flushing if necessary.
|
||||
/// client -> postgres
|
||||
fn poll_write(&mut self, cx: &mut Context<'_>) -> Poll<Result<WriteReady, Error>> {
|
||||
loop {
|
||||
if self.state == State::Closing {
|
||||
trace!("poll_write: done");
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
if Pin::new(&mut self.stream)
|
||||
.poll_ready(cx)
|
||||
.map_err(Error::io)?
|
||||
.is_pending()
|
||||
{
|
||||
trace!("poll_write: waiting on socket");
|
||||
return Ok(false);
|
||||
|
||||
// poll_ready is self-flushing.
|
||||
return Poll::Pending;
|
||||
}
|
||||
|
||||
let request = match self.poll_request(cx) {
|
||||
Poll::Ready(Some(request)) => request,
|
||||
Poll::Ready(None) if self.responses.is_empty() && self.state == State::Active => {
|
||||
match self.poll_request(cx) {
|
||||
// send the message to postgres
|
||||
Poll::Ready(Some(RequestMessages::Single(request))) => {
|
||||
Pin::new(&mut self.stream)
|
||||
.start_send(request)
|
||||
.map_err(Error::io)?;
|
||||
}
|
||||
// No more messages from the client, and no more responses to wait for.
|
||||
// Send a terminate message to postgres
|
||||
Poll::Ready(None) if self.responses.is_empty() => {
|
||||
trace!("poll_write: at eof, terminating");
|
||||
self.state = State::Terminating;
|
||||
let mut request = BytesMut::new();
|
||||
frontend::terminate(&mut request);
|
||||
RequestMessages::Single(FrontendMessage::Raw(request.freeze()))
|
||||
let request = FrontendMessage::Raw(request.freeze());
|
||||
|
||||
Pin::new(&mut self.stream)
|
||||
.start_send(request)
|
||||
.map_err(Error::io)?;
|
||||
|
||||
trace!("poll_write: sent eof, closing");
|
||||
trace!("poll_write: done");
|
||||
return Poll::Ready(Ok(WriteReady::Terminating));
|
||||
}
|
||||
// No more messages from the client, but there are still some responses to wait for.
|
||||
Poll::Ready(None) => {
|
||||
trace!(
|
||||
"poll_write: at eof, pending responses {}",
|
||||
self.responses.len()
|
||||
);
|
||||
return Ok(true);
|
||||
ready!(self.poll_flush(cx))?;
|
||||
return Poll::Ready(Ok(WriteReady::WaitingOnRead));
|
||||
}
|
||||
// Still waiting for a message from the client.
|
||||
Poll::Pending => {
|
||||
trace!("poll_write: waiting on request");
|
||||
return Ok(true);
|
||||
}
|
||||
};
|
||||
|
||||
match request {
|
||||
RequestMessages::Single(request) => {
|
||||
Pin::new(&mut self.stream)
|
||||
.start_send(request)
|
||||
.map_err(Error::io)?;
|
||||
if self.state == State::Terminating {
|
||||
trace!("poll_write: sent eof, closing");
|
||||
self.state = State::Closing;
|
||||
}
|
||||
ready!(self.poll_flush(cx))?;
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_flush(&mut self, cx: &mut Context<'_>) -> Result<(), Error> {
|
||||
fn poll_flush(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
|
||||
match Pin::new(&mut self.stream)
|
||||
.poll_flush(cx)
|
||||
.map_err(Error::io)?
|
||||
{
|
||||
Poll::Ready(()) => trace!("poll_flush: flushed"),
|
||||
Poll::Pending => trace!("poll_flush: waiting on socket"),
|
||||
Poll::Ready(()) => {
|
||||
trace!("poll_flush: flushed");
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
Poll::Pending => {
|
||||
trace!("poll_flush: waiting on socket");
|
||||
Poll::Pending
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn poll_shutdown(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
|
||||
if self.state != State::Closing {
|
||||
return Poll::Pending;
|
||||
}
|
||||
|
||||
match Pin::new(&mut self.stream)
|
||||
.poll_close(cx)
|
||||
.map_err(Error::io)?
|
||||
@@ -289,18 +292,30 @@ where
|
||||
&mut self,
|
||||
cx: &mut Context<'_>,
|
||||
) -> Poll<Option<Result<AsyncMessage, Error>>> {
|
||||
let message = self.poll_read(cx)?;
|
||||
let want_flush = self.poll_write(cx)?;
|
||||
if want_flush {
|
||||
self.poll_flush(cx)?;
|
||||
if self.state != State::Closing {
|
||||
// if the state is still active, try read from and write to postgres.
|
||||
let message = self.poll_read(cx)?;
|
||||
let closing = self.poll_write(cx)?;
|
||||
if let Poll::Ready(WriteReady::Terminating) = closing {
|
||||
self.state = State::Closing;
|
||||
}
|
||||
|
||||
if let Poll::Ready(message) = message {
|
||||
return Poll::Ready(Some(Ok(message)));
|
||||
}
|
||||
|
||||
// poll_read returned Pending.
|
||||
// poll_write returned Pending or Ready(WriteReady::WaitingOnRead).
|
||||
// if poll_write returned Ready(WriteReady::WaitingOnRead), then we are waiting to read more data from postgres.
|
||||
if self.state != State::Closing {
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
match message {
|
||||
Some(message) => Poll::Ready(Some(Ok(message))),
|
||||
None => match self.poll_shutdown(cx) {
|
||||
Poll::Ready(Ok(())) => Poll::Ready(None),
|
||||
Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))),
|
||||
Poll::Pending => Poll::Pending,
|
||||
},
|
||||
|
||||
match self.poll_shutdown(cx) {
|
||||
Poll::Ready(Ok(())) => Poll::Ready(None),
|
||||
Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))),
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,6 +43,17 @@ impl RemoteStorageKind {
|
||||
}
|
||||
}
|
||||
|
||||
impl RemoteStorageConfig {
|
||||
/// Helper to fetch the configured concurrency limit.
|
||||
pub fn concurrency_limit(&self) -> Option<usize> {
|
||||
match &self.storage {
|
||||
RemoteStorageKind::LocalFs { .. } => None,
|
||||
RemoteStorageKind::AwsS3(c) => Some(c.concurrency_limit.into()),
|
||||
RemoteStorageKind::AzureContainer(c) => Some(c.concurrency_limit.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_timeout() -> Duration {
|
||||
RemoteStorageConfig::DEFAULT_TIMEOUT
|
||||
}
|
||||
@@ -115,13 +126,15 @@ fn default_max_keys_per_list_response() -> Option<i32> {
|
||||
}
|
||||
|
||||
fn default_azure_conn_pool_size() -> usize {
|
||||
// Conservative default: no connection pooling. At time of writing this is the Azure
|
||||
// SDK's default as well, due to historic reports of hard-to-reproduce issues
|
||||
// By default, the Azure SDK does no connection pooling, due to historic reports of hard-to-reproduce issues
|
||||
// (https://github.com/hyperium/hyper/issues/2312)
|
||||
//
|
||||
// However, using connection pooling is important to avoid exhausting client ports when
|
||||
// doing huge numbers of requests (https://github.com/neondatabase/cloud/issues/20971)
|
||||
0
|
||||
//
|
||||
// We therefore enable a modest pool size by default: this may be configured to zero if
|
||||
// issues like the alleged upstream hyper issue appear.
|
||||
8
|
||||
}
|
||||
|
||||
impl Debug for S3Config {
|
||||
|
||||
@@ -5,9 +5,12 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
const_format.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
pq_proto.workspace = true
|
||||
tokio.workspace = true
|
||||
utils.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
|
||||
@@ -4,12 +4,15 @@ use const_format::formatcp;
|
||||
use pq_proto::SystemId;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub mod membership;
|
||||
/// Public API types
|
||||
pub mod models;
|
||||
|
||||
/// Consensus logical timestamp. Note: it is a part of sk control file.
|
||||
pub type Term = u64;
|
||||
pub const INVALID_TERM: Term = 0;
|
||||
/// With this term timeline is created initially. It
|
||||
/// is a normal term except wp is never elected with it.
|
||||
pub const INITIAL_TERM: Term = 0;
|
||||
|
||||
/// Information about Postgres. Safekeeper gets it once and then verifies all
|
||||
/// further connections from computes match. Note: it is a part of sk control
|
||||
|
||||
166
libs/safekeeper_api/src/membership.rs
Normal file
166
libs/safekeeper_api/src/membership.rs
Normal file
@@ -0,0 +1,166 @@
|
||||
//! Types defining safekeeper membership, see
|
||||
//! rfcs/035-safekeeper-dynamic-membership-change.md
|
||||
//! for details.
|
||||
|
||||
use std::{collections::HashSet, fmt::Display};
|
||||
|
||||
use anyhow;
|
||||
use anyhow::bail;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::id::NodeId;
|
||||
|
||||
/// Number uniquely identifying safekeeper configuration.
|
||||
/// Note: it is a part of sk control file.
|
||||
pub type Generation = u32;
|
||||
/// 1 is the first valid generation, 0 is used as
|
||||
/// a placeholder before we fully migrate to generations.
|
||||
pub const INVALID_GENERATION: Generation = 0;
|
||||
pub const INITIAL_GENERATION: Generation = 1;
|
||||
|
||||
/// Membership is defined by ids so e.g. walproposer uses them to figure out
|
||||
/// quorums, but we also carry host and port to give wp idea where to connect.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct SafekeeperId {
|
||||
pub id: NodeId,
|
||||
pub host: String,
|
||||
/// We include here only port for computes -- that is, pg protocol tenant
|
||||
/// only port, or wide pg protocol port if the former is not configured.
|
||||
pub pg_port: u16,
|
||||
}
|
||||
|
||||
impl Display for SafekeeperId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "[id={}, ep={}:{}]", self.id, self.host, self.pg_port)
|
||||
}
|
||||
}
|
||||
|
||||
/// Set of safekeepers.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(transparent)]
|
||||
pub struct MemberSet {
|
||||
pub members: Vec<SafekeeperId>,
|
||||
}
|
||||
|
||||
impl MemberSet {
|
||||
pub fn empty() -> Self {
|
||||
MemberSet {
|
||||
members: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(members: Vec<SafekeeperId>) -> anyhow::Result<Self> {
|
||||
let hs: HashSet<NodeId> = HashSet::from_iter(members.iter().map(|sk| sk.id));
|
||||
if hs.len() != members.len() {
|
||||
bail!("duplicate safekeeper id in the set {:?}", members);
|
||||
}
|
||||
Ok(MemberSet { members })
|
||||
}
|
||||
|
||||
pub fn contains(&self, sk: &SafekeeperId) -> bool {
|
||||
self.members.iter().any(|m| m.id == sk.id)
|
||||
}
|
||||
|
||||
pub fn add(&mut self, sk: SafekeeperId) -> anyhow::Result<()> {
|
||||
if self.contains(&sk) {
|
||||
bail!(format!(
|
||||
"sk {} is already member of the set {}",
|
||||
sk.id, self
|
||||
));
|
||||
}
|
||||
self.members.push(sk);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for MemberSet {
|
||||
/// Display as a comma separated list of members.
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let sks_str = self
|
||||
.members
|
||||
.iter()
|
||||
.map(|m| m.to_string())
|
||||
.collect::<Vec<_>>();
|
||||
write!(f, "({})", sks_str.join(", "))
|
||||
}
|
||||
}
|
||||
|
||||
/// Safekeeper membership configuration.
|
||||
/// Note: it is a part of both control file and http API.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct Configuration {
|
||||
/// Unique id.
|
||||
pub generation: Generation,
|
||||
/// Current members of the configuration.
|
||||
pub members: MemberSet,
|
||||
/// Some means it is a joint conf.
|
||||
pub new_members: Option<MemberSet>,
|
||||
}
|
||||
|
||||
impl Configuration {
|
||||
/// Used for pre-generations timelines, will be removed eventually.
|
||||
pub fn empty() -> Self {
|
||||
Configuration {
|
||||
generation: INVALID_GENERATION,
|
||||
members: MemberSet::empty(),
|
||||
new_members: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Configuration {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"gen={}, members={}, new_members={}",
|
||||
self.generation,
|
||||
self.members,
|
||||
self.new_members
|
||||
.as_ref()
|
||||
.map(ToString::to_string)
|
||||
.unwrap_or(String::from("none"))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{MemberSet, SafekeeperId};
|
||||
use utils::id::NodeId;
|
||||
|
||||
#[test]
|
||||
fn test_member_set() {
|
||||
let mut members = MemberSet::empty();
|
||||
members
|
||||
.add(SafekeeperId {
|
||||
id: NodeId(42),
|
||||
host: String::from("lala.org"),
|
||||
pg_port: 5432,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
members
|
||||
.add(SafekeeperId {
|
||||
id: NodeId(42),
|
||||
host: String::from("lala.org"),
|
||||
pg_port: 5432,
|
||||
})
|
||||
.expect_err("duplicate must not be allowed");
|
||||
|
||||
members
|
||||
.add(SafekeeperId {
|
||||
id: NodeId(43),
|
||||
host: String::from("bubu.org"),
|
||||
pg_port: 5432,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
println!("members: {}", members);
|
||||
|
||||
let j = serde_json::to_string(&members).expect("failed to serialize");
|
||||
println!("members json: {}", j);
|
||||
assert_eq!(
|
||||
j,
|
||||
r#"[{"id":42,"host":"lala.org","pg_port":5432},{"id":43,"host":"bubu.org","pg_port":5432}]"#
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
//! Types used in safekeeper http API. Many of them are also reused internally.
|
||||
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use postgres_ffi::TimestampTz;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::net::SocketAddr;
|
||||
@@ -11,7 +12,7 @@ use utils::{
|
||||
pageserver_feedback::PageserverFeedback,
|
||||
};
|
||||
|
||||
use crate::{ServerInfo, Term};
|
||||
use crate::{membership::Configuration, ServerInfo, Term};
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct SafekeeperStatus {
|
||||
@@ -22,13 +23,16 @@ pub struct SafekeeperStatus {
|
||||
pub struct TimelineCreateRequest {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub peer_ids: Option<Vec<NodeId>>,
|
||||
pub mconf: Configuration,
|
||||
pub pg_version: u32,
|
||||
pub system_id: Option<u64>,
|
||||
// By default WAL_SEGMENT_SIZE
|
||||
pub wal_seg_size: Option<u32>,
|
||||
pub commit_lsn: Lsn,
|
||||
// If not passed, it is assigned to the beginning of commit_lsn segment.
|
||||
pub local_start_lsn: Option<Lsn>,
|
||||
pub start_lsn: Lsn,
|
||||
// Normal creation should omit this field (start_lsn initializes all LSNs).
|
||||
// However, we allow specifying custom value higher than start_lsn for
|
||||
// manual recovery case, see test_s3_wal_replay.
|
||||
pub commit_lsn: Option<Lsn>,
|
||||
}
|
||||
|
||||
/// Same as TermLsn, but serializes LSN using display serializer
|
||||
@@ -143,7 +147,13 @@ pub type ConnectionId = u32;
|
||||
|
||||
/// Serialize is used only for json'ing in API response. Also used internally.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WalSenderState {
|
||||
pub enum WalSenderState {
|
||||
Vanilla(VanillaWalSenderState),
|
||||
Interpreted(InterpretedWalSenderState),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VanillaWalSenderState {
|
||||
pub ttid: TenantTimelineId,
|
||||
pub addr: SocketAddr,
|
||||
pub conn_id: ConnectionId,
|
||||
@@ -152,6 +162,17 @@ pub struct WalSenderState {
|
||||
pub feedback: ReplicationFeedback,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct InterpretedWalSenderState {
|
||||
pub ttid: TenantTimelineId,
|
||||
pub shard: ShardIdentity,
|
||||
pub addr: SocketAddr,
|
||||
pub conn_id: ConnectionId,
|
||||
// postgres application_name
|
||||
pub appname: Option<String>,
|
||||
pub feedback: ReplicationFeedback,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WalReceiverState {
|
||||
/// None means it is recovery initiated by us (this safekeeper).
|
||||
@@ -172,6 +193,7 @@ pub enum WalReceiverStatus {
|
||||
pub struct TimelineStatus {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub mconf: Configuration,
|
||||
pub acceptor_state: AcceptorStateStatus,
|
||||
pub pg_info: ServerInfo,
|
||||
pub flush_lsn: Lsn,
|
||||
@@ -186,6 +208,20 @@ pub struct TimelineStatus {
|
||||
pub walreceivers: Vec<WalReceiverState>,
|
||||
}
|
||||
|
||||
/// Request to switch membership configuration.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct TimelineMembershipSwitchRequest {
|
||||
pub mconf: Configuration,
|
||||
}
|
||||
|
||||
/// In response both previous and current configuration are sent.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineMembershipSwitchResponse {
|
||||
pub previous_conf: Configuration,
|
||||
pub current_conf: Configuration,
|
||||
}
|
||||
|
||||
fn lsn_invalid() -> Lsn {
|
||||
Lsn::INVALID
|
||||
}
|
||||
@@ -241,3 +277,8 @@ pub struct TimelineTermBumpResponse {
|
||||
pub previous_term: u64,
|
||||
pub current_term: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct SafekeeperUtilization {
|
||||
pub timeline_count: u64,
|
||||
}
|
||||
|
||||
@@ -38,7 +38,6 @@ pub mod http;
|
||||
|
||||
use opentelemetry::trace::TracerProvider;
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry_sdk::Resource;
|
||||
use tracing::Subscriber;
|
||||
use tracing_subscriber::registry::LookupSpan;
|
||||
use tracing_subscriber::Layer;
|
||||
@@ -121,7 +120,10 @@ where
|
||||
S: Subscriber + for<'span> LookupSpan<'span>,
|
||||
{
|
||||
// Sets up exporter from the OTEL_EXPORTER_* environment variables.
|
||||
let exporter = opentelemetry_otlp::new_exporter().http();
|
||||
let exporter = opentelemetry_otlp::SpanExporter::builder()
|
||||
.with_http()
|
||||
.build()
|
||||
.expect("could not initialize opentelemetry exporter");
|
||||
|
||||
// TODO: opentelemetry::global::set_error_handler() with custom handler that
|
||||
// bypasses default tracing layers, but logs regular looking log
|
||||
@@ -132,17 +134,13 @@ where
|
||||
opentelemetry_sdk::propagation::TraceContextPropagator::new(),
|
||||
);
|
||||
|
||||
let tracer = opentelemetry_otlp::new_pipeline()
|
||||
.tracing()
|
||||
.with_exporter(exporter)
|
||||
.with_trace_config(opentelemetry_sdk::trace::Config::default().with_resource(
|
||||
Resource::new(vec![KeyValue::new(
|
||||
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
|
||||
service_name,
|
||||
)]),
|
||||
))
|
||||
.install_batch(opentelemetry_sdk::runtime::Tokio)
|
||||
.expect("could not initialize opentelemetry exporter")
|
||||
let tracer = opentelemetry_sdk::trace::TracerProvider::builder()
|
||||
.with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio)
|
||||
.with_resource(opentelemetry_sdk::Resource::new(vec![KeyValue::new(
|
||||
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
|
||||
service_name,
|
||||
)]))
|
||||
.build()
|
||||
.tracer("global");
|
||||
|
||||
tracing_opentelemetry::layer().with_tracer(tracer)
|
||||
|
||||
@@ -26,6 +26,7 @@ git-version.workspace = true
|
||||
hex = { workspace = true, features = ["serde"] }
|
||||
humantime.workspace = true
|
||||
hyper0 = { workspace = true, features = ["full"] }
|
||||
inferno.workspace = true
|
||||
itertools.workspace = true
|
||||
fail.workspace = true
|
||||
futures = { workspace = true }
|
||||
|
||||
@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
|
||||
/// Declare a failpoint that can use the `pause` failpoint action.
|
||||
/// Declare a failpoint that can use to `pause` failpoint action.
|
||||
/// We don't want to block the executor thread, hence, spawn_blocking + await.
|
||||
#[macro_export]
|
||||
macro_rules! pausable_failpoint {
|
||||
@@ -181,7 +181,7 @@ pub async fn failpoints_handler(
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
if !fail::has_failpoints() {
|
||||
return Err(ApiError::BadRequest(anyhow::anyhow!(
|
||||
"Cannot manage failpoints because storage was compiled without failpoints support"
|
||||
"Cannot manage failpoints because neon was compiled without failpoints support"
|
||||
)));
|
||||
}
|
||||
|
||||
|
||||
@@ -112,9 +112,9 @@ impl Serialize for Generation {
|
||||
// We should never be asked to serialize a None. Structures
|
||||
// that include an optional generation should convert None to an
|
||||
// Option<Generation>::None
|
||||
Err(serde::ser::Error::custom(
|
||||
"Tried to serialize invalid generation ({self})",
|
||||
))
|
||||
Err(serde::ser::Error::custom(format!(
|
||||
"Tried to serialize invalid generation ({self:?})"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
54
libs/utils/src/guard_arc_swap.rs
Normal file
54
libs/utils/src/guard_arc_swap.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
//! A wrapper around `ArcSwap` that ensures there is only one writer at a time and writes
|
||||
//! don't block reads.
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::TryLockError;
|
||||
|
||||
pub struct GuardArcSwap<T> {
|
||||
inner: ArcSwap<T>,
|
||||
guard: tokio::sync::Mutex<()>,
|
||||
}
|
||||
|
||||
pub struct Guard<'a, T> {
|
||||
_guard: tokio::sync::MutexGuard<'a, ()>,
|
||||
inner: &'a ArcSwap<T>,
|
||||
}
|
||||
|
||||
impl<T> GuardArcSwap<T> {
|
||||
pub fn new(inner: T) -> Self {
|
||||
Self {
|
||||
inner: ArcSwap::new(Arc::new(inner)),
|
||||
guard: tokio::sync::Mutex::new(()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read(&self) -> Arc<T> {
|
||||
self.inner.load_full()
|
||||
}
|
||||
|
||||
pub async fn write_guard(&self) -> Guard<'_, T> {
|
||||
Guard {
|
||||
_guard: self.guard.lock().await,
|
||||
inner: &self.inner,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_write_guard(&self) -> Result<Guard<'_, T>, TryLockError> {
|
||||
let guard = self.guard.try_lock()?;
|
||||
Ok(Guard {
|
||||
_guard: guard,
|
||||
inner: &self.inner,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Guard<'_, T> {
|
||||
pub fn read(&self) -> Arc<T> {
|
||||
self.inner.load_full()
|
||||
}
|
||||
|
||||
pub fn write(&mut self, value: T) {
|
||||
self.inner.store(Arc::new(value));
|
||||
}
|
||||
}
|
||||
@@ -15,7 +15,7 @@ use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use routerify::ext::RequestExt;
|
||||
use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
use tokio::sync::{mpsc, Mutex, Notify};
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_util::io::ReaderStream;
|
||||
use tracing::{debug, info, info_span, warn, Instrument};
|
||||
@@ -350,33 +350,53 @@ pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, A
|
||||
};
|
||||
let seconds = match parse_query_param(&req, "seconds")? {
|
||||
None => 5,
|
||||
Some(seconds @ 1..=30) => seconds,
|
||||
Some(_) => return Err(ApiError::BadRequest(anyhow!("duration must be 1-30 secs"))),
|
||||
Some(seconds @ 1..=60) => seconds,
|
||||
Some(_) => return Err(ApiError::BadRequest(anyhow!("duration must be 1-60 secs"))),
|
||||
};
|
||||
let frequency_hz = match parse_query_param(&req, "frequency")? {
|
||||
None => 99,
|
||||
Some(1001..) => return Err(ApiError::BadRequest(anyhow!("frequency must be <=1000 Hz"))),
|
||||
Some(frequency) => frequency,
|
||||
};
|
||||
|
||||
// Only allow one profiler at a time.
|
||||
static PROFILE_LOCK: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
|
||||
let _lock = PROFILE_LOCK
|
||||
.try_lock()
|
||||
.map_err(|_| ApiError::Conflict("profiler already running".into()))?;
|
||||
let force: bool = parse_query_param(&req, "force")?.unwrap_or_default();
|
||||
|
||||
// Take the profile.
|
||||
let report = tokio::task::spawn_blocking(move || {
|
||||
static PROFILE_LOCK: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
|
||||
static PROFILE_CANCEL: Lazy<Notify> = Lazy::new(Notify::new);
|
||||
|
||||
let report = {
|
||||
// Only allow one profiler at a time. If force is true, cancel a running profile (e.g. a
|
||||
// Grafana continuous profile). We use a try_lock() loop when cancelling instead of waiting
|
||||
// for a lock(), to avoid races where the notify isn't currently awaited.
|
||||
let _lock = loop {
|
||||
match PROFILE_LOCK.try_lock() {
|
||||
Ok(lock) => break lock,
|
||||
Err(_) if force => PROFILE_CANCEL.notify_waiters(),
|
||||
Err(_) => {
|
||||
return Err(ApiError::Conflict(
|
||||
"profiler already running (use ?force=true to cancel it)".into(),
|
||||
))
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(1)).await; // don't busy-wait
|
||||
};
|
||||
|
||||
let guard = ProfilerGuardBuilder::default()
|
||||
.frequency(frequency_hz)
|
||||
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
|
||||
.build()?;
|
||||
std::thread::sleep(Duration::from_secs(seconds));
|
||||
guard.report().build()
|
||||
})
|
||||
.await
|
||||
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
|
||||
.map_err(|pprof_err| ApiError::InternalServerError(pprof_err.into()))?;
|
||||
.build()
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))?;
|
||||
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(Duration::from_secs(seconds)) => {},
|
||||
_ = PROFILE_CANCEL.notified() => {},
|
||||
};
|
||||
|
||||
guard
|
||||
.report()
|
||||
.build()
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))?
|
||||
};
|
||||
|
||||
// Return the report in the requested format.
|
||||
match format {
|
||||
@@ -417,6 +437,7 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
|
||||
enum Format {
|
||||
Jemalloc,
|
||||
Pprof,
|
||||
Svg,
|
||||
}
|
||||
|
||||
// Parameters.
|
||||
@@ -424,9 +445,24 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
|
||||
None => Format::Pprof,
|
||||
Some("jemalloc") => Format::Jemalloc,
|
||||
Some("pprof") => Format::Pprof,
|
||||
Some("svg") => Format::Svg,
|
||||
Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))),
|
||||
};
|
||||
|
||||
// Functions and mappings to strip when symbolizing pprof profiles. If true,
|
||||
// also remove child frames.
|
||||
static STRIP_FUNCTIONS: Lazy<Vec<(Regex, bool)>> = Lazy::new(|| {
|
||||
vec![
|
||||
(Regex::new("^__rust").unwrap(), false),
|
||||
(Regex::new("^_start$").unwrap(), false),
|
||||
(Regex::new("^irallocx_prof").unwrap(), true),
|
||||
(Regex::new("^prof_alloc_prep").unwrap(), true),
|
||||
(Regex::new("^std::rt::lang_start").unwrap(), false),
|
||||
(Regex::new("^std::sys::backtrace::__rust").unwrap(), false),
|
||||
]
|
||||
});
|
||||
const STRIP_MAPPINGS: &[&str] = &["libc", "libgcc", "pthread", "vdso"];
|
||||
|
||||
// Obtain profiler handle.
|
||||
let mut prof_ctl = jemalloc_pprof::PROF_CTL
|
||||
.as_ref()
|
||||
@@ -464,24 +500,9 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
|
||||
// Symbolize the profile.
|
||||
// TODO: consider moving this upstream to jemalloc_pprof and avoiding the
|
||||
// serialization roundtrip.
|
||||
static STRIP_FUNCTIONS: Lazy<Vec<(Regex, bool)>> = Lazy::new(|| {
|
||||
// Functions to strip from profiles. If true, also remove child frames.
|
||||
vec![
|
||||
(Regex::new("^__rust").unwrap(), false),
|
||||
(Regex::new("^_start$").unwrap(), false),
|
||||
(Regex::new("^irallocx_prof").unwrap(), true),
|
||||
(Regex::new("^prof_alloc_prep").unwrap(), true),
|
||||
(Regex::new("^std::rt::lang_start").unwrap(), false),
|
||||
(Regex::new("^std::sys::backtrace::__rust").unwrap(), false),
|
||||
]
|
||||
});
|
||||
let profile = pprof::decode(&bytes)?;
|
||||
let profile = pprof::symbolize(profile)?;
|
||||
let profile = pprof::strip_locations(
|
||||
profile,
|
||||
&["libc", "libgcc", "pthread", "vdso"],
|
||||
&STRIP_FUNCTIONS,
|
||||
);
|
||||
let profile = pprof::strip_locations(profile, STRIP_MAPPINGS, &STRIP_FUNCTIONS);
|
||||
pprof::encode(&profile)
|
||||
})
|
||||
.await
|
||||
@@ -494,6 +515,27 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
|
||||
.body(Body::from(data))
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))
|
||||
}
|
||||
|
||||
Format::Svg => {
|
||||
let body = tokio::task::spawn_blocking(move || {
|
||||
let bytes = prof_ctl.dump_pprof()?;
|
||||
let profile = pprof::decode(&bytes)?;
|
||||
let profile = pprof::symbolize(profile)?;
|
||||
let profile = pprof::strip_locations(profile, STRIP_MAPPINGS, &STRIP_FUNCTIONS);
|
||||
let mut opts = inferno::flamegraph::Options::default();
|
||||
opts.title = "Heap inuse".to_string();
|
||||
opts.count_name = "bytes".to_string();
|
||||
pprof::flamegraph(profile, &mut opts)
|
||||
})
|
||||
.await
|
||||
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, "image/svg+xml")
|
||||
.body(Body::from(body))
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -98,6 +98,8 @@ pub mod try_rcu;
|
||||
|
||||
pub mod pprof;
|
||||
|
||||
pub mod guard_arc_swap;
|
||||
|
||||
// Re-export used in macro. Avoids adding git-version as dep in target crates.
|
||||
#[doc(hidden)]
|
||||
pub use git_version;
|
||||
|
||||
@@ -260,7 +260,7 @@ impl FromStr for Lsn {
|
||||
{
|
||||
let left_num = u32::from_str_radix(left, 16).map_err(|_| LsnParseError)?;
|
||||
let right_num = u32::from_str_radix(right, 16).map_err(|_| LsnParseError)?;
|
||||
Ok(Lsn((left_num as u64) << 32 | right_num as u64))
|
||||
Ok(Lsn(((left_num as u64) << 32) | right_num as u64))
|
||||
} else {
|
||||
Err(LsnParseError)
|
||||
}
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use anyhow::bail;
|
||||
use flate2::write::{GzDecoder, GzEncoder};
|
||||
use flate2::Compression;
|
||||
use itertools::Itertools as _;
|
||||
use once_cell::sync::Lazy;
|
||||
use pprof::protos::{Function, Line, Message as _, Profile};
|
||||
use pprof::protos::{Function, Line, Location, Message as _, Profile};
|
||||
use regex::Regex;
|
||||
|
||||
use std::borrow::Cow;
|
||||
@@ -188,3 +189,59 @@ pub fn strip_locations(
|
||||
|
||||
profile
|
||||
}
|
||||
|
||||
/// Generates an SVG flamegraph from a symbolized pprof profile.
|
||||
pub fn flamegraph(
|
||||
profile: Profile,
|
||||
opts: &mut inferno::flamegraph::Options,
|
||||
) -> anyhow::Result<Vec<u8>> {
|
||||
if profile.mapping.iter().any(|m| !m.has_functions) {
|
||||
bail!("profile not symbolized");
|
||||
}
|
||||
|
||||
// Index locations, functions, and strings.
|
||||
let locations: HashMap<u64, Location> =
|
||||
profile.location.into_iter().map(|l| (l.id, l)).collect();
|
||||
let functions: HashMap<u64, Function> =
|
||||
profile.function.into_iter().map(|f| (f.id, f)).collect();
|
||||
let strings = profile.string_table;
|
||||
|
||||
// Resolve stacks as function names, and sum sample values per stack. Also reverse the stack,
|
||||
// since inferno expects it bottom-up.
|
||||
let mut stacks: HashMap<Vec<&str>, i64> = HashMap::new();
|
||||
for sample in profile.sample {
|
||||
let mut stack = Vec::with_capacity(sample.location_id.len());
|
||||
for location in sample.location_id.into_iter().rev() {
|
||||
let Some(location) = locations.get(&location) else {
|
||||
bail!("missing location {location}");
|
||||
};
|
||||
for line in location.line.iter().rev() {
|
||||
let Some(function) = functions.get(&line.function_id) else {
|
||||
bail!("missing function {}", line.function_id);
|
||||
};
|
||||
let Some(name) = strings.get(function.name as usize) else {
|
||||
bail!("missing string {}", function.name);
|
||||
};
|
||||
stack.push(name.as_str());
|
||||
}
|
||||
}
|
||||
let Some(&value) = sample.value.first() else {
|
||||
bail!("missing value");
|
||||
};
|
||||
*stacks.entry(stack).or_default() += value;
|
||||
}
|
||||
|
||||
// Construct stack lines for inferno.
|
||||
let lines = stacks
|
||||
.into_iter()
|
||||
.map(|(stack, value)| (stack.into_iter().join(";"), value))
|
||||
.map(|(stack, value)| format!("{stack} {value}"))
|
||||
.sorted()
|
||||
.collect_vec();
|
||||
|
||||
// Construct the flamegraph.
|
||||
let mut bytes = Vec::new();
|
||||
let lines = lines.iter().map(|line| line.as_str());
|
||||
inferno::flamegraph::from_lines(opts, lines, &mut bytes)?;
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
@@ -96,7 +96,11 @@ impl<T: Send> Sender<T> {
|
||||
}
|
||||
}
|
||||
State::SenderWaitsForReceiverToConsume(_data) => {
|
||||
// Really, we shouldn't be polled until receiver has consumed and wakes us.
|
||||
// SAFETY: send is single threaded due to `&mut self` requirement,
|
||||
// therefore register is not concurrent.
|
||||
unsafe {
|
||||
self.state.wake_sender.register(cx.waker());
|
||||
}
|
||||
Poll::Pending
|
||||
}
|
||||
State::ReceiverGone => Poll::Ready(Err(SendError::ReceiverGone)),
|
||||
@@ -449,4 +453,38 @@ mod tests {
|
||||
let err = recv_task.await.unwrap().expect_err("should error");
|
||||
assert!(matches!(err, RecvError::SenderGone));
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn test_receiver_drop_while_waiting_for_receiver_to_consume_unblocks_sender() {
|
||||
let (mut sender, receiver) = channel();
|
||||
|
||||
let state = receiver.state.clone();
|
||||
|
||||
sender.send((), |_, _| unreachable!()).await.unwrap();
|
||||
|
||||
assert!(matches!(&*state.value.lock().unwrap(), &State::HasData(_)));
|
||||
|
||||
let unmergeable = sender.send((), |_, _| Err(()));
|
||||
let mut unmergeable = std::pin::pin!(unmergeable);
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(FOREVER) => {},
|
||||
_ = &mut unmergeable => {
|
||||
panic!("unmergeable should not complete");
|
||||
},
|
||||
}
|
||||
|
||||
assert!(matches!(
|
||||
&*state.value.lock().unwrap(),
|
||||
&State::SenderWaitsForReceiverToConsume(_)
|
||||
));
|
||||
|
||||
drop(receiver);
|
||||
|
||||
assert!(matches!(
|
||||
&*state.value.lock().unwrap(),
|
||||
&State::ReceiverGone
|
||||
));
|
||||
|
||||
unmergeable.await.unwrap_err();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,3 +24,18 @@ workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
[build-dependencies]
|
||||
tonic-build.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
criterion.workspace = true
|
||||
camino.workspace = true
|
||||
camino-tempfile.workspace = true
|
||||
remote_storage.workspace = true
|
||||
tokio-util.workspace = true
|
||||
serde_json.workspace = true
|
||||
futures.workspace = true
|
||||
tikv-jemallocator.workspace = true
|
||||
pprof.workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "bench_interpret_wal"
|
||||
harness = false
|
||||
|
||||
34
libs/wal_decoder/benches/README.md
Normal file
34
libs/wal_decoder/benches/README.md
Normal file
@@ -0,0 +1,34 @@
|
||||
## WAL Decoding and Interpretation Benchmarks
|
||||
|
||||
Note that these benchmarks pull WAL from a public bucket in S3
|
||||
as a preparation step. Hence, you need a way to auth with AWS.
|
||||
You can achieve this by copying the `~/.aws/config` file from
|
||||
the AWS SSO notion page and exporting `AWS_PROFILE=dev` when invoking
|
||||
the benchmarks.
|
||||
|
||||
To run benchmarks:
|
||||
|
||||
```sh
|
||||
aws sso login --profile dev
|
||||
|
||||
# All benchmarks.
|
||||
AWS_PROFILE=dev cargo bench --package wal_decoder
|
||||
|
||||
# Specific file.
|
||||
AWS_PROFILE=dev cargo bench --package wal_decoder --bench bench_interpret_wal
|
||||
|
||||
# Specific benchmark.
|
||||
AWS_PROFILE=dev cargo bench --package wal_decoder --bench bench_interpret_wal unsharded
|
||||
|
||||
# List available benchmarks.
|
||||
cargo bench --package wal_decoder --benches -- --list
|
||||
|
||||
# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.
|
||||
# Output in target/criterion/*/profile/flamegraph.svg.
|
||||
AWS_PROFILE=dev cargo bench --package wal_decoder --bench bench_interpret_wal unsharded -- --profile-time 10
|
||||
```
|
||||
|
||||
Additional charts and statistics are available in `target/criterion/report/index.html`.
|
||||
|
||||
Benchmarks are automatically compared against the previous run. To compare against other runs, see
|
||||
`--baseline` and `--save-baseline`.
|
||||
250
libs/wal_decoder/benches/bench_interpret_wal.rs
Normal file
250
libs/wal_decoder/benches/bench_interpret_wal.rs
Normal file
@@ -0,0 +1,250 @@
|
||||
use anyhow::Context;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use pageserver_api::shard::{ShardIdentity, ShardStripeSize};
|
||||
use postgres_ffi::{waldecoder::WalStreamDecoder, MAX_SEND_SIZE, WAL_SEGMENT_SIZE};
|
||||
use pprof::criterion::{Output, PProfProfiler};
|
||||
use serde::Deserialize;
|
||||
use std::{env, num::NonZeroUsize, sync::Arc};
|
||||
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use camino_tempfile::Utf8TempDir;
|
||||
use remote_storage::{
|
||||
DownloadOpts, GenericRemoteStorage, ListingMode, RemoteStorageConfig, RemoteStorageKind,
|
||||
S3Config,
|
||||
};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::{
|
||||
lsn::Lsn,
|
||||
shard::{ShardCount, ShardNumber},
|
||||
};
|
||||
use wal_decoder::models::InterpretedWalRecord;
|
||||
|
||||
const S3_BUCKET: &str = "neon-github-public-dev";
|
||||
const S3_REGION: &str = "eu-central-1";
|
||||
const BUCKET_PREFIX: &str = "wal-snapshots/bulk-insert/";
|
||||
const METADATA_FILENAME: &str = "metadata.json";
|
||||
|
||||
/// Use jemalloc, and configure it to sample allocations for profiles every 1 MB.
|
||||
/// This mirrors the configuration in bin/safekeeper.rs.
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
#[allow(non_upper_case_globals)]
|
||||
#[export_name = "malloc_conf"]
|
||||
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
|
||||
|
||||
async fn create_s3_client() -> anyhow::Result<Arc<GenericRemoteStorage>> {
|
||||
let remote_storage_config = RemoteStorageConfig {
|
||||
storage: RemoteStorageKind::AwsS3(S3Config {
|
||||
bucket_name: S3_BUCKET.to_string(),
|
||||
bucket_region: S3_REGION.to_string(),
|
||||
prefix_in_bucket: Some(BUCKET_PREFIX.to_string()),
|
||||
endpoint: None,
|
||||
concurrency_limit: NonZeroUsize::new(100).unwrap(),
|
||||
max_keys_per_list_response: None,
|
||||
upload_storage_class: None,
|
||||
}),
|
||||
timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
|
||||
small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,
|
||||
};
|
||||
Ok(Arc::new(
|
||||
GenericRemoteStorage::from_config(&remote_storage_config)
|
||||
.await
|
||||
.context("remote storage init")?,
|
||||
))
|
||||
}
|
||||
|
||||
async fn download_bench_data(
|
||||
client: Arc<GenericRemoteStorage>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<Utf8TempDir> {
|
||||
let temp_dir_parent: Utf8PathBuf = env::current_dir().unwrap().try_into()?;
|
||||
let temp_dir = camino_tempfile::tempdir_in(temp_dir_parent)?;
|
||||
|
||||
eprintln!("Downloading benchmark data to {:?}", temp_dir);
|
||||
|
||||
let listing = client
|
||||
.list(None, ListingMode::NoDelimiter, None, cancel)
|
||||
.await?;
|
||||
|
||||
let mut downloads = listing
|
||||
.keys
|
||||
.into_iter()
|
||||
.map(|obj| {
|
||||
let client = client.clone();
|
||||
let temp_dir_path = temp_dir.path().to_owned();
|
||||
|
||||
async move {
|
||||
let remote_path = obj.key;
|
||||
let download = client
|
||||
.download(&remote_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
let mut body = tokio_util::io::StreamReader::new(download.download_stream);
|
||||
|
||||
let file_name = remote_path.object_name().unwrap();
|
||||
let file_path = temp_dir_path.join(file_name);
|
||||
let file = tokio::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.write(true)
|
||||
.open(&file_path)
|
||||
.await?;
|
||||
|
||||
let mut writer = tokio::io::BufWriter::new(file);
|
||||
tokio::io::copy_buf(&mut body, &mut writer).await?;
|
||||
|
||||
Ok::<(), anyhow::Error>(())
|
||||
}
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
while let Some(download) = downloads.next().await {
|
||||
download?;
|
||||
}
|
||||
|
||||
Ok(temp_dir)
|
||||
}
|
||||
|
||||
struct BenchmarkData {
|
||||
wal: Vec<u8>,
|
||||
meta: BenchmarkMetadata,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct BenchmarkMetadata {
|
||||
pg_version: u32,
|
||||
start_lsn: Lsn,
|
||||
}
|
||||
|
||||
async fn load_bench_data(path: &Utf8Path, input_size: usize) -> anyhow::Result<BenchmarkData> {
|
||||
eprintln!("Loading benchmark data from {:?}", path);
|
||||
|
||||
let mut entries = tokio::fs::read_dir(path).await?;
|
||||
let mut ordered_segment_paths = Vec::new();
|
||||
let mut metadata = None;
|
||||
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
if entry.file_name() == METADATA_FILENAME {
|
||||
let bytes = tokio::fs::read(entry.path()).await?;
|
||||
metadata = Some(
|
||||
serde_json::from_slice::<BenchmarkMetadata>(&bytes)
|
||||
.context("failed to deserialize metadata.json")?,
|
||||
);
|
||||
} else {
|
||||
ordered_segment_paths.push(entry.path());
|
||||
}
|
||||
}
|
||||
|
||||
ordered_segment_paths.sort();
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
for path in ordered_segment_paths {
|
||||
if buffer.len() >= input_size {
|
||||
break;
|
||||
}
|
||||
|
||||
use async_compression::tokio::bufread::ZstdDecoder;
|
||||
let file = tokio::fs::File::open(path).await?;
|
||||
let reader = tokio::io::BufReader::new(file);
|
||||
let decoder = ZstdDecoder::new(reader);
|
||||
let mut reader = tokio::io::BufReader::new(decoder);
|
||||
tokio::io::copy_buf(&mut reader, &mut buffer).await?;
|
||||
}
|
||||
|
||||
buffer.truncate(input_size);
|
||||
|
||||
Ok(BenchmarkData {
|
||||
wal: buffer,
|
||||
meta: metadata.unwrap(),
|
||||
})
|
||||
}
|
||||
|
||||
fn criterion_benchmark(c: &mut Criterion) {
|
||||
const INPUT_SIZE: usize = 128 * 1024 * 1024;
|
||||
|
||||
let setup_runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let (_temp_dir, bench_data) = setup_runtime.block_on(async move {
|
||||
let cancel = CancellationToken::new();
|
||||
let client = create_s3_client().await.unwrap();
|
||||
let temp_dir = download_bench_data(client, &cancel).await.unwrap();
|
||||
let bench_data = load_bench_data(temp_dir.path(), INPUT_SIZE).await.unwrap();
|
||||
|
||||
(temp_dir, bench_data)
|
||||
});
|
||||
|
||||
eprintln!(
|
||||
"Benchmarking against {} MiB of WAL",
|
||||
INPUT_SIZE / 1024 / 1024
|
||||
);
|
||||
|
||||
let mut group = c.benchmark_group("decode-interpret-wal");
|
||||
group.throughput(criterion::Throughput::Bytes(bench_data.wal.len() as u64));
|
||||
group.sample_size(10);
|
||||
|
||||
group.bench_function("unsharded", |b| {
|
||||
b.iter(|| decode_interpret_main(&bench_data, &[ShardIdentity::unsharded()]))
|
||||
});
|
||||
|
||||
let eight_shards = (0..8)
|
||||
.map(|i| ShardIdentity::new(ShardNumber(i), ShardCount(8), ShardStripeSize(8)).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
group.bench_function("8/8-shards", |b| {
|
||||
b.iter(|| decode_interpret_main(&bench_data, &eight_shards))
|
||||
});
|
||||
|
||||
let four_shards = eight_shards
|
||||
.into_iter()
|
||||
.filter(|s| s.number.0 % 2 == 0)
|
||||
.collect::<Vec<_>>();
|
||||
group.bench_function("4/8-shards", |b| {
|
||||
b.iter(|| decode_interpret_main(&bench_data, &four_shards))
|
||||
});
|
||||
|
||||
let two_shards = four_shards
|
||||
.into_iter()
|
||||
.filter(|s| s.number.0 % 4 == 0)
|
||||
.collect::<Vec<_>>();
|
||||
group.bench_function("2/8-shards", |b| {
|
||||
b.iter(|| decode_interpret_main(&bench_data, &two_shards))
|
||||
});
|
||||
}
|
||||
|
||||
fn decode_interpret_main(bench: &BenchmarkData, shards: &[ShardIdentity]) {
|
||||
let r = decode_interpret(bench, shards);
|
||||
if let Err(e) = r {
|
||||
panic!("{e:?}");
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_interpret(bench: &BenchmarkData, shard: &[ShardIdentity]) -> anyhow::Result<()> {
|
||||
let mut decoder = WalStreamDecoder::new(bench.meta.start_lsn, bench.meta.pg_version);
|
||||
let xlogoff: usize = bench.meta.start_lsn.segment_offset(WAL_SEGMENT_SIZE);
|
||||
|
||||
for chunk in bench.wal[xlogoff..].chunks(MAX_SEND_SIZE) {
|
||||
decoder.feed_bytes(chunk);
|
||||
while let Some((lsn, recdata)) = decoder.poll_decode().unwrap() {
|
||||
assert!(lsn.is_aligned());
|
||||
let _ = InterpretedWalRecord::from_bytes_filtered(
|
||||
recdata,
|
||||
shard,
|
||||
lsn,
|
||||
bench.meta.pg_version,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
criterion_group!(
|
||||
name=benches;
|
||||
config=Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
|
||||
targets=criterion_benchmark
|
||||
);
|
||||
criterion_main!(benches);
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user