mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-27 23:30:38 +00:00
Compare commits
1 Commits
release-pr
...
cloneable/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef737e7d7c |
@@ -21,14 +21,13 @@ platforms = [
|
||||
# "x86_64-apple-darwin",
|
||||
# "x86_64-pc-windows-msvc",
|
||||
]
|
||||
|
||||
[final-excludes]
|
||||
workspace-members = [
|
||||
# vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
|
||||
# it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
|
||||
# from depending on workspace-hack because most of the dependencies are not used.
|
||||
"vm_monitor",
|
||||
# subzero-core is a stub crate that should be excluded from workspace-hack
|
||||
"subzero-core",
|
||||
# All of these exist in libs and are not usually built independently.
|
||||
# Putting workspace hack there adds a bottleneck for cargo builds.
|
||||
"compute_api",
|
||||
|
||||
28
.github/actions/prepare-for-subzero/action.yml
vendored
28
.github/actions/prepare-for-subzero/action.yml
vendored
@@ -1,28 +0,0 @@
|
||||
name: 'Prepare current job for subzero'
|
||||
description: >
|
||||
Set git token to access `neondatabase/subzero` from cargo build,
|
||||
and set `CARGO_NET_GIT_FETCH_WITH_CLI=true` env variable to use git CLI
|
||||
|
||||
inputs:
|
||||
token:
|
||||
description: 'GitHub token with access to neondatabase/subzero'
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
|
||||
steps:
|
||||
- name: Set git token for neondatabase/subzero
|
||||
uses: pyTooling/Actions/with-post-step@2307b526df64d55e95884e072e49aac2a00a9afa # v5.1.0
|
||||
env:
|
||||
SUBZERO_ACCESS_TOKEN: ${{ inputs.token }}
|
||||
with:
|
||||
main: |
|
||||
git config --global url."https://x-access-token:${SUBZERO_ACCESS_TOKEN}@github.com/neondatabase/subzero".insteadOf "https://github.com/neondatabase/subzero"
|
||||
cargo add -p proxy subzero-core --git https://github.com/neondatabase/subzero --rev 396264617e78e8be428682f87469bb25429af88a
|
||||
post: |
|
||||
git config --global --unset url."https://x-access-token:${SUBZERO_ACCESS_TOKEN}@github.com/neondatabase/subzero".insteadOf "https://github.com/neondatabase/subzero"
|
||||
|
||||
- name: Set `CARGO_NET_GIT_FETCH_WITH_CLI=true` env variable
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: echo "CARGO_NET_GIT_FETCH_WITH_CLI=true" >> ${GITHUB_ENV}
|
||||
@@ -181,8 +181,6 @@ runs:
|
||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||
# cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
cov_prefix=()
|
||||
# Explicitly set LLVM_PROFILE_FILE to /dev/null to avoid writing *.profraw files
|
||||
export LLVM_PROFILE_FILE=/dev/null
|
||||
else
|
||||
cov_prefix=()
|
||||
fi
|
||||
|
||||
@@ -86,10 +86,6 @@ jobs:
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- uses: ./.github/actions/prepare-for-subzero
|
||||
with:
|
||||
token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
||||
@@ -120,7 +116,7 @@ jobs:
|
||||
ARCH: ${{ inputs.arch }}
|
||||
SANITIZERS: ${{ inputs.sanitizers }}
|
||||
run: |
|
||||
CARGO_FLAGS="--locked --features testing,rest_broker"
|
||||
CARGO_FLAGS="--locked --features testing"
|
||||
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
|
||||
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
|
||||
CARGO_PROFILE=""
|
||||
|
||||
4
.github/workflows/_check-codestyle-rust.yml
vendored
4
.github/workflows/_check-codestyle-rust.yml
vendored
@@ -46,10 +46,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- uses: ./.github/actions/prepare-for-subzero
|
||||
with:
|
||||
token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
- name: Cache cargo deps
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
|
||||
4
.github/workflows/build-macos.yml
vendored
4
.github/workflows/build-macos.yml
vendored
@@ -54,10 +54,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- uses: ./.github/actions/prepare-for-subzero
|
||||
with:
|
||||
token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
|
||||
45
.github/workflows/build_and_test.yml
vendored
45
.github/workflows/build_and_test.yml
vendored
@@ -87,27 +87,22 @@ jobs:
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
secrets: inherit
|
||||
|
||||
lint-yamls:
|
||||
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||
lint-openapi-spec:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [ meta, check-permissions ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- run: make -C compute manifest-schema-validation
|
||||
- uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- run: make lint-openapi-spec
|
||||
|
||||
check-codestyle-python:
|
||||
@@ -222,6 +217,28 @@ jobs:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
secrets: inherit
|
||||
|
||||
validate-compute-manifest:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [ meta, check-permissions ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: '24'
|
||||
|
||||
- name: Validate manifest against schema
|
||||
run: |
|
||||
make -C compute manifest-schema-validation
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
@@ -632,8 +649,6 @@ jobs:
|
||||
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-bookworm
|
||||
DEBIAN_VERSION=bookworm
|
||||
secrets: |
|
||||
SUBZERO_ACCESS_TOKEN=${{ secrets.CI_ACCESS_TOKEN }}
|
||||
provenance: false
|
||||
push: true
|
||||
pull: true
|
||||
|
||||
1
.github/workflows/neon_extra_builds.yml
vendored
1
.github/workflows/neon_extra_builds.yml
vendored
@@ -72,7 +72,6 @@ jobs:
|
||||
check-macos-build:
|
||||
needs: [ check-permissions, files-changed ]
|
||||
uses: ./.github/workflows/build-macos.yml
|
||||
secrets: inherit
|
||||
with:
|
||||
pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
|
||||
rebuild_rust_code: ${{ fromJSON(needs.files-changed.outputs.rebuild_rust_code) }}
|
||||
|
||||
8
.gitignore
vendored
8
.gitignore
vendored
@@ -26,14 +26,6 @@ docker-compose/docker-compose-parallel.yml
|
||||
*.o
|
||||
*.so
|
||||
*.Po
|
||||
*.pid
|
||||
|
||||
# pgindent typedef lists
|
||||
*.list
|
||||
|
||||
# Node
|
||||
**/node_modules/
|
||||
|
||||
# various files for local testing
|
||||
/proxy/.subzero
|
||||
local_proxy.json
|
||||
|
||||
257
Cargo.lock
generated
257
Cargo.lock
generated
@@ -52,12 +52,6 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aliasable"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd"
|
||||
|
||||
[[package]]
|
||||
name = "aligned-vec"
|
||||
version = "0.6.1"
|
||||
@@ -496,7 +490,7 @@ dependencies = [
|
||||
"hex",
|
||||
"hmac",
|
||||
"http 0.2.9",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"once_cell",
|
||||
"p256 0.11.1",
|
||||
"percent-encoding",
|
||||
@@ -637,7 +631,7 @@ dependencies = [
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"http 0.2.9",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tracing",
|
||||
@@ -655,7 +649,7 @@ dependencies = [
|
||||
"bytes-utils",
|
||||
"futures-core",
|
||||
"http 0.2.9",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 0.4.5",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
@@ -704,7 +698,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"form_urlencoded",
|
||||
"futures-util",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.4.1",
|
||||
@@ -738,7 +732,7 @@ checksum = "df1362f362fd16024ae199c1970ce98f9661bf5ef94b9808fee734bc3698b733"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"mime",
|
||||
@@ -762,7 +756,7 @@ dependencies = [
|
||||
"form_urlencoded",
|
||||
"futures-util",
|
||||
"headers",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"mime",
|
||||
@@ -1096,7 +1090,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "975982cdb7ad6a142be15bdf84aea7ec6a9e5d4d797c004d43185b24cfe4e684"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"heck 0.5.0",
|
||||
"heck",
|
||||
"indexmap 2.9.0",
|
||||
"log",
|
||||
"proc-macro2",
|
||||
@@ -1234,7 +1228,7 @@ version = "4.5.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
@@ -1340,7 +1334,7 @@ dependencies = [
|
||||
"flate2",
|
||||
"futures",
|
||||
"hostname-validator",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"indexmap 2.9.0",
|
||||
"itertools 0.10.5",
|
||||
"jsonwebtoken",
|
||||
@@ -1878,7 +1872,6 @@ dependencies = [
|
||||
"diesel_derives",
|
||||
"itoa",
|
||||
"serde_json",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1975,7 +1968,7 @@ checksum = "0892a17df262a24294c382f0d5997571006e7a4348b4327557c4ff1cd4a8bccc"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"either",
|
||||
"heck 0.5.0",
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
@@ -2540,18 +2533,6 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"r-efi",
|
||||
"wasi 0.14.2+wasi-0.2.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gettid"
|
||||
version = "0.1.3"
|
||||
@@ -2667,7 +2648,7 @@ dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"futures-util",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"indexmap 2.9.0",
|
||||
"slab",
|
||||
"tokio",
|
||||
@@ -2749,7 +2730,7 @@ dependencies = [
|
||||
"base64 0.21.7",
|
||||
"bytes",
|
||||
"headers-core",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"httpdate",
|
||||
"mime",
|
||||
"sha1",
|
||||
@@ -2761,15 +2742,9 @@ version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4"
|
||||
dependencies = [
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
@@ -2845,9 +2820,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "1.3.1"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
|
||||
checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fnv",
|
||||
@@ -2872,7 +2847,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2883,7 +2858,7 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"pin-project-lite",
|
||||
]
|
||||
@@ -3007,7 +2982,7 @@ dependencies = [
|
||||
"futures-channel",
|
||||
"futures-util",
|
||||
"h2 0.4.4",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
@@ -3040,7 +3015,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"rustls 0.22.4",
|
||||
@@ -3072,7 +3047,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-util",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"hyper 1.4.1",
|
||||
"pin-project-lite",
|
||||
@@ -3631,9 +3606,9 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.13"
|
||||
version = "0.4.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
|
||||
checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"scopeguard",
|
||||
@@ -3721,7 +3696,7 @@ version = "0.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9e6777fc80a575f9503d908c8b498782a6c3ee88a06cb416dc3941401e43b94"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
@@ -3783,7 +3758,7 @@ dependencies = [
|
||||
"procfs",
|
||||
"prometheus",
|
||||
"rand 0.8.5",
|
||||
"rand_distr 0.4.3",
|
||||
"rand_distr",
|
||||
"twox-hash",
|
||||
]
|
||||
|
||||
@@ -3871,12 +3846,7 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
|
||||
name = "neon-shmem"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"lock_api",
|
||||
"nix 0.30.1",
|
||||
"rand 0.9.1",
|
||||
"rand_distr 0.5.1",
|
||||
"rustc-hash 2.1.1",
|
||||
"tempfile",
|
||||
"thiserror 1.0.69",
|
||||
"workspace_hack",
|
||||
@@ -4172,7 +4142,7 @@ checksum = "10a8a7f5f6ba7c1b286c2fbca0454eaba116f63bbe69ed250b642d36fbb04d80"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"opentelemetry",
|
||||
"reqwest",
|
||||
]
|
||||
@@ -4185,7 +4155,7 @@ checksum = "91cf61a1868dacc576bf2b2a1c3e9ab150af7272909e80085c3173384fe11f76"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-core",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"opentelemetry",
|
||||
"opentelemetry-http",
|
||||
"opentelemetry-proto",
|
||||
@@ -4264,30 +4234,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ouroboros"
|
||||
version = "0.18.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59"
|
||||
dependencies = [
|
||||
"aliasable",
|
||||
"ouroboros_macro",
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ouroboros_macro"
|
||||
version = "0.18.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c7028bdd3d43083f6d8d4d5187680d0d3560d54df4cc9d752005268b41e64d0"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"proc-macro2",
|
||||
"proc-macro2-diagnostics",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "outref"
|
||||
version = "0.5.1"
|
||||
@@ -4417,7 +4363,7 @@ dependencies = [
|
||||
"hashlink",
|
||||
"hex",
|
||||
"hex-literal",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-utils",
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
@@ -5184,19 +5130,6 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2-diagnostics"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
"version_check",
|
||||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "procfs"
|
||||
version = "0.16.0"
|
||||
@@ -5266,7 +5199,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"heck 0.5.0",
|
||||
"heck",
|
||||
"itertools 0.12.1",
|
||||
"log",
|
||||
"multimap",
|
||||
@@ -5287,7 +5220,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"heck 0.5.0",
|
||||
"heck",
|
||||
"itertools 0.12.1",
|
||||
"log",
|
||||
"multimap",
|
||||
@@ -5370,6 +5303,7 @@ dependencies = [
|
||||
"clashmap",
|
||||
"compute_api",
|
||||
"consumption_metrics",
|
||||
"criterion",
|
||||
"ecdsa 0.16.9",
|
||||
"ed25519-dalek",
|
||||
"env_logger",
|
||||
@@ -5383,7 +5317,7 @@ dependencies = [
|
||||
"hex",
|
||||
"hmac",
|
||||
"hostname",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body-util",
|
||||
"http-utils",
|
||||
"humantime",
|
||||
@@ -5403,7 +5337,6 @@ dependencies = [
|
||||
"metrics",
|
||||
"once_cell",
|
||||
"opentelemetry",
|
||||
"ouroboros",
|
||||
"p256 0.13.2",
|
||||
"papaya",
|
||||
"parking_lot 0.12.1",
|
||||
@@ -5415,7 +5348,7 @@ dependencies = [
|
||||
"postgres_backend",
|
||||
"pq_proto",
|
||||
"rand 0.8.5",
|
||||
"rand_distr 0.4.3",
|
||||
"rand_distr",
|
||||
"rcgen",
|
||||
"redis",
|
||||
"regex",
|
||||
@@ -5426,7 +5359,7 @@ dependencies = [
|
||||
"reqwest-tracing",
|
||||
"rsa",
|
||||
"rstest",
|
||||
"rustc-hash 2.1.1",
|
||||
"rustc-hash 1.1.0",
|
||||
"rustls 0.23.27",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"rustls-pemfile 2.1.1",
|
||||
@@ -5440,7 +5373,6 @@ dependencies = [
|
||||
"socket2",
|
||||
"strum_macros",
|
||||
"subtle",
|
||||
"subzero-core",
|
||||
"thiserror 1.0.69",
|
||||
"tikv-jemalloc-ctl",
|
||||
"tikv-jemallocator",
|
||||
@@ -5520,12 +5452,6 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "r-efi"
|
||||
version = "5.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.7.3"
|
||||
@@ -5550,16 +5476,6 @@ dependencies = [
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
|
||||
dependencies = [
|
||||
"rand_chacha 0.9.0",
|
||||
"rand_core 0.9.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.2.2"
|
||||
@@ -5580,16 +5496,6 @@ dependencies = [
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core 0.9.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.5.1"
|
||||
@@ -5608,15 +5514,6 @@ dependencies = [
|
||||
"getrandom 0.2.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
|
||||
dependencies = [
|
||||
"getrandom 0.3.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_distr"
|
||||
version = "0.4.3"
|
||||
@@ -5627,16 +5524,6 @@ dependencies = [
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_distr"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand 0.9.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.2.0"
|
||||
@@ -5756,14 +5643,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.11.1"
|
||||
version = "1.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
||||
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata 0.4.9",
|
||||
"regex-syntax 0.8.5",
|
||||
"regex-automata 0.4.3",
|
||||
"regex-syntax 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5777,13 +5664,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.9"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
||||
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax 0.8.5",
|
||||
"regex-syntax 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5800,9 +5687,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.5"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
|
||||
|
||||
[[package]]
|
||||
name = "relative-path"
|
||||
@@ -5872,7 +5759,7 @@ dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.4.1",
|
||||
@@ -5914,7 +5801,7 @@ checksum = "d1ccd3b55e711f91a9885a2fa6fbbb2e39db1776420b062efc058c6410f7e5e3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"thiserror 1.0.69",
|
||||
@@ -5931,7 +5818,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"futures",
|
||||
"getrandom 0.2.11",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"hyper 1.4.1",
|
||||
"parking_lot 0.11.2",
|
||||
"reqwest",
|
||||
@@ -5952,7 +5839,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"getrandom 0.2.11",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"matchit",
|
||||
"opentelemetry",
|
||||
"reqwest",
|
||||
@@ -6311,14 +6198,13 @@ dependencies = [
|
||||
"fail",
|
||||
"futures",
|
||||
"hex",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-utils",
|
||||
"humantime",
|
||||
"hyper 0.14.30",
|
||||
"itertools 0.10.5",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"nix 0.30.1",
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"parking_lot 0.12.1",
|
||||
@@ -7047,7 +6933,6 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"utils",
|
||||
"uuid",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
@@ -7160,7 +7045,7 @@ version = "0.26.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
@@ -7173,10 +7058,6 @@ version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
||||
|
||||
[[package]]
|
||||
name = "subzero-core"
|
||||
version = "3.0.1"
|
||||
|
||||
[[package]]
|
||||
name = "svg_fmt"
|
||||
version = "0.4.3"
|
||||
@@ -7787,7 +7668,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"percent-encoding",
|
||||
@@ -7811,7 +7692,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"flate2",
|
||||
"h2 0.4.4",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.4.1",
|
||||
@@ -7902,7 +7783,7 @@ dependencies = [
|
||||
"base64 0.22.1",
|
||||
"bitflags 2.8.0",
|
||||
"bytes",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"mime",
|
||||
"pin-project-lite",
|
||||
@@ -7923,7 +7804,7 @@ name = "tower-otel"
|
||||
version = "0.2.0"
|
||||
source = "git+https://github.com/mattiapenati/tower-otel?rev=56a7321053bcb72443888257b622ba0d43a11fcd#56a7321053bcb72443888257b622ba0d43a11fcd"
|
||||
dependencies = [
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"opentelemetry",
|
||||
"pin-project",
|
||||
"tower-layer",
|
||||
@@ -8104,7 +7985,7 @@ dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"data-encoding",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"httparse",
|
||||
"log",
|
||||
"rand 0.8.5",
|
||||
@@ -8123,7 +8004,7 @@ dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"data-encoding",
|
||||
"http 1.3.1",
|
||||
"http 1.1.0",
|
||||
"httparse",
|
||||
"log",
|
||||
"rand 0.8.5",
|
||||
@@ -8325,7 +8206,6 @@ dependencies = [
|
||||
"tracing-error",
|
||||
"tracing-subscriber",
|
||||
"tracing-utils",
|
||||
"uuid",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
@@ -8468,15 +8348,6 @@ version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.14.2+wasi-0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
|
||||
dependencies = [
|
||||
"wit-bindgen-rt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasite"
|
||||
version = "0.1.0"
|
||||
@@ -8834,15 +8705,6 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen-rt"
|
||||
version = "0.39.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
|
||||
dependencies = [
|
||||
"bitflags 2.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "workspace_hack"
|
||||
version = "0.1.0"
|
||||
@@ -8912,8 +8774,8 @@ dependencies = [
|
||||
"quote",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"regex-automata 0.4.9",
|
||||
"regex-syntax 0.8.5",
|
||||
"regex-automata 0.4.3",
|
||||
"regex-syntax 0.8.2",
|
||||
"reqwest",
|
||||
"rustls 0.23.27",
|
||||
"rustls-pki-types",
|
||||
@@ -8945,6 +8807,7 @@ dependencies = [
|
||||
"tracing-log",
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
"uuid",
|
||||
"zeroize",
|
||||
"zstd",
|
||||
"zstd-safe",
|
||||
@@ -9009,12 +8872,6 @@ version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd"
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
|
||||
|
||||
[[package]]
|
||||
name = "yasna"
|
||||
version = "0.5.2"
|
||||
|
||||
@@ -49,7 +49,6 @@ members = [
|
||||
"libs/proxy/tokio-postgres2",
|
||||
"endpoint_storage",
|
||||
"pgxn/neon/communicator",
|
||||
"proxy/subzero_core",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -131,7 +130,6 @@ jemalloc_pprof = { version = "0.7", features = ["symbolize", "flamegraph"] }
|
||||
jsonwebtoken = "9"
|
||||
lasso = "0.7"
|
||||
libc = "0.2"
|
||||
lock_api = "0.4.13"
|
||||
md5 = "0.7.0"
|
||||
measured = { version = "0.0.22", features=["lasso"] }
|
||||
measured-process = { version = "0.0.22" }
|
||||
@@ -167,7 +165,7 @@ reqwest-middleware = "0.4"
|
||||
reqwest-retry = "0.7"
|
||||
routerify = "3"
|
||||
rpds = "0.13"
|
||||
rustc-hash = "2.1.1"
|
||||
rustc-hash = "1.1.0"
|
||||
rustls = { version = "0.23.16", default-features = false }
|
||||
rustls-pemfile = "2"
|
||||
rustls-pki-types = "1.11"
|
||||
|
||||
26
Dockerfile
26
Dockerfile
@@ -63,14 +63,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
COPY --chown=nonroot . .
|
||||
|
||||
RUN --mount=type=secret,uid=1000,id=SUBZERO_ACCESS_TOKEN \
|
||||
set -e \
|
||||
&& if [ -s /run/secrets/SUBZERO_ACCESS_TOKEN ]; then \
|
||||
export CARGO_NET_GIT_FETCH_WITH_CLI=true && \
|
||||
git config --global url."https://$(cat /run/secrets/SUBZERO_ACCESS_TOKEN)@github.com/neondatabase/subzero".insteadOf "https://github.com/neondatabase/subzero" && \
|
||||
cargo add -p proxy subzero-core --git https://github.com/neondatabase/subzero --rev 396264617e78e8be428682f87469bb25429af88a; \
|
||||
fi \
|
||||
&& cargo chef prepare --recipe-path recipe.json
|
||||
RUN cargo chef prepare --recipe-path recipe.json
|
||||
|
||||
# Main build image
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS build
|
||||
@@ -78,33 +71,20 @@ WORKDIR /home/nonroot
|
||||
ARG GIT_VERSION=local
|
||||
ARG BUILD_TAG
|
||||
ARG ADDITIONAL_RUSTFLAGS=""
|
||||
ENV CARGO_FEATURES="default"
|
||||
|
||||
# 3. Build cargo dependencies. Note that this step doesn't depend on anything else than
|
||||
# `recipe.json`, so the layer can be reused as long as none of the dependencies change.
|
||||
COPY --from=plan /home/nonroot/recipe.json recipe.json
|
||||
RUN --mount=type=secret,uid=1000,id=SUBZERO_ACCESS_TOKEN \
|
||||
set -e \
|
||||
&& if [ -s /run/secrets/SUBZERO_ACCESS_TOKEN ]; then \
|
||||
export CARGO_NET_GIT_FETCH_WITH_CLI=true && \
|
||||
git config --global url."https://$(cat /run/secrets/SUBZERO_ACCESS_TOKEN)@github.com/neondatabase/subzero".insteadOf "https://github.com/neondatabase/subzero"; \
|
||||
fi \
|
||||
RUN set -e \
|
||||
&& RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo chef cook --locked --release --recipe-path recipe.json
|
||||
|
||||
# Perform the main build. We reuse the Postgres build artifacts from the intermediate 'pg-build'
|
||||
# layer, and the cargo dependencies built in the previous step.
|
||||
COPY --chown=nonroot --from=pg-build /home/nonroot/pg_install/ pg_install
|
||||
COPY --chown=nonroot . .
|
||||
COPY --chown=nonroot --from=plan /home/nonroot/proxy/Cargo.toml proxy/Cargo.toml
|
||||
COPY --chown=nonroot --from=plan /home/nonroot/Cargo.lock Cargo.lock
|
||||
|
||||
RUN --mount=type=secret,uid=1000,id=SUBZERO_ACCESS_TOKEN \
|
||||
set -e \
|
||||
&& if [ -s /run/secrets/SUBZERO_ACCESS_TOKEN ]; then \
|
||||
export CARGO_FEATURES="rest_broker"; \
|
||||
fi \
|
||||
RUN set -e \
|
||||
&& RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
--features $CARGO_FEATURES \
|
||||
--bin pg_sni_router \
|
||||
--bin pageserver \
|
||||
--bin pagectl \
|
||||
|
||||
14
Makefile
14
Makefile
@@ -2,7 +2,7 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
# Where to install Postgres, default is ./pg_install, maybe useful for package
|
||||
# managers.
|
||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install
|
||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
|
||||
|
||||
# Supported PostgreSQL versions
|
||||
POSTGRES_VERSIONS = v17 v16 v15 v14
|
||||
@@ -14,7 +14,7 @@ POSTGRES_VERSIONS = v17 v16 v15 v14
|
||||
# it is derived from BUILD_TYPE.
|
||||
|
||||
# All intermediate build artifacts are stored here.
|
||||
BUILD_DIR := $(ROOT_PROJECT_DIR)/build
|
||||
BUILD_DIR := build
|
||||
|
||||
ICU_PREFIX_DIR := /usr/local/icu
|
||||
|
||||
@@ -212,7 +212,7 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
||||
INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
||||
-C $(BUILD_DIR)/pgxn-v17/neon \
|
||||
-C $(BUILD_DIR)/neon-v17 \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
|
||||
|
||||
|
||||
@@ -220,15 +220,11 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
||||
setup-pre-commit-hook:
|
||||
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
|
||||
|
||||
build-tools/node_modules: build-tools/package.json
|
||||
cd build-tools && $(if $(CI),npm ci,npm install)
|
||||
touch build-tools/node_modules
|
||||
|
||||
.PHONY: lint-openapi-spec
|
||||
lint-openapi-spec: build-tools/node_modules
|
||||
lint-openapi-spec:
|
||||
# operation-2xx-response: pageserver timeline delete returns 404 on success
|
||||
find . -iname "openapi_spec.y*ml" -exec\
|
||||
npx --prefix=build-tools/ redocly\
|
||||
docker run --rm -v ${PWD}:/spec ghcr.io/redocly/cli:1.34.4\
|
||||
--skip-rule=operation-operationId --skip-rule=operation-summary --extends=minimal\
|
||||
--skip-rule=no-server-example.com --skip-rule=operation-2xx-response\
|
||||
lint {} \+
|
||||
|
||||
@@ -188,12 +188,6 @@ RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
|
||||
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# Install node
|
||||
ENV NODE_VERSION=24
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \
|
||||
&& apt install -y nodejs \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# Install docker
|
||||
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable" > /etc/apt/sources.list.d/docker.list \
|
||||
@@ -317,14 +311,14 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
. "$HOME/.cargo/env" && \
|
||||
cargo --version && rustup --version && \
|
||||
rustup component add llvm-tools rustfmt clippy && \
|
||||
cargo install rustfilt --locked --version ${RUSTFILT_VERSION} && \
|
||||
cargo install cargo-hakari --locked --version ${CARGO_HAKARI_VERSION} && \
|
||||
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
|
||||
cargo install cargo-hack --locked --version ${CARGO_HACK_VERSION} && \
|
||||
cargo install cargo-nextest --locked --version ${CARGO_NEXTEST_VERSION} && \
|
||||
cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
|
||||
cargo install diesel_cli --locked --version ${CARGO_DIESEL_CLI_VERSION} \
|
||||
--features postgres-bundled --no-default-features && \
|
||||
cargo install rustfilt --version ${RUSTFILT_VERSION} --locked && \
|
||||
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} --locked && \
|
||||
cargo install cargo-deny --version ${CARGO_DENY_VERSION} --locked && \
|
||||
cargo install cargo-hack --version ${CARGO_HACK_VERSION} --locked && \
|
||||
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} --locked && \
|
||||
cargo install cargo-chef --version ${CARGO_CHEF_VERSION} --locked && \
|
||||
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} --locked \
|
||||
--features postgres-bundled --no-default-features && \
|
||||
rm -rf /home/nonroot/.cargo/registry && \
|
||||
rm -rf /home/nonroot/.cargo/git
|
||||
|
||||
|
||||
3189
build-tools/package-lock.json
generated
3189
build-tools/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"name": "build-tools",
|
||||
"private": true,
|
||||
"devDependencies": {
|
||||
"@redocly/cli": "1.34.4",
|
||||
"@sourcemeta/jsonschema": "10.0.0"
|
||||
}
|
||||
}
|
||||
@@ -50,9 +50,9 @@ jsonnetfmt-format:
|
||||
jsonnetfmt --in-place $(jsonnet_files)
|
||||
|
||||
.PHONY: manifest-schema-validation
|
||||
manifest-schema-validation: ../build-tools/node_modules
|
||||
npx --prefix=../build-tools/ jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
|
||||
manifest-schema-validation: node_modules
|
||||
node_modules/.bin/jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
|
||||
|
||||
../build-tools/node_modules: ../build-tools/package.json
|
||||
cd ../build-tools && $(if $(CI),npm ci,npm install)
|
||||
touch ../build-tools/node_modules
|
||||
node_modules: package.json
|
||||
npm install
|
||||
touch node_modules
|
||||
|
||||
@@ -170,29 +170,7 @@ RUN case $DEBIAN_VERSION in \
|
||||
FROM build-deps AS pg-build
|
||||
ARG PG_VERSION
|
||||
COPY vendor/postgres-${PG_VERSION:?} postgres
|
||||
COPY compute/patches/postgres_fdw.patch .
|
||||
COPY compute/patches/pg_stat_statements_pg14-16.patch .
|
||||
COPY compute/patches/pg_stat_statements_pg17.patch .
|
||||
RUN cd postgres && \
|
||||
# Apply patches to some contrib extensions
|
||||
# For example, we need to grant EXECUTE on pg_stat_statements_reset() to {privileged_role_name}.
|
||||
# In vanilla Postgres this function is limited to Postgres role superuser.
|
||||
# In Neon we have {privileged_role_name} role that is not a superuser but replaces superuser in some cases.
|
||||
# We could add the additional grant statements to the Postgres repository but it would be hard to maintain,
|
||||
# whenever we need to pick up a new Postgres version and we want to limit the changes in our Postgres fork,
|
||||
# so we do it here.
|
||||
case "${PG_VERSION}" in \
|
||||
"v14" | "v15" | "v16") \
|
||||
patch -p1 < /pg_stat_statements_pg14-16.patch; \
|
||||
;; \
|
||||
"v17") \
|
||||
patch -p1 < /pg_stat_statements_pg17.patch; \
|
||||
;; \
|
||||
*) \
|
||||
# To do not forget to migrate patches to the next major version
|
||||
echo "No contrib patches for this PostgreSQL version" && exit 1;; \
|
||||
esac && \
|
||||
patch -p1 < /postgres_fdw.patch && \
|
||||
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
|
||||
--with-icu --with-libxml --with-libxslt --with-lz4" && \
|
||||
if [ "${PG_VERSION:?}" != "v14" ]; then \
|
||||
@@ -206,6 +184,8 @@ RUN cd postgres && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgres_fdw.control && \
|
||||
file=/usr/local/pgsql/share/extension/postgres_fdw--1.0.sql && [ -e $file ] && \
|
||||
echo 'GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO neon_superuser;' >> $file && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
|
||||
@@ -215,7 +195,34 @@ RUN cd postgres && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control && \
|
||||
# We need to grant EXECUTE on pg_stat_statements_reset() to neon_superuser.
|
||||
# In vanilla postgres this function is limited to Postgres role superuser.
|
||||
# In neon we have neon_superuser role that is not a superuser but replaces superuser in some cases.
|
||||
# We could add the additional grant statements to the postgres repository but it would be hard to maintain,
|
||||
# whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
|
||||
# so we do it here.
|
||||
for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
|
||||
filename=$(basename "$file"); \
|
||||
# Note that there are no downgrade scripts for pg_stat_statements, so we \
|
||||
# don't have to modify any downgrade paths or (much) older versions: we only \
|
||||
# have to make sure every creation of the pg_stat_statements_reset function \
|
||||
# also adds execute permissions to the neon_superuser.
|
||||
case $filename in \
|
||||
pg_stat_statements--1.4.sql) \
|
||||
# pg_stat_statements_reset is first created with 1.4
|
||||
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
|
||||
;; \
|
||||
pg_stat_statements--1.6--1.7.sql) \
|
||||
# Then with the 1.6-1.7 migration it is re-created with a new signature, thus add the permissions back
|
||||
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
|
||||
;; \
|
||||
pg_stat_statements--1.10--1.11.sql) \
|
||||
# Then with the 1.10-1.11 migration it is re-created with a new signature again, thus add the permissions back
|
||||
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO neon_superuser;' >> $file; \
|
||||
;; \
|
||||
esac; \
|
||||
done;
|
||||
|
||||
# Set PATH for all the subsequent build steps
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
@@ -1517,7 +1524,7 @@ WORKDIR /ext-src
|
||||
COPY compute/patches/pg_duckdb_v031.patch .
|
||||
COPY compute/patches/duckdb_v120.patch .
|
||||
# pg_duckdb build requires source dir to be a git repo to get submodules
|
||||
# allow {privileged_role_name} to execute some functions that in pg_duckdb are available to superuser only:
|
||||
# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only:
|
||||
# - extension management function duckdb.install_extension()
|
||||
# - access to duckdb.extensions table and its sequence
|
||||
RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
|
||||
|
||||
7
compute/package.json
Normal file
7
compute/package.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"name": "neon-compute",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@sourcemeta/jsonschema": "9.3.4"
|
||||
}
|
||||
}
|
||||
@@ -1,26 +1,22 @@
|
||||
diff --git a/sql/anon.sql b/sql/anon.sql
|
||||
index 0cdc769..5eab1d6 100644
|
||||
index 0cdc769..b450327 100644
|
||||
--- a/sql/anon.sql
|
||||
+++ b/sql/anon.sql
|
||||
@@ -1141,3 +1141,19 @@ $$
|
||||
@@ -1141,3 +1141,15 @@ $$
|
||||
-- TODO : https://en.wikipedia.org/wiki/L-diversity
|
||||
|
||||
-- TODO : https://en.wikipedia.org/wiki/T-closeness
|
||||
+
|
||||
+-- NEON Patches
|
||||
+
|
||||
+GRANT ALL ON SCHEMA anon to neon_superuser;
|
||||
+GRANT ALL ON ALL TABLES IN SCHEMA anon TO neon_superuser;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT ALL ON SCHEMA anon to %I', privileged_role_name);
|
||||
+ EXECUTE format('GRANT ALL ON ALL TABLES IN SCHEMA anon TO %I', privileged_role_name);
|
||||
+
|
||||
+ IF current_setting('server_version_num')::int >= 150000 THEN
|
||||
+ EXECUTE format('GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO %I', privileged_role_name);
|
||||
+ END IF;
|
||||
+ IF current_setting('server_version_num')::int >= 150000 THEN
|
||||
+ GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO neon_superuser;
|
||||
+ END IF;
|
||||
+END $$;
|
||||
diff --git a/sql/init.sql b/sql/init.sql
|
||||
index 7da6553..9b6164b 100644
|
||||
|
||||
@@ -21,21 +21,13 @@ index 3235cc8..6b892bc 100644
|
||||
include Makefile.global
|
||||
|
||||
diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||
index d777d76..3b54396 100644
|
||||
index d777d76..af60106 100644
|
||||
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||
@@ -1056,3 +1056,14 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
|
||||
@@ -1056,3 +1056,6 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
|
||||
GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC;
|
||||
GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC;
|
||||
GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO %I', privileged_role_name);
|
||||
+ EXECUTE format('GRANT ALL ON TABLE duckdb.extensions TO %I', privileged_role_name);
|
||||
+ EXECUTE format('GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
+GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO neon_superuser;
|
||||
+GRANT ALL ON TABLE duckdb.extensions TO neon_superuser;
|
||||
+GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO neon_superuser;
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
index 58cdf600fce..8be57a996f6 100644
|
||||
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
|
||||
|
||||
-- Don't want this to be available to non-superusers.
|
||||
REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
index 6fc3fed4c93..256345a8f79 100644
|
||||
--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
|
||||
|
||||
-- Don't want this to be available to non-superusers.
|
||||
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
@@ -1,52 +0,0 @@
|
||||
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
|
||||
index 0bb2c397711..32764db1d8b 100644
|
||||
--- a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
|
||||
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
|
||||
@@ -80,3 +80,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
|
||||
|
||||
-- Don't want this to be available to non-superusers.
|
||||
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) FROM PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
\ No newline at end of file
|
||||
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
index 58cdf600fce..8be57a996f6 100644
|
||||
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
|
||||
|
||||
-- Don't want this to be available to non-superusers.
|
||||
REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
index 6fc3fed4c93..256345a8f79 100644
|
||||
--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
|
||||
|
||||
-- Don't want this to be available to non-superusers.
|
||||
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
@@ -1,17 +0,0 @@
|
||||
diff --git a/contrib/postgres_fdw/postgres_fdw--1.0.sql b/contrib/postgres_fdw/postgres_fdw--1.0.sql
|
||||
index a0f0fc1bf45..ee077f2eea6 100644
|
||||
--- a/contrib/postgres_fdw/postgres_fdw--1.0.sql
|
||||
+++ b/contrib/postgres_fdw/postgres_fdw--1.0.sql
|
||||
@@ -16,3 +16,12 @@ LANGUAGE C STRICT;
|
||||
CREATE FOREIGN DATA WRAPPER postgres_fdw
|
||||
HANDLER postgres_fdw_handler
|
||||
VALIDATOR postgres_fdw_validator;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
@@ -87,14 +87,6 @@ struct Cli {
|
||||
#[arg(short = 'C', long, value_name = "DATABASE_URL")]
|
||||
pub connstr: String,
|
||||
|
||||
#[arg(
|
||||
long,
|
||||
default_value = "neon_superuser",
|
||||
value_name = "PRIVILEGED_ROLE_NAME",
|
||||
value_parser = Self::parse_privileged_role_name
|
||||
)]
|
||||
pub privileged_role_name: String,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[arg(long, default_value = "neon-postgres")]
|
||||
pub cgroup: String,
|
||||
@@ -157,21 +149,6 @@ impl Cli {
|
||||
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
/// For simplicity, we do not escape `privileged_role_name` anywhere in the code.
|
||||
/// Since it's a system role, which we fully control, that's fine. Still, let's
|
||||
/// validate it to avoid any surprises.
|
||||
fn parse_privileged_role_name(value: &str) -> Result<String> {
|
||||
use regex::Regex;
|
||||
|
||||
let pattern = Regex::new(r"^[a-z_]+$").unwrap();
|
||||
|
||||
if !pattern.is_match(value) {
|
||||
bail!("--privileged-role-name can only contain lowercase letters and underscores")
|
||||
}
|
||||
|
||||
Ok(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
@@ -201,7 +178,6 @@ fn main() -> Result<()> {
|
||||
ComputeNodeParams {
|
||||
compute_id: cli.compute_id,
|
||||
connstr,
|
||||
privileged_role_name: cli.privileged_role_name.clone(),
|
||||
pgdata: cli.pgdata.clone(),
|
||||
pgbin: cli.pgbin.clone(),
|
||||
pgversion: get_pg_version_string(&cli.pgbin),
|
||||
@@ -351,49 +327,4 @@ mod test {
|
||||
])
|
||||
.expect_err("URL parameters are not allowed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_privileged_role_name() {
|
||||
// Valid name
|
||||
let cli = Cli::parse_from([
|
||||
"compute_ctl",
|
||||
"--pgdata=test",
|
||||
"--connstr=test",
|
||||
"--compute-id=test",
|
||||
"--privileged-role-name",
|
||||
"my_superuser",
|
||||
]);
|
||||
assert_eq!(cli.privileged_role_name, "my_superuser");
|
||||
|
||||
// Invalid names
|
||||
Cli::try_parse_from([
|
||||
"compute_ctl",
|
||||
"--pgdata=test",
|
||||
"--connstr=test",
|
||||
"--compute-id=test",
|
||||
"--privileged-role-name",
|
||||
"NeonSuperuser",
|
||||
])
|
||||
.expect_err("uppercase letters are not allowed");
|
||||
|
||||
Cli::try_parse_from([
|
||||
"compute_ctl",
|
||||
"--pgdata=test",
|
||||
"--connstr=test",
|
||||
"--compute-id=test",
|
||||
"--privileged-role-name",
|
||||
"$'neon_superuser",
|
||||
])
|
||||
.expect_err("special characters are not allowed");
|
||||
|
||||
Cli::try_parse_from([
|
||||
"compute_ctl",
|
||||
"--pgdata=test",
|
||||
"--connstr=test",
|
||||
"--compute-id=test",
|
||||
"--privileged-role-name",
|
||||
"",
|
||||
])
|
||||
.expect_err("empty name is not allowed");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,20 +74,12 @@ const DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL: u64 = 3600;
|
||||
|
||||
/// Static configuration params that don't change after startup. These mostly
|
||||
/// come from the CLI args, or are derived from them.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ComputeNodeParams {
|
||||
/// The ID of the compute
|
||||
pub compute_id: String,
|
||||
|
||||
/// Url type maintains proper escaping
|
||||
// Url type maintains proper escaping
|
||||
pub connstr: url::Url,
|
||||
|
||||
/// The name of the 'weak' superuser role, which we give to the users.
|
||||
/// It follows the allow list approach, i.e., we take a standard role
|
||||
/// and grant it extra permissions with explicit GRANTs here and there,
|
||||
/// and core patches.
|
||||
pub privileged_role_name: String,
|
||||
|
||||
pub resize_swap_on_bind: bool,
|
||||
pub set_disk_quota_for_fs: Option<String>,
|
||||
|
||||
@@ -1397,7 +1389,6 @@ impl ComputeNode {
|
||||
self.create_pgdata()?;
|
||||
config::write_postgres_conf(
|
||||
pgdata_path,
|
||||
&self.params,
|
||||
&pspec.spec,
|
||||
self.params.internal_http_port,
|
||||
tls_config,
|
||||
@@ -1746,7 +1737,6 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
// Run migrations separately to not hold up cold starts
|
||||
let params = self.params.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut conf = conf.as_ref().clone();
|
||||
conf.application_name("compute_ctl:migrations");
|
||||
@@ -1758,7 +1748,7 @@ impl ComputeNode {
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
if let Err(e) = handle_migrations(params, &mut client).await {
|
||||
if let Err(e) = handle_migrations(&mut client).await {
|
||||
error!("Failed to run migrations: {}", e);
|
||||
}
|
||||
}
|
||||
@@ -1837,7 +1827,6 @@ impl ComputeNode {
|
||||
let pgdata_path = Path::new(&self.params.pgdata);
|
||||
config::write_postgres_conf(
|
||||
pgdata_path,
|
||||
&self.params,
|
||||
&spec,
|
||||
self.params.internal_http_port,
|
||||
tls_config,
|
||||
@@ -2450,31 +2439,14 @@ LIMIT 100",
|
||||
pub fn spawn_lfc_offload_task(self: &Arc<Self>, interval: Duration) {
|
||||
self.terminate_lfc_offload_task();
|
||||
let secs = interval.as_secs();
|
||||
info!("spawning lfc offload worker with {secs}s interval");
|
||||
let this = self.clone();
|
||||
|
||||
info!("spawning LFC offload worker with {secs}s interval");
|
||||
let handle = spawn(async move {
|
||||
let mut interval = time::interval(interval);
|
||||
interval.tick().await; // returns immediately
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
||||
let prewarm_state = this.state.lock().unwrap().lfc_prewarm_state.clone();
|
||||
// Do not offload LFC state if we are currently prewarming or any issue occurred.
|
||||
// If we'd do that, we might override the LFC state in endpoint storage with some
|
||||
// incomplete state. Imagine a situation:
|
||||
// 1. Endpoint started with `autoprewarm: true`
|
||||
// 2. While prewarming is not completed, we upload the new incomplete state
|
||||
// 3. Compute gets interrupted and restarts
|
||||
// 4. We start again and try to prewarm with the state from 2. instead of the previous complete state
|
||||
if matches!(
|
||||
prewarm_state,
|
||||
LfcPrewarmState::Completed
|
||||
| LfcPrewarmState::NotPrewarmed
|
||||
| LfcPrewarmState::Skipped
|
||||
) {
|
||||
this.offload_lfc_async().await;
|
||||
}
|
||||
this.offload_lfc_async().await;
|
||||
}
|
||||
});
|
||||
*self.lfc_offload_task.lock().unwrap() = Some(handle);
|
||||
|
||||
@@ -89,7 +89,7 @@ impl ComputeNode {
|
||||
self.state.lock().unwrap().lfc_offload_state.clone()
|
||||
}
|
||||
|
||||
/// If there is a prewarm request ongoing, return `false`, `true` otherwise.
|
||||
/// If there is a prewarm request ongoing, return false, true otherwise
|
||||
pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
|
||||
{
|
||||
let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
|
||||
@@ -101,25 +101,15 @@ impl ComputeNode {
|
||||
|
||||
let cloned = self.clone();
|
||||
spawn(async move {
|
||||
let state = match cloned.prewarm_impl(from_endpoint).await {
|
||||
Ok(true) => LfcPrewarmState::Completed,
|
||||
Ok(false) => {
|
||||
info!(
|
||||
"skipping LFC prewarm because LFC state is not found in endpoint storage"
|
||||
);
|
||||
LfcPrewarmState::Skipped
|
||||
}
|
||||
Err(err) => {
|
||||
crate::metrics::LFC_PREWARM_ERRORS.inc();
|
||||
error!(%err, "could not prewarm LFC");
|
||||
|
||||
LfcPrewarmState::Failed {
|
||||
error: err.to_string(),
|
||||
}
|
||||
}
|
||||
let Err(err) = cloned.prewarm_impl(from_endpoint).await else {
|
||||
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Completed;
|
||||
return;
|
||||
};
|
||||
crate::metrics::LFC_PREWARM_ERRORS.inc();
|
||||
error!(%err, "prewarming lfc");
|
||||
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Failed {
|
||||
error: err.to_string(),
|
||||
};
|
||||
|
||||
cloned.state.lock().unwrap().lfc_prewarm_state = state;
|
||||
});
|
||||
true
|
||||
}
|
||||
@@ -130,21 +120,15 @@ impl ComputeNode {
|
||||
EndpointStoragePair::from_spec_and_endpoint(state.pspec.as_ref().unwrap(), from_endpoint)
|
||||
}
|
||||
|
||||
/// Request LFC state from endpoint storage and load corresponding pages into Postgres.
|
||||
/// Returns a result with `false` if the LFC state is not found in endpoint storage.
|
||||
async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<bool> {
|
||||
async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<()> {
|
||||
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(from_endpoint)?;
|
||||
|
||||
info!(%url, "requesting LFC state from endpoint storage");
|
||||
|
||||
let request = Client::new().get(&url).bearer_auth(token);
|
||||
let res = request.send().await.context("querying endpoint storage")?;
|
||||
let status = res.status();
|
||||
match status {
|
||||
StatusCode::OK => (),
|
||||
StatusCode::NOT_FOUND => {
|
||||
return Ok(false);
|
||||
}
|
||||
_ => bail!("{status} querying endpoint storage"),
|
||||
if status != StatusCode::OK {
|
||||
bail!("{status} querying endpoint storage")
|
||||
}
|
||||
|
||||
let mut uncompressed = Vec::new();
|
||||
@@ -157,8 +141,7 @@ impl ComputeNode {
|
||||
.await
|
||||
.context("decoding LFC state")?;
|
||||
let uncompressed_len = uncompressed.len();
|
||||
|
||||
info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into Postgres");
|
||||
info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into postgres");
|
||||
|
||||
ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
||||
.await
|
||||
@@ -166,9 +149,7 @@ impl ComputeNode {
|
||||
.query_one("select neon.prewarm_local_cache($1)", &[&uncompressed])
|
||||
.await
|
||||
.context("loading LFC state into postgres")
|
||||
.map(|_| ())?;
|
||||
|
||||
Ok(true)
|
||||
.map(|_| ())
|
||||
}
|
||||
|
||||
/// If offload request is ongoing, return false, true otherwise
|
||||
@@ -196,14 +177,12 @@ impl ComputeNode {
|
||||
|
||||
async fn offload_lfc_with_state_update(&self) {
|
||||
crate::metrics::LFC_OFFLOADS.inc();
|
||||
|
||||
let Err(err) = self.offload_lfc_impl().await else {
|
||||
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
|
||||
return;
|
||||
};
|
||||
|
||||
crate::metrics::LFC_OFFLOAD_ERRORS.inc();
|
||||
error!(%err, "could not offload LFC state to endpoint storage");
|
||||
error!(%err, "offloading lfc");
|
||||
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
|
||||
error: err.to_string(),
|
||||
};
|
||||
@@ -211,7 +190,7 @@ impl ComputeNode {
|
||||
|
||||
async fn offload_lfc_impl(&self) -> Result<()> {
|
||||
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;
|
||||
info!(%url, "requesting LFC state from Postgres");
|
||||
info!(%url, "requesting LFC state from postgres");
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
||||
@@ -226,17 +205,13 @@ impl ComputeNode {
|
||||
.read_to_end(&mut compressed)
|
||||
.await
|
||||
.context("compressing LFC state")?;
|
||||
|
||||
let compressed_len = compressed.len();
|
||||
info!(%url, "downloaded LFC state, compressed size {compressed_len}, writing to endpoint storage");
|
||||
|
||||
let request = Client::new().put(url).bearer_auth(token).body(compressed);
|
||||
match request.send().await {
|
||||
Ok(res) if res.status() == StatusCode::OK => Ok(()),
|
||||
Ok(res) => bail!(
|
||||
"Request to endpoint storage failed with status: {}",
|
||||
res.status()
|
||||
),
|
||||
Ok(res) => bail!("Error writing to endpoint storage: {}", res.status()),
|
||||
Err(err) => Err(err).context("writing to endpoint storage"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@ use std::path::Path;
|
||||
use compute_api::responses::TlsConfig;
|
||||
use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
|
||||
|
||||
use crate::compute::ComputeNodeParams;
|
||||
use crate::pg_helpers::{
|
||||
GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
|
||||
};
|
||||
@@ -42,7 +41,6 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
|
||||
/// Create or completely rewrite configuration file specified by `path`
|
||||
pub fn write_postgres_conf(
|
||||
pgdata_path: &Path,
|
||||
params: &ComputeNodeParams,
|
||||
spec: &ComputeSpec,
|
||||
extension_server_port: u16,
|
||||
tls_config: &Option<TlsConfig>,
|
||||
@@ -56,15 +54,14 @@ pub fn write_postgres_conf(
|
||||
writeln!(file, "{conf}")?;
|
||||
}
|
||||
|
||||
// Stripe size GUC should be defined prior to connection string
|
||||
if let Some(stripe_size) = spec.shard_stripe_size {
|
||||
writeln!(file, "neon.stripe_size={stripe_size}")?;
|
||||
}
|
||||
// Add options for connecting to storage
|
||||
writeln!(file, "# Neon storage settings")?;
|
||||
if let Some(s) = &spec.pageserver_connstring {
|
||||
writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
|
||||
}
|
||||
if let Some(stripe_size) = spec.shard_stripe_size {
|
||||
writeln!(file, "neon.stripe_size={stripe_size}")?;
|
||||
}
|
||||
if !spec.safekeeper_connstrings.is_empty() {
|
||||
let mut neon_safekeepers_value = String::new();
|
||||
tracing::info!(
|
||||
@@ -164,12 +161,6 @@ pub fn write_postgres_conf(
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
"neon.privileged_role_name={}",
|
||||
escape_conf_value(params.privileged_role_name.as_str())
|
||||
)?;
|
||||
|
||||
// If there are any extra options in the 'settings' field, append those
|
||||
if spec.cluster.settings.is_some() {
|
||||
writeln!(file, "# Managed by compute_ctl: begin")?;
|
||||
|
||||
@@ -613,11 +613,11 @@ components:
|
||||
- skipped
|
||||
properties:
|
||||
status:
|
||||
description: LFC prewarm status
|
||||
enum: [not_prewarmed, prewarming, completed, failed, skipped]
|
||||
description: Lfc prewarm status
|
||||
enum: [not_prewarmed, prewarming, completed, failed]
|
||||
type: string
|
||||
error:
|
||||
description: LFC prewarm error, if any
|
||||
description: Lfc prewarm error, if any
|
||||
type: string
|
||||
total:
|
||||
description: Total pages processed
|
||||
@@ -635,11 +635,11 @@ components:
|
||||
- status
|
||||
properties:
|
||||
status:
|
||||
description: LFC offload status
|
||||
description: Lfc offload status
|
||||
enum: [not_offloaded, offloading, completed, failed]
|
||||
type: string
|
||||
error:
|
||||
description: LFC offload error, if any
|
||||
description: Lfc offload error, if any
|
||||
type: string
|
||||
|
||||
PromoteState:
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
ALTER ROLE {privileged_role_name} BYPASSRLS;
|
||||
@@ -0,0 +1 @@
|
||||
ALTER ROLE neon_superuser BYPASSRLS;
|
||||
@@ -15,7 +15,7 @@ DO $$
|
||||
DECLARE
|
||||
role_name text;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, '{privileged_role_name}', 'member')
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
|
||||
@@ -23,7 +23,7 @@ BEGIN
|
||||
|
||||
FOR role_name IN SELECT rolname FROM pg_roles
|
||||
WHERE
|
||||
NOT pg_has_role(rolname, '{privileged_role_name}', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT pg_create_subscription TO {privileged_role_name}';
|
||||
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1 @@
|
||||
GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;
|
||||
@@ -1 +0,0 @@
|
||||
GRANT pg_monitor TO {privileged_role_name} WITH ADMIN OPTION;
|
||||
@@ -1,4 +1,4 @@
|
||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||
-- interacted with by {privileged_role_name} without permission issues.
|
||||
-- interacted with by neon_superuser without permission issues.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name};
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
|
||||
@@ -1,4 +1,4 @@
|
||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||
-- interacted with by {privileged_role_name} without permission issues.
|
||||
-- interacted with by neon_superuser without permission issues.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name};
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
|
||||
@@ -1,3 +1,3 @@
|
||||
-- SKIP: Moved inline to the handle_grants() functions.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name} WITH GRANT OPTION;
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
|
||||
@@ -1,3 +1,3 @@
|
||||
-- SKIP: Moved inline to the handle_grants() functions.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name} WITH GRANT OPTION;
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
|
||||
@@ -1,7 +1,7 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO {privileged_role_name}';
|
||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO {privileged_role_name}';
|
||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
|
||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1 @@
|
||||
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;
|
||||
@@ -1 +0,0 @@
|
||||
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO {privileged_role_name};
|
||||
@@ -0,0 +1 @@
|
||||
GRANT pg_signal_backend TO neon_superuser WITH ADMIN OPTION;
|
||||
@@ -1 +0,0 @@
|
||||
GRANT pg_signal_backend TO {privileged_role_name} WITH ADMIN OPTION;
|
||||
@@ -9,7 +9,6 @@ use reqwest::StatusCode;
|
||||
use tokio_postgres::Client;
|
||||
use tracing::{error, info, instrument};
|
||||
|
||||
use crate::compute::ComputeNodeParams;
|
||||
use crate::config;
|
||||
use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
|
||||
use crate::migration::MigrationRunner;
|
||||
@@ -170,7 +169,7 @@ pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
pub async fn handle_migrations(params: ComputeNodeParams, client: &mut Client) -> Result<()> {
|
||||
pub async fn handle_migrations(client: &mut Client) -> Result<()> {
|
||||
info!("handle migrations");
|
||||
|
||||
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
@@ -179,59 +178,26 @@ pub async fn handle_migrations(params: ComputeNodeParams, client: &mut Client) -
|
||||
|
||||
// Add new migrations in numerical order.
|
||||
let migrations = [
|
||||
&format!(
|
||||
include_str!("./migrations/0001-add_bypass_rls_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
|
||||
include_str!("./migrations/0002-alter_roles.sql"),
|
||||
include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
|
||||
include_str!(
|
||||
"./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0002-alter_roles.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0003-grant_pg_create_subscription_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0004-grant_pg_monitor_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0005-grant_all_on_tables_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0006-grant_all_on_sequences_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!(
|
||||
"./migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql"
|
||||
),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!(
|
||||
"./migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql"
|
||||
),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
include_str!(
|
||||
"./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
|
||||
),
|
||||
include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
|
||||
&format!(
|
||||
include_str!(
|
||||
"./migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql"
|
||||
),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
include_str!(
|
||||
"./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
|
||||
),
|
||||
&format!(
|
||||
include_str!(
|
||||
"./migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql"
|
||||
),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0012-grant_pg_signal_backend_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
include_str!(
|
||||
"./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
|
||||
),
|
||||
include_str!("./migrations/0012-grant_pg_signal_backend_to_neon_superuser.sql"),
|
||||
];
|
||||
|
||||
MigrationRunner::new(client, &migrations)
|
||||
|
||||
@@ -13,14 +13,14 @@ use tokio_postgres::Client;
|
||||
use tokio_postgres::error::SqlState;
|
||||
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
|
||||
|
||||
use crate::compute::{ComputeNode, ComputeNodeParams, ComputeState};
|
||||
use crate::compute::{ComputeNode, ComputeState};
|
||||
use crate::pg_helpers::{
|
||||
DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async,
|
||||
get_existing_roles_async,
|
||||
};
|
||||
use crate::spec_apply::ApplySpecPhase::{
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreatePgauditExtension,
|
||||
CreatePgauditlogtofileExtension, CreatePrivilegedRole, CreateSchemaNeon,
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateNeonSuperuser,
|
||||
CreatePgauditExtension, CreatePgauditlogtofileExtension, CreateSchemaNeon,
|
||||
DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
|
||||
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
|
||||
RunInEachDatabase,
|
||||
@@ -49,7 +49,6 @@ impl ComputeNode {
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
let client = Self::get_maintenance_client(&conf).await?;
|
||||
let spec = spec.clone();
|
||||
let params = Arc::new(self.params.clone());
|
||||
|
||||
let databases = get_existing_dbs_async(&client).await?;
|
||||
let roles = get_existing_roles_async(&client)
|
||||
@@ -158,7 +157,6 @@ impl ComputeNode {
|
||||
|
||||
let conf = Arc::new(conf);
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
params.clone(),
|
||||
spec.clone(),
|
||||
conf,
|
||||
ctx.clone(),
|
||||
@@ -187,7 +185,7 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
for phase in [
|
||||
CreatePrivilegedRole,
|
||||
CreateNeonSuperuser,
|
||||
DropInvalidDatabases,
|
||||
RenameRoles,
|
||||
CreateAndAlterRoles,
|
||||
@@ -197,7 +195,6 @@ impl ComputeNode {
|
||||
] {
|
||||
info!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
params.clone(),
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
@@ -246,7 +243,6 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
params.clone(),
|
||||
spec.clone(),
|
||||
conf,
|
||||
ctx.clone(),
|
||||
@@ -297,7 +293,6 @@ impl ComputeNode {
|
||||
for phase in phases {
|
||||
debug!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
params.clone(),
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
@@ -318,9 +313,7 @@ impl ComputeNode {
|
||||
/// May opt to not connect to databases that don't have any scheduled
|
||||
/// operations. The function is concurrency-controlled with the provided
|
||||
/// semaphore. The caller has to make sure the semaphore isn't exhausted.
|
||||
#[allow(clippy::too_many_arguments)] // TODO: needs bigger refactoring
|
||||
async fn apply_spec_sql_db(
|
||||
params: Arc<ComputeNodeParams>,
|
||||
spec: Arc<ComputeSpec>,
|
||||
conf: Arc<tokio_postgres::Config>,
|
||||
ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
|
||||
@@ -335,7 +328,6 @@ impl ComputeNode {
|
||||
|
||||
for subphase in subphases {
|
||||
apply_operations(
|
||||
params.clone(),
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
@@ -411,8 +403,7 @@ impl ComputeNode {
|
||||
.map(|limit| match limit {
|
||||
0..10 => limit,
|
||||
10..30 => 10,
|
||||
30..300 => limit / 3,
|
||||
300.. => 100,
|
||||
30.. => limit / 3,
|
||||
})
|
||||
// If we didn't find max_connections, default to 10 concurrent connections.
|
||||
.unwrap_or(10)
|
||||
@@ -476,7 +467,7 @@ pub enum PerDatabasePhase {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ApplySpecPhase {
|
||||
CreatePrivilegedRole,
|
||||
CreateNeonSuperuser,
|
||||
DropInvalidDatabases,
|
||||
RenameRoles,
|
||||
CreateAndAlterRoles,
|
||||
@@ -519,7 +510,6 @@ pub struct MutableApplyContext {
|
||||
/// - No timeouts have (yet) been implemented.
|
||||
/// - The caller is responsible for limiting and/or applying concurrency.
|
||||
pub async fn apply_operations<'a, Fut, F>(
|
||||
params: Arc<ComputeNodeParams>,
|
||||
spec: Arc<ComputeSpec>,
|
||||
ctx: Arc<RwLock<MutableApplyContext>>,
|
||||
jwks_roles: Arc<HashSet<String>>,
|
||||
@@ -537,7 +527,7 @@ where
|
||||
debug!("Processing phase {:?}", &apply_spec_phase);
|
||||
let ctx = ctx;
|
||||
|
||||
let mut ops = get_operations(¶ms, &spec, &ctx, &jwks_roles, &apply_spec_phase)
|
||||
let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase)
|
||||
.await?
|
||||
.peekable();
|
||||
|
||||
@@ -598,18 +588,14 @@ where
|
||||
/// sort/merge/batch execution, but for now this is a nice way to improve
|
||||
/// batching behavior of the commands.
|
||||
async fn get_operations<'a>(
|
||||
params: &'a ComputeNodeParams,
|
||||
spec: &'a ComputeSpec,
|
||||
ctx: &'a RwLock<MutableApplyContext>,
|
||||
jwks_roles: &'a HashSet<String>,
|
||||
apply_spec_phase: &'a ApplySpecPhase,
|
||||
) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
|
||||
match apply_spec_phase {
|
||||
ApplySpecPhase::CreatePrivilegedRole => Ok(Box::new(once(Operation {
|
||||
query: format!(
|
||||
include_str!("sql/create_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
ApplySpecPhase::CreateNeonSuperuser => Ok(Box::new(once(Operation {
|
||||
query: include_str!("sql/create_neon_superuser.sql").to_string(),
|
||||
comment: None,
|
||||
}))),
|
||||
ApplySpecPhase::DropInvalidDatabases => {
|
||||
@@ -711,9 +697,8 @@ async fn get_operations<'a>(
|
||||
None => {
|
||||
let query = if !jwks_roles.contains(role.name.as_str()) {
|
||||
format!(
|
||||
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE {} {}",
|
||||
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}",
|
||||
role.name.pg_quote(),
|
||||
params.privileged_role_name,
|
||||
role.to_pg_options(),
|
||||
)
|
||||
} else {
|
||||
@@ -864,9 +849,8 @@ async fn get_operations<'a>(
|
||||
// ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database
|
||||
// (see https://www.postgresql.org/docs/current/ddl-priv.html)
|
||||
query: format!(
|
||||
"GRANT ALL PRIVILEGES ON DATABASE {} TO {}",
|
||||
db.name.pg_quote(),
|
||||
params.privileged_role_name
|
||||
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
|
||||
db.name.pg_quote()
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
|
||||
8
compute_tools/src/sql/create_neon_superuser.sql
Normal file
8
compute_tools/src/sql/create_neon_superuser.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
|
||||
THEN
|
||||
CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
@@ -1,8 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{privileged_role_name}')
|
||||
THEN
|
||||
CREATE ROLE {privileged_role_name} CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
@@ -8,10 +8,10 @@ code changes locally, but not suitable for running production systems.
|
||||
|
||||
## Example: Start with Postgres 16
|
||||
|
||||
To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 2 of the start-up commands.
|
||||
To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 3 of the start-up commands.
|
||||
|
||||
```shell
|
||||
cargo neon init
|
||||
cargo neon init --pg-version 16
|
||||
cargo neon start
|
||||
cargo neon tenant create --set-default --pg-version 16
|
||||
cargo neon endpoint create main --pg-version 16
|
||||
|
||||
@@ -631,10 +631,6 @@ struct EndpointCreateCmdArgs {
|
||||
help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
|
||||
)]
|
||||
allow_multiple: bool,
|
||||
|
||||
/// Only allow changing it on creation
|
||||
#[clap(long, help = "Name of the privileged role for the endpoint")]
|
||||
privileged_role_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
@@ -1484,7 +1480,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
args.grpc,
|
||||
!args.update_catalog,
|
||||
false,
|
||||
args.privileged_role_name.clone(),
|
||||
)?;
|
||||
}
|
||||
EndpointCmd::Start(args) => {
|
||||
|
||||
@@ -65,6 +65,7 @@ use jsonwebtoken::jwk::{
|
||||
OctetKeyPairParameters, OctetKeyPairType, PublicKeyUse,
|
||||
};
|
||||
use nix::sys::signal::{Signal, kill};
|
||||
use pageserver_api::shard::ShardStripeSize;
|
||||
use pem::Pem;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use safekeeper_api::PgMajorVersion;
|
||||
@@ -76,7 +77,6 @@ use spki::{SubjectPublicKeyInfo, SubjectPublicKeyInfoRef};
|
||||
use tracing::debug;
|
||||
use url::Host;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
use utils::shard::ShardStripeSize;
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::postgresql_conf::PostgresConf;
|
||||
@@ -99,7 +99,6 @@ pub struct EndpointConf {
|
||||
features: Vec<ComputeFeature>,
|
||||
cluster: Option<Cluster>,
|
||||
compute_ctl_config: ComputeCtlConfig,
|
||||
privileged_role_name: Option<String>,
|
||||
}
|
||||
|
||||
//
|
||||
@@ -200,7 +199,6 @@ impl ComputeControlPlane {
|
||||
grpc: bool,
|
||||
skip_pg_catalog_updates: bool,
|
||||
drop_subscriptions_before_start: bool,
|
||||
privileged_role_name: Option<String>,
|
||||
) -> Result<Arc<Endpoint>> {
|
||||
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
|
||||
let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
|
||||
@@ -238,7 +236,6 @@ impl ComputeControlPlane {
|
||||
features: vec![],
|
||||
cluster: None,
|
||||
compute_ctl_config: compute_ctl_config.clone(),
|
||||
privileged_role_name: privileged_role_name.clone(),
|
||||
});
|
||||
|
||||
ep.create_endpoint_dir()?;
|
||||
@@ -260,7 +257,6 @@ impl ComputeControlPlane {
|
||||
features: vec![],
|
||||
cluster: None,
|
||||
compute_ctl_config,
|
||||
privileged_role_name,
|
||||
})?,
|
||||
)?;
|
||||
std::fs::write(
|
||||
@@ -336,9 +332,6 @@ pub struct Endpoint {
|
||||
|
||||
/// The compute_ctl config for the endpoint's compute.
|
||||
compute_ctl_config: ComputeCtlConfig,
|
||||
|
||||
/// The name of the privileged role for the endpoint.
|
||||
privileged_role_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
@@ -439,7 +432,6 @@ impl Endpoint {
|
||||
features: conf.features,
|
||||
cluster: conf.cluster,
|
||||
compute_ctl_config: conf.compute_ctl_config,
|
||||
privileged_role_name: conf.privileged_role_name,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -878,10 +870,6 @@ impl Endpoint {
|
||||
cmd.arg("--dev");
|
||||
}
|
||||
|
||||
if let Some(privileged_role_name) = self.privileged_role_name.clone() {
|
||||
cmd.args(["--privileged-role-name", &privileged_role_name]);
|
||||
}
|
||||
|
||||
let child = cmd.spawn()?;
|
||||
// set up a scopeguard to kill & wait for the child in case we panic or bail below
|
||||
let child = scopeguard::guard(child, |mut child| {
|
||||
|
||||
@@ -76,12 +76,6 @@ enum Command {
|
||||
NodeStartDelete {
|
||||
#[arg(long)]
|
||||
node_id: NodeId,
|
||||
/// When `force` is true, skip waiting for shards to prewarm during migration.
|
||||
/// This can significantly speed up node deletion since prewarming all shards
|
||||
/// can take considerable time, but may result in slower initial access to
|
||||
/// migrated shards until they warm up naturally.
|
||||
#[arg(long)]
|
||||
force: bool,
|
||||
},
|
||||
/// Cancel deletion of the specified pageserver and wait for `timeout`
|
||||
/// for the operation to be canceled. May be retried.
|
||||
@@ -958,14 +952,13 @@ async fn main() -> anyhow::Result<()> {
|
||||
.dispatch::<(), ()>(Method::DELETE, format!("control/v1/node/{node_id}"), None)
|
||||
.await?;
|
||||
}
|
||||
Command::NodeStartDelete { node_id, force } => {
|
||||
let query = if force {
|
||||
format!("control/v1/node/{node_id}/delete?force=true")
|
||||
} else {
|
||||
format!("control/v1/node/{node_id}/delete")
|
||||
};
|
||||
Command::NodeStartDelete { node_id } => {
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(Method::PUT, query, None)
|
||||
.dispatch::<(), ()>(
|
||||
Method::PUT,
|
||||
format!("control/v1/node/{node_id}/delete"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
println!("Delete started for {node_id}");
|
||||
}
|
||||
|
||||
@@ -35,7 +35,6 @@ reason = "The paste crate is a build-only dependency with no runtime components.
|
||||
# More documentation for the licenses section can be found here:
|
||||
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
|
||||
[licenses]
|
||||
version = 2
|
||||
allow = [
|
||||
"0BSD",
|
||||
"Apache-2.0",
|
||||
|
||||
@@ -46,33 +46,16 @@ pub struct ExtensionInstallResponse {
|
||||
pub version: ExtVersion,
|
||||
}
|
||||
|
||||
/// Status of the LFC prewarm process. The same state machine is reused for
|
||||
/// both autoprewarm (prewarm after compute/Postgres start using the previously
|
||||
/// stored LFC state) and explicit prewarming via API.
|
||||
#[derive(Serialize, Default, Debug, Clone, PartialEq)]
|
||||
#[serde(tag = "status", rename_all = "snake_case")]
|
||||
pub enum LfcPrewarmState {
|
||||
/// Default value when compute boots up.
|
||||
#[default]
|
||||
NotPrewarmed,
|
||||
/// Prewarming thread is active and loading pages into LFC.
|
||||
Prewarming,
|
||||
/// We found requested LFC state in the endpoint storage and
|
||||
/// completed prewarming successfully.
|
||||
Completed,
|
||||
/// Unexpected error happened during prewarming. Note, `Not Found 404`
|
||||
/// response from the endpoint storage is explicitly excluded here
|
||||
/// because it can normally happen on the first compute start,
|
||||
/// since LFC state is not available yet.
|
||||
Failed { error: String },
|
||||
/// We tried to fetch the corresponding LFC state from the endpoint storage,
|
||||
/// but received `Not Found 404`. This should normally happen only during the
|
||||
/// first endpoint start after creation with `autoprewarm: true`.
|
||||
///
|
||||
/// During the orchestrated prewarm via API, when a caller explicitly
|
||||
/// provides the LFC state key to prewarm from, it's the caller responsibility
|
||||
/// to handle this status as an error state in this case.
|
||||
Skipped,
|
||||
Failed {
|
||||
error: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl Display for LfcPrewarmState {
|
||||
@@ -81,7 +64,6 @@ impl Display for LfcPrewarmState {
|
||||
LfcPrewarmState::NotPrewarmed => f.write_str("NotPrewarmed"),
|
||||
LfcPrewarmState::Prewarming => f.write_str("Prewarming"),
|
||||
LfcPrewarmState::Completed => f.write_str("Completed"),
|
||||
LfcPrewarmState::Skipped => f.write_str("Skipped"),
|
||||
LfcPrewarmState::Failed { error } => write!(f, "Error({error})"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,14 +4,12 @@
|
||||
//! a default registry.
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
use std::sync::RwLock;
|
||||
|
||||
use measured::label::{LabelGroupSet, LabelGroupVisitor, LabelName, NoLabels};
|
||||
use measured::metric::counter::CounterState;
|
||||
use measured::metric::gauge::GaugeState;
|
||||
use measured::metric::group::Encoding;
|
||||
use measured::metric::name::{MetricName, MetricNameEncoder};
|
||||
use measured::metric::{MetricEncoding, MetricFamilyEncoding, MetricType};
|
||||
use measured::metric::{MetricEncoding, MetricFamilyEncoding};
|
||||
use measured::{FixedCardinalityLabel, LabelGroup, MetricGroup};
|
||||
use once_cell::sync::Lazy;
|
||||
use prometheus::Registry;
|
||||
@@ -118,52 +116,12 @@ pub fn pow2_buckets(start: usize, end: usize) -> Vec<f64> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub struct InfoMetric<L: LabelGroup, M: MetricType = GaugeState> {
|
||||
label: RwLock<L>,
|
||||
metric: M,
|
||||
}
|
||||
|
||||
impl<L: LabelGroup> InfoMetric<L> {
|
||||
pub fn new(label: L) -> Self {
|
||||
Self::with_metric(label, GaugeState::new(1))
|
||||
}
|
||||
}
|
||||
|
||||
impl<L: LabelGroup, M: MetricType<Metadata = ()>> InfoMetric<L, M> {
|
||||
pub fn with_metric(label: L, metric: M) -> Self {
|
||||
Self {
|
||||
label: RwLock::new(label),
|
||||
metric,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_label(&self, label: L) {
|
||||
*self.label.write().unwrap() = label;
|
||||
}
|
||||
}
|
||||
|
||||
impl<L, M, E> MetricFamilyEncoding<E> for InfoMetric<L, M>
|
||||
where
|
||||
L: LabelGroup,
|
||||
M: MetricEncoding<E, Metadata = ()>,
|
||||
E: Encoding,
|
||||
{
|
||||
fn collect_family_into(
|
||||
&self,
|
||||
name: impl measured::metric::name::MetricNameEncoder,
|
||||
enc: &mut E,
|
||||
) -> Result<(), E::Err> {
|
||||
M::write_type(&name, enc)?;
|
||||
self.metric
|
||||
.collect_into(&(), &*self.label.read().unwrap(), name, enc)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BuildInfo {
|
||||
pub revision: &'static str,
|
||||
pub build_tag: &'static str,
|
||||
}
|
||||
|
||||
// todo: allow label group without the set
|
||||
impl LabelGroup for BuildInfo {
|
||||
fn visit_values(&self, v: &mut impl LabelGroupVisitor) {
|
||||
const REVISION: &LabelName = LabelName::from_str("revision");
|
||||
@@ -173,6 +131,24 @@ impl LabelGroup for BuildInfo {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Encoding> MetricFamilyEncoding<T> for BuildInfo
|
||||
where
|
||||
GaugeState: MetricEncoding<T>,
|
||||
{
|
||||
fn collect_family_into(
|
||||
&self,
|
||||
name: impl measured::metric::name::MetricNameEncoder,
|
||||
enc: &mut T,
|
||||
) -> Result<(), T::Err> {
|
||||
enc.write_help(&name, "Build/version information")?;
|
||||
GaugeState::write_type(&name, enc)?;
|
||||
GaugeState {
|
||||
count: std::sync::atomic::AtomicI64::new(1),
|
||||
}
|
||||
.collect_into(&(), self, name, enc)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(MetricGroup)]
|
||||
#[metric(new(build_info: BuildInfo))]
|
||||
pub struct NeonMetrics {
|
||||
@@ -189,8 +165,8 @@ pub struct NeonMetrics {
|
||||
#[derive(MetricGroup)]
|
||||
#[metric(new(build_info: BuildInfo))]
|
||||
pub struct LibMetrics {
|
||||
#[metric(init = InfoMetric::new(build_info))]
|
||||
build_info: InfoMetric<BuildInfo>,
|
||||
#[metric(init = build_info)]
|
||||
build_info: BuildInfo,
|
||||
|
||||
#[metric(flatten)]
|
||||
rusage: Rusage,
|
||||
|
||||
@@ -8,13 +8,6 @@ license.workspace = true
|
||||
thiserror.workspace = true
|
||||
nix.workspace=true
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
libc.workspace = true
|
||||
lock_api.workspace = true
|
||||
rustc-hash.workspace = true
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
tempfile = "3.14.0"
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.9"
|
||||
rand_distr = "0.5.1"
|
||||
|
||||
@@ -1,583 +0,0 @@
|
||||
//! Resizable hash table implementation on top of byte-level storage (either a [`ShmemHandle`] or a fixed byte array).
|
||||
//!
|
||||
//! This hash table has two major components: the bucket array and the dictionary. Each bucket within the
|
||||
//! bucket array contains a `Option<(K, V)>` and an index of another bucket. In this way there is both an
|
||||
//! implicit freelist within the bucket array (`None` buckets point to other `None` entries) and various hash
|
||||
//! chains within the bucket array (a Some bucket will point to other Some buckets that had the same hash).
|
||||
//!
|
||||
//! Buckets are never moved unless they are within a region that is being shrunk, and so the actual hash-
|
||||
//! dependent component is done with the dictionary. When a new key is inserted into the map, a position
|
||||
//! within the dictionary is decided based on its hash, the data is inserted into an empty bucket based
|
||||
//! off of the freelist, and then the index of said bucket is placed in the dictionary.
|
||||
//!
|
||||
//! This map is resizable (if initialized on top of a [`ShmemHandle`]). Both growing and shrinking happen
|
||||
//! in-place and are at a high level achieved by expanding/reducing the bucket array and rebuilding the
|
||||
//! dictionary by rehashing all keys.
|
||||
//!
|
||||
//! Concurrency is managed very simply: the entire map is guarded by one shared-memory RwLock.
|
||||
|
||||
use std::hash::{BuildHasher, Hash};
|
||||
use std::mem::MaybeUninit;
|
||||
|
||||
use crate::shmem::ShmemHandle;
|
||||
use crate::{shmem, sync::*};
|
||||
|
||||
mod core;
|
||||
pub mod entry;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use core::{Bucket, CoreHashMap, INVALID_POS};
|
||||
use entry::{Entry, OccupiedEntry, PrevPos, VacantEntry};
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
/// Error type for a hashmap shrink operation.
|
||||
#[derive(Error, Debug)]
|
||||
pub enum HashMapShrinkError {
|
||||
/// There was an error encountered while resizing the memory area.
|
||||
#[error("shmem resize failed: {0}")]
|
||||
ResizeError(shmem::Error),
|
||||
/// Occupied entries in to-be-shrunk space were encountered beginning at the given index.
|
||||
#[error("occupied entry in deallocated space found at {0}")]
|
||||
RemainingEntries(usize),
|
||||
}
|
||||
|
||||
/// This represents a hash table that (possibly) lives in shared memory.
|
||||
/// If a new process is launched with fork(), the child process inherits
|
||||
/// this struct.
|
||||
#[must_use]
|
||||
pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
|
||||
shmem_handle: Option<ShmemHandle>,
|
||||
shared_ptr: *mut HashMapShared<'a, K, V>,
|
||||
shared_size: usize,
|
||||
hasher: S,
|
||||
num_buckets: u32,
|
||||
}
|
||||
|
||||
/// This is a per-process handle to a hash table that (possibly) lives in shared memory.
|
||||
/// If a child process is launched with fork(), the child process should
|
||||
/// get its own HashMapAccess by calling HashMapInit::attach_writer/reader().
|
||||
///
|
||||
/// XXX: We're not making use of it at the moment, but this struct could
|
||||
/// hold process-local information in the future.
|
||||
pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> {
|
||||
shmem_handle: Option<ShmemHandle>,
|
||||
shared_ptr: *mut HashMapShared<'a, K, V>,
|
||||
hasher: S,
|
||||
}
|
||||
|
||||
unsafe impl<K: Sync, V: Sync, S> Sync for HashMapAccess<'_, K, V, S> {}
|
||||
unsafe impl<K: Send, V: Send, S> Send for HashMapAccess<'_, K, V, S> {}
|
||||
|
||||
impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
|
||||
/// Change the 'hasher' used by the hash table.
|
||||
///
|
||||
/// NOTE: This must be called right after creating the hash table,
|
||||
/// before inserting any entries and before calling attach_writer/reader.
|
||||
/// Otherwise different accessors could be using different hash function,
|
||||
/// with confusing results.
|
||||
pub fn with_hasher<T: BuildHasher>(self, hasher: T) -> HashMapInit<'a, K, V, T> {
|
||||
HashMapInit {
|
||||
hasher,
|
||||
shmem_handle: self.shmem_handle,
|
||||
shared_ptr: self.shared_ptr,
|
||||
shared_size: self.shared_size,
|
||||
num_buckets: self.num_buckets,
|
||||
}
|
||||
}
|
||||
|
||||
/// Loosely (over)estimate the size needed to store a hash table with `num_buckets` buckets.
|
||||
pub fn estimate_size(num_buckets: u32) -> usize {
|
||||
// add some margin to cover alignment etc.
|
||||
CoreHashMap::<K, V>::estimate_size(num_buckets) + size_of::<HashMapShared<K, V>>() + 1000
|
||||
}
|
||||
|
||||
fn new(
|
||||
num_buckets: u32,
|
||||
shmem_handle: Option<ShmemHandle>,
|
||||
area_ptr: *mut u8,
|
||||
area_size: usize,
|
||||
hasher: S,
|
||||
) -> Self {
|
||||
let mut ptr: *mut u8 = area_ptr;
|
||||
let end_ptr: *mut u8 = unsafe { ptr.add(area_size) };
|
||||
|
||||
// carve out area for the One Big Lock (TM) and the HashMapShared.
|
||||
ptr = unsafe { ptr.add(ptr.align_offset(align_of::<libc::pthread_rwlock_t>())) };
|
||||
let raw_lock_ptr = ptr;
|
||||
ptr = unsafe { ptr.add(size_of::<libc::pthread_rwlock_t>()) };
|
||||
ptr = unsafe { ptr.add(ptr.align_offset(align_of::<HashMapShared<K, V>>())) };
|
||||
let shared_ptr: *mut HashMapShared<K, V> = ptr.cast();
|
||||
ptr = unsafe { ptr.add(size_of::<HashMapShared<K, V>>()) };
|
||||
|
||||
// carve out the buckets
|
||||
ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<core::Bucket<K, V>>())) };
|
||||
let buckets_ptr = ptr;
|
||||
ptr = unsafe { ptr.add(size_of::<core::Bucket<K, V>>() * num_buckets as usize) };
|
||||
|
||||
// use remaining space for the dictionary
|
||||
ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
|
||||
assert!(ptr.addr() < end_ptr.addr());
|
||||
let dictionary_ptr = ptr;
|
||||
let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
|
||||
assert!(dictionary_size > 0);
|
||||
|
||||
let buckets =
|
||||
unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), num_buckets as usize) };
|
||||
let dictionary = unsafe {
|
||||
std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
|
||||
};
|
||||
|
||||
let hashmap = CoreHashMap::new(buckets, dictionary);
|
||||
unsafe {
|
||||
let lock = RwLock::from_raw(PthreadRwLock::new(raw_lock_ptr.cast()), hashmap);
|
||||
std::ptr::write(shared_ptr, lock);
|
||||
}
|
||||
|
||||
Self {
|
||||
num_buckets,
|
||||
shmem_handle,
|
||||
shared_ptr,
|
||||
shared_size: area_size,
|
||||
hasher,
|
||||
}
|
||||
}
|
||||
|
||||
/// Attach to a hash table for writing.
|
||||
pub fn attach_writer(self) -> HashMapAccess<'a, K, V, S> {
|
||||
HashMapAccess {
|
||||
shmem_handle: self.shmem_handle,
|
||||
shared_ptr: self.shared_ptr,
|
||||
hasher: self.hasher,
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize a table for reading. Currently identical to [`HashMapInit::attach_writer`].
|
||||
///
|
||||
/// This is a holdover from a previous implementation and is being kept around for
|
||||
/// backwards compatibility reasons.
|
||||
pub fn attach_reader(self) -> HashMapAccess<'a, K, V, S> {
|
||||
self.attach_writer()
|
||||
}
|
||||
}
|
||||
|
||||
/// Hash table data that is actually stored in the shared memory area.
|
||||
///
|
||||
/// NOTE: We carve out the parts from a contiguous chunk. Growing and shrinking the hash table
|
||||
/// relies on the memory layout! The data structures are laid out in the contiguous shared memory
|
||||
/// area as follows:
|
||||
///
|
||||
/// [`libc::pthread_rwlock_t`]
|
||||
/// [`HashMapShared`]
|
||||
/// buckets
|
||||
/// dictionary
|
||||
///
|
||||
/// In between the above parts, there can be padding bytes to align the parts correctly.
|
||||
type HashMapShared<'a, K, V> = RwLock<CoreHashMap<'a, K, V>>;
|
||||
|
||||
impl<'a, K, V> HashMapInit<'a, K, V, rustc_hash::FxBuildHasher>
|
||||
where
|
||||
K: Clone + Hash + Eq,
|
||||
{
|
||||
/// Place the hash table within a user-supplied fixed memory area.
|
||||
pub fn with_fixed(num_buckets: u32, area: &'a mut [MaybeUninit<u8>]) -> Self {
|
||||
Self::new(
|
||||
num_buckets,
|
||||
None,
|
||||
area.as_mut_ptr().cast(),
|
||||
area.len(),
|
||||
rustc_hash::FxBuildHasher,
|
||||
)
|
||||
}
|
||||
|
||||
/// Place a new hash map in the given shared memory area
|
||||
///
|
||||
/// # Panics
|
||||
/// Will panic on failure to resize area to expected map size.
|
||||
pub fn with_shmem(num_buckets: u32, shmem: ShmemHandle) -> Self {
|
||||
let size = Self::estimate_size(num_buckets);
|
||||
shmem
|
||||
.set_size(size)
|
||||
.expect("could not resize shared memory area");
|
||||
let ptr = shmem.data_ptr.as_ptr().cast();
|
||||
Self::new(
|
||||
num_buckets,
|
||||
Some(shmem),
|
||||
ptr,
|
||||
size,
|
||||
rustc_hash::FxBuildHasher,
|
||||
)
|
||||
}
|
||||
|
||||
/// Make a resizable hash map within a new shared memory area with the given name.
|
||||
pub fn new_resizeable_named(num_buckets: u32, max_buckets: u32, name: &str) -> Self {
|
||||
let size = Self::estimate_size(num_buckets);
|
||||
let max_size = Self::estimate_size(max_buckets);
|
||||
let shmem =
|
||||
ShmemHandle::new(name, size, max_size).expect("failed to make shared memory area");
|
||||
let ptr = shmem.data_ptr.as_ptr().cast();
|
||||
|
||||
Self::new(
|
||||
num_buckets,
|
||||
Some(shmem),
|
||||
ptr,
|
||||
size,
|
||||
rustc_hash::FxBuildHasher,
|
||||
)
|
||||
}
|
||||
|
||||
/// Make a resizable hash map within a new anonymous shared memory area.
|
||||
pub fn new_resizeable(num_buckets: u32, max_buckets: u32) -> Self {
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
static COUNTER: AtomicUsize = AtomicUsize::new(0);
|
||||
let val = COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||
let name = format!("neon_shmem_hmap{val}");
|
||||
Self::new_resizeable_named(num_buckets, max_buckets, &name)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, K, V, S: BuildHasher> HashMapAccess<'a, K, V, S>
|
||||
where
|
||||
K: Clone + Hash + Eq,
|
||||
{
|
||||
/// Hash a key using the map's hasher.
|
||||
#[inline]
|
||||
fn get_hash_value(&self, key: &K) -> u64 {
|
||||
self.hasher.hash_one(key)
|
||||
}
|
||||
|
||||
fn entry_with_hash(&self, key: K, hash: u64) -> Entry<'a, '_, K, V> {
|
||||
let mut map = unsafe { self.shared_ptr.as_ref() }.unwrap().write();
|
||||
let dict_pos = hash as usize % map.dictionary.len();
|
||||
let first = map.dictionary[dict_pos];
|
||||
if first == INVALID_POS {
|
||||
// no existing entry
|
||||
return Entry::Vacant(VacantEntry {
|
||||
map,
|
||||
key,
|
||||
dict_pos: dict_pos as u32,
|
||||
});
|
||||
}
|
||||
|
||||
let mut prev_pos = PrevPos::First(dict_pos as u32);
|
||||
let mut next = first;
|
||||
loop {
|
||||
let bucket = &mut map.buckets[next as usize];
|
||||
let (bucket_key, _bucket_value) = bucket.inner.as_mut().expect("entry is in use");
|
||||
if *bucket_key == key {
|
||||
// found existing entry
|
||||
return Entry::Occupied(OccupiedEntry {
|
||||
map,
|
||||
_key: key,
|
||||
prev_pos,
|
||||
bucket_pos: next,
|
||||
});
|
||||
}
|
||||
|
||||
if bucket.next == INVALID_POS {
|
||||
// No existing entry
|
||||
return Entry::Vacant(VacantEntry {
|
||||
map,
|
||||
key,
|
||||
dict_pos: dict_pos as u32,
|
||||
});
|
||||
}
|
||||
prev_pos = PrevPos::Chained(next);
|
||||
next = bucket.next;
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a reference to the corresponding value for a key.
|
||||
pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, V>> {
|
||||
let hash = self.get_hash_value(key);
|
||||
let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
|
||||
RwLockReadGuard::try_map(map, |m| m.get_with_hash(key, hash)).ok()
|
||||
}
|
||||
|
||||
/// Get a reference to the entry containing a key.
|
||||
///
|
||||
/// NB: THis takes a write lock as there's no way to distinguish whether the intention
|
||||
/// is to use the entry for reading or for writing in advance.
|
||||
pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> {
|
||||
let hash = self.get_hash_value(&key);
|
||||
self.entry_with_hash(key, hash)
|
||||
}
|
||||
|
||||
/// Remove a key given its hash. Returns the associated value if it existed.
|
||||
pub fn remove(&self, key: &K) -> Option<V> {
|
||||
let hash = self.get_hash_value(key);
|
||||
match self.entry_with_hash(key.clone(), hash) {
|
||||
Entry::Occupied(e) => Some(e.remove()),
|
||||
Entry::Vacant(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert/update a key. Returns the previous associated value if it existed.
|
||||
///
|
||||
/// # Errors
|
||||
/// Will return [`core::FullError`] if there is no more space left in the map.
|
||||
pub fn insert(&self, key: K, value: V) -> Result<Option<V>, core::FullError> {
|
||||
let hash = self.get_hash_value(&key);
|
||||
match self.entry_with_hash(key.clone(), hash) {
|
||||
Entry::Occupied(mut e) => Ok(Some(e.insert(value))),
|
||||
Entry::Vacant(e) => {
|
||||
_ = e.insert(value)?;
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Optionally return the entry for a bucket at a given index if it exists.
|
||||
///
|
||||
/// Has more overhead than one would intuitively expect: performs both a clone of the key
|
||||
/// due to the [`OccupiedEntry`] type owning the key and also a hash of the key in order
|
||||
/// to enable repairing the hash chain if the entry is removed.
|
||||
pub fn entry_at_bucket(&self, pos: usize) -> Option<OccupiedEntry<'a, '_, K, V>> {
|
||||
let map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
|
||||
if pos >= map.buckets.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let entry = map.buckets[pos].inner.as_ref();
|
||||
match entry {
|
||||
Some((key, _)) => Some(OccupiedEntry {
|
||||
_key: key.clone(),
|
||||
bucket_pos: pos as u32,
|
||||
prev_pos: entry::PrevPos::Unknown(self.get_hash_value(key)),
|
||||
map,
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of buckets in the table.
|
||||
pub fn get_num_buckets(&self) -> usize {
|
||||
let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
|
||||
map.get_num_buckets()
|
||||
}
|
||||
|
||||
/// Return the key and value stored in bucket with given index. This can be used to
|
||||
/// iterate through the hash map.
|
||||
// TODO: An Iterator might be nicer. The communicator's clock algorithm needs to
|
||||
// _slowly_ iterate through all buckets with its clock hand, without holding a lock.
|
||||
// If we switch to an Iterator, it must not hold the lock.
|
||||
pub fn get_at_bucket(&self, pos: usize) -> Option<ValueReadGuard<(K, V)>> {
|
||||
let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
|
||||
if pos >= map.buckets.len() {
|
||||
return None;
|
||||
}
|
||||
RwLockReadGuard::try_map(map, |m| m.buckets[pos].inner.as_ref()).ok()
|
||||
}
|
||||
|
||||
/// Returns the index of the bucket a given value corresponds to.
|
||||
pub fn get_bucket_for_value(&self, val_ptr: *const V) -> usize {
|
||||
let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
|
||||
|
||||
let origin = map.buckets.as_ptr();
|
||||
let idx = (val_ptr as usize - origin as usize) / size_of::<Bucket<K, V>>();
|
||||
assert!(idx < map.buckets.len());
|
||||
|
||||
idx
|
||||
}
|
||||
|
||||
/// Returns the number of occupied buckets in the table.
|
||||
pub fn get_num_buckets_in_use(&self) -> usize {
|
||||
let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
|
||||
map.buckets_in_use as usize
|
||||
}
|
||||
|
||||
/// Clears all entries in a table. Does not reset any shrinking operations.
|
||||
pub fn clear(&self) {
|
||||
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
|
||||
map.clear();
|
||||
}
|
||||
|
||||
/// Perform an in-place rehash of some region (0..`rehash_buckets`) of the table and reset
|
||||
/// the `buckets` and `dictionary` slices to be as long as `num_buckets`. Resets the freelist
|
||||
/// in the process.
|
||||
fn rehash_dict(
|
||||
&self,
|
||||
inner: &mut CoreHashMap<'a, K, V>,
|
||||
buckets_ptr: *mut core::Bucket<K, V>,
|
||||
end_ptr: *mut u8,
|
||||
num_buckets: u32,
|
||||
rehash_buckets: u32,
|
||||
) {
|
||||
inner.free_head = INVALID_POS;
|
||||
|
||||
let buckets;
|
||||
let dictionary;
|
||||
unsafe {
|
||||
let buckets_end_ptr = buckets_ptr.add(num_buckets as usize);
|
||||
let dictionary_ptr: *mut u32 = buckets_end_ptr
|
||||
.byte_add(buckets_end_ptr.align_offset(align_of::<u32>()))
|
||||
.cast();
|
||||
let dictionary_size: usize =
|
||||
end_ptr.byte_offset_from(buckets_end_ptr) as usize / size_of::<u32>();
|
||||
|
||||
buckets = std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize);
|
||||
dictionary = std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size);
|
||||
}
|
||||
for e in dictionary.iter_mut() {
|
||||
*e = INVALID_POS;
|
||||
}
|
||||
|
||||
for (i, bucket) in buckets.iter_mut().enumerate().take(rehash_buckets as usize) {
|
||||
if bucket.inner.is_none() {
|
||||
bucket.next = inner.free_head;
|
||||
inner.free_head = i as u32;
|
||||
continue;
|
||||
}
|
||||
|
||||
let hash = self.hasher.hash_one(&bucket.inner.as_ref().unwrap().0);
|
||||
let pos: usize = (hash % dictionary.len() as u64) as usize;
|
||||
bucket.next = dictionary[pos];
|
||||
dictionary[pos] = i as u32;
|
||||
}
|
||||
|
||||
inner.dictionary = dictionary;
|
||||
inner.buckets = buckets;
|
||||
}
|
||||
|
||||
/// Rehash the map without growing or shrinking.
|
||||
pub fn shuffle(&self) {
|
||||
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
|
||||
let num_buckets = map.get_num_buckets() as u32;
|
||||
let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
|
||||
let end_ptr: *mut u8 = unsafe { self.shared_ptr.byte_add(size_bytes).cast() };
|
||||
let buckets_ptr = map.buckets.as_mut_ptr();
|
||||
self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);
|
||||
}
|
||||
|
||||
/// Grow the number of buckets within the table.
|
||||
///
|
||||
/// 1. Grows the underlying shared memory area
|
||||
/// 2. Initializes new buckets and overwrites the current dictionary
|
||||
/// 3. Rehashes the dictionary
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if called on a map initialized with [`HashMapInit::with_fixed`].
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an [`shmem::Error`] if any errors occur resizing the memory region.
|
||||
pub fn grow(&self, num_buckets: u32) -> Result<(), shmem::Error> {
|
||||
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
|
||||
let old_num_buckets = map.buckets.len() as u32;
|
||||
|
||||
assert!(
|
||||
num_buckets >= old_num_buckets,
|
||||
"grow called with a smaller number of buckets"
|
||||
);
|
||||
if num_buckets == old_num_buckets {
|
||||
return Ok(());
|
||||
}
|
||||
let shmem_handle = self
|
||||
.shmem_handle
|
||||
.as_ref()
|
||||
.expect("grow called on a fixed-size hash table");
|
||||
|
||||
let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
|
||||
shmem_handle.set_size(size_bytes)?;
|
||||
let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
|
||||
|
||||
// Initialize new buckets. The new buckets are linked to the free list.
|
||||
// NB: This overwrites the dictionary!
|
||||
let buckets_ptr = map.buckets.as_mut_ptr();
|
||||
unsafe {
|
||||
for i in old_num_buckets..num_buckets {
|
||||
let bucket = buckets_ptr.add(i as usize);
|
||||
bucket.write(core::Bucket {
|
||||
next: if i < num_buckets - 1 {
|
||||
i + 1
|
||||
} else {
|
||||
map.free_head
|
||||
},
|
||||
inner: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, old_num_buckets);
|
||||
map.free_head = old_num_buckets;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Begin a shrink, limiting all new allocations to be in buckets with index below `num_buckets`.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if called on a map initialized with [`HashMapInit::with_fixed`] or if `num_buckets` is
|
||||
/// greater than the number of buckets in the map.
|
||||
pub fn begin_shrink(&mut self, num_buckets: u32) {
|
||||
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
|
||||
assert!(
|
||||
num_buckets <= map.get_num_buckets() as u32,
|
||||
"shrink called with a larger number of buckets"
|
||||
);
|
||||
_ = self
|
||||
.shmem_handle
|
||||
.as_ref()
|
||||
.expect("shrink called on a fixed-size hash table");
|
||||
map.alloc_limit = num_buckets;
|
||||
}
|
||||
|
||||
/// If a shrink operation is underway, returns the target size of the map. Otherwise, returns None.
|
||||
pub fn shrink_goal(&self) -> Option<usize> {
|
||||
let map = unsafe { self.shared_ptr.as_mut() }.unwrap().read();
|
||||
let goal = map.alloc_limit;
|
||||
if goal == INVALID_POS {
|
||||
None
|
||||
} else {
|
||||
Some(goal as usize)
|
||||
}
|
||||
}
|
||||
|
||||
/// Complete a shrink after caller has evicted entries, removing the unused buckets and rehashing.
|
||||
///
|
||||
/// # Panics
|
||||
/// The following cases result in a panic:
|
||||
/// - Calling this function on a map initialized with [`HashMapInit::with_fixed`].
|
||||
/// - Calling this function on a map when no shrink operation is in progress.
|
||||
pub fn finish_shrink(&self) -> Result<(), HashMapShrinkError> {
|
||||
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
|
||||
assert!(
|
||||
map.alloc_limit != INVALID_POS,
|
||||
"called finish_shrink when no shrink is in progress"
|
||||
);
|
||||
|
||||
let num_buckets = map.alloc_limit;
|
||||
|
||||
if map.get_num_buckets() == num_buckets as usize {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
assert!(
|
||||
map.buckets_in_use <= num_buckets,
|
||||
"called finish_shrink before enough entries were removed"
|
||||
);
|
||||
|
||||
for i in (num_buckets as usize)..map.buckets.len() {
|
||||
if map.buckets[i].inner.is_some() {
|
||||
return Err(HashMapShrinkError::RemainingEntries(i));
|
||||
}
|
||||
}
|
||||
|
||||
let shmem_handle = self
|
||||
.shmem_handle
|
||||
.as_ref()
|
||||
.expect("shrink called on a fixed-size hash table");
|
||||
|
||||
let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
|
||||
if let Err(e) = shmem_handle.set_size(size_bytes) {
|
||||
return Err(HashMapShrinkError::ResizeError(e));
|
||||
}
|
||||
let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
|
||||
let buckets_ptr = map.buckets.as_mut_ptr();
|
||||
self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);
|
||||
map.alloc_limit = INVALID_POS;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
//! Simple hash table with chaining.
|
||||
|
||||
use std::hash::Hash;
|
||||
use std::mem::MaybeUninit;
|
||||
|
||||
use crate::hash::entry::*;
|
||||
|
||||
/// Invalid position within the map (either within the dictionary or bucket array).
|
||||
pub(crate) const INVALID_POS: u32 = u32::MAX;
|
||||
|
||||
/// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.
|
||||
/// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).
|
||||
pub(crate) struct Bucket<K, V> {
|
||||
/// Index of next bucket in the chain.
|
||||
pub(crate) next: u32,
|
||||
/// Key-value pair contained within bucket.
|
||||
pub(crate) inner: Option<(K, V)>,
|
||||
}
|
||||
|
||||
/// Core hash table implementation.
|
||||
pub(crate) struct CoreHashMap<'a, K, V> {
|
||||
/// Dictionary used to map hashes to bucket indices.
|
||||
pub(crate) dictionary: &'a mut [u32],
|
||||
/// Buckets containing key-value pairs.
|
||||
pub(crate) buckets: &'a mut [Bucket<K, V>],
|
||||
/// Head of the freelist.
|
||||
pub(crate) free_head: u32,
|
||||
/// Maximum index of a bucket allowed to be allocated. [`INVALID_POS`] if no limit.
|
||||
pub(crate) alloc_limit: u32,
|
||||
/// The number of currently occupied buckets.
|
||||
pub(crate) buckets_in_use: u32,
|
||||
}
|
||||
|
||||
/// Error for when there are no empty buckets left but one is needed.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct FullError;
|
||||
|
||||
impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> {
|
||||
const FILL_FACTOR: f32 = 0.60;
|
||||
|
||||
/// Estimate the size of data contained within the the hash map.
|
||||
pub fn estimate_size(num_buckets: u32) -> usize {
|
||||
let mut size = 0;
|
||||
|
||||
// buckets
|
||||
size += size_of::<Bucket<K, V>>() * num_buckets as usize;
|
||||
|
||||
// dictionary
|
||||
size += (f32::ceil((size_of::<u32>() * num_buckets as usize) as f32 / Self::FILL_FACTOR))
|
||||
as usize;
|
||||
|
||||
size
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
buckets: &'a mut [MaybeUninit<Bucket<K, V>>],
|
||||
dictionary: &'a mut [MaybeUninit<u32>],
|
||||
) -> Self {
|
||||
// Initialize the buckets
|
||||
for i in 0..buckets.len() {
|
||||
buckets[i].write(Bucket {
|
||||
next: if i < buckets.len() - 1 {
|
||||
i as u32 + 1
|
||||
} else {
|
||||
INVALID_POS
|
||||
},
|
||||
inner: None,
|
||||
});
|
||||
}
|
||||
|
||||
// Initialize the dictionary
|
||||
for e in dictionary.iter_mut() {
|
||||
e.write(INVALID_POS);
|
||||
}
|
||||
|
||||
// TODO: use std::slice::assume_init_mut() once it stabilizes
|
||||
let buckets =
|
||||
unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(), buckets.len()) };
|
||||
let dictionary = unsafe {
|
||||
std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len())
|
||||
};
|
||||
|
||||
Self {
|
||||
dictionary,
|
||||
buckets,
|
||||
free_head: 0,
|
||||
buckets_in_use: 0,
|
||||
alloc_limit: INVALID_POS,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the value associated with a key (if it exists) given its hash.
|
||||
pub fn get_with_hash(&self, key: &K, hash: u64) -> Option<&V> {
|
||||
let mut next = self.dictionary[hash as usize % self.dictionary.len()];
|
||||
loop {
|
||||
if next == INVALID_POS {
|
||||
return None;
|
||||
}
|
||||
|
||||
let bucket = &self.buckets[next as usize];
|
||||
let (bucket_key, bucket_value) = bucket.inner.as_ref().expect("entry is in use");
|
||||
if bucket_key == key {
|
||||
return Some(bucket_value);
|
||||
}
|
||||
next = bucket.next;
|
||||
}
|
||||
}
|
||||
|
||||
/// Get number of buckets in map.
|
||||
pub fn get_num_buckets(&self) -> usize {
|
||||
self.buckets.len()
|
||||
}
|
||||
|
||||
/// Clears all entries from the hashmap.
|
||||
///
|
||||
/// Does not reset any allocation limits, but does clear any entries beyond them.
|
||||
pub fn clear(&mut self) {
|
||||
for i in 0..self.buckets.len() {
|
||||
self.buckets[i] = Bucket {
|
||||
next: if i < self.buckets.len() - 1 {
|
||||
i as u32 + 1
|
||||
} else {
|
||||
INVALID_POS
|
||||
},
|
||||
inner: None,
|
||||
}
|
||||
}
|
||||
for i in 0..self.dictionary.len() {
|
||||
self.dictionary[i] = INVALID_POS;
|
||||
}
|
||||
|
||||
self.free_head = 0;
|
||||
self.buckets_in_use = 0;
|
||||
}
|
||||
|
||||
/// Find the position of an unused bucket via the freelist and initialize it.
|
||||
pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
|
||||
let mut pos = self.free_head;
|
||||
|
||||
// Find the first bucket we're *allowed* to use.
|
||||
let mut prev = PrevPos::First(self.free_head);
|
||||
while pos != INVALID_POS && pos >= self.alloc_limit {
|
||||
let bucket = &mut self.buckets[pos as usize];
|
||||
prev = PrevPos::Chained(pos);
|
||||
pos = bucket.next;
|
||||
}
|
||||
if pos == INVALID_POS {
|
||||
return Err(FullError);
|
||||
}
|
||||
|
||||
// Repair the freelist.
|
||||
match prev {
|
||||
PrevPos::First(_) => {
|
||||
let next_pos = self.buckets[pos as usize].next;
|
||||
self.free_head = next_pos;
|
||||
}
|
||||
PrevPos::Chained(p) => {
|
||||
if p != INVALID_POS {
|
||||
let next_pos = self.buckets[pos as usize].next;
|
||||
self.buckets[p as usize].next = next_pos;
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
// Initialize the bucket.
|
||||
let bucket = &mut self.buckets[pos as usize];
|
||||
self.buckets_in_use += 1;
|
||||
bucket.next = INVALID_POS;
|
||||
bucket.inner = Some((key, value));
|
||||
|
||||
Ok(pos)
|
||||
}
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
//! Equivalent of [`std::collections::hash_map::Entry`] for this hashmap.
|
||||
|
||||
use crate::hash::core::{CoreHashMap, FullError, INVALID_POS};
|
||||
use crate::sync::{RwLockWriteGuard, ValueWriteGuard};
|
||||
|
||||
use std::hash::Hash;
|
||||
use std::mem;
|
||||
|
||||
pub enum Entry<'a, 'b, K, V> {
|
||||
Occupied(OccupiedEntry<'a, 'b, K, V>),
|
||||
Vacant(VacantEntry<'a, 'b, K, V>),
|
||||
}
|
||||
|
||||
/// Enum representing the previous position within a chain.
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) enum PrevPos {
|
||||
/// Starting index within the dictionary.
|
||||
First(u32),
|
||||
/// Regular index within the buckets.
|
||||
Chained(u32),
|
||||
/// Unknown - e.g. the associated entry was retrieved by index instead of chain.
|
||||
Unknown(u64),
|
||||
}
|
||||
|
||||
pub struct OccupiedEntry<'a, 'b, K, V> {
|
||||
/// Mutable reference to the map containing this entry.
|
||||
pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,
|
||||
/// The key of the occupied entry
|
||||
pub(crate) _key: K,
|
||||
/// The index of the previous entry in the chain.
|
||||
pub(crate) prev_pos: PrevPos,
|
||||
/// The position of the bucket in the [`CoreHashMap`] bucket array.
|
||||
pub(crate) bucket_pos: u32,
|
||||
}
|
||||
|
||||
impl<K, V> OccupiedEntry<'_, '_, K, V> {
|
||||
pub fn get(&self) -> &V {
|
||||
&self.map.buckets[self.bucket_pos as usize]
|
||||
.inner
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.1
|
||||
}
|
||||
|
||||
pub fn get_mut(&mut self) -> &mut V {
|
||||
&mut self.map.buckets[self.bucket_pos as usize]
|
||||
.inner
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.1
|
||||
}
|
||||
|
||||
/// Inserts a value into the entry, replacing (and returning) the existing value.
|
||||
pub fn insert(&mut self, value: V) -> V {
|
||||
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
|
||||
// This assumes inner is Some, which it must be for an OccupiedEntry
|
||||
mem::replace(&mut bucket.inner.as_mut().unwrap().1, value)
|
||||
}
|
||||
|
||||
/// Removes the entry from the hash map, returning the value originally stored within it.
|
||||
///
|
||||
/// This may result in multiple bucket accesses if the entry was obtained by index as the
|
||||
/// previous chain entry needs to be discovered in this case.
|
||||
pub fn remove(mut self) -> V {
|
||||
// If this bucket was queried by index, go ahead and follow its chain from the start.
|
||||
let prev = if let PrevPos::Unknown(hash) = self.prev_pos {
|
||||
let dict_idx = hash as usize % self.map.dictionary.len();
|
||||
let mut prev = PrevPos::First(dict_idx as u32);
|
||||
let mut curr = self.map.dictionary[dict_idx];
|
||||
while curr != self.bucket_pos {
|
||||
assert!(curr != INVALID_POS);
|
||||
prev = PrevPos::Chained(curr);
|
||||
curr = self.map.buckets[curr as usize].next;
|
||||
}
|
||||
prev
|
||||
} else {
|
||||
self.prev_pos
|
||||
};
|
||||
|
||||
// CoreHashMap::remove returns Option<(K, V)>. We know it's Some for an OccupiedEntry.
|
||||
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
|
||||
|
||||
// unlink it from the chain
|
||||
match prev {
|
||||
PrevPos::First(dict_pos) => {
|
||||
self.map.dictionary[dict_pos as usize] = bucket.next;
|
||||
}
|
||||
PrevPos::Chained(bucket_pos) => {
|
||||
self.map.buckets[bucket_pos as usize].next = bucket.next;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
// and add it to the freelist
|
||||
let free = self.map.free_head;
|
||||
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
|
||||
let old_value = bucket.inner.take();
|
||||
bucket.next = free;
|
||||
self.map.free_head = self.bucket_pos;
|
||||
self.map.buckets_in_use -= 1;
|
||||
|
||||
old_value.unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
/// An abstract view into a vacant entry within the map.
|
||||
pub struct VacantEntry<'a, 'b, K, V> {
|
||||
/// Mutable reference to the map containing this entry.
|
||||
pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,
|
||||
/// The key to be inserted into this entry.
|
||||
pub(crate) key: K,
|
||||
/// The position within the dictionary corresponding to the key's hash.
|
||||
pub(crate) dict_pos: u32,
|
||||
}
|
||||
|
||||
impl<'b, K: Clone + Hash + Eq, V> VacantEntry<'_, 'b, K, V> {
|
||||
/// Insert a value into the vacant entry, finding and populating an empty bucket in the process.
|
||||
///
|
||||
/// # Errors
|
||||
/// Will return [`FullError`] if there are no unoccupied buckets in the map.
|
||||
pub fn insert(mut self, value: V) -> Result<ValueWriteGuard<'b, V>, FullError> {
|
||||
let pos = self.map.alloc_bucket(self.key, value)?;
|
||||
self.map.buckets[pos as usize].next = self.map.dictionary[self.dict_pos as usize];
|
||||
self.map.dictionary[self.dict_pos as usize] = pos;
|
||||
|
||||
Ok(RwLockWriteGuard::map(self.map, |m| {
|
||||
&mut m.buckets[pos as usize].inner.as_mut().unwrap().1
|
||||
}))
|
||||
}
|
||||
}
|
||||
@@ -1,428 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::Debug;
|
||||
use std::mem::MaybeUninit;
|
||||
|
||||
use crate::hash::Entry;
|
||||
use crate::hash::HashMapAccess;
|
||||
use crate::hash::HashMapInit;
|
||||
use crate::hash::core::FullError;
|
||||
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::{Rng, RngCore};
|
||||
use rand_distr::Zipf;
|
||||
|
||||
const TEST_KEY_LEN: usize = 16;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
struct TestKey([u8; TEST_KEY_LEN]);
|
||||
|
||||
impl From<&TestKey> for u128 {
|
||||
fn from(val: &TestKey) -> u128 {
|
||||
u128::from_be_bytes(val.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u128> for TestKey {
|
||||
fn from(val: u128) -> TestKey {
|
||||
TestKey(val.to_be_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a [u8]> for TestKey {
|
||||
fn from(bytes: &'a [u8]) -> TestKey {
|
||||
TestKey(bytes.try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
|
||||
let w = HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, "test_inserts")
|
||||
.attach_writer();
|
||||
|
||||
for (idx, k) in keys.iter().enumerate() {
|
||||
let res = w.entry((*k).into());
|
||||
match res {
|
||||
Entry::Occupied(mut e) => {
|
||||
e.insert(idx);
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
let res = e.insert(idx);
|
||||
assert!(res.is_ok());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for (idx, k) in keys.iter().enumerate() {
|
||||
let x = w.get(&(*k).into());
|
||||
let value = x.as_deref().copied();
|
||||
assert_eq!(value, Some(idx));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dense() {
|
||||
// This exercises splitting a node with prefix
|
||||
let keys: &[u128] = &[0, 1, 2, 3, 256];
|
||||
test_inserts(keys);
|
||||
|
||||
// Dense keys
|
||||
let mut keys: Vec<u128> = (0..10000).collect();
|
||||
test_inserts(&keys);
|
||||
|
||||
// Do the same in random orders
|
||||
for _ in 1..10 {
|
||||
keys.shuffle(&mut rand::rng());
|
||||
test_inserts(&keys);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sparse() {
|
||||
// sparse keys
|
||||
let mut keys: Vec<TestKey> = Vec::new();
|
||||
let mut used_keys = HashSet::new();
|
||||
for _ in 0..10000 {
|
||||
loop {
|
||||
let key = rand::random::<u128>();
|
||||
if used_keys.contains(&key) {
|
||||
continue;
|
||||
}
|
||||
used_keys.insert(key);
|
||||
keys.push(key.into());
|
||||
break;
|
||||
}
|
||||
}
|
||||
test_inserts(&keys);
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct TestOp(TestKey, Option<usize>);
|
||||
|
||||
fn apply_op(
|
||||
op: &TestOp,
|
||||
map: &mut HashMapAccess<TestKey, usize>,
|
||||
shadow: &mut BTreeMap<TestKey, usize>,
|
||||
) {
|
||||
// apply the change to the shadow tree first
|
||||
let shadow_existing = if let Some(v) = op.1 {
|
||||
shadow.insert(op.0, v)
|
||||
} else {
|
||||
shadow.remove(&op.0)
|
||||
};
|
||||
|
||||
let entry = map.entry(op.0);
|
||||
let hash_existing = match op.1 {
|
||||
Some(new) => match entry {
|
||||
Entry::Occupied(mut e) => Some(e.insert(new)),
|
||||
Entry::Vacant(e) => {
|
||||
_ = e.insert(new).unwrap();
|
||||
None
|
||||
}
|
||||
},
|
||||
None => match entry {
|
||||
Entry::Occupied(e) => Some(e.remove()),
|
||||
Entry::Vacant(_) => None,
|
||||
},
|
||||
};
|
||||
|
||||
assert_eq!(shadow_existing, hash_existing);
|
||||
}
|
||||
|
||||
fn do_random_ops(
|
||||
num_ops: usize,
|
||||
size: u32,
|
||||
del_prob: f64,
|
||||
writer: &mut HashMapAccess<TestKey, usize>,
|
||||
shadow: &mut BTreeMap<TestKey, usize>,
|
||||
rng: &mut rand::rngs::ThreadRng,
|
||||
) {
|
||||
for i in 0..num_ops {
|
||||
let key: TestKey = ((rng.next_u32() % size) as u128).into();
|
||||
let op = TestOp(
|
||||
key,
|
||||
if rng.random_bool(del_prob) {
|
||||
Some(i)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
);
|
||||
apply_op(&op, writer, shadow);
|
||||
}
|
||||
}
|
||||
|
||||
fn do_deletes(
|
||||
num_ops: usize,
|
||||
writer: &mut HashMapAccess<TestKey, usize>,
|
||||
shadow: &mut BTreeMap<TestKey, usize>,
|
||||
) {
|
||||
for _ in 0..num_ops {
|
||||
let (k, _) = shadow.pop_first().unwrap();
|
||||
writer.remove(&k);
|
||||
}
|
||||
}
|
||||
|
||||
fn do_shrink(
|
||||
writer: &mut HashMapAccess<TestKey, usize>,
|
||||
shadow: &mut BTreeMap<TestKey, usize>,
|
||||
from: u32,
|
||||
to: u32,
|
||||
) {
|
||||
assert!(writer.shrink_goal().is_none());
|
||||
writer.begin_shrink(to);
|
||||
assert_eq!(writer.shrink_goal(), Some(to as usize));
|
||||
for i in to..from {
|
||||
if let Some(entry) = writer.entry_at_bucket(i as usize) {
|
||||
shadow.remove(&entry._key);
|
||||
entry.remove();
|
||||
}
|
||||
}
|
||||
let old_usage = writer.get_num_buckets_in_use();
|
||||
writer.finish_shrink().unwrap();
|
||||
assert!(writer.shrink_goal().is_none());
|
||||
assert_eq!(writer.get_num_buckets_in_use(), old_usage);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn random_ops() {
|
||||
let mut writer =
|
||||
HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, "test_random")
|
||||
.attach_writer();
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
|
||||
let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
|
||||
let mut rng = rand::rng();
|
||||
for i in 0..100000 {
|
||||
let key: TestKey = (rng.sample(distribution) as u128).into();
|
||||
|
||||
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
|
||||
|
||||
apply_op(&op, &mut writer, &mut shadow);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shuffle() {
|
||||
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, "test_shuf")
|
||||
.attach_writer();
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
let mut rng = rand::rng();
|
||||
|
||||
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
writer.shuffle();
|
||||
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_grow() {
|
||||
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 2000, "test_grow")
|
||||
.attach_writer();
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
let mut rng = rand::rng();
|
||||
|
||||
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
let old_usage = writer.get_num_buckets_in_use();
|
||||
writer.grow(1500).unwrap();
|
||||
assert_eq!(writer.get_num_buckets_in_use(), old_usage);
|
||||
assert_eq!(writer.get_num_buckets(), 1500);
|
||||
do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear() {
|
||||
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
|
||||
.attach_writer();
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
let mut rng = rand::rng();
|
||||
do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
writer.clear();
|
||||
assert_eq!(writer.get_num_buckets_in_use(), 0);
|
||||
assert_eq!(writer.get_num_buckets(), 1500);
|
||||
while let Some((key, _)) = shadow.pop_first() {
|
||||
assert!(writer.get(&key).is_none());
|
||||
}
|
||||
do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
for i in 0..(1500 - writer.get_num_buckets_in_use()) {
|
||||
writer.insert((1500 + i as u128).into(), 0).unwrap();
|
||||
}
|
||||
assert_eq!(writer.insert(5000.into(), 0), Err(FullError {}));
|
||||
writer.clear();
|
||||
assert!(writer.insert(5000.into(), 0).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_idx_remove() {
|
||||
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
|
||||
.attach_writer();
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
let mut rng = rand::rng();
|
||||
do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
|
||||
for _ in 0..100 {
|
||||
let idx = (rng.next_u32() % 1500) as usize;
|
||||
if let Some(e) = writer.entry_at_bucket(idx) {
|
||||
shadow.remove(&e._key);
|
||||
e.remove();
|
||||
}
|
||||
}
|
||||
while let Some((key, val)) = shadow.pop_first() {
|
||||
assert_eq!(*writer.get(&key).unwrap(), val);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_idx_get() {
|
||||
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
|
||||
.attach_writer();
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
let mut rng = rand::rng();
|
||||
do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
|
||||
for _ in 0..100 {
|
||||
let idx = (rng.next_u32() % 1500) as usize;
|
||||
if let Some(pair) = writer.get_at_bucket(idx) {
|
||||
{
|
||||
let v: *const usize = &pair.1;
|
||||
assert_eq!(writer.get_bucket_for_value(v), idx);
|
||||
}
|
||||
{
|
||||
let v: *const usize = &pair.1;
|
||||
assert_eq!(writer.get_bucket_for_value(v), idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shrink() {
|
||||
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink")
|
||||
.attach_writer();
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
let mut rng = rand::rng();
|
||||
|
||||
do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
do_shrink(&mut writer, &mut shadow, 1500, 1000);
|
||||
assert_eq!(writer.get_num_buckets(), 1000);
|
||||
do_deletes(500, &mut writer, &mut shadow);
|
||||
do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
assert!(writer.get_num_buckets_in_use() <= 1000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shrink_grow_seq() {
|
||||
let mut writer =
|
||||
HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 20000, "test_grow_seq")
|
||||
.attach_writer();
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
let mut rng = rand::rng();
|
||||
|
||||
do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
|
||||
eprintln!("Shrinking to 750");
|
||||
do_shrink(&mut writer, &mut shadow, 1000, 750);
|
||||
do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
|
||||
eprintln!("Growing to 1500");
|
||||
writer.grow(1500).unwrap();
|
||||
do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
|
||||
eprintln!("Shrinking to 200");
|
||||
while shadow.len() > 100 {
|
||||
do_deletes(1, &mut writer, &mut shadow);
|
||||
}
|
||||
do_shrink(&mut writer, &mut shadow, 1500, 200);
|
||||
do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
|
||||
eprintln!("Growing to 10k");
|
||||
writer.grow(10000).unwrap();
|
||||
do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bucket_ops() {
|
||||
let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, "test_bucket_ops")
|
||||
.attach_writer();
|
||||
match writer.entry(1.into()) {
|
||||
Entry::Occupied(mut e) => {
|
||||
e.insert(2);
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
_ = e.insert(2).unwrap();
|
||||
}
|
||||
}
|
||||
assert_eq!(writer.get_num_buckets_in_use(), 1);
|
||||
assert_eq!(writer.get_num_buckets(), 1000);
|
||||
assert_eq!(*writer.get(&1.into()).unwrap(), 2);
|
||||
let pos = match writer.entry(1.into()) {
|
||||
Entry::Occupied(e) => {
|
||||
assert_eq!(e._key, 1.into());
|
||||
e.bucket_pos as usize
|
||||
}
|
||||
Entry::Vacant(_) => {
|
||||
panic!("Insert didn't affect entry");
|
||||
}
|
||||
};
|
||||
assert_eq!(writer.entry_at_bucket(pos).unwrap()._key, 1.into());
|
||||
assert_eq!(*writer.get_at_bucket(pos).unwrap(), (1.into(), 2));
|
||||
{
|
||||
let ptr: *const usize = &*writer.get(&1.into()).unwrap();
|
||||
assert_eq!(writer.get_bucket_for_value(ptr), pos);
|
||||
}
|
||||
writer.remove(&1.into());
|
||||
assert!(writer.get(&1.into()).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shrink_zero() {
|
||||
let mut writer =
|
||||
HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink_zero")
|
||||
.attach_writer();
|
||||
writer.begin_shrink(0);
|
||||
for i in 0..1500 {
|
||||
writer.entry_at_bucket(i).map(|x| x.remove());
|
||||
}
|
||||
writer.finish_shrink().unwrap();
|
||||
assert_eq!(writer.get_num_buckets_in_use(), 0);
|
||||
let entry = writer.entry(1.into());
|
||||
if let Entry::Vacant(v) = entry {
|
||||
assert!(v.insert(2).is_err());
|
||||
} else {
|
||||
panic!("Somehow got non-vacant entry in empty map.")
|
||||
}
|
||||
writer.grow(50).unwrap();
|
||||
let entry = writer.entry(1.into());
|
||||
if let Entry::Vacant(v) = entry {
|
||||
assert!(v.insert(2).is_ok());
|
||||
} else {
|
||||
panic!("Somehow got non-vacant entry in empty map.")
|
||||
}
|
||||
assert_eq!(writer.get_num_buckets_in_use(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_grow_oom() {
|
||||
let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_grow_oom")
|
||||
.attach_writer();
|
||||
writer.grow(20000).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_shrink_bigger() {
|
||||
let mut writer =
|
||||
HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_bigger")
|
||||
.attach_writer();
|
||||
writer.begin_shrink(2000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_shrink_early_finish() {
|
||||
let writer =
|
||||
HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_early_finish")
|
||||
.attach_writer();
|
||||
writer.finish_shrink().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_shrink_fixed_size() {
|
||||
let mut area = [MaybeUninit::uninit(); 10000];
|
||||
let init_struct = HashMapInit::<TestKey, usize>::with_fixed(3, &mut area);
|
||||
let mut writer = init_struct.attach_writer();
|
||||
writer.begin_shrink(1);
|
||||
}
|
||||
@@ -1,3 +1,418 @@
|
||||
pub mod hash;
|
||||
pub mod shmem;
|
||||
pub mod sync;
|
||||
//! Shared memory utilities for neon communicator
|
||||
|
||||
use std::num::NonZeroUsize;
|
||||
use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
|
||||
use std::ptr::NonNull;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::mman::MapFlags;
|
||||
use nix::sys::mman::ProtFlags;
|
||||
use nix::sys::mman::mmap as nix_mmap;
|
||||
use nix::sys::mman::munmap as nix_munmap;
|
||||
use nix::unistd::ftruncate as nix_ftruncate;
|
||||
|
||||
/// ShmemHandle represents a shared memory area that can be shared by processes over fork().
|
||||
/// Unlike shared memory allocated by Postgres, this area is resizable, up to 'max_size' that's
|
||||
/// specified at creation.
|
||||
///
|
||||
/// The area is backed by an anonymous file created with memfd_create(). The full address space for
|
||||
/// 'max_size' is reserved up-front with mmap(), but whenever you call [`ShmemHandle::set_size`],
|
||||
/// the underlying file is resized. Do not access the area beyond the current size. Currently, that
|
||||
/// will cause the file to be expanded, but we might use mprotect() etc. to enforce that in the
|
||||
/// future.
|
||||
pub struct ShmemHandle {
|
||||
/// memfd file descriptor
|
||||
fd: OwnedFd,
|
||||
|
||||
max_size: usize,
|
||||
|
||||
// Pointer to the beginning of the shared memory area. The header is stored there.
|
||||
shared_ptr: NonNull<SharedStruct>,
|
||||
|
||||
// Pointer to the beginning of the user data
|
||||
pub data_ptr: NonNull<u8>,
|
||||
}
|
||||
|
||||
/// This is stored at the beginning in the shared memory area.
|
||||
struct SharedStruct {
|
||||
max_size: usize,
|
||||
|
||||
/// Current size of the backing file. The high-order bit is used for the RESIZE_IN_PROGRESS flag
|
||||
current_size: AtomicUsize,
|
||||
}
|
||||
|
||||
const RESIZE_IN_PROGRESS: usize = 1 << 63;
|
||||
|
||||
const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();
|
||||
|
||||
/// Error type returned by the ShmemHandle functions.
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
#[error("{msg}: {errno}")]
|
||||
pub struct Error {
|
||||
pub msg: String,
|
||||
pub errno: Errno,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
fn new(msg: &str, errno: Errno) -> Error {
|
||||
Error {
|
||||
msg: msg.to_string(),
|
||||
errno,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShmemHandle {
|
||||
/// Create a new shared memory area. To communicate between processes, the processes need to be
|
||||
/// fork()'d after calling this, so that the ShmemHandle is inherited by all processes.
|
||||
///
|
||||
/// If the ShmemHandle is dropped, the memory is unmapped from the current process. Other
|
||||
/// processes can continue using it, however.
|
||||
pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<ShmemHandle, Error> {
|
||||
// create the backing anonymous file.
|
||||
let fd = create_backing_file(name)?;
|
||||
|
||||
Self::new_with_fd(fd, initial_size, max_size)
|
||||
}
|
||||
|
||||
fn new_with_fd(
|
||||
fd: OwnedFd,
|
||||
initial_size: usize,
|
||||
max_size: usize,
|
||||
) -> Result<ShmemHandle, Error> {
|
||||
// We reserve the high-order bit for the RESIZE_IN_PROGRESS flag, and the actual size
|
||||
// is a little larger than this because of the SharedStruct header. Make the upper limit
|
||||
// somewhat smaller than that, because with anything close to that, you'll run out of
|
||||
// memory anyway.
|
||||
if max_size >= 1 << 48 {
|
||||
panic!("max size {max_size} too large");
|
||||
}
|
||||
if initial_size > max_size {
|
||||
panic!("initial size {initial_size} larger than max size {max_size}");
|
||||
}
|
||||
|
||||
// The actual initial / max size is the one given by the caller, plus the size of
|
||||
// 'SharedStruct'.
|
||||
let initial_size = HEADER_SIZE + initial_size;
|
||||
let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();
|
||||
|
||||
// Reserve address space for it with mmap
|
||||
//
|
||||
// TODO: Use MAP_HUGETLB if possible
|
||||
let start_ptr = unsafe {
|
||||
nix_mmap(
|
||||
None,
|
||||
max_size,
|
||||
ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
|
||||
MapFlags::MAP_SHARED,
|
||||
&fd,
|
||||
0,
|
||||
)
|
||||
}
|
||||
.map_err(|e| Error::new("mmap failed: {e}", e))?;
|
||||
|
||||
// Reserve space for the initial size
|
||||
enlarge_file(fd.as_fd(), initial_size as u64)?;
|
||||
|
||||
// Initialize the header
|
||||
let shared: NonNull<SharedStruct> = start_ptr.cast();
|
||||
unsafe {
|
||||
shared.write(SharedStruct {
|
||||
max_size: max_size.into(),
|
||||
current_size: AtomicUsize::new(initial_size),
|
||||
})
|
||||
};
|
||||
|
||||
// The user data begins after the header
|
||||
let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };
|
||||
|
||||
Ok(ShmemHandle {
|
||||
fd,
|
||||
max_size: max_size.into(),
|
||||
shared_ptr: shared,
|
||||
data_ptr,
|
||||
})
|
||||
}
|
||||
|
||||
// return reference to the header
|
||||
fn shared(&self) -> &SharedStruct {
|
||||
unsafe { self.shared_ptr.as_ref() }
|
||||
}
|
||||
|
||||
/// Resize the shared memory area. 'new_size' must not be larger than the 'max_size' specified
|
||||
/// when creating the area.
|
||||
///
|
||||
/// This may only be called from one process/thread concurrently. We detect that case
|
||||
/// and return an Error.
|
||||
pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
|
||||
let new_size = new_size + HEADER_SIZE;
|
||||
let shared = self.shared();
|
||||
|
||||
if new_size > self.max_size {
|
||||
panic!(
|
||||
"new size ({} is greater than max size ({})",
|
||||
new_size, self.max_size
|
||||
);
|
||||
}
|
||||
assert_eq!(self.max_size, shared.max_size);
|
||||
|
||||
// Lock the area by setting the bit in 'current_size'
|
||||
//
|
||||
// Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
|
||||
// and the posix_fallocate/ftruncate call is surely a synchronization point anyway. But
|
||||
// since this is not performance-critical, better safe than sorry .
|
||||
let mut old_size = shared.current_size.load(Ordering::Acquire);
|
||||
loop {
|
||||
if (old_size & RESIZE_IN_PROGRESS) != 0 {
|
||||
return Err(Error::new(
|
||||
"concurrent resize detected",
|
||||
Errno::UnknownErrno,
|
||||
));
|
||||
}
|
||||
match shared.current_size.compare_exchange(
|
||||
old_size,
|
||||
new_size,
|
||||
Ordering::Acquire,
|
||||
Ordering::Relaxed,
|
||||
) {
|
||||
Ok(_) => break,
|
||||
Err(x) => old_size = x,
|
||||
}
|
||||
}
|
||||
|
||||
// Ok, we got the lock.
|
||||
//
|
||||
// NB: If anything goes wrong, we *must* clear the bit!
|
||||
let result = {
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
match new_size.cmp(&old_size) {
|
||||
Less => nix_ftruncate(&self.fd, new_size as i64).map_err(|e| {
|
||||
Error::new("could not shrink shmem segment, ftruncate failed: {e}", e)
|
||||
}),
|
||||
Equal => Ok(()),
|
||||
Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
|
||||
}
|
||||
};
|
||||
|
||||
// Unlock
|
||||
shared.current_size.store(
|
||||
if result.is_ok() { new_size } else { old_size },
|
||||
Ordering::Release,
|
||||
);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Returns the current user-visible size of the shared memory segment.
|
||||
///
|
||||
/// NOTE: a concurrent set_size() call can change the size at any time. It is the caller's
|
||||
/// responsibility not to access the area beyond the current size.
|
||||
pub fn current_size(&self) -> usize {
|
||||
let total_current_size =
|
||||
self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
|
||||
total_current_size - HEADER_SIZE
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ShmemHandle {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: The pointer was obtained from mmap() with the given size.
|
||||
// We unmap the entire region.
|
||||
let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };
|
||||
// The fd is dropped automatically by OwnedFd.
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a "backing file" for the shared memory area. On Linux, use memfd_create(), to create an
|
||||
/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
|
||||
/// development and testing, but in production we want the file to stay in memory.
|
||||
///
|
||||
/// disable 'unused_variables' warnings, because in the macos path, 'name' is unused.
|
||||
#[allow(unused_variables)]
|
||||
fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
{
|
||||
nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
|
||||
.map_err(|e| Error::new("memfd_create failed: {e}", e))
|
||||
}
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
let file = tempfile::tempfile().map_err(|e| {
|
||||
Error::new(
|
||||
"could not create temporary file to back shmem area: {e}",
|
||||
nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
|
||||
)
|
||||
})?;
|
||||
Ok(OwnedFd::from(file))
|
||||
}
|
||||
}
|
||||
|
||||
fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
|
||||
// Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that
|
||||
// we don't get a segfault later when trying to actually use it.
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
{
|
||||
nix::fcntl::posix_fallocate(fd, 0, size as i64).map_err(|e| {
|
||||
Error::new(
|
||||
"could not grow shmem segment, posix_fallocate failed: {e}",
|
||||
e,
|
||||
)
|
||||
})
|
||||
}
|
||||
// As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
nix::unistd::ftruncate(fd, size as i64)
|
||||
.map_err(|e| Error::new("could not grow shmem segment, ftruncate failed: {e}", e))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use nix::unistd::ForkResult;
|
||||
use std::ops::Range;
|
||||
|
||||
/// check that all bytes in given range have the expected value.
|
||||
fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
|
||||
for i in range {
|
||||
let b = unsafe { *(ptr.add(i)) };
|
||||
assert_eq!(expected, b, "unexpected byte at offset {i}");
|
||||
}
|
||||
}
|
||||
|
||||
/// Write 'b' to all bytes in the given range
|
||||
fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {
|
||||
unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };
|
||||
}
|
||||
|
||||
// simple single-process test of growing and shrinking
|
||||
#[test]
|
||||
fn test_shmem_resize() -> Result<(), Error> {
|
||||
let max_size = 1024 * 1024;
|
||||
let init_struct = ShmemHandle::new("test_shmem_resize", 0, max_size)?;
|
||||
|
||||
assert_eq!(init_struct.current_size(), 0);
|
||||
|
||||
// Initial grow
|
||||
let size1 = 10000;
|
||||
init_struct.set_size(size1).unwrap();
|
||||
assert_eq!(init_struct.current_size(), size1);
|
||||
|
||||
// Write some data
|
||||
let data_ptr = init_struct.data_ptr.as_ptr();
|
||||
write_range(data_ptr, 0xAA, 0..size1);
|
||||
assert_range(data_ptr, 0xAA, 0..size1);
|
||||
|
||||
// Shrink
|
||||
let size2 = 5000;
|
||||
init_struct.set_size(size2).unwrap();
|
||||
assert_eq!(init_struct.current_size(), size2);
|
||||
|
||||
// Grow again
|
||||
let size3 = 20000;
|
||||
init_struct.set_size(size3).unwrap();
|
||||
assert_eq!(init_struct.current_size(), size3);
|
||||
|
||||
// Try to read it. The area that was shrunk and grown again should read as all zeros now
|
||||
assert_range(data_ptr, 0xAA, 0..5000);
|
||||
assert_range(data_ptr, 0, 5000..size1);
|
||||
|
||||
// Try to grow beyond max_size
|
||||
//let size4 = max_size + 1;
|
||||
//assert!(init_struct.set_size(size4).is_err());
|
||||
|
||||
// Dropping init_struct should unmap the memory
|
||||
drop(init_struct);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// This is used in tests to coordinate between test processes. It's like std::sync::Barrier,
|
||||
/// but is stored in the shared memory area and works across processes. It's implemented by
|
||||
/// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
|
||||
struct SimpleBarrier {
|
||||
num_procs: usize,
|
||||
count: AtomicUsize,
|
||||
}
|
||||
|
||||
impl SimpleBarrier {
|
||||
unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {
|
||||
unsafe {
|
||||
*ptr = SimpleBarrier {
|
||||
num_procs,
|
||||
count: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn wait(&self) {
|
||||
let old = self.count.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let generation = old / self.num_procs;
|
||||
|
||||
let mut current = old + 1;
|
||||
while current < (generation + 1) * self.num_procs {
|
||||
std::thread::sleep(std::time::Duration::from_millis(10));
|
||||
current = self.count.load(Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multi_process() {
|
||||
// Initialize
|
||||
let max_size = 1_000_000_000_000;
|
||||
let init_struct = ShmemHandle::new("test_multi_process", 0, max_size).unwrap();
|
||||
let ptr = init_struct.data_ptr.as_ptr();
|
||||
|
||||
// Store the SimpleBarrier in the first 1k of the area.
|
||||
init_struct.set_size(10000).unwrap();
|
||||
let barrier_ptr: *mut SimpleBarrier = unsafe {
|
||||
ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))
|
||||
.cast()
|
||||
};
|
||||
unsafe { SimpleBarrier::init(barrier_ptr, 2) };
|
||||
let barrier = unsafe { barrier_ptr.as_ref().unwrap() };
|
||||
|
||||
// Fork another test process. The code after this runs in both processes concurrently.
|
||||
let fork_result = unsafe { nix::unistd::fork().unwrap() };
|
||||
|
||||
// In the parent, fill bytes between 1000..2000. In the child, between 2000..3000
|
||||
if fork_result.is_parent() {
|
||||
write_range(ptr, 0xAA, 1000..2000);
|
||||
} else {
|
||||
write_range(ptr, 0xBB, 2000..3000);
|
||||
}
|
||||
barrier.wait();
|
||||
// Verify the contents. (in both processes)
|
||||
assert_range(ptr, 0xAA, 1000..2000);
|
||||
assert_range(ptr, 0xBB, 2000..3000);
|
||||
|
||||
// Grow, from the child this time
|
||||
let size = 10_000_000;
|
||||
if !fork_result.is_parent() {
|
||||
init_struct.set_size(size).unwrap();
|
||||
}
|
||||
barrier.wait();
|
||||
|
||||
// make some writes at the end
|
||||
if fork_result.is_parent() {
|
||||
write_range(ptr, 0xAA, (size - 10)..size);
|
||||
} else {
|
||||
write_range(ptr, 0xBB, (size - 20)..(size - 10));
|
||||
}
|
||||
barrier.wait();
|
||||
|
||||
// Verify the contents. (This runs in both processes)
|
||||
assert_range(ptr, 0, (size - 1000)..(size - 20));
|
||||
assert_range(ptr, 0xBB, (size - 20)..(size - 10));
|
||||
assert_range(ptr, 0xAA, (size - 10)..size);
|
||||
|
||||
if let ForkResult::Parent { child } = fork_result {
|
||||
nix::sys::wait::waitpid(child, None).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,409 +0,0 @@
|
||||
//! Dynamically resizable contiguous chunk of shared memory
|
||||
|
||||
use std::num::NonZeroUsize;
|
||||
use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
|
||||
use std::ptr::NonNull;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::mman::MapFlags;
|
||||
use nix::sys::mman::ProtFlags;
|
||||
use nix::sys::mman::mmap as nix_mmap;
|
||||
use nix::sys::mman::munmap as nix_munmap;
|
||||
use nix::unistd::ftruncate as nix_ftruncate;
|
||||
|
||||
/// `ShmemHandle` represents a shared memory area that can be shared by processes over `fork()`.
|
||||
/// Unlike shared memory allocated by Postgres, this area is resizable, up to `max_size` that's
|
||||
/// specified at creation.
|
||||
///
|
||||
/// The area is backed by an anonymous file created with `memfd_create()`. The full address space for
|
||||
/// `max_size` is reserved up-front with `mmap()`, but whenever you call [`ShmemHandle::set_size`],
|
||||
/// the underlying file is resized. Do not access the area beyond the current size. Currently, that
|
||||
/// will cause the file to be expanded, but we might use `mprotect()` etc. to enforce that in the
|
||||
/// future.
|
||||
pub struct ShmemHandle {
|
||||
/// memfd file descriptor
|
||||
fd: OwnedFd,
|
||||
|
||||
max_size: usize,
|
||||
|
||||
// Pointer to the beginning of the shared memory area. The header is stored there.
|
||||
shared_ptr: NonNull<SharedStruct>,
|
||||
|
||||
// Pointer to the beginning of the user data
|
||||
pub data_ptr: NonNull<u8>,
|
||||
}
|
||||
|
||||
/// This is stored at the beginning in the shared memory area.
|
||||
struct SharedStruct {
|
||||
max_size: usize,
|
||||
|
||||
/// Current size of the backing file. The high-order bit is used for the [`RESIZE_IN_PROGRESS`] flag.
|
||||
current_size: AtomicUsize,
|
||||
}
|
||||
|
||||
const RESIZE_IN_PROGRESS: usize = 1 << 63;
|
||||
|
||||
const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();
|
||||
|
||||
/// Error type returned by the [`ShmemHandle`] functions.
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
#[error("{msg}: {errno}")]
|
||||
pub struct Error {
|
||||
pub msg: String,
|
||||
pub errno: Errno,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
fn new(msg: &str, errno: Errno) -> Self {
|
||||
Self {
|
||||
msg: msg.to_string(),
|
||||
errno,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShmemHandle {
|
||||
/// Create a new shared memory area. To communicate between processes, the processes need to be
|
||||
/// `fork()`'d after calling this, so that the `ShmemHandle` is inherited by all processes.
|
||||
///
|
||||
/// If the `ShmemHandle` is dropped, the memory is unmapped from the current process. Other
|
||||
/// processes can continue using it, however.
|
||||
pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<Self, Error> {
|
||||
// create the backing anonymous file.
|
||||
let fd = create_backing_file(name)?;
|
||||
|
||||
Self::new_with_fd(fd, initial_size, max_size)
|
||||
}
|
||||
|
||||
fn new_with_fd(fd: OwnedFd, initial_size: usize, max_size: usize) -> Result<Self, Error> {
|
||||
// We reserve the high-order bit for the `RESIZE_IN_PROGRESS` flag, and the actual size
|
||||
// is a little larger than this because of the SharedStruct header. Make the upper limit
|
||||
// somewhat smaller than that, because with anything close to that, you'll run out of
|
||||
// memory anyway.
|
||||
assert!(max_size < 1 << 48, "max size {max_size} too large");
|
||||
|
||||
assert!(
|
||||
initial_size <= max_size,
|
||||
"initial size {initial_size} larger than max size {max_size}"
|
||||
);
|
||||
|
||||
// The actual initial / max size is the one given by the caller, plus the size of
|
||||
// 'SharedStruct'.
|
||||
let initial_size = HEADER_SIZE + initial_size;
|
||||
let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();
|
||||
|
||||
// Reserve address space for it with mmap
|
||||
//
|
||||
// TODO: Use MAP_HUGETLB if possible
|
||||
let start_ptr = unsafe {
|
||||
nix_mmap(
|
||||
None,
|
||||
max_size,
|
||||
ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
|
||||
MapFlags::MAP_SHARED,
|
||||
&fd,
|
||||
0,
|
||||
)
|
||||
}
|
||||
.map_err(|e| Error::new("mmap failed", e))?;
|
||||
|
||||
// Reserve space for the initial size
|
||||
enlarge_file(fd.as_fd(), initial_size as u64)?;
|
||||
|
||||
// Initialize the header
|
||||
let shared: NonNull<SharedStruct> = start_ptr.cast();
|
||||
unsafe {
|
||||
shared.write(SharedStruct {
|
||||
max_size: max_size.into(),
|
||||
current_size: AtomicUsize::new(initial_size),
|
||||
});
|
||||
}
|
||||
|
||||
// The user data begins after the header
|
||||
let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };
|
||||
|
||||
Ok(Self {
|
||||
fd,
|
||||
max_size: max_size.into(),
|
||||
shared_ptr: shared,
|
||||
data_ptr,
|
||||
})
|
||||
}
|
||||
|
||||
// return reference to the header
|
||||
fn shared(&self) -> &SharedStruct {
|
||||
unsafe { self.shared_ptr.as_ref() }
|
||||
}
|
||||
|
||||
/// Resize the shared memory area. `new_size` must not be larger than the `max_size` specified
|
||||
/// when creating the area.
|
||||
///
|
||||
/// This may only be called from one process/thread concurrently. We detect that case
|
||||
/// and return an [`shmem::Error`](Error).
|
||||
pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
|
||||
let new_size = new_size + HEADER_SIZE;
|
||||
let shared = self.shared();
|
||||
|
||||
assert!(
|
||||
new_size <= self.max_size,
|
||||
"new size ({new_size}) is greater than max size ({})",
|
||||
self.max_size
|
||||
);
|
||||
|
||||
assert_eq!(self.max_size, shared.max_size);
|
||||
|
||||
// Lock the area by setting the bit in `current_size`
|
||||
//
|
||||
// Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
|
||||
// and the `posix_fallocate`/`ftruncate` call is surely a synchronization point anyway. But
|
||||
// since this is not performance-critical, better safe than sorry.
|
||||
let mut old_size = shared.current_size.load(Ordering::Acquire);
|
||||
loop {
|
||||
if (old_size & RESIZE_IN_PROGRESS) != 0 {
|
||||
return Err(Error::new(
|
||||
"concurrent resize detected",
|
||||
Errno::UnknownErrno,
|
||||
));
|
||||
}
|
||||
match shared.current_size.compare_exchange(
|
||||
old_size,
|
||||
new_size,
|
||||
Ordering::Acquire,
|
||||
Ordering::Relaxed,
|
||||
) {
|
||||
Ok(_) => break,
|
||||
Err(x) => old_size = x,
|
||||
}
|
||||
}
|
||||
|
||||
// Ok, we got the lock.
|
||||
//
|
||||
// NB: If anything goes wrong, we *must* clear the bit!
|
||||
let result = {
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
match new_size.cmp(&old_size) {
|
||||
Less => nix_ftruncate(&self.fd, new_size as i64)
|
||||
.map_err(|e| Error::new("could not shrink shmem segment, ftruncate failed", e)),
|
||||
Equal => Ok(()),
|
||||
Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
|
||||
}
|
||||
};
|
||||
|
||||
// Unlock
|
||||
shared.current_size.store(
|
||||
if result.is_ok() { new_size } else { old_size },
|
||||
Ordering::Release,
|
||||
);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Returns the current user-visible size of the shared memory segment.
|
||||
///
|
||||
/// NOTE: a concurrent [`ShmemHandle::set_size()`] call can change the size at any time.
|
||||
/// It is the caller's responsibility not to access the area beyond the current size.
|
||||
pub fn current_size(&self) -> usize {
|
||||
let total_current_size =
|
||||
self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
|
||||
total_current_size - HEADER_SIZE
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ShmemHandle {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: The pointer was obtained from mmap() with the given size.
|
||||
// We unmap the entire region.
|
||||
let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };
|
||||
// The fd is dropped automatically by OwnedFd.
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a "backing file" for the shared memory area. On Linux, use `memfd_create()`, to create an
|
||||
/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
|
||||
/// development and testing, but in production we want the file to stay in memory.
|
||||
///
|
||||
/// Disable unused variables warnings because `name` is unused in the macos path.
|
||||
#[allow(unused_variables)]
|
||||
fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
{
|
||||
nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
|
||||
.map_err(|e| Error::new("memfd_create failed", e))
|
||||
}
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
let file = tempfile::tempfile().map_err(|e| {
|
||||
Error::new(
|
||||
"could not create temporary file to back shmem area",
|
||||
nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
|
||||
)
|
||||
})?;
|
||||
Ok(OwnedFd::from(file))
|
||||
}
|
||||
}
|
||||
|
||||
fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
|
||||
// Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that
|
||||
// we don't get a segfault later when trying to actually use it.
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
{
|
||||
nix::fcntl::posix_fallocate(fd, 0, size as i64)
|
||||
.map_err(|e| Error::new("could not grow shmem segment, posix_fallocate failed", e))
|
||||
}
|
||||
// As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
nix::unistd::ftruncate(fd, size as i64)
|
||||
.map_err(|e| Error::new("could not grow shmem segment, ftruncate failed", e))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use nix::unistd::ForkResult;
|
||||
use std::ops::Range;
|
||||
|
||||
/// check that all bytes in given range have the expected value.
|
||||
fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
|
||||
for i in range {
|
||||
let b = unsafe { *(ptr.add(i)) };
|
||||
assert_eq!(expected, b, "unexpected byte at offset {i}");
|
||||
}
|
||||
}
|
||||
|
||||
/// Write 'b' to all bytes in the given range
|
||||
fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {
|
||||
unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };
|
||||
}
|
||||
|
||||
// simple single-process test of growing and shrinking
|
||||
#[test]
|
||||
fn test_shmem_resize() -> Result<(), Error> {
|
||||
let max_size = 1024 * 1024;
|
||||
let init_struct = ShmemHandle::new("test_shmem_resize", 0, max_size)?;
|
||||
|
||||
assert_eq!(init_struct.current_size(), 0);
|
||||
|
||||
// Initial grow
|
||||
let size1 = 10000;
|
||||
init_struct.set_size(size1).unwrap();
|
||||
assert_eq!(init_struct.current_size(), size1);
|
||||
|
||||
// Write some data
|
||||
let data_ptr = init_struct.data_ptr.as_ptr();
|
||||
write_range(data_ptr, 0xAA, 0..size1);
|
||||
assert_range(data_ptr, 0xAA, 0..size1);
|
||||
|
||||
// Shrink
|
||||
let size2 = 5000;
|
||||
init_struct.set_size(size2).unwrap();
|
||||
assert_eq!(init_struct.current_size(), size2);
|
||||
|
||||
// Grow again
|
||||
let size3 = 20000;
|
||||
init_struct.set_size(size3).unwrap();
|
||||
assert_eq!(init_struct.current_size(), size3);
|
||||
|
||||
// Try to read it. The area that was shrunk and grown again should read as all zeros now
|
||||
assert_range(data_ptr, 0xAA, 0..5000);
|
||||
assert_range(data_ptr, 0, 5000..size1);
|
||||
|
||||
// Try to grow beyond max_size
|
||||
//let size4 = max_size + 1;
|
||||
//assert!(init_struct.set_size(size4).is_err());
|
||||
|
||||
// Dropping init_struct should unmap the memory
|
||||
drop(init_struct);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// This is used in tests to coordinate between test processes. It's like `std::sync::Barrier`,
|
||||
/// but is stored in the shared memory area and works across processes. It's implemented by
|
||||
/// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
|
||||
struct SimpleBarrier {
|
||||
num_procs: usize,
|
||||
count: AtomicUsize,
|
||||
}
|
||||
|
||||
impl SimpleBarrier {
|
||||
unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {
|
||||
unsafe {
|
||||
*ptr = SimpleBarrier {
|
||||
num_procs,
|
||||
count: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn wait(&self) {
|
||||
let old = self.count.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let generation = old / self.num_procs;
|
||||
|
||||
let mut current = old + 1;
|
||||
while current < (generation + 1) * self.num_procs {
|
||||
std::thread::sleep(std::time::Duration::from_millis(10));
|
||||
current = self.count.load(Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multi_process() {
|
||||
// Initialize
|
||||
let max_size = 1_000_000_000_000;
|
||||
let init_struct = ShmemHandle::new("test_multi_process", 0, max_size).unwrap();
|
||||
let ptr = init_struct.data_ptr.as_ptr();
|
||||
|
||||
// Store the SimpleBarrier in the first 1k of the area.
|
||||
init_struct.set_size(10000).unwrap();
|
||||
let barrier_ptr: *mut SimpleBarrier = unsafe {
|
||||
ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))
|
||||
.cast()
|
||||
};
|
||||
unsafe { SimpleBarrier::init(barrier_ptr, 2) };
|
||||
let barrier = unsafe { barrier_ptr.as_ref().unwrap() };
|
||||
|
||||
// Fork another test process. The code after this runs in both processes concurrently.
|
||||
let fork_result = unsafe { nix::unistd::fork().unwrap() };
|
||||
|
||||
// In the parent, fill bytes between 1000..2000. In the child, between 2000..3000
|
||||
if fork_result.is_parent() {
|
||||
write_range(ptr, 0xAA, 1000..2000);
|
||||
} else {
|
||||
write_range(ptr, 0xBB, 2000..3000);
|
||||
}
|
||||
barrier.wait();
|
||||
// Verify the contents. (in both processes)
|
||||
assert_range(ptr, 0xAA, 1000..2000);
|
||||
assert_range(ptr, 0xBB, 2000..3000);
|
||||
|
||||
// Grow, from the child this time
|
||||
let size = 10_000_000;
|
||||
if !fork_result.is_parent() {
|
||||
init_struct.set_size(size).unwrap();
|
||||
}
|
||||
barrier.wait();
|
||||
|
||||
// make some writes at the end
|
||||
if fork_result.is_parent() {
|
||||
write_range(ptr, 0xAA, (size - 10)..size);
|
||||
} else {
|
||||
write_range(ptr, 0xBB, (size - 20)..(size - 10));
|
||||
}
|
||||
barrier.wait();
|
||||
|
||||
// Verify the contents. (This runs in both processes)
|
||||
assert_range(ptr, 0, (size - 1000)..(size - 20));
|
||||
assert_range(ptr, 0xBB, (size - 20)..(size - 10));
|
||||
assert_range(ptr, 0xAA, (size - 10)..size);
|
||||
|
||||
if let ForkResult::Parent { child } = fork_result {
|
||||
nix::sys::wait::waitpid(child, None).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,111 +0,0 @@
|
||||
//! Simple utilities akin to what's in [`std::sync`] but designed to work with shared memory.
|
||||
|
||||
use std::mem::MaybeUninit;
|
||||
use std::ptr::NonNull;
|
||||
|
||||
use nix::errno::Errno;
|
||||
|
||||
pub type RwLock<T> = lock_api::RwLock<PthreadRwLock, T>;
|
||||
pub type RwLockReadGuard<'a, T> = lock_api::RwLockReadGuard<'a, PthreadRwLock, T>;
|
||||
pub type RwLockWriteGuard<'a, T> = lock_api::RwLockWriteGuard<'a, PthreadRwLock, T>;
|
||||
pub type ValueReadGuard<'a, T> = lock_api::MappedRwLockReadGuard<'a, PthreadRwLock, T>;
|
||||
pub type ValueWriteGuard<'a, T> = lock_api::MappedRwLockWriteGuard<'a, PthreadRwLock, T>;
|
||||
|
||||
/// Shared memory read-write lock.
|
||||
pub struct PthreadRwLock(Option<NonNull<libc::pthread_rwlock_t>>);
|
||||
|
||||
/// Simple macro that calls a function in the libc namespace and panics if return value is nonzero.
|
||||
macro_rules! libc_checked {
|
||||
($fn_name:ident ( $($arg:expr),* )) => {{
|
||||
let res = libc::$fn_name($($arg),*);
|
||||
if res != 0 {
|
||||
panic!("{} failed with {}", stringify!($fn_name), Errno::from_raw(res));
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
impl PthreadRwLock {
|
||||
/// Creates a new `PthreadRwLock` on top of a pointer to a pthread rwlock.
|
||||
///
|
||||
/// # Safety
|
||||
/// `lock` must be non-null. Every unsafe operation will panic in the event of an error.
|
||||
pub unsafe fn new(lock: *mut libc::pthread_rwlock_t) -> Self {
|
||||
unsafe {
|
||||
let mut attrs = MaybeUninit::uninit();
|
||||
libc_checked!(pthread_rwlockattr_init(attrs.as_mut_ptr()));
|
||||
libc_checked!(pthread_rwlockattr_setpshared(
|
||||
attrs.as_mut_ptr(),
|
||||
libc::PTHREAD_PROCESS_SHARED
|
||||
));
|
||||
libc_checked!(pthread_rwlock_init(lock, attrs.as_mut_ptr()));
|
||||
// Safety: POSIX specifies that "any function affecting the attributes
|
||||
// object (including destruction) shall not affect any previously
|
||||
// initialized read-write locks".
|
||||
libc_checked!(pthread_rwlockattr_destroy(attrs.as_mut_ptr()));
|
||||
Self(Some(NonNull::new_unchecked(lock)))
|
||||
}
|
||||
}
|
||||
|
||||
fn inner(&self) -> NonNull<libc::pthread_rwlock_t> {
|
||||
match self.0 {
|
||||
None => {
|
||||
panic!("PthreadRwLock constructed badly - something likely used RawRwLock::INIT")
|
||||
}
|
||||
Some(x) => x,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl lock_api::RawRwLock for PthreadRwLock {
|
||||
type GuardMarker = lock_api::GuardSend;
|
||||
const INIT: Self = Self(None);
|
||||
|
||||
fn try_lock_shared(&self) -> bool {
|
||||
unsafe {
|
||||
let res = libc::pthread_rwlock_tryrdlock(self.inner().as_ptr());
|
||||
match res {
|
||||
0 => true,
|
||||
libc::EAGAIN => false,
|
||||
_ => panic!(
|
||||
"pthread_rwlock_tryrdlock failed with {}",
|
||||
Errno::from_raw(res)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn try_lock_exclusive(&self) -> bool {
|
||||
unsafe {
|
||||
let res = libc::pthread_rwlock_trywrlock(self.inner().as_ptr());
|
||||
match res {
|
||||
0 => true,
|
||||
libc::EAGAIN => false,
|
||||
_ => panic!("try_wrlock failed with {}", Errno::from_raw(res)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn lock_shared(&self) {
|
||||
unsafe {
|
||||
libc_checked!(pthread_rwlock_rdlock(self.inner().as_ptr()));
|
||||
}
|
||||
}
|
||||
|
||||
fn lock_exclusive(&self) {
|
||||
unsafe {
|
||||
libc_checked!(pthread_rwlock_wrlock(self.inner().as_ptr()));
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn unlock_exclusive(&self) {
|
||||
unsafe {
|
||||
libc_checked!(pthread_rwlock_unlock(self.inner().as_ptr()));
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn unlock_shared(&self) {
|
||||
unsafe {
|
||||
libc_checked!(pthread_rwlock_unlock(self.inner().as_ptr()));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -596,7 +596,6 @@ pub struct TimelineImportRequest {
|
||||
pub timeline_id: TimelineId,
|
||||
pub start_lsn: Lsn,
|
||||
pub sk_set: Vec<NodeId>,
|
||||
pub force_upsert: bool,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, serde::Deserialize, Clone)]
|
||||
|
||||
@@ -69,6 +69,22 @@ impl Hash for ShardIdentity {
|
||||
}
|
||||
}
|
||||
|
||||
/// Stripe size in number of pages
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
|
||||
pub struct ShardStripeSize(pub u32);
|
||||
|
||||
impl Default for ShardStripeSize {
|
||||
fn default() -> Self {
|
||||
DEFAULT_STRIPE_SIZE
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ShardStripeSize {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
/// Layout version: for future upgrades where we might change how the key->shard mapping works
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Hash, Debug)]
|
||||
pub struct ShardLayout(u8);
|
||||
|
||||
@@ -749,18 +749,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
|
||||
trace!("got query {query_string:?}");
|
||||
if let Err(e) = handler.process_query(self, query_string).await {
|
||||
match e {
|
||||
err @ QueryError::Shutdown => {
|
||||
// Notify postgres of the connection shutdown at the libpq
|
||||
// protocol level. This avoids postgres having to tell apart
|
||||
// from an idle connection and a stale one, which is bug prone.
|
||||
let shutdown_error = short_error(&err);
|
||||
self.write_message_noflush(&BeMessage::ErrorResponse(
|
||||
&shutdown_error,
|
||||
Some(err.pg_error_code()),
|
||||
))?;
|
||||
|
||||
return Ok(ProcessMsgResult::Break);
|
||||
}
|
||||
QueryError::Shutdown => return Ok(ProcessMsgResult::Break),
|
||||
QueryError::SimulatedConnectionError => {
|
||||
return Err(QueryError::SimulatedConnectionError);
|
||||
}
|
||||
|
||||
@@ -74,6 +74,7 @@ impl Header {
|
||||
}
|
||||
|
||||
/// An enum representing Postgres backend messages.
|
||||
#[non_exhaustive]
|
||||
pub enum Message {
|
||||
AuthenticationCleartextPassword,
|
||||
AuthenticationGss,
|
||||
@@ -144,7 +145,16 @@ impl Message {
|
||||
PARSE_COMPLETE_TAG => Message::ParseComplete,
|
||||
BIND_COMPLETE_TAG => Message::BindComplete,
|
||||
CLOSE_COMPLETE_TAG => Message::CloseComplete,
|
||||
NOTIFICATION_RESPONSE_TAG => Message::NotificationResponse(NotificationResponseBody {}),
|
||||
NOTIFICATION_RESPONSE_TAG => {
|
||||
let process_id = buf.read_i32::<BigEndian>()?;
|
||||
let channel = buf.read_cstr()?;
|
||||
let message = buf.read_cstr()?;
|
||||
Message::NotificationResponse(NotificationResponseBody {
|
||||
process_id,
|
||||
channel,
|
||||
message,
|
||||
})
|
||||
}
|
||||
COPY_DONE_TAG => Message::CopyDone,
|
||||
COMMAND_COMPLETE_TAG => {
|
||||
let tag = buf.read_cstr()?;
|
||||
@@ -533,7 +543,28 @@ impl NoticeResponseBody {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NotificationResponseBody {}
|
||||
pub struct NotificationResponseBody {
|
||||
process_id: i32,
|
||||
channel: Bytes,
|
||||
message: Bytes,
|
||||
}
|
||||
|
||||
impl NotificationResponseBody {
|
||||
#[inline]
|
||||
pub fn process_id(&self) -> i32 {
|
||||
self.process_id
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn channel(&self) -> io::Result<&str> {
|
||||
get_str(&self.channel)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn message(&self) -> io::Result<&str> {
|
||||
get_str(&self.message)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ParameterDescriptionBody {
|
||||
storage: Bytes,
|
||||
|
||||
@@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::cancel_token::RawCancelToken;
|
||||
use crate::codec::{BackendMessages, FrontendMessage, RecordNotices};
|
||||
use crate::codec::{BackendMessages, FrontendMessage};
|
||||
use crate::config::{Host, SslMode};
|
||||
use crate::query::RowStream;
|
||||
use crate::simple_query::SimpleQueryStream;
|
||||
@@ -221,18 +221,6 @@ impl Client {
|
||||
&mut self.inner
|
||||
}
|
||||
|
||||
pub fn record_notices(&mut self, limit: usize) -> mpsc::UnboundedReceiver<Box<str>> {
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
|
||||
let notices = RecordNotices { sender: tx, limit };
|
||||
self.inner
|
||||
.sender
|
||||
.send(FrontendMessage::RecordNotices(notices))
|
||||
.ok();
|
||||
|
||||
rx
|
||||
}
|
||||
|
||||
/// Pass text directly to the Postgres backend to allow it to sort out typing itself and
|
||||
/// to save a roundtrip
|
||||
pub async fn query_raw_txt<S, I>(
|
||||
|
||||
@@ -3,17 +3,10 @@ use std::io;
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use postgres_protocol2::message::backend;
|
||||
use tokio::sync::mpsc::UnboundedSender;
|
||||
use tokio_util::codec::{Decoder, Encoder};
|
||||
|
||||
pub enum FrontendMessage {
|
||||
Raw(Bytes),
|
||||
RecordNotices(RecordNotices),
|
||||
}
|
||||
|
||||
pub struct RecordNotices {
|
||||
pub sender: UnboundedSender<Box<str>>,
|
||||
pub limit: usize,
|
||||
}
|
||||
|
||||
pub enum BackendMessage {
|
||||
@@ -40,11 +33,14 @@ impl FallibleIterator for BackendMessages {
|
||||
|
||||
pub struct PostgresCodec;
|
||||
|
||||
impl Encoder<Bytes> for PostgresCodec {
|
||||
impl Encoder<FrontendMessage> for PostgresCodec {
|
||||
type Error = io::Error;
|
||||
|
||||
fn encode(&mut self, item: Bytes, dst: &mut BytesMut) -> io::Result<()> {
|
||||
dst.extend_from_slice(&item);
|
||||
fn encode(&mut self, item: FrontendMessage, dst: &mut BytesMut) -> io::Result<()> {
|
||||
match item {
|
||||
FrontendMessage::Raw(buf) => dst.extend_from_slice(&buf),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
use std::net::IpAddr;
|
||||
|
||||
use postgres_protocol2::message::backend::Message;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::client::SocketConfig;
|
||||
use crate::codec::BackendMessage;
|
||||
use crate::config::Host;
|
||||
use crate::connect_raw::connect_raw;
|
||||
use crate::connect_socket::connect_socket;
|
||||
@@ -46,8 +48,8 @@ where
|
||||
let stream = connect_tls(socket, config.ssl_mode, tls).await?;
|
||||
let RawConnection {
|
||||
stream,
|
||||
parameters: _,
|
||||
delayed_notice: _,
|
||||
parameters,
|
||||
delayed_notice,
|
||||
process_id,
|
||||
secret_key,
|
||||
} = connect_raw(stream, config).await?;
|
||||
@@ -70,7 +72,13 @@ where
|
||||
secret_key,
|
||||
);
|
||||
|
||||
let connection = Connection::new(stream, conn_tx, conn_rx);
|
||||
// delayed notices are always sent as "Async" messages.
|
||||
let delayed = delayed_notice
|
||||
.into_iter()
|
||||
.map(|m| BackendMessage::Async(Message::NoticeResponse(m)))
|
||||
.collect();
|
||||
|
||||
let connection = Connection::new(stream, delayed, parameters, conn_tx, conn_rx);
|
||||
|
||||
Ok((client, connection))
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::io;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use bytes::BytesMut;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{Sink, SinkExt, Stream, TryStreamExt, ready};
|
||||
use postgres_protocol2::authentication::sasl;
|
||||
@@ -14,7 +14,7 @@ use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_util::codec::Framed;
|
||||
|
||||
use crate::Error;
|
||||
use crate::codec::{BackendMessage, BackendMessages, PostgresCodec};
|
||||
use crate::codec::{BackendMessage, BackendMessages, FrontendMessage, PostgresCodec};
|
||||
use crate::config::{self, AuthKeys, Config};
|
||||
use crate::maybe_tls_stream::MaybeTlsStream;
|
||||
use crate::tls::TlsStream;
|
||||
@@ -25,7 +25,7 @@ pub struct StartupStream<S, T> {
|
||||
delayed_notice: Vec<NoticeResponseBody>,
|
||||
}
|
||||
|
||||
impl<S, T> Sink<Bytes> for StartupStream<S, T>
|
||||
impl<S, T> Sink<FrontendMessage> for StartupStream<S, T>
|
||||
where
|
||||
S: AsyncRead + AsyncWrite + Unpin,
|
||||
T: AsyncRead + AsyncWrite + Unpin,
|
||||
@@ -36,7 +36,7 @@ where
|
||||
Pin::new(&mut self.inner).poll_ready(cx)
|
||||
}
|
||||
|
||||
fn start_send(mut self: Pin<&mut Self>, item: Bytes) -> io::Result<()> {
|
||||
fn start_send(mut self: Pin<&mut Self>, item: FrontendMessage) -> io::Result<()> {
|
||||
Pin::new(&mut self.inner).start_send(item)
|
||||
}
|
||||
|
||||
@@ -120,7 +120,10 @@ where
|
||||
let mut buf = BytesMut::new();
|
||||
frontend::startup_message(&config.server_params, &mut buf).map_err(Error::encode)?;
|
||||
|
||||
stream.send(buf.freeze()).await.map_err(Error::io)
|
||||
stream
|
||||
.send(FrontendMessage::Raw(buf.freeze()))
|
||||
.await
|
||||
.map_err(Error::io)
|
||||
}
|
||||
|
||||
async fn authenticate<S, T>(stream: &mut StartupStream<S, T>, config: &Config) -> Result<(), Error>
|
||||
@@ -188,7 +191,10 @@ where
|
||||
let mut buf = BytesMut::new();
|
||||
frontend::password_message(password, &mut buf).map_err(Error::encode)?;
|
||||
|
||||
stream.send(buf.freeze()).await.map_err(Error::io)
|
||||
stream
|
||||
.send(FrontendMessage::Raw(buf.freeze()))
|
||||
.await
|
||||
.map_err(Error::io)
|
||||
}
|
||||
|
||||
async fn authenticate_sasl<S, T>(
|
||||
@@ -247,7 +253,10 @@ where
|
||||
|
||||
let mut buf = BytesMut::new();
|
||||
frontend::sasl_initial_response(mechanism, scram.message(), &mut buf).map_err(Error::encode)?;
|
||||
stream.send(buf.freeze()).await.map_err(Error::io)?;
|
||||
stream
|
||||
.send(FrontendMessage::Raw(buf.freeze()))
|
||||
.await
|
||||
.map_err(Error::io)?;
|
||||
|
||||
let body = match stream.try_next().await.map_err(Error::io)? {
|
||||
Some(Message::AuthenticationSaslContinue(body)) => body,
|
||||
@@ -263,7 +272,10 @@ where
|
||||
|
||||
let mut buf = BytesMut::new();
|
||||
frontend::sasl_response(scram.message(), &mut buf).map_err(Error::encode)?;
|
||||
stream.send(buf.freeze()).await.map_err(Error::io)?;
|
||||
stream
|
||||
.send(FrontendMessage::Raw(buf.freeze()))
|
||||
.await
|
||||
.map_err(Error::io)?;
|
||||
|
||||
let body = match stream.try_next().await.map_err(Error::io)? {
|
||||
Some(Message::AuthenticationSaslFinal(body)) => body,
|
||||
|
||||
@@ -1,23 +1,22 @@
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use bytes::BytesMut;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{Sink, StreamExt, ready};
|
||||
use postgres_protocol2::message::backend::{Message, NoticeResponseBody};
|
||||
use futures_util::{Sink, Stream, ready};
|
||||
use postgres_protocol2::message::backend::Message;
|
||||
use postgres_protocol2::message::frontend;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_util::codec::Framed;
|
||||
use tokio_util::sync::PollSender;
|
||||
use tracing::trace;
|
||||
use tracing::{info, trace};
|
||||
|
||||
use crate::Error;
|
||||
use crate::codec::{
|
||||
BackendMessage, BackendMessages, FrontendMessage, PostgresCodec, RecordNotices,
|
||||
};
|
||||
use crate::codec::{BackendMessage, BackendMessages, FrontendMessage, PostgresCodec};
|
||||
use crate::error::DbError;
|
||||
use crate::maybe_tls_stream::MaybeTlsStream;
|
||||
use crate::{AsyncMessage, Error, Notification};
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
enum State {
|
||||
@@ -34,18 +33,18 @@ enum State {
|
||||
/// occurred, or because its associated `Client` has dropped and all outstanding work has completed.
|
||||
#[must_use = "futures do nothing unless polled"]
|
||||
pub struct Connection<S, T> {
|
||||
stream: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
|
||||
/// HACK: we need this in the Neon Proxy.
|
||||
pub stream: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
|
||||
/// HACK: we need this in the Neon Proxy to forward params.
|
||||
pub parameters: HashMap<String, String>,
|
||||
|
||||
sender: PollSender<BackendMessages>,
|
||||
receiver: mpsc::UnboundedReceiver<FrontendMessage>,
|
||||
notices: Option<RecordNotices>,
|
||||
|
||||
pending_response: Option<BackendMessages>,
|
||||
pending_responses: VecDeque<BackendMessage>,
|
||||
state: State,
|
||||
}
|
||||
|
||||
pub enum Never {}
|
||||
|
||||
impl<S, T> Connection<S, T>
|
||||
where
|
||||
S: AsyncRead + AsyncWrite + Unpin,
|
||||
@@ -53,44 +52,72 @@ where
|
||||
{
|
||||
pub(crate) fn new(
|
||||
stream: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
|
||||
pending_responses: VecDeque<BackendMessage>,
|
||||
parameters: HashMap<String, String>,
|
||||
sender: mpsc::Sender<BackendMessages>,
|
||||
receiver: mpsc::UnboundedReceiver<FrontendMessage>,
|
||||
) -> Connection<S, T> {
|
||||
Connection {
|
||||
stream,
|
||||
parameters,
|
||||
sender: PollSender::new(sender),
|
||||
receiver,
|
||||
notices: None,
|
||||
pending_response: None,
|
||||
pending_responses,
|
||||
state: State::Active,
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_response(
|
||||
&mut self,
|
||||
cx: &mut Context<'_>,
|
||||
) -> Poll<Option<Result<BackendMessage, Error>>> {
|
||||
if let Some(message) = self.pending_responses.pop_front() {
|
||||
trace!("retrying pending response");
|
||||
return Poll::Ready(Some(Ok(message)));
|
||||
}
|
||||
|
||||
Pin::new(&mut self.stream)
|
||||
.poll_next(cx)
|
||||
.map(|o| o.map(|r| r.map_err(Error::io)))
|
||||
}
|
||||
|
||||
/// Read and process messages from the connection to postgres.
|
||||
/// client <- postgres
|
||||
fn poll_read(&mut self, cx: &mut Context<'_>) -> Poll<Result<Never, Error>> {
|
||||
fn poll_read(&mut self, cx: &mut Context<'_>) -> Poll<Result<AsyncMessage, Error>> {
|
||||
loop {
|
||||
let messages = match self.pending_response.take() {
|
||||
Some(messages) => messages,
|
||||
None => {
|
||||
let message = match self.stream.poll_next_unpin(cx) {
|
||||
Poll::Pending => return Poll::Pending,
|
||||
Poll::Ready(None) => return Poll::Ready(Err(Error::closed())),
|
||||
Poll::Ready(Some(Err(e))) => return Poll::Ready(Err(Error::io(e))),
|
||||
Poll::Ready(Some(Ok(message))) => message,
|
||||
};
|
||||
|
||||
match message {
|
||||
BackendMessage::Async(Message::NoticeResponse(body)) => {
|
||||
self.handle_notice(body)?;
|
||||
continue;
|
||||
}
|
||||
BackendMessage::Async(_) => continue,
|
||||
BackendMessage::Normal { messages } => messages,
|
||||
}
|
||||
let message = match self.poll_response(cx)? {
|
||||
Poll::Ready(Some(message)) => message,
|
||||
Poll::Ready(None) => return Poll::Ready(Err(Error::closed())),
|
||||
Poll::Pending => {
|
||||
trace!("poll_read: waiting on response");
|
||||
return Poll::Pending;
|
||||
}
|
||||
};
|
||||
|
||||
let messages = match message {
|
||||
BackendMessage::Async(Message::NoticeResponse(body)) => {
|
||||
let error = DbError::parse(&mut body.fields()).map_err(Error::parse)?;
|
||||
return Poll::Ready(Ok(AsyncMessage::Notice(error)));
|
||||
}
|
||||
BackendMessage::Async(Message::NotificationResponse(body)) => {
|
||||
let notification = Notification {
|
||||
process_id: body.process_id(),
|
||||
channel: body.channel().map_err(Error::parse)?.to_string(),
|
||||
payload: body.message().map_err(Error::parse)?.to_string(),
|
||||
};
|
||||
return Poll::Ready(Ok(AsyncMessage::Notification(notification)));
|
||||
}
|
||||
BackendMessage::Async(Message::ParameterStatus(body)) => {
|
||||
self.parameters.insert(
|
||||
body.name().map_err(Error::parse)?.to_string(),
|
||||
body.value().map_err(Error::parse)?.to_string(),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
BackendMessage::Async(_) => unreachable!(),
|
||||
BackendMessage::Normal { messages } => messages,
|
||||
};
|
||||
|
||||
match self.sender.poll_reserve(cx) {
|
||||
Poll::Ready(Ok(())) => {
|
||||
let _ = self.sender.send_item(messages);
|
||||
@@ -99,7 +126,8 @@ where
|
||||
return Poll::Ready(Err(Error::closed()));
|
||||
}
|
||||
Poll::Pending => {
|
||||
self.pending_response = Some(messages);
|
||||
self.pending_responses
|
||||
.push_back(BackendMessage::Normal { messages });
|
||||
trace!("poll_read: waiting on sender");
|
||||
return Poll::Pending;
|
||||
}
|
||||
@@ -107,31 +135,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_notice(&mut self, body: NoticeResponseBody) -> Result<(), Error> {
|
||||
let Some(notices) = &mut self.notices else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let mut fields = body.fields();
|
||||
while let Some(field) = fields.next().map_err(Error::parse)? {
|
||||
// loop until we find the message field
|
||||
if field.type_() == b'M' {
|
||||
// if the message field is within the limit, send it.
|
||||
if let Some(new_limit) = notices.limit.checked_sub(field.value().len()) {
|
||||
match notices.sender.send(field.value().into()) {
|
||||
// set the new limit.
|
||||
Ok(()) => notices.limit = new_limit,
|
||||
// closed.
|
||||
Err(_) => self.notices = None,
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fetch the next client request and enqueue the response sender.
|
||||
fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll<Option<FrontendMessage>> {
|
||||
if self.receiver.is_closed() {
|
||||
@@ -165,23 +168,21 @@ where
|
||||
|
||||
match self.poll_request(cx) {
|
||||
// send the message to postgres
|
||||
Poll::Ready(Some(FrontendMessage::Raw(request))) => {
|
||||
Poll::Ready(Some(request)) => {
|
||||
Pin::new(&mut self.stream)
|
||||
.start_send(request)
|
||||
.map_err(Error::io)?;
|
||||
}
|
||||
Poll::Ready(Some(FrontendMessage::RecordNotices(notices))) => {
|
||||
self.notices = Some(notices)
|
||||
}
|
||||
// No more messages from the client, and no more responses to wait for.
|
||||
// Send a terminate message to postgres
|
||||
Poll::Ready(None) => {
|
||||
trace!("poll_write: at eof, terminating");
|
||||
let mut request = BytesMut::new();
|
||||
frontend::terminate(&mut request);
|
||||
let request = FrontendMessage::Raw(request.freeze());
|
||||
|
||||
Pin::new(&mut self.stream)
|
||||
.start_send(request.freeze())
|
||||
.start_send(request)
|
||||
.map_err(Error::io)?;
|
||||
|
||||
trace!("poll_write: sent eof, closing");
|
||||
@@ -230,17 +231,34 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_message(&mut self, cx: &mut Context<'_>) -> Poll<Option<Result<Never, Error>>> {
|
||||
/// Returns the value of a runtime parameter for this connection.
|
||||
pub fn parameter(&self, name: &str) -> Option<&str> {
|
||||
self.parameters.get(name).map(|s| &**s)
|
||||
}
|
||||
|
||||
/// Polls for asynchronous messages from the server.
|
||||
///
|
||||
/// The server can send notices as well as notifications asynchronously to the client. Applications that wish to
|
||||
/// examine those messages should use this method to drive the connection rather than its `Future` implementation.
|
||||
pub fn poll_message(
|
||||
&mut self,
|
||||
cx: &mut Context<'_>,
|
||||
) -> Poll<Option<Result<AsyncMessage, Error>>> {
|
||||
if self.state != State::Closing {
|
||||
// if the state is still active, try read from and write to postgres.
|
||||
let Poll::Pending = self.poll_read(cx)?;
|
||||
if self.poll_write(cx)?.is_ready() {
|
||||
let message = self.poll_read(cx)?;
|
||||
let closing = self.poll_write(cx)?;
|
||||
if let Poll::Ready(()) = closing {
|
||||
self.state = State::Closing;
|
||||
}
|
||||
|
||||
if let Poll::Ready(message) = message {
|
||||
return Poll::Ready(Some(Ok(message)));
|
||||
}
|
||||
|
||||
// poll_read returned Pending.
|
||||
// poll_write returned Pending or Ready(()).
|
||||
// if poll_write returned Ready(()), then we are waiting to read more data from postgres.
|
||||
// poll_write returned Pending or Ready(WriteReady::WaitingOnRead).
|
||||
// if poll_write returned Ready(WriteReady::WaitingOnRead), then we are waiting to read more data from postgres.
|
||||
if self.state != State::Closing {
|
||||
return Poll::Pending;
|
||||
}
|
||||
@@ -262,9 +280,11 @@ where
|
||||
type Output = Result<(), Error>;
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
|
||||
match self.poll_message(cx)? {
|
||||
Poll::Ready(None) => Poll::Ready(Ok(())),
|
||||
Poll::Pending => Poll::Pending,
|
||||
while let Some(message) = ready!(self.poll_message(cx)?) {
|
||||
if let AsyncMessage::Notice(notice) = message {
|
||||
info!("{}: {}", notice.severity(), notice.message());
|
||||
}
|
||||
}
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ pub use crate::client::{Client, SocketConfig};
|
||||
pub use crate::config::Config;
|
||||
pub use crate::connect_raw::RawConnection;
|
||||
pub use crate::connection::Connection;
|
||||
use crate::error::DbError;
|
||||
pub use crate::error::Error;
|
||||
pub use crate::generic_client::GenericClient;
|
||||
pub use crate::query::RowStream;
|
||||
@@ -92,6 +93,21 @@ impl Notification {
|
||||
}
|
||||
}
|
||||
|
||||
/// An asynchronous message from the server.
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone)]
|
||||
#[non_exhaustive]
|
||||
pub enum AsyncMessage {
|
||||
/// A notice.
|
||||
///
|
||||
/// Notices use the same format as errors, but aren't "errors" per-se.
|
||||
Notice(DbError),
|
||||
/// A notification.
|
||||
///
|
||||
/// Connections can subscribe to notifications with the `LISTEN` command.
|
||||
Notification(Notification),
|
||||
}
|
||||
|
||||
/// Message returned by the `SimpleQuery` stream.
|
||||
#[derive(Debug)]
|
||||
#[non_exhaustive]
|
||||
|
||||
@@ -47,7 +47,6 @@ tracing-subscriber = { workspace = true, features = ["json", "registry"] }
|
||||
tracing-utils.workspace = true
|
||||
rand.workspace = true
|
||||
scopeguard.workspace = true
|
||||
uuid.workspace = true
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
walkdir.workspace = true
|
||||
|
||||
@@ -12,8 +12,7 @@ use jsonwebtoken::{
|
||||
Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation, decode, encode,
|
||||
};
|
||||
use pem::Pem;
|
||||
use serde::{Deserialize, Deserializer, Serialize, de::DeserializeOwned};
|
||||
use uuid::Uuid;
|
||||
use serde::{Deserialize, Serialize, de::DeserializeOwned};
|
||||
|
||||
use crate::id::TenantId;
|
||||
|
||||
@@ -26,11 +25,6 @@ pub enum Scope {
|
||||
/// Provides access to all data for a specific tenant (specified in `struct Claims` below)
|
||||
// TODO: join these two?
|
||||
Tenant,
|
||||
/// Provides access to all data for a specific tenant, but based on endpoint ID. This token scope
|
||||
/// is only used by compute to fetch the spec for a specific endpoint. The spec contains a Tenant-scoped
|
||||
/// token authorizing access to all data of a tenant, so the spec-fetch API requires a TenantEndpoint
|
||||
/// scope token to ensure that untrusted compute nodes can't fetch spec for arbitrary endpoints.
|
||||
TenantEndpoint,
|
||||
/// Provides blanket access to all tenants on the pageserver plus pageserver-wide APIs.
|
||||
/// Should only be used e.g. for status check/tenant creation/list.
|
||||
PageServerApi,
|
||||
@@ -57,43 +51,17 @@ pub enum Scope {
|
||||
ControllerPeer,
|
||||
}
|
||||
|
||||
fn deserialize_empty_string_as_none_uuid<'de, D>(deserializer: D) -> Result<Option<Uuid>, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let opt = Option::<String>::deserialize(deserializer)?;
|
||||
match opt.as_deref() {
|
||||
Some("") => Ok(None),
|
||||
Some(s) => Uuid::parse_str(s)
|
||||
.map(Some)
|
||||
.map_err(serde::de::Error::custom),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// JWT payload. See docs/authentication.md for the format
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
pub struct Claims {
|
||||
#[serde(default)]
|
||||
pub tenant_id: Option<TenantId>,
|
||||
#[serde(
|
||||
default,
|
||||
skip_serializing_if = "Option::is_none",
|
||||
// Neon control plane includes this field as empty in the claims.
|
||||
// Consider it None in those cases.
|
||||
deserialize_with = "deserialize_empty_string_as_none_uuid"
|
||||
)]
|
||||
pub endpoint_id: Option<Uuid>,
|
||||
pub scope: Scope,
|
||||
}
|
||||
|
||||
impl Claims {
|
||||
pub fn new(tenant_id: Option<TenantId>, scope: Scope) -> Self {
|
||||
Self {
|
||||
tenant_id,
|
||||
scope,
|
||||
endpoint_id: None,
|
||||
}
|
||||
Self { tenant_id, scope }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -244,7 +212,6 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
let expected_claims = Claims {
|
||||
tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081").unwrap()),
|
||||
scope: Scope::Tenant,
|
||||
endpoint_id: None,
|
||||
};
|
||||
|
||||
// A test token containing the following payload, signed using TEST_PRIV_KEY_ED25519:
|
||||
@@ -273,7 +240,6 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
let claims = Claims {
|
||||
tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081").unwrap()),
|
||||
scope: Scope::Tenant,
|
||||
endpoint_id: None,
|
||||
};
|
||||
|
||||
let pem = pem::parse(TEST_PRIV_KEY_ED25519).unwrap();
|
||||
|
||||
@@ -25,12 +25,6 @@ pub struct ShardIndex {
|
||||
pub shard_count: ShardCount,
|
||||
}
|
||||
|
||||
/// Stripe size as number of pages.
|
||||
///
|
||||
/// NB: don't implement Default, so callers don't lazily use it by mistake. See DEFAULT_STRIPE_SIZE.
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
|
||||
pub struct ShardStripeSize(pub u32);
|
||||
|
||||
/// Formatting helper, for generating the `shard_id` label in traces.
|
||||
pub struct ShardSlug<'a>(&'a TenantShardId);
|
||||
|
||||
@@ -183,12 +177,6 @@ impl std::fmt::Display for ShardCount {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ShardStripeSize {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ShardSlug<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
|
||||
@@ -431,7 +431,7 @@ pub fn empty_shmem() -> crate::bindings::WalproposerShmemState {
|
||||
let empty_wal_rate_limiter = crate::bindings::WalRateLimiter {
|
||||
should_limit: crate::bindings::pg_atomic_uint32 { value: 0 },
|
||||
sent_bytes: 0,
|
||||
last_recorded_time_us: crate::bindings::pg_atomic_uint64 { value: 0 },
|
||||
last_recorded_time_us: 0,
|
||||
};
|
||||
|
||||
crate::bindings::WalproposerShmemState {
|
||||
|
||||
@@ -873,22 +873,6 @@ impl Client {
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn reset_alert_gauges(&self) -> Result<()> {
|
||||
let uri = format!(
|
||||
"{}/hadron-internal/reset_alert_gauges",
|
||||
self.mgmt_api_endpoint
|
||||
);
|
||||
self.start_request(Method::POST, uri)
|
||||
.send()
|
||||
.await
|
||||
.map_err(Error::SendRequest)?
|
||||
.error_from_body()
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn wait_lsn(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
|
||||
@@ -16,9 +16,10 @@ use crate::pool::{ChannelPool, ClientGuard, ClientPool, StreamGuard, StreamPool}
|
||||
use crate::retry::Retry;
|
||||
use crate::split::GetPageSplitter;
|
||||
use compute_api::spec::PageserverProtocol;
|
||||
use pageserver_api::shard::ShardStripeSize;
|
||||
use pageserver_page_api as page_api;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::shard::{ShardCount, ShardIndex, ShardNumber, ShardStripeSize};
|
||||
use utils::shard::{ShardCount, ShardIndex, ShardNumber};
|
||||
|
||||
/// Max number of concurrent clients per channel (i.e. TCP connection). New channels will be spun up
|
||||
/// when full.
|
||||
@@ -140,8 +141,8 @@ impl PageserverClient {
|
||||
if !old.count.is_unsharded() && shard_spec.stripe_size != old.stripe_size {
|
||||
return Err(anyhow!(
|
||||
"can't change stripe size from {} to {}",
|
||||
old.stripe_size.expect("always Some when sharded"),
|
||||
shard_spec.stripe_size.expect("always Some when sharded")
|
||||
old.stripe_size,
|
||||
shard_spec.stripe_size
|
||||
));
|
||||
}
|
||||
|
||||
@@ -156,6 +157,23 @@ impl PageserverClient {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns whether a relation exists.
|
||||
#[instrument(skip_all, fields(rel=%req.rel, lsn=%req.read_lsn))]
|
||||
pub async fn check_rel_exists(
|
||||
&self,
|
||||
req: page_api::CheckRelExistsRequest,
|
||||
) -> tonic::Result<page_api::CheckRelExistsResponse> {
|
||||
debug!("sending request: {req:?}");
|
||||
let resp = Self::with_retries(CALL_TIMEOUT, async |_| {
|
||||
// Relation metadata is only available on shard 0.
|
||||
let mut client = self.shards.load_full().get_zero().client().await?;
|
||||
Self::with_timeout(REQUEST_TIMEOUT, client.check_rel_exists(req)).await
|
||||
})
|
||||
.await?;
|
||||
debug!("received response: {resp:?}");
|
||||
Ok(resp)
|
||||
}
|
||||
|
||||
/// Returns the total size of a database, as # of bytes.
|
||||
#[instrument(skip_all, fields(db_oid=%req.db_oid, lsn=%req.read_lsn))]
|
||||
pub async fn get_db_size(
|
||||
@@ -231,15 +249,13 @@ impl PageserverClient {
|
||||
// Fast path: request is for a single shard.
|
||||
if let Some(shard_id) =
|
||||
GetPageSplitter::for_single_shard(&req, shards.count, shards.stripe_size)
|
||||
.map_err(|err| tonic::Status::internal(err.to_string()))?
|
||||
{
|
||||
return Self::get_page_with_shard(req, shards.get(shard_id)?).await;
|
||||
}
|
||||
|
||||
// Request spans multiple shards. Split it, dispatch concurrent per-shard requests, and
|
||||
// reassemble the responses.
|
||||
let mut splitter = GetPageSplitter::split(req, shards.count, shards.stripe_size)
|
||||
.map_err(|err| tonic::Status::internal(err.to_string()))?;
|
||||
let mut splitter = GetPageSplitter::split(req, shards.count, shards.stripe_size);
|
||||
|
||||
let mut shard_requests = FuturesUnordered::new();
|
||||
for (shard_id, shard_req) in splitter.drain_requests() {
|
||||
@@ -249,14 +265,10 @@ impl PageserverClient {
|
||||
}
|
||||
|
||||
while let Some((shard_id, shard_response)) = shard_requests.next().await.transpose()? {
|
||||
splitter
|
||||
.add_response(shard_id, shard_response)
|
||||
.map_err(|err| tonic::Status::internal(err.to_string()))?;
|
||||
splitter.add_response(shard_id, shard_response)?;
|
||||
}
|
||||
|
||||
splitter
|
||||
.get_response()
|
||||
.map_err(|err| tonic::Status::internal(err.to_string()))
|
||||
splitter.get_response()
|
||||
}
|
||||
|
||||
/// Fetches pages on the given shard. Does not retry internally.
|
||||
@@ -384,14 +396,12 @@ pub struct ShardSpec {
|
||||
/// NB: this is 0 for unsharded tenants, following `ShardIndex::unsharded()` convention.
|
||||
count: ShardCount,
|
||||
/// The stripe size for these shards.
|
||||
///
|
||||
/// INVARIANT: None for unsharded tenants, Some for sharded.
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
stripe_size: ShardStripeSize,
|
||||
}
|
||||
|
||||
impl ShardSpec {
|
||||
/// Creates a new shard spec with the given URLs and stripe size. All shards must be given.
|
||||
/// The stripe size must be Some for sharded tenants, or None for unsharded tenants.
|
||||
/// The stripe size may be omitted for unsharded tenants.
|
||||
pub fn new(
|
||||
urls: HashMap<ShardIndex, String>,
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
@@ -404,13 +414,11 @@ impl ShardSpec {
|
||||
n => ShardCount::new(n as u8),
|
||||
};
|
||||
|
||||
// Validate the stripe size.
|
||||
// Determine the stripe size. It doesn't matter for unsharded tenants.
|
||||
if stripe_size.is_none() && !count.is_unsharded() {
|
||||
return Err(anyhow!("stripe size must be given for sharded tenants"));
|
||||
}
|
||||
if stripe_size.is_some() && count.is_unsharded() {
|
||||
return Err(anyhow!("stripe size can't be given for unsharded tenants"));
|
||||
}
|
||||
let stripe_size = stripe_size.unwrap_or_default();
|
||||
|
||||
// Validate the shard spec.
|
||||
for (shard_id, url) in &urls {
|
||||
@@ -450,10 +458,8 @@ struct Shards {
|
||||
///
|
||||
/// NB: this is 0 for unsharded tenants, following `ShardIndex::unsharded()` convention.
|
||||
count: ShardCount,
|
||||
/// The stripe size.
|
||||
///
|
||||
/// INVARIANT: None for unsharded tenants, Some for sharded.
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
/// The stripe size. Only used for sharded tenants.
|
||||
stripe_size: ShardStripeSize,
|
||||
}
|
||||
|
||||
impl Shards {
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::anyhow;
|
||||
use bytes::Bytes;
|
||||
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::shard::key_to_shard_number;
|
||||
use pageserver_api::shard::{ShardStripeSize, key_to_shard_number};
|
||||
use pageserver_page_api as page_api;
|
||||
use utils::shard::{ShardCount, ShardIndex, ShardStripeSize};
|
||||
use utils::shard::{ShardCount, ShardIndex, ShardNumber};
|
||||
|
||||
/// Splits GetPageRequests that straddle shard boundaries and assembles the responses.
|
||||
/// TODO: add tests for this.
|
||||
@@ -26,54 +25,43 @@ impl GetPageSplitter {
|
||||
pub fn for_single_shard(
|
||||
req: &page_api::GetPageRequest,
|
||||
count: ShardCount,
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
) -> anyhow::Result<Option<ShardIndex>> {
|
||||
stripe_size: ShardStripeSize,
|
||||
) -> Option<ShardIndex> {
|
||||
// Fast path: unsharded tenant.
|
||||
if count.is_unsharded() {
|
||||
return Ok(Some(ShardIndex::unsharded()));
|
||||
return Some(ShardIndex::unsharded());
|
||||
}
|
||||
|
||||
let Some(stripe_size) = stripe_size else {
|
||||
return Err(anyhow!("stripe size must be given for sharded tenants"));
|
||||
};
|
||||
|
||||
// Find the first page's shard, for comparison.
|
||||
// Find the first page's shard, for comparison. If there are no pages, just return the first
|
||||
// shard (caller likely checked already, otherwise the server will reject it).
|
||||
let Some(&first_page) = req.block_numbers.first() else {
|
||||
return Err(anyhow!("no block numbers in request"));
|
||||
return Some(ShardIndex::new(ShardNumber(0), count));
|
||||
};
|
||||
let key = rel_block_to_key(req.rel, first_page);
|
||||
let shard_number = key_to_shard_number(count, stripe_size, &key);
|
||||
|
||||
Ok(req
|
||||
.block_numbers
|
||||
req.block_numbers
|
||||
.iter()
|
||||
.skip(1) // computed above
|
||||
.all(|&blkno| {
|
||||
let key = rel_block_to_key(req.rel, blkno);
|
||||
key_to_shard_number(count, stripe_size, &key) == shard_number
|
||||
})
|
||||
.then_some(ShardIndex::new(shard_number, count)))
|
||||
.then_some(ShardIndex::new(shard_number, count))
|
||||
}
|
||||
|
||||
/// Splits the given request.
|
||||
pub fn split(
|
||||
req: page_api::GetPageRequest,
|
||||
count: ShardCount,
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
) -> anyhow::Result<Self> {
|
||||
stripe_size: ShardStripeSize,
|
||||
) -> Self {
|
||||
// The caller should make sure we don't split requests unnecessarily.
|
||||
debug_assert!(
|
||||
Self::for_single_shard(&req, count, stripe_size)?.is_none(),
|
||||
Self::for_single_shard(&req, count, stripe_size).is_none(),
|
||||
"unnecessary request split"
|
||||
);
|
||||
|
||||
if count.is_unsharded() {
|
||||
return Err(anyhow!("unsharded tenant, no point in splitting request"));
|
||||
}
|
||||
let Some(stripe_size) = stripe_size else {
|
||||
return Err(anyhow!("stripe size must be given for sharded tenants"));
|
||||
};
|
||||
|
||||
// Split the requests by shard index.
|
||||
let mut requests = HashMap::with_capacity(2); // common case
|
||||
let mut block_shards = Vec::with_capacity(req.block_numbers.len());
|
||||
@@ -115,11 +103,11 @@ impl GetPageSplitter {
|
||||
.collect(),
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
Self {
|
||||
requests,
|
||||
response,
|
||||
block_shards,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Drains the per-shard requests, moving them out of the splitter to avoid extra allocations.
|
||||
@@ -136,30 +124,21 @@ impl GetPageSplitter {
|
||||
&mut self,
|
||||
shard_id: ShardIndex,
|
||||
response: page_api::GetPageResponse,
|
||||
) -> anyhow::Result<()> {
|
||||
) -> tonic::Result<()> {
|
||||
// The caller should already have converted status codes into tonic::Status.
|
||||
if response.status_code != page_api::GetPageStatusCode::Ok {
|
||||
return Err(anyhow!(
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"unexpected non-OK response for shard {shard_id}: {} {}",
|
||||
response.status_code,
|
||||
response.reason.unwrap_or_default()
|
||||
));
|
||||
)));
|
||||
}
|
||||
|
||||
if response.request_id != self.response.request_id {
|
||||
return Err(anyhow!(
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"response ID mismatch for shard {shard_id}: expected {}, got {}",
|
||||
self.response.request_id,
|
||||
response.request_id
|
||||
));
|
||||
}
|
||||
|
||||
if response.request_id != self.response.request_id {
|
||||
return Err(anyhow!(
|
||||
"response ID mismatch for shard {shard_id}: expected {}, got {}",
|
||||
self.response.request_id,
|
||||
response.request_id
|
||||
));
|
||||
self.response.request_id, response.request_id
|
||||
)));
|
||||
}
|
||||
|
||||
// Place the shard response pages into the assembled response, in request order.
|
||||
@@ -171,26 +150,27 @@ impl GetPageSplitter {
|
||||
}
|
||||
|
||||
let Some(slot) = self.response.pages.get_mut(i) else {
|
||||
return Err(anyhow!("no block_shards slot {i} for shard {shard_id}"));
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"no block_shards slot {i} for shard {shard_id}"
|
||||
)));
|
||||
};
|
||||
let Some(page) = pages.next() else {
|
||||
return Err(anyhow!(
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"missing page {} in shard {shard_id} response",
|
||||
slot.block_number
|
||||
));
|
||||
)));
|
||||
};
|
||||
if page.block_number != slot.block_number {
|
||||
return Err(anyhow!(
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"shard {shard_id} returned wrong page at index {i}, expected {} got {}",
|
||||
slot.block_number,
|
||||
page.block_number
|
||||
));
|
||||
slot.block_number, page.block_number
|
||||
)));
|
||||
}
|
||||
if !slot.image.is_empty() {
|
||||
return Err(anyhow!(
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"shard {shard_id} returned duplicate page {} at index {i}",
|
||||
slot.block_number
|
||||
));
|
||||
)));
|
||||
}
|
||||
|
||||
*slot = page;
|
||||
@@ -198,10 +178,10 @@ impl GetPageSplitter {
|
||||
|
||||
// Make sure we've consumed all pages from the shard response.
|
||||
if let Some(extra_page) = pages.next() {
|
||||
return Err(anyhow!(
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"shard {shard_id} returned extra page: {}",
|
||||
extra_page.block_number
|
||||
));
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -209,18 +189,18 @@ impl GetPageSplitter {
|
||||
|
||||
/// Fetches the final, assembled response.
|
||||
#[allow(clippy::result_large_err)]
|
||||
pub fn get_response(self) -> anyhow::Result<page_api::GetPageResponse> {
|
||||
pub fn get_response(self) -> tonic::Result<page_api::GetPageResponse> {
|
||||
// Check that the response is complete.
|
||||
for (i, page) in self.response.pages.iter().enumerate() {
|
||||
if page.image.is_empty() {
|
||||
return Err(anyhow!(
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"missing page {} for shard {}",
|
||||
page.block_number,
|
||||
self.block_shards
|
||||
.get(i)
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "?".to_string())
|
||||
));
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ use anyhow::Context;
|
||||
use clap::Parser;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use pageserver_api::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize};
|
||||
use pageserver_api::shard::{ShardCount, ShardStripeSize};
|
||||
|
||||
#[derive(Parser)]
|
||||
pub(super) struct DescribeKeyCommand {
|
||||
@@ -128,9 +128,7 @@ impl DescribeKeyCommand {
|
||||
// seeing the sharding placement might be confusing, so leave it out unless shard
|
||||
// count was given.
|
||||
|
||||
let stripe_size = stripe_size
|
||||
.map(ShardStripeSize)
|
||||
.unwrap_or(DEFAULT_STRIPE_SIZE);
|
||||
let stripe_size = stripe_size.map(ShardStripeSize).unwrap_or_default();
|
||||
println!(
|
||||
"# placement with shard_count: {} and stripe_size: {}:",
|
||||
shard_count.0, stripe_size.0
|
||||
|
||||
@@ -17,11 +17,11 @@
|
||||
// grpcurl \
|
||||
// -plaintext \
|
||||
// -H "neon-tenant-id: 7c4a1f9e3bd6470c8f3e21a65bd2e980" \
|
||||
// -H "neon-shard-id: 0000" \
|
||||
// -H "neon-shard-id: 0b10" \
|
||||
// -H "neon-timeline-id: f08c4e9a2d5f76b1e3a7c2d8910f4b3e" \
|
||||
// -H "authorization: Bearer $JWT" \
|
||||
// -d '{"read_lsn": {"request_lsn": 100000000, "not_modified_since_lsn": 1}, "db_oid": 1}' \
|
||||
// localhost:51051 page_api.PageService/GetDbSize
|
||||
// -d '{"read_lsn": {"request_lsn": 1234567890}, "rel": {"spc_oid": 1663, "db_oid": 1234, "rel_number": 5678, "fork_number": 0}}'
|
||||
// localhost:51051 page_api.PageService/CheckRelExists
|
||||
// ```
|
||||
//
|
||||
// TODO: consider adding neon-compute-mode ("primary", "static", "replica").
|
||||
@@ -38,8 +38,8 @@ package page_api;
|
||||
import "google/protobuf/timestamp.proto";
|
||||
|
||||
service PageService {
|
||||
// NB: unlike libpq, there is no CheckRelExists in gRPC, at the compute team's request. Instead,
|
||||
// use GetRelSize with allow_missing=true to check existence.
|
||||
// Returns whether a relation exists.
|
||||
rpc CheckRelExists(CheckRelExistsRequest) returns (CheckRelExistsResponse);
|
||||
|
||||
// Fetches a base backup.
|
||||
rpc GetBaseBackup (GetBaseBackupRequest) returns (stream GetBaseBackupResponseChunk);
|
||||
@@ -97,6 +97,17 @@ message RelTag {
|
||||
uint32 fork_number = 4;
|
||||
}
|
||||
|
||||
// Checks whether a relation exists, at the given LSN. Only valid on shard 0,
|
||||
// other shards will error.
|
||||
message CheckRelExistsRequest {
|
||||
ReadLsn read_lsn = 1;
|
||||
RelTag rel = 2;
|
||||
}
|
||||
|
||||
message CheckRelExistsResponse {
|
||||
bool exists = 1;
|
||||
}
|
||||
|
||||
// Requests a base backup.
|
||||
message GetBaseBackupRequest {
|
||||
// The LSN to fetch the base backup at. 0 or absent means the latest LSN known to the Pageserver.
|
||||
@@ -249,15 +260,10 @@ enum GetPageStatusCode {
|
||||
message GetRelSizeRequest {
|
||||
ReadLsn read_lsn = 1;
|
||||
RelTag rel = 2;
|
||||
// If true, return missing=true for missing relations instead of a NotFound error.
|
||||
bool allow_missing = 3;
|
||||
}
|
||||
|
||||
message GetRelSizeResponse {
|
||||
// The number of blocks in the relation.
|
||||
uint32 num_blocks = 1;
|
||||
// If allow_missing=true, this is true for missing relations.
|
||||
bool missing = 2;
|
||||
}
|
||||
|
||||
// Requests an SLRU segment. Only valid on shard 0, other shards will error.
|
||||
|
||||
@@ -69,6 +69,16 @@ impl Client {
|
||||
Ok(Self { inner })
|
||||
}
|
||||
|
||||
/// Returns whether a relation exists.
|
||||
pub async fn check_rel_exists(
|
||||
&mut self,
|
||||
req: CheckRelExistsRequest,
|
||||
) -> tonic::Result<CheckRelExistsResponse> {
|
||||
let req = proto::CheckRelExistsRequest::from(req);
|
||||
let resp = self.inner.check_rel_exists(req).await?.into_inner();
|
||||
Ok(resp.into())
|
||||
}
|
||||
|
||||
/// Fetches a base backup.
|
||||
pub async fn get_base_backup(
|
||||
&mut self,
|
||||
@@ -104,8 +114,7 @@ impl Client {
|
||||
Ok(resps.and_then(|resp| ready(GetPageResponse::try_from(resp).map_err(|err| err.into()))))
|
||||
}
|
||||
|
||||
/// Returns the size of a relation as # of blocks, or None if allow_missing=true and the
|
||||
/// relation does not exist.
|
||||
/// Returns the size of a relation, as # of blocks.
|
||||
pub async fn get_rel_size(
|
||||
&mut self,
|
||||
req: GetRelSizeRequest,
|
||||
|
||||
@@ -139,6 +139,50 @@ impl From<RelTag> for proto::RelTag {
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks whether a relation exists, at the given LSN. Only valid on shard 0, other shards error.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct CheckRelExistsRequest {
|
||||
pub read_lsn: ReadLsn,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
impl TryFrom<proto::CheckRelExistsRequest> for CheckRelExistsRequest {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(pb: proto::CheckRelExistsRequest) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
read_lsn: pb
|
||||
.read_lsn
|
||||
.ok_or(ProtocolError::Missing("read_lsn"))?
|
||||
.try_into()?,
|
||||
rel: pb.rel.ok_or(ProtocolError::Missing("rel"))?.try_into()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CheckRelExistsRequest> for proto::CheckRelExistsRequest {
|
||||
fn from(request: CheckRelExistsRequest) -> Self {
|
||||
Self {
|
||||
read_lsn: Some(request.read_lsn.into()),
|
||||
rel: Some(request.rel.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type CheckRelExistsResponse = bool;
|
||||
|
||||
impl From<proto::CheckRelExistsResponse> for CheckRelExistsResponse {
|
||||
fn from(pb: proto::CheckRelExistsResponse) -> Self {
|
||||
pb.exists
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CheckRelExistsResponse> for proto::CheckRelExistsResponse {
|
||||
fn from(exists: CheckRelExistsResponse) -> Self {
|
||||
Self { exists }
|
||||
}
|
||||
}
|
||||
|
||||
/// Requests a base backup.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct GetBaseBackupRequest {
|
||||
@@ -663,8 +707,6 @@ impl From<GetPageStatusCode> for tonic::Code {
|
||||
pub struct GetRelSizeRequest {
|
||||
pub read_lsn: ReadLsn,
|
||||
pub rel: RelTag,
|
||||
/// If true, return missing=true for missing relations instead of a NotFound error.
|
||||
pub allow_missing: bool,
|
||||
}
|
||||
|
||||
impl TryFrom<proto::GetRelSizeRequest> for GetRelSizeRequest {
|
||||
@@ -677,7 +719,6 @@ impl TryFrom<proto::GetRelSizeRequest> for GetRelSizeRequest {
|
||||
.ok_or(ProtocolError::Missing("read_lsn"))?
|
||||
.try_into()?,
|
||||
rel: proto.rel.ok_or(ProtocolError::Missing("rel"))?.try_into()?,
|
||||
allow_missing: proto.allow_missing,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -687,29 +728,21 @@ impl From<GetRelSizeRequest> for proto::GetRelSizeRequest {
|
||||
Self {
|
||||
read_lsn: Some(request.read_lsn.into()),
|
||||
rel: Some(request.rel.into()),
|
||||
allow_missing: request.allow_missing,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The size of a relation as number of blocks, or None if `allow_missing=true` and the relation
|
||||
/// does not exist.
|
||||
///
|
||||
/// INVARIANT: never None if `allow_missing=false` (returns `NotFound` error instead).
|
||||
pub type GetRelSizeResponse = Option<u32>;
|
||||
pub type GetRelSizeResponse = u32;
|
||||
|
||||
impl From<proto::GetRelSizeResponse> for GetRelSizeResponse {
|
||||
fn from(pb: proto::GetRelSizeResponse) -> Self {
|
||||
(!pb.missing).then_some(pb.num_blocks)
|
||||
fn from(proto: proto::GetRelSizeResponse) -> Self {
|
||||
proto.num_blocks
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetRelSizeResponse> for proto::GetRelSizeResponse {
|
||||
fn from(resp: GetRelSizeResponse) -> Self {
|
||||
Self {
|
||||
num_blocks: resp.unwrap_or_default(),
|
||||
missing: resp.is_none(),
|
||||
}
|
||||
fn from(num_blocks: GetRelSizeResponse) -> Self {
|
||||
Self { num_blocks }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,8 +20,7 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
|
||||
| Scope::GenerationsApi
|
||||
| Scope::Infra
|
||||
| Scope::Scrubber
|
||||
| Scope::ControllerPeer
|
||||
| Scope::TenantEndpoint,
|
||||
| Scope::ControllerPeer,
|
||||
_,
|
||||
) => Err(AuthError(
|
||||
format!(
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::collections::HashMap;
|
||||
use std::net::IpAddr;
|
||||
|
||||
use futures::Future;
|
||||
use pageserver_api::config::NodeMetadata;
|
||||
@@ -17,7 +16,7 @@ use tokio_util::sync::CancellationToken;
|
||||
use url::Url;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{NodeId, TimelineId};
|
||||
use utils::{backoff, failpoint_support, ip_address};
|
||||
use utils::{backoff, failpoint_support};
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::virtual_file::on_fatal_io_error;
|
||||
@@ -28,7 +27,6 @@ pub struct StorageControllerUpcallClient {
|
||||
http_client: reqwest::Client,
|
||||
base_url: Url,
|
||||
node_id: NodeId,
|
||||
node_ip_addr: Option<IpAddr>,
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
@@ -93,18 +91,11 @@ impl StorageControllerUpcallClient {
|
||||
);
|
||||
}
|
||||
|
||||
// Intentionally panics if we encountered any errors parsing or reading the IP address.
|
||||
// Note that if the required environment variable is not set, `read_node_ip_addr_from_env` returns `Ok(None)`
|
||||
// instead of an error.
|
||||
let node_ip_addr =
|
||||
ip_address::read_node_ip_addr_from_env().expect("Error reading node IP address.");
|
||||
|
||||
Self {
|
||||
http_client: client.build().expect("Failed to construct HTTP client"),
|
||||
base_url: url,
|
||||
node_id: conf.id,
|
||||
cancel: cancel.clone(),
|
||||
node_ip_addr,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -202,8 +193,8 @@ impl StorageControllerUpcallApi for StorageControllerUpcallClient {
|
||||
listen_http_addr: m.http_host,
|
||||
listen_http_port: m.http_port,
|
||||
listen_https_port: m.https_port,
|
||||
node_ip_addr: self.node_ip_addr,
|
||||
availability_zone_id: az_id.expect("Checked above"),
|
||||
node_ip_addr: None,
|
||||
})
|
||||
}
|
||||
Err(e) => {
|
||||
|
||||
@@ -2357,7 +2357,6 @@ async fn timeline_compact_handler(
|
||||
flags,
|
||||
sub_compaction,
|
||||
sub_compaction_max_job_size_mb,
|
||||
gc_compaction_do_metadata_compaction: false,
|
||||
};
|
||||
|
||||
let scheduled = compact_request
|
||||
|
||||
@@ -1636,10 +1636,9 @@ impl PageServerHandler {
|
||||
let (shard, ctx) = upgrade_handle_and_set_context!(shard);
|
||||
(
|
||||
vec![
|
||||
Self::handle_get_nblocks_request(&shard, &req, false, &ctx)
|
||||
Self::handle_get_nblocks_request(&shard, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| msg.expect("allow_missing=false"))
|
||||
.map(|msg| (PagestreamBeMessage::Nblocks(msg), timer, ctx))
|
||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
|
||||
],
|
||||
@@ -2304,16 +2303,12 @@ impl PageServerHandler {
|
||||
Ok(PagestreamExistsResponse { req: *req, exists })
|
||||
}
|
||||
|
||||
/// If `allow_missing` is true, returns None instead of Err on missing relations. Otherwise,
|
||||
/// never returns None. It is only supported by the gRPC protocol, so we pass it separately to
|
||||
/// avoid changing the libpq protocol types.
|
||||
#[instrument(skip_all, fields(shard_id))]
|
||||
async fn handle_get_nblocks_request(
|
||||
timeline: &Timeline,
|
||||
req: &PagestreamNblocksRequest,
|
||||
allow_missing: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Option<PagestreamNblocksResponse>, PageStreamError> {
|
||||
) -> Result<PagestreamNblocksResponse, PageStreamError> {
|
||||
let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
@@ -2325,25 +2320,20 @@ impl PageServerHandler {
|
||||
.await?;
|
||||
|
||||
let n_blocks = timeline
|
||||
.get_rel_size_in_reldir(
|
||||
.get_rel_size(
|
||||
req.rel,
|
||||
Version::LsnRange(LsnRange {
|
||||
effective_lsn: lsn,
|
||||
request_lsn: req.hdr.request_lsn,
|
||||
}),
|
||||
None,
|
||||
allow_missing,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
let Some(n_blocks) = n_blocks else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(PagestreamNblocksResponse {
|
||||
Ok(PagestreamNblocksResponse {
|
||||
req: *req,
|
||||
n_blocks,
|
||||
}))
|
||||
})
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(shard_id))]
|
||||
@@ -3228,25 +3218,13 @@ where
|
||||
pub struct GrpcPageServiceHandler {
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
ctx: RequestContext,
|
||||
|
||||
/// Cancelled to shut down the server. Tonic will shut down in response to this, but wait for
|
||||
/// in-flight requests to complete. Any tasks we spawn ourselves must respect this token.
|
||||
cancel: CancellationToken,
|
||||
|
||||
/// Any tasks we spawn ourselves should clone this gate guard, so that we can wait for them to
|
||||
/// complete during shutdown. Request handlers implicitly hold this guard already.
|
||||
gate_guard: GateGuard,
|
||||
|
||||
/// `get_vectored` concurrency setting.
|
||||
get_vectored_concurrent_io: GetVectoredConcurrentIo,
|
||||
}
|
||||
|
||||
impl GrpcPageServiceHandler {
|
||||
/// Spawns a gRPC server for the page service.
|
||||
///
|
||||
/// Returns a `CancellableTask` handle that can be used to shut down the server. It waits for
|
||||
/// any in-flight requests and tasks to complete first.
|
||||
///
|
||||
/// TODO: this doesn't support TLS. We need TLS reloading via ReloadingCertificateResolver, so we
|
||||
/// need to reimplement the TCP+TLS accept loop ourselves.
|
||||
pub fn spawn(
|
||||
@@ -3256,15 +3234,12 @@ impl GrpcPageServiceHandler {
|
||||
get_vectored_concurrent_io: GetVectoredConcurrentIo,
|
||||
listener: std::net::TcpListener,
|
||||
) -> anyhow::Result<CancellableTask> {
|
||||
// Set up a cancellation token for shutting down the server, and a gate to wait for all
|
||||
// requests and spawned tasks to complete.
|
||||
let cancel = CancellationToken::new();
|
||||
let gate = Gate::default();
|
||||
|
||||
let ctx = RequestContextBuilder::new(TaskKind::PageRequestHandler)
|
||||
.download_behavior(DownloadBehavior::Download)
|
||||
.perf_span_dispatch(perf_trace_dispatch)
|
||||
.detached_child();
|
||||
let gate = Gate::default();
|
||||
|
||||
// Set up the TCP socket. We take a preconfigured TcpListener to bind the
|
||||
// port early during startup.
|
||||
@@ -3295,7 +3270,6 @@ impl GrpcPageServiceHandler {
|
||||
let page_service_handler = GrpcPageServiceHandler {
|
||||
tenant_manager,
|
||||
ctx,
|
||||
cancel: cancel.clone(),
|
||||
gate_guard: gate.enter().expect("gate was just created"),
|
||||
get_vectored_concurrent_io,
|
||||
};
|
||||
@@ -3332,20 +3306,19 @@ impl GrpcPageServiceHandler {
|
||||
.build_v1()?;
|
||||
let server = server.add_service(reflection_service);
|
||||
|
||||
// Spawn server task. It runs until the cancellation token fires and in-flight requests and
|
||||
// tasks complete. The `CancellableTask` will wait for the task's join handle, which
|
||||
// implicitly waits for the gate to close.
|
||||
// Spawn server task.
|
||||
let task_cancel = cancel.clone();
|
||||
let task = COMPUTE_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
|
||||
"grpc pageservice listener",
|
||||
"grpc listener",
|
||||
async move {
|
||||
server
|
||||
let result = server
|
||||
.serve_with_incoming_shutdown(incoming, task_cancel.cancelled())
|
||||
.await?;
|
||||
// Server exited cleanly. All requests should have completed by now. Wait for any
|
||||
// spawned tasks to complete as well (e.g. IoConcurrency sidecars) via the gate.
|
||||
gate.close().await;
|
||||
anyhow::Ok(())
|
||||
.await;
|
||||
if result.is_ok() {
|
||||
// TODO: revisit shutdown logic once page service is implemented.
|
||||
gate.close().await;
|
||||
}
|
||||
result
|
||||
},
|
||||
));
|
||||
|
||||
@@ -3535,10 +3508,7 @@ impl GrpcPageServiceHandler {
|
||||
|
||||
/// Implements the gRPC page service.
|
||||
///
|
||||
/// On client disconnect (e.g. timeout or client shutdown), Tonic will drop the request handler
|
||||
/// futures, so the read path must be cancellation-safe. On server shutdown, Tonic will wait for
|
||||
/// in-flight requests to complete.
|
||||
///
|
||||
/// TODO: cancellation.
|
||||
/// TODO: when the libpq impl is removed, remove the Pagestream types and inline the handler code.
|
||||
#[tonic::async_trait]
|
||||
impl proto::PageService for GrpcPageServiceHandler {
|
||||
@@ -3549,6 +3519,39 @@ impl proto::PageService for GrpcPageServiceHandler {
|
||||
type GetPagesStream =
|
||||
Pin<Box<dyn Stream<Item = Result<proto::GetPageResponse, tonic::Status>> + Send>>;
|
||||
|
||||
#[instrument(skip_all, fields(rel, lsn))]
|
||||
async fn check_rel_exists(
|
||||
&self,
|
||||
req: tonic::Request<proto::CheckRelExistsRequest>,
|
||||
) -> Result<tonic::Response<proto::CheckRelExistsResponse>, tonic::Status> {
|
||||
let received_at = extract::<ReceivedAt>(&req).0;
|
||||
let timeline = self.get_request_timeline(&req).await?;
|
||||
let ctx = self.ctx.with_scope_page_service_pagestream(&timeline);
|
||||
|
||||
// Validate the request, decorate the span, and convert it to a Pagestream request.
|
||||
Self::ensure_shard_zero(&timeline)?;
|
||||
let req: page_api::CheckRelExistsRequest = req.into_inner().try_into()?;
|
||||
|
||||
span_record!(rel=%req.rel, lsn=%req.read_lsn);
|
||||
|
||||
let req = PagestreamExistsRequest {
|
||||
hdr: Self::make_hdr(req.read_lsn, None),
|
||||
rel: req.rel,
|
||||
};
|
||||
|
||||
// Execute the request and convert the response.
|
||||
let _timer = Self::record_op_start_and_throttle(
|
||||
&timeline,
|
||||
metrics::SmgrQueryType::GetRelExists,
|
||||
received_at,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let resp = PageServerHandler::handle_get_rel_exists_request(&timeline, &req, &ctx).await?;
|
||||
let resp: page_api::CheckRelExistsResponse = resp.exists;
|
||||
Ok(tonic::Response::new(resp.into()))
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(lsn))]
|
||||
async fn get_base_backup(
|
||||
&self,
|
||||
@@ -3590,14 +3593,8 @@ impl proto::PageService for GrpcPageServiceHandler {
|
||||
|
||||
// Spawn a task to run the basebackup.
|
||||
let span = Span::current();
|
||||
let gate_guard = self
|
||||
.gate_guard
|
||||
.try_clone()
|
||||
.map_err(|_| tonic::Status::unavailable("shutting down"))?;
|
||||
let (mut simplex_read, mut simplex_write) = tokio::io::simplex(CHUNK_SIZE);
|
||||
let jh = tokio::spawn(async move {
|
||||
let _gate_guard = gate_guard; // keep gate open until task completes
|
||||
|
||||
let gzip_level = match req.compression {
|
||||
page_api::BaseBackupCompression::None => None,
|
||||
// NB: using fast compression because it's on the critical path for compute
|
||||
@@ -3721,17 +3718,15 @@ impl proto::PageService for GrpcPageServiceHandler {
|
||||
.await?;
|
||||
|
||||
// Spawn an IoConcurrency sidecar, if enabled.
|
||||
let gate_guard = self
|
||||
.gate_guard
|
||||
.try_clone()
|
||||
.map_err(|_| tonic::Status::unavailable("shutting down"))?;
|
||||
let Ok(gate_guard) = self.gate_guard.try_clone() else {
|
||||
return Err(tonic::Status::unavailable("shutting down"));
|
||||
};
|
||||
let io_concurrency =
|
||||
IoConcurrency::spawn_from_conf(self.get_vectored_concurrent_io, gate_guard);
|
||||
|
||||
// Construct the GetPageRequest stream handler.
|
||||
// Spawn a task to handle the GetPageRequest stream.
|
||||
let span = Span::current();
|
||||
let ctx = self.ctx.attached_child();
|
||||
let cancel = self.cancel.clone();
|
||||
let mut reqs = req.into_inner();
|
||||
|
||||
let resps = async_stream::try_stream! {
|
||||
@@ -3739,19 +3734,7 @@ impl proto::PageService for GrpcPageServiceHandler {
|
||||
.get(ttid.tenant_id, ttid.timeline_id, shard_selector)
|
||||
.await?
|
||||
.downgrade();
|
||||
loop {
|
||||
// NB: Tonic considers the entire stream to be an in-flight request and will wait
|
||||
// for it to complete before shutting down. React to cancellation between requests.
|
||||
let req = tokio::select! {
|
||||
biased;
|
||||
_ = cancel.cancelled() => Err(tonic::Status::unavailable("shutting down")),
|
||||
|
||||
result = reqs.message() => match result {
|
||||
Ok(Some(req)) => Ok(req),
|
||||
Ok(None) => break, // client closed the stream
|
||||
Err(err) => Err(err),
|
||||
},
|
||||
}?;
|
||||
while let Some(req) = reqs.message().await? {
|
||||
let req_id = req.request_id.map(page_api::RequestID::from).unwrap_or_default();
|
||||
let result = Self::get_page(&ctx, &timeline, req, io_concurrency.clone())
|
||||
.instrument(span.clone()) // propagate request span
|
||||
@@ -3775,7 +3758,7 @@ impl proto::PageService for GrpcPageServiceHandler {
|
||||
Ok(tonic::Response::new(Box::pin(resps)))
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(rel, lsn, allow_missing))]
|
||||
#[instrument(skip_all, fields(rel, lsn))]
|
||||
async fn get_rel_size(
|
||||
&self,
|
||||
req: tonic::Request<proto::GetRelSizeRequest>,
|
||||
@@ -3787,9 +3770,8 @@ impl proto::PageService for GrpcPageServiceHandler {
|
||||
// Validate the request, decorate the span, and convert it to a Pagestream request.
|
||||
Self::ensure_shard_zero(&timeline)?;
|
||||
let req: page_api::GetRelSizeRequest = req.into_inner().try_into()?;
|
||||
let allow_missing = req.allow_missing;
|
||||
|
||||
span_record!(rel=%req.rel, lsn=%req.read_lsn, allow_missing=%req.allow_missing);
|
||||
span_record!(rel=%req.rel, lsn=%req.read_lsn);
|
||||
|
||||
let req = PagestreamNblocksRequest {
|
||||
hdr: Self::make_hdr(req.read_lsn, None),
|
||||
@@ -3804,11 +3786,8 @@ impl proto::PageService for GrpcPageServiceHandler {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let resp =
|
||||
PageServerHandler::handle_get_nblocks_request(&timeline, &req, allow_missing, &ctx)
|
||||
.await?;
|
||||
let resp: page_api::GetRelSizeResponse = resp.map(|resp| resp.n_blocks);
|
||||
|
||||
let resp = PageServerHandler::handle_get_nblocks_request(&timeline, &req, &ctx).await?;
|
||||
let resp: page_api::GetRelSizeResponse = resp.n_blocks;
|
||||
Ok(tonic::Response::new(resp.into()))
|
||||
}
|
||||
|
||||
|
||||
@@ -286,10 +286,6 @@ impl Timeline {
|
||||
/// Like [`Self::get_rel_page_at_lsn`], but returns a batch of pages.
|
||||
///
|
||||
/// The ordering of the returned vec corresponds to the ordering of `pages`.
|
||||
///
|
||||
/// NB: the read path must be cancellation-safe. The Tonic gRPC service will drop the future
|
||||
/// if the client goes away (e.g. due to timeout or cancellation).
|
||||
/// TODO: verify that it actually is cancellation-safe.
|
||||
pub(crate) async fn get_rel_page_at_lsn_batched(
|
||||
&self,
|
||||
pages: impl ExactSizeIterator<Item = (&RelTag, &BlockNumber, LsnRange, RequestContext)>,
|
||||
@@ -504,9 +500,8 @@ impl Timeline {
|
||||
|
||||
for rel in rels {
|
||||
let n_blocks = self
|
||||
.get_rel_size_in_reldir(rel, version, Some((reldir_key, &reldir)), false, ctx)
|
||||
.await?
|
||||
.expect("allow_missing=false");
|
||||
.get_rel_size_in_reldir(rel, version, Some((reldir_key, &reldir)), ctx)
|
||||
.await?;
|
||||
total_blocks += n_blocks as usize;
|
||||
}
|
||||
Ok(total_blocks)
|
||||
@@ -522,16 +517,10 @@ impl Timeline {
|
||||
version: Version<'_>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<BlockNumber, PageReconstructError> {
|
||||
Ok(self
|
||||
.get_rel_size_in_reldir(tag, version, None, false, ctx)
|
||||
.await?
|
||||
.expect("allow_missing=false"))
|
||||
self.get_rel_size_in_reldir(tag, version, None, ctx).await
|
||||
}
|
||||
|
||||
/// Get size of a relation file. If `allow_missing` is true, returns None for missing relations,
|
||||
/// otherwise errors.
|
||||
///
|
||||
/// INVARIANT: never returns None if `allow_missing=false`.
|
||||
/// Get size of a relation file. The relation must exist, otherwise an error is returned.
|
||||
///
|
||||
/// See [`Self::get_rel_exists_in_reldir`] on why we need `deserialized_reldir_v1`.
|
||||
pub(crate) async fn get_rel_size_in_reldir(
|
||||
@@ -539,9 +528,8 @@ impl Timeline {
|
||||
tag: RelTag,
|
||||
version: Version<'_>,
|
||||
deserialized_reldir_v1: Option<(Key, &RelDirectory)>,
|
||||
allow_missing: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Option<BlockNumber>, PageReconstructError> {
|
||||
) -> Result<BlockNumber, PageReconstructError> {
|
||||
if tag.relnode == 0 {
|
||||
return Err(PageReconstructError::Other(
|
||||
RelationError::InvalidRelnode.into(),
|
||||
@@ -549,15 +537,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
if let Some(nblocks) = self.get_cached_rel_size(&tag, version) {
|
||||
return Ok(Some(nblocks));
|
||||
}
|
||||
|
||||
if allow_missing
|
||||
&& !self
|
||||
.get_rel_exists_in_reldir(tag, version, deserialized_reldir_v1, ctx)
|
||||
.await?
|
||||
{
|
||||
return Ok(None);
|
||||
return Ok(nblocks);
|
||||
}
|
||||
|
||||
if (tag.forknum == FSM_FORKNUM || tag.forknum == VISIBILITYMAP_FORKNUM)
|
||||
@@ -569,7 +549,7 @@ impl Timeline {
|
||||
// FSM, and smgrnblocks() on it immediately afterwards,
|
||||
// without extending it. Tolerate that by claiming that
|
||||
// any non-existent FSM fork has size 0.
|
||||
return Ok(Some(0));
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let key = rel_size_to_key(tag);
|
||||
@@ -578,7 +558,7 @@ impl Timeline {
|
||||
|
||||
self.update_cached_rel_size(tag, version, nblocks);
|
||||
|
||||
Ok(Some(nblocks))
|
||||
Ok(nblocks)
|
||||
}
|
||||
|
||||
/// Does the relation exist?
|
||||
@@ -833,7 +813,6 @@ impl Timeline {
|
||||
let gc_cutoff_lsn_guard = self.get_applied_gc_cutoff_lsn();
|
||||
let gc_cutoff_planned = {
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
info!(cutoffs=?gc_info.cutoffs, applied_cutoff=%*gc_cutoff_lsn_guard, "starting find_lsn_for_timestamp");
|
||||
gc_info.min_cutoff()
|
||||
};
|
||||
// Usually the planned cutoff is newer than the cutoff of the last gc run,
|
||||
@@ -2928,8 +2907,9 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
|
||||
mod tests {
|
||||
use hex_literal::hex;
|
||||
use pageserver_api::models::ShardParameters;
|
||||
use pageserver_api::shard::ShardStripeSize;
|
||||
use utils::id::TimelineId;
|
||||
use utils::shard::{ShardCount, ShardNumber, ShardStripeSize};
|
||||
use utils::shard::{ShardCount, ShardNumber};
|
||||
|
||||
use super::*;
|
||||
use crate::DEFAULT_PG_VERSION;
|
||||
|
||||
@@ -9216,11 +9216,7 @@ mod tests {
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -9303,11 +9299,7 @@ mod tests {
|
||||
guard.cutoffs.space = Lsn(0x40);
|
||||
}
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -9844,11 +9836,7 @@ mod tests {
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -9883,11 +9871,7 @@ mod tests {
|
||||
guard.cutoffs.space = Lsn(0x40);
|
||||
}
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -10462,7 +10446,7 @@ mod tests {
|
||||
&cancel,
|
||||
CompactOptions {
|
||||
flags: dryrun_flags,
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -10473,22 +10457,14 @@ mod tests {
|
||||
verify_result().await;
|
||||
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
verify_result().await;
|
||||
|
||||
// compact again
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
verify_result().await;
|
||||
@@ -10507,22 +10483,14 @@ mod tests {
|
||||
guard.cutoffs.space = Lsn(0x38);
|
||||
}
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result
|
||||
|
||||
// not increasing the GC horizon and compact again
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
verify_result().await;
|
||||
@@ -10727,7 +10695,7 @@ mod tests {
|
||||
&cancel,
|
||||
CompactOptions {
|
||||
flags: dryrun_flags,
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -10738,22 +10706,14 @@ mod tests {
|
||||
verify_result().await;
|
||||
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
verify_result().await;
|
||||
|
||||
// compact again
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
verify_result().await;
|
||||
@@ -10953,11 +10913,7 @@ mod tests {
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
branch_tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -10970,7 +10926,7 @@ mod tests {
|
||||
&cancel,
|
||||
CompactOptions {
|
||||
compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x40))),
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -11638,7 +11594,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
flags: EnumSet::new(),
|
||||
compact_key_range: Some((get_key(0)..get_key(2)).into()),
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -11685,7 +11641,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
flags: EnumSet::new(),
|
||||
compact_key_range: Some((get_key(2)..get_key(4)).into()),
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -11737,7 +11693,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
flags: EnumSet::new(),
|
||||
compact_key_range: Some((get_key(4)..get_key(9)).into()),
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -11788,7 +11744,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
flags: EnumSet::new(),
|
||||
compact_key_range: Some((get_key(9)..get_key(10)).into()),
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -11844,7 +11800,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
flags: EnumSet::new(),
|
||||
compact_key_range: Some((get_key(0)..get_key(10)).into()),
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -12115,7 +12071,7 @@ mod tests {
|
||||
&cancel,
|
||||
CompactOptions {
|
||||
compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x28))),
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -12150,11 +12106,7 @@ mod tests {
|
||||
|
||||
// compact again
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
verify_result().await;
|
||||
@@ -12373,7 +12325,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
compact_key_range: Some((get_key(0)..get_key(2)).into()),
|
||||
compact_lsn_range: Some((Lsn(0x20)..Lsn(0x28)).into()),
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -12419,7 +12371,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
compact_key_range: Some((get_key(3)..get_key(8)).into()),
|
||||
compact_lsn_range: Some((Lsn(0x28)..Lsn(0x40)).into()),
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -12467,7 +12419,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
compact_key_range: Some((get_key(0)..get_key(5)).into()),
|
||||
compact_lsn_range: Some((Lsn(0x20)..Lsn(0x50)).into()),
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -12502,11 +12454,7 @@ mod tests {
|
||||
|
||||
// final full compaction
|
||||
tline
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions::default_for_gc_compaction_unit_tests(),
|
||||
&ctx,
|
||||
)
|
||||
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
verify_result().await;
|
||||
@@ -12616,7 +12564,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
compact_key_range: None,
|
||||
compact_lsn_range: None,
|
||||
..CompactOptions::default_for_gc_compaction_unit_tests()
|
||||
..Default::default()
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user