Compare commits

..

4 Commits

Author SHA1 Message Date
Christian Schwarz
e093c42c33 pagebench: fake queue depth of 10 2024-09-12 10:30:33 +00:00
Christian Schwarz
08fad5b0db debouncer 2024-09-12 10:30:21 +00:00
Christian Schwarz
942bc9544b fixup 2024-09-11 20:04:39 +00:00
Christian Schwarz
02b7cdb305 HACK: instrument page_service to count nonblocking consecutive getpage requests 2024-09-11 19:25:19 +01:00
124 changed files with 1604 additions and 3871 deletions

View File

@@ -62,7 +62,7 @@ jobs:
#
git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE}
for r in 14 15 16 17; do
for r in 14 15 16; do
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
done
@@ -83,10 +83,6 @@ jobs:
id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
- name: Set pg 17 revision for caching
id: pg_v17_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
# Set some environment variables used by all the steps.
#
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
@@ -140,13 +136,6 @@ jobs:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Cache postgres v17 build
id: cache_pg_17
uses: actions/cache@v4
with:
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: mold -run make postgres-v14 -j$(nproc)
@@ -159,10 +148,6 @@ jobs:
if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: mold -run make postgres-v16 -j$(nproc)
- name: Build postgres v17
if: steps.cache_pg_17.outputs.cache-hit != 'true'
run: mold -run make postgres-v17 -j$(nproc)
- name: Build neon extensions
run: mold -run make neon-pg-ext -j$(nproc)
@@ -225,7 +210,7 @@ jobs:
run: |
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
export PQ_LIB_DIR
LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
export LD_LIBRARY_PATH
#nextest does not yet support running doctests

View File

@@ -211,7 +211,7 @@ jobs:
build-tag: ${{ needs.tag.outputs.build-tag }}
build-type: ${{ matrix.build-type }}
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds
pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16", "v17"]' || '["v17"]' }}
pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16"]' || '["v16"]' }}
secrets: inherit
# Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
@@ -548,7 +548,7 @@ jobs:
strategy:
fail-fast: false
matrix:
version: [ v14, v15, v16, v17 ]
version: [ v14, v15, v16 ]
arch: [ x64, arm64 ]
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
@@ -627,7 +627,7 @@ jobs:
- name: Build compute-tools image
# compute-tools are Postgres independent, so build it only once
if: matrix.version == 'v17'
if: matrix.version == 'v16'
uses: docker/build-push-action@v6
with:
target: compute-tools-image
@@ -649,7 +649,7 @@ jobs:
strategy:
matrix:
version: [ v14, v15, v16, v17 ]
version: [ v14, v15, v16 ]
steps:
- uses: docker/login-action@v3
@@ -671,7 +671,7 @@ jobs:
neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64
- name: Create multi-arch compute-tools image
if: matrix.version == 'v17'
if: matrix.version == 'v16'
run: |
docker buildx imagetools create -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} \
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-x64 \
@@ -689,7 +689,7 @@ jobs:
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
- name: Push multi-arch compute-tools image to ECR
if: matrix.version == 'v17'
if: matrix.version == 'v16'
run: |
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }} \
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}
@@ -700,7 +700,7 @@ jobs:
strategy:
fail-fast: false
matrix:
version: [ v14, v15, v16, v17 ]
version: [ v14, v15, v16 ]
env:
VM_BUILDER_VERSION: v0.29.3
@@ -798,7 +798,7 @@ jobs:
runs-on: ubuntu-22.04
env:
VERSIONS: v14 v15 v16 v17
VERSIONS: v14 v15 v16
steps:
- uses: docker/login-action@v3
@@ -839,7 +839,7 @@ jobs:
done
done
docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
- name: Login to prod ECR
uses: docker/login-action@v3
@@ -852,7 +852,7 @@ jobs:
- name: Copy all images to prod ECR
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
run: |
for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16,v17}; do
for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16}; do
docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \
369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
done
@@ -864,7 +864,7 @@ jobs:
with:
client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
image_tag: ${{ needs.tag.outputs.build-tag }}
images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
tenant_id: ${{ vars.AZURE_TENANT_ID }}
@@ -876,7 +876,7 @@ jobs:
with:
client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
image_tag: ${{ needs.tag.outputs.build-tag }}
images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
tenant_id: ${{ vars.AZURE_TENANT_ID }}
@@ -971,7 +971,7 @@ jobs:
#
git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE}
for r in 14 15 16 17; do
for r in 14 15 16; do
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
done
@@ -1117,7 +1117,6 @@ jobs:
files_to_promote+=("s3://${BUCKET}/${s3_key}")
# TODO Add v17
for pg_version in v14 v15 v16; do
# We run less tests for debug builds, so we don't need to promote them
if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then

View File

@@ -72,10 +72,6 @@ jobs:
id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
- name: Set pg 17 revision for caching
id: pg_v17_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v4
@@ -97,13 +93,6 @@ jobs:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v17 build
id: cache_pg_17
uses: actions/cache@v4
with:
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
@@ -131,10 +120,6 @@ jobs:
if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: make postgres-v16 -j$(sysctl -n hw.ncpu)
- name: Build postgres v17
if: steps.cache_pg_17.outputs.cache-hit != 'true'
run: make postgres-v17 -j$(sysctl -n hw.ncpu)
- name: Build neon extensions
run: make neon-pg-ext -j$(sysctl -n hw.ncpu)

4
.gitmodules vendored
View File

@@ -10,7 +10,3 @@
path = vendor/postgres-v16
url = https://github.com/neondatabase/postgres.git
branch = REL_16_STABLE_neon
[submodule "vendor/postgres-v17"]
path = vendor/postgres-v17
url = https://github.com/neondatabase/postgres.git
branch = REL_17_STABLE_neon

115
Cargo.lock generated
View File

@@ -1189,9 +1189,9 @@ dependencies = [
[[package]]
name = "comfy-table"
version = "7.1.1"
version = "6.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
checksum = "6e7b787b0dc42e8111badfdbe4c3059158ccb2db8780352fa1b01e8ccf45cc4d"
dependencies = [
"crossterm",
"strum",
@@ -1246,7 +1246,7 @@ dependencies = [
"tokio-postgres",
"tokio-stream",
"tokio-util",
"toml_edit",
"toml_edit 0.19.10",
"tracing",
"tracing-opentelemetry",
"tracing-subscriber",
@@ -1360,8 +1360,8 @@ dependencies = [
"tokio",
"tokio-postgres",
"tokio-util",
"toml",
"toml_edit",
"toml 0.7.4",
"toml_edit 0.19.10",
"tracing",
"url",
"utils",
@@ -1485,22 +1485,25 @@ checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
[[package]]
name = "crossterm"
version = "0.27.0"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
dependencies = [
"bitflags 2.4.1",
"bitflags 1.3.2",
"crossterm_winapi",
"libc",
"mio",
"parking_lot 0.12.1",
"signal-hook",
"signal-hook-mio",
"winapi",
]
[[package]]
name = "crossterm_winapi"
version = "0.9.1"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c"
dependencies = [
"winapi",
]
@@ -3141,7 +3144,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd01039851e82f8799046eabbb354056283fb265c8ec0996af940f4e85a380ff"
dependencies = [
"serde",
"toml",
"toml 0.8.14",
]
[[package]]
@@ -3657,7 +3660,7 @@ dependencies = [
"thiserror",
"tokio",
"tokio-util",
"toml_edit",
"toml_edit 0.19.10",
"utils",
"workspace_hack",
]
@@ -3744,7 +3747,7 @@ dependencies = [
"tokio-stream",
"tokio-tar",
"tokio-util",
"toml_edit",
"toml_edit 0.19.10",
"tracing",
"twox-hash",
"url",
@@ -3907,9 +3910,8 @@ dependencies = [
[[package]]
name = "parquet"
version = "53.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0fbf928021131daaa57d334ca8e3904fe9ae22f73c56244fc7db9b04eedc3d8"
version = "51.0.0"
source = "git+https://github.com/apache/arrow-rs?branch=master#2534976a564be3d2d56312dc88fb1b6ed4cef829"
dependencies = [
"ahash",
"bytes",
@@ -3928,9 +3930,8 @@ dependencies = [
[[package]]
name = "parquet_derive"
version = "53.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86e9fcfae007533a06b580429a3f7e07cb833ec8aa37c041c16563e7918f057e"
version = "51.0.0"
source = "git+https://github.com/apache/arrow-rs?branch=master#2534976a564be3d2d56312dc88fb1b6ed4cef829"
dependencies = [
"parquet",
"proc-macro2",
@@ -4120,7 +4121,7 @@ dependencies = [
[[package]]
name = "postgres"
version = "0.19.4"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
dependencies = [
"bytes",
"fallible-iterator",
@@ -4133,7 +4134,7 @@ dependencies = [
[[package]]
name = "postgres-protocol"
version = "0.6.4"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
dependencies = [
"base64 0.20.0",
"byteorder",
@@ -4152,7 +4153,7 @@ dependencies = [
[[package]]
name = "postgres-types"
version = "0.2.4"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
dependencies = [
"bytes",
"fallible-iterator",
@@ -4811,7 +4812,7 @@ dependencies = [
"tokio",
"tokio-stream",
"tokio-util",
"toml_edit",
"toml_edit 0.19.10",
"tracing",
"utils",
]
@@ -5321,7 +5322,7 @@ dependencies = [
"tokio-stream",
"tokio-tar",
"tokio-util",
"toml_edit",
"toml_edit 0.19.10",
"tracing",
"tracing-subscriber",
"url",
@@ -5730,6 +5731,17 @@ dependencies = [
"signal-hook-registry",
]
[[package]]
name = "signal-hook-mio"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
dependencies = [
"libc",
"mio",
"signal-hook",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.1"
@@ -6042,21 +6054,21 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.26.3"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
[[package]]
name = "strum_macros"
version = "0.26.4"
version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
dependencies = [
"heck 0.5.0",
"heck 0.4.1",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.52",
"syn 1.0.109",
]
[[package]]
@@ -6397,7 +6409,7 @@ dependencies = [
[[package]]
name = "tokio-postgres"
version = "0.7.7"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
dependencies = [
"async-trait",
"byteorder",
@@ -6508,6 +6520,18 @@ dependencies = [
"tracing",
]
[[package]]
name = "toml"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6135d499e69981f9ff0ef2167955a5333c35e36f6937d382974566b3d5b94ec"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit 0.19.10",
]
[[package]]
name = "toml"
version = "0.8.14"
@@ -6517,7 +6541,7 @@ dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
"toml_edit 0.22.14",
]
[[package]]
@@ -6529,6 +6553,19 @@ dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.19.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
dependencies = [
"indexmap 1.9.3",
"serde",
"serde_spanned",
"toml_datetime",
"winnow 0.4.6",
]
[[package]]
name = "toml_edit"
version = "0.22.14"
@@ -6539,7 +6576,7 @@ dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"winnow",
"winnow 0.6.13",
]
[[package]]
@@ -6952,7 +6989,7 @@ dependencies = [
"tokio-stream",
"tokio-tar",
"tokio-util",
"toml_edit",
"toml_edit 0.19.10",
"tracing",
"tracing-error",
"tracing-subscriber",
@@ -7498,6 +7535,15 @@ version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
[[package]]
name = "winnow"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699"
dependencies = [
"memchr",
]
[[package]]
name = "winnow"
version = "0.6.13"
@@ -7605,7 +7651,6 @@ dependencies = [
"tokio",
"tokio-rustls 0.24.0",
"tokio-util",
"toml_edit",
"tonic",
"tower",
"tracing",

View File

@@ -73,7 +73,7 @@ camino = "1.1.6"
cfg-if = "1.0.0"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
clap = { version = "4.0", features = ["derive"] }
comfy-table = "7.1"
comfy-table = "6.1"
const_format = "0.2"
crc32c = "0.6"
crossbeam-deque = "0.8.5"
@@ -123,8 +123,8 @@ opentelemetry = "0.20.0"
opentelemetry-otlp = { version = "0.13.0", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
opentelemetry-semantic-conventions = "0.12.0"
parking_lot = "0.12"
parquet = { version = "53", default-features = false, features = ["zstd"] }
parquet_derive = "53"
parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
parquet_derive = "51.0.0"
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
pin-project-lite = "0.2"
procfs = "0.16"
@@ -158,8 +158,8 @@ signal-hook = "0.3"
smallvec = "1.11"
smol_str = { version = "0.2.0", features = ["serde"] }
socket2 = "0.5"
strum = "0.26"
strum_macros = "0.26"
strum = "0.24"
strum_macros = "0.24"
"subtle" = "2.5.0"
svg_fmt = "0.4.3"
sync_wrapper = "0.1.2"
@@ -177,8 +177,8 @@ tokio-rustls = "0.25"
tokio-stream = "0.1"
tokio-tar = "0.3"
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
toml = "0.8"
toml_edit = "0.22"
toml = "0.7"
toml_edit = "0.19"
tonic = {version = "0.9", features = ["tls", "tls-roots"]}
tower-service = "0.3.2"
tracing = "0.1"
@@ -201,21 +201,10 @@ env_logger = "0.10"
log = "0.4"
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
# We want to use the 'neon' branch for these, but there's currently one
# incompatible change on the branch. See:
#
# - PR #8076 which contained changes that depended on the new changes in
# the rust-postgres crate, and
# - PR #8654 which reverted those changes and made the code in proxy incompatible
# with the tip of the 'neon' branch again.
#
# When those proxy changes are re-applied (see PR #8747), we can switch using
# the tip of the 'neon' branch again.
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
## Local libraries
compute_api = { version = "0.1", path = "./libs/compute_api/" }
@@ -252,7 +241,11 @@ tonic-build = "0.9"
[patch.crates-io]
# Needed to get `tokio-postgres-rustls` to depend on our fork.
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
# bug fixes for UUID
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
parquet_derive = { git = "https://github.com/apache/arrow-rs", branch = "master" }
################# Binary contents sections

View File

@@ -5,8 +5,6 @@
ARG REPOSITORY=neondatabase
ARG IMAGE=build-tools
ARG TAG=pinned
ARG DEFAULT_PG_VERSION=17
ARG STABLE_PG_VERSION=16
# Build Postgres
FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
@@ -15,7 +13,6 @@ WORKDIR /home/nonroot
COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
COPY --chown=nonroot pgxn pgxn
COPY --chown=nonroot Makefile Makefile
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
@@ -31,19 +28,16 @@ FROM $REPOSITORY/$IMAGE:$TAG AS build
WORKDIR /home/nonroot
ARG GIT_VERSION=local
ARG BUILD_TAG
ARG STABLE_PG_VERSION
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
COPY --from=pg-build /home/nonroot/pg_install/v17/lib pg_install/v17/lib
COPY --chown=nonroot . .
ARG ADDITIONAL_RUSTFLAGS
RUN set -e \
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
&& PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
--bin pg_sni_router \
--bin pageserver \
--bin pagectl \
@@ -58,7 +52,6 @@ RUN set -e \
# Build final image
#
FROM debian:bullseye-slim
ARG DEFAULT_PG_VERSION
WORKDIR /data
RUN set -e \
@@ -84,7 +77,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubbe
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
COPY --from=pg-build /home/nonroot/pg_install/v17 /usr/local/v17/
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
@@ -101,7 +93,7 @@ RUN mkdir -p /data/.neon/ && \
# When running a binary that links with libpq, default to using our most recent postgres version. Binaries
# that want a particular postgres version will select it explicitly: this is just a default.
ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib
ENV LD_LIBRARY_PATH=/usr/local/v16/lib
VOLUME ["/data"]

View File

@@ -79,7 +79,6 @@ RUN cd postgres && \
#
#########################################################################################
FROM build-deps AS postgis-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y cmake gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
@@ -88,11 +87,7 @@ RUN apt update && \
protobuf-c-compiler xsltproc
# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
RUN case "${PG_VERSION}" in "v17") \
mkdir -p /sfcgal && \
echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
esac && \
wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -101,10 +96,7 @@ RUN case "${PG_VERSION}" in "v17") \
ENV PATH="/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
esac && \
wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
@@ -130,10 +122,7 @@ RUN case "${PG_VERSION}" in "v17") \
cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \
cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
mkdir build && cd build && \
@@ -153,19 +142,12 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS plv8-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
apt update && \
RUN apt update && \
apt install -y ninja-build python3-dev libncurses5 binutils clang
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
# generate and copy upgrade scripts
@@ -190,13 +172,9 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS h3-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
case "$(uname -m)" in \
RUN case "$(uname -m)" in \
"x86_64") \
export CMAKE_CHECKSUM=739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 \
;; \
@@ -214,11 +192,7 @@ RUN case "${PG_VERSION}" in "v17") \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh
RUN case "${PG_VERSION}" in "v17") \
mkdir -p /h3/usr/ && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
mkdir build && cd build && \
@@ -228,10 +202,7 @@ RUN case "${PG_VERSION}" in "v17") \
cp -R /h3/usr / && \
rm -rf build
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
@@ -247,13 +218,9 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS unit-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -272,7 +239,6 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS vector-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY patches/pgvector.patch /pgvector.patch
@@ -280,10 +246,7 @@ COPY patches/pgvector.patch /pgvector.patch
# By default, pgvector Makefile uses `-march=native`. We don't want that,
# because we build the images on different machines than where we run them.
# Pass OPTFLAGS="" to remove it.
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \
mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
patch -p1 < /pgvector.patch && \
@@ -298,14 +261,10 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS pgjwt-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -318,13 +277,9 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS hypopg-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -338,13 +293,9 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS pg-hashids-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -358,15 +309,11 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS rum-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY patches/rum.patch /rum.patch
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
patch -p1 < /rum.patch && \
@@ -381,13 +328,9 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS pgtap-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -401,13 +344,9 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS ip4r-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -421,13 +360,9 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS prefix-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -441,13 +376,9 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS hll-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -461,13 +392,9 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS plpgsql-check-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -486,10 +413,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ARG PG_VERSION
ENV PATH="/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
case "${PG_VERSION}" in \
RUN case "${PG_VERSION}" in \
"v14" | "v15") \
export TIMESCALEDB_VERSION=2.10.1 \
export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
@@ -522,10 +446,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ARG PG_VERSION
ENV PATH="/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
case "${PG_VERSION}" in \
RUN case "${PG_VERSION}" in \
"v14") \
export PG_HINT_PLAN_VERSION=14_1_4_1 \
export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
@@ -538,9 +459,6 @@ RUN case "${PG_VERSION}" in "v17") \
export PG_HINT_PLAN_VERSION=16_1_6_0 \
export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \
;; \
"v17") \
echo "TODO: PG17 pg_hint_plan support" && exit 0 \
;; \
*) \
echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
;; \
@@ -560,14 +478,10 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS pg-cron-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -581,13 +495,9 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS rdkit-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
apt-get update && \
RUN apt-get update && \
apt-get install -y \
cmake \
libboost-iostreams1.74-dev \
@@ -597,10 +507,7 @@ RUN case "${PG_VERSION}" in "v17") \
libeigen3-dev
ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
cmake \
@@ -637,14 +544,10 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS pg-uuidv7-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -658,14 +561,10 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS pg-roaringbitmap-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions is not supported yet by pg_roaringbitmap. Quit" && exit 0;; \
esac && \
wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -679,14 +578,10 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS pg-semver-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "v17 is not supported yet by pg_semver. Quit" && exit 0;; \
esac && \
wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -725,14 +620,10 @@ RUN case "${PG_VERSION}" in \
#
#########################################################################################
FROM build-deps AS pg-anon-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
esac && \
wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
RUN wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
@@ -750,7 +641,6 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS rust-extensions-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt-get update && \
@@ -761,11 +651,9 @@ ENV HOME=/home/nonroot
ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
USER nonroot
WORKDIR /home/nonroot
ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \
echo "v17 is not supported yet by pgrx. Quit" && exit 0;; \
esac && \
curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
rm rustup-init && \
@@ -784,10 +672,7 @@ USER root
FROM rust-extensions-build AS pg-jsonschema-pg-build
ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \
echo "pg_jsonschema does not yet have a release that supports pg17" && exit 0;; \
esac && \
wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
echo "61df3db1ed83cf24f6aa39c826f8818bfa4f0bd33b587fd6b2b1747985642297 pg_jsonschema.tar.gz" | sha256sum --check && \
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
# see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8
@@ -809,10 +694,7 @@ RUN case "${PG_VERSION}" in "v17") \
FROM rust-extensions-build AS pg-graphql-pg-build
ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \
echo "pg_graphql does not yet have a release that supports pg17 as of now" && exit 0;; \
esac && \
wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
echo "2b3e567a5b31019cb97ae0e33263c1bcc28580be5a444ac4c8ece5c4be2aea41 pg_graphql.tar.gz" | sha256sum --check && \
mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -832,10 +714,7 @@ FROM rust-extensions-build AS pg-tiktoken-pg-build
ARG PG_VERSION
# 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
RUN case "${PG_VERSION}" in "v17") \
echo "pg_tiktoken does not have versions, nor support for pg17" && exit 0;; \
esac && \
wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
# TODO update pgrx version in the pg_tiktoken repo and remove this line
@@ -854,10 +733,7 @@ RUN case "${PG_VERSION}" in "v17") \
FROM rust-extensions-build AS pg-pgx-ulid-build
ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \
echo "pgx_ulid does not support pg17 as of the latest version (0.1.5)" && exit 0;; \
esac && \
wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
sed -i 's/pgrx = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -872,14 +748,10 @@ RUN case "${PG_VERSION}" in "v17") \
#########################################################################################
FROM build-deps AS wal2json-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "We'll need to update wal2json to 2.6+ for pg17 support" && exit 0;; \
esac && \
wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -892,14 +764,10 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS pg-ivm-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "We'll need to update pg_ivm to 1.9+ for pg17 support" && exit 0;; \
esac && \
wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -913,14 +781,10 @@ RUN case "${PG_VERSION}" in "v17") \
#
#########################################################################################
FROM build-deps AS pg-partman-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "pg_partman doesn't support PG17 yet" && exit 0;; \
esac && \
wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -990,8 +854,8 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
case "${PG_VERSION}" in \
"v14" | "v15") \
;; \
"v16" | "v17") \
echo "Skipping HNSW for PostgreSQL ${PG_VERSION}" && exit 0 \
"v16") \
echo "Skipping HNSW for PostgreSQL 16" && exit 0 \
;; \
*) \
echo "unexpected PostgreSQL version" && exit 1 \
@@ -1035,7 +899,7 @@ FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
RUN cd /usr/local/pgsql/bin && rm -f ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
# Remove headers that we won't need anymore - we've completed installation of all extensions
RUN rm -r /usr/local/pgsql/include
@@ -1054,10 +918,7 @@ RUN rm /usr/local/pgsql/lib/lib*.a
FROM neon-pg-ext-build AS neon-pg-ext-test
ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
mkdir /ext-src
RUN mkdir /ext-src
#COPY --from=postgis-build /postgis.tar.gz /ext-src/
#COPY --from=postgis-build /sfcgal/* /usr
@@ -1095,39 +956,18 @@ COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
COPY patches/pg_anon.patch /ext-src
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
cd /ext-src/ && for f in *.tar.gz; \
RUN cd /ext-src/ && for f in *.tar.gz; \
do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
|| exit 1; rm -f $f; done
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
cd /ext-src/rum-src && patch -p1 <../rum.patch
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
# cmake is required for the h3 test
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
apt-get update && apt-get install -y cmake
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
RUN apt-get update && apt-get install -y cmake
RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
patch -p1 </ext-src/pg_anon.patch
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
patch -p1 </ext-src/pg_cron.patch
RUN patch -p1 </ext-src/pg_anon.patch
RUN patch -p1 </ext-src/pg_cron.patch
ENV PATH=/usr/local/pgsql/bin:$PATH
ENV PGHOST=compute
ENV PGPORT=55433

View File

@@ -119,8 +119,6 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
# I'm not sure why it wouldn't work, but this is the only place (apart from
# the "build-all-versions" entry points) where direct mention of PostgreSQL
# versions is used.
.PHONY: postgres-configure-v17
postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
.PHONY: postgres-configure-v16
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
.PHONY: postgres-configure-v15
@@ -217,31 +215,29 @@ neon-pg-clean-ext-%:
# they depend on openssl and other libraries that are not included in our
# Rust build.
.PHONY: walproposer-lib
walproposer-lib: neon-pg-ext-v17
walproposer-lib: neon-pg-ext-v16
+@echo "Compiling walproposer-lib"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
ifeq ($(UNAME_S),Linux)
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
pg_strong_random.o
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
checksum_helper.o \
cryptohash_openssl.o \
pg_crc32c.o \
hmac_openssl.o \
cryptohash_openssl.o \
scram-common.o \
md5_common.o \
parse_manifest.o \
scram-common.o
ifeq ($(UNAME_S),Linux)
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
pg_crc32c.o
checksum_helper.o
endif
.PHONY: walproposer-lib-clean
walproposer-lib-clean:
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config \
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
@@ -249,44 +245,38 @@ walproposer-lib-clean:
neon-pg-ext: \
neon-pg-ext-v14 \
neon-pg-ext-v15 \
neon-pg-ext-v16 \
neon-pg-ext-v17
neon-pg-ext-v16
.PHONY: neon-pg-clean-ext
neon-pg-clean-ext: \
neon-pg-clean-ext-v14 \
neon-pg-clean-ext-v15 \
neon-pg-clean-ext-v16 \
neon-pg-clean-ext-v17
neon-pg-clean-ext-v16
# shorthand to build all Postgres versions
.PHONY: postgres
postgres: \
postgres-v14 \
postgres-v15 \
postgres-v16 \
postgres-v17
postgres-v16
.PHONY: postgres-headers
postgres-headers: \
postgres-headers-v14 \
postgres-headers-v15 \
postgres-headers-v16 \
postgres-headers-v17
postgres-headers-v16
.PHONY: postgres-clean
postgres-clean: \
postgres-clean-v14 \
postgres-clean-v15 \
postgres-clean-v16 \
postgres-clean-v17
postgres-clean-v16
.PHONY: postgres-check
postgres-check: \
postgres-check-v14 \
postgres-check-v15 \
postgres-check-v16 \
postgres-check-v17
postgres-check-v16
# This doesn't remove the effects of 'configure'.
.PHONY: clean
@@ -331,13 +321,13 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
rm -f pg*.BAK
# Indent pxgn/neon.
.PHONY: neon-pgindent
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
.PHONY: pgindent
neon-pgindent: postgres-v16-pg-bsd-indent neon-pg-ext-v16
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/find_typedef \
INDENT=$(POSTGRES_INSTALL_DIR)/build/v16/src/tools/pg_bsd_indent/pg_bsd_indent \
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/pgindent/pgindent \
-C $(POSTGRES_INSTALL_DIR)/build/neon-v16 \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent

View File

@@ -1052,19 +1052,26 @@ impl ComputeNode {
let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?;
let config_time = Utc::now();
if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
let pgdata_path = Path::new(&self.pgdata);
// temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are applying config:
// creating new extensions, roles, etc...
config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
if pspec.spec.mode == ComputeMode::Primary {
if !pspec.spec.skip_pg_catalog_updates {
let pgdata_path = Path::new(&self.pgdata);
// temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are applying config:
// creating new extensions, roles, etc...
config::with_compute_ctl_tmp_override(
pgdata_path,
"neon.max_cluster_size=-1",
|| {
self.pg_reload_conf()?;
self.apply_config(&compute_state)?;
Ok(())
},
)?;
self.pg_reload_conf()?;
self.apply_config(&compute_state)?;
Ok(())
})?;
self.pg_reload_conf()?;
}
self.post_apply_config()?;
}
let startup_end_time = Utc::now();

View File

@@ -124,7 +124,6 @@ fn parse_pg_version(human_version: &str) -> &str {
"14" => return "v14",
"15" => return "v15",
"16" => return "v16",
"17" => return "v17",
_ => {}
},
_ => {}

View File

@@ -640,8 +640,6 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
}
Some(("branch", branch_match)) => {
let tenant_id = get_tenant_id(branch_match, env)?;
let new_timeline_id =
parse_timeline_id(branch_match)?.unwrap_or(TimelineId::generate());
let new_branch_name = branch_match
.get_one::<String>("branch-name")
.ok_or_else(|| anyhow!("No branch name provided"))?;
@@ -660,6 +658,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
.map(|lsn_str| Lsn::from_str(lsn_str))
.transpose()
.context("Failed to parse ancestor start Lsn from the request")?;
let new_timeline_id = TimelineId::generate();
let storage_controller = StorageController::from_env(env);
let create_req = TimelineCreateRequest {
new_timeline_id,
@@ -1571,6 +1570,7 @@ fn cli() -> Command {
.value_parser(value_parser!(PathBuf))
.value_name("config")
)
.arg(pg_version_arg.clone())
.arg(force_arg)
)
.subcommand(
@@ -1583,7 +1583,6 @@ fn cli() -> Command {
.subcommand(Command::new("branch")
.about("Create a new timeline, using another timeline as a base, copying its data")
.arg(tenant_id_arg.clone())
.arg(timeline_id_arg.clone())
.arg(branch_name_arg.clone())
.arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name")
.help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))

View File

@@ -342,7 +342,7 @@ impl LocalEnv {
#[allow(clippy::manual_range_patterns)]
match pg_version {
14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}

View File

@@ -75,14 +75,14 @@ impl PageServerNode {
}
}
fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::DocumentMut {
toml_edit::DocumentMut::from_str(&format!("id={node_id}")).unwrap()
fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::Document {
toml_edit::Document::from_str(&format!("id={node_id}")).unwrap()
}
fn pageserver_init_make_toml(
&self,
conf: NeonLocalInitPageserverConf,
) -> anyhow::Result<toml_edit::DocumentMut> {
) -> anyhow::Result<toml_edit::Document> {
assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
// TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
@@ -137,9 +137,9 @@ impl PageServerNode {
// Turn `overrides` into a toml document.
// TODO: above code is legacy code, it should be refactored to use toml_edit directly.
let mut config_toml = toml_edit::DocumentMut::new();
let mut config_toml = toml_edit::Document::new();
for fragment_str in overrides {
let fragment = toml_edit::DocumentMut::from_str(&fragment_str)
let fragment = toml_edit::Document::from_str(&fragment_str)
.expect("all fragments in `overrides` are valid toml documents, this function controls that");
for (key, item) in fragment.iter() {
config_toml.insert(key, item.clone());

View File

@@ -28,7 +28,6 @@ use utils::{
auth::{encode_from_key_file, Claims, Scope},
id::{NodeId, TenantId},
};
use whoami::username;
pub struct StorageController {
env: LocalEnv,
@@ -184,7 +183,7 @@ impl StorageController {
/// to other versions if that one isn't found. Some automated tests create circumstances
/// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> {
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 16, 15, 14];
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14];
for v in prefer_versions {
let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap();
@@ -212,16 +211,7 @@ impl StorageController {
/// Readiness check for our postgres process
async fn pg_isready(&self, pg_bin_dir: &Utf8Path, postgres_port: u16) -> anyhow::Result<bool> {
let bin_path = pg_bin_dir.join("pg_isready");
let args = [
"-h",
"localhost",
"-U",
&username(),
"-d",
DB_NAME,
"-p",
&format!("{}", postgres_port),
];
let args = ["-h", "localhost", "-p", &format!("{}", postgres_port)];
let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?;
Ok(exitcode.success())
@@ -235,11 +225,7 @@ impl StorageController {
///
/// Returns the database url
pub async fn setup_database(&self, postgres_port: u16) -> anyhow::Result<String> {
let database_url = format!(
"postgresql://{}@localhost:{}/{DB_NAME}",
&username(),
postgres_port
);
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", postgres_port);
let pg_bin_dir = self.get_pg_bin_dir().await?;
let createdb_path = pg_bin_dir.join("createdb");
@@ -249,10 +235,6 @@ impl StorageController {
"localhost",
"-p",
&format!("{}", postgres_port),
"-U",
&username(),
"-O",
&username(),
DB_NAME,
])
.output()
@@ -289,7 +271,7 @@ impl StorageController {
// But tokio-postgres fork doesn't have this upstream commit:
// https://github.com/sfackler/rust-postgres/commit/cb609be758f3fb5af537f04b584a2ee0cebd5e79
// => we should rebase our fork => TODO https://github.com/neondatabase/neon/issues/8399
.user(&username())
.user(&whoami::username())
.dbname(DB_NAME)
.connect(tokio_postgres::NoTls)
.await
@@ -346,12 +328,6 @@ impl StorageController {
let pg_log_path = pg_data_path.join("postgres.log");
if !tokio::fs::try_exists(&pg_data_path).await? {
let initdb_args = ["-D", pg_data_path.as_ref(), "--username", &username()];
tracing::info!(
"Initializing storage controller database with args: {:?}",
initdb_args
);
// Initialize empty database
let initdb_path = pg_bin_dir.join("initdb");
let mut child = Command::new(&initdb_path)
@@ -359,7 +335,7 @@ impl StorageController {
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
])
.args(initdb_args)
.args(["-D", pg_data_path.as_ref()])
.spawn()
.expect("Failed to spawn initdb");
let status = child.wait().await?;
@@ -388,14 +364,8 @@ impl StorageController {
pg_data_path.as_ref(),
"-l",
pg_log_path.as_ref(),
"-U",
&username(),
"start",
];
tracing::info!(
"Starting storage controller database with args: {:?}",
db_start_args
);
background_process::start_process(
"storage_controller_db",

View File

@@ -1,8 +1,8 @@
use anyhow::{bail, Result};
use byteorder::{ByteOrder, BE};
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::Oid;
use postgres_ffi::RepOriginId;
use postgres_ffi::{Oid, TransactionId};
use serde::{Deserialize, Serialize};
use std::{fmt, ops::Range};
@@ -263,6 +263,15 @@ impl Key {
field5: u8::MAX,
field6: u32::MAX,
};
/// A key slightly smaller than [`Key::MAX`] for use in layer key ranges to avoid them to be confused with L0 layers
pub const NON_L0_MAX: Key = Key {
field1: u8::MAX,
field2: u32::MAX,
field3: u32::MAX,
field4: u32::MAX,
field5: u8::MAX,
field6: u32::MAX - 1,
};
pub fn from_hex(s: &str) -> Result<Self> {
if s.len() != 36 {
@@ -350,17 +359,7 @@ impl Key {
// 02 00000000 00000000 00000000 00 00000000
//
// TwoPhaseFile:
//
// 02 00000000 00000000 00XXXXXX XX XXXXXXXX
//
// \______XID_________/
//
// The 64-bit XID is stored a little awkwardly in field6, field5 and
// field4. PostgreSQL v16 and below only stored a 32-bit XID, which
// fit completely in field6, but starting with PostgreSQL v17, a full
// 64-bit XID is used. Most pageserver code that accesses
// TwoPhaseFiles now deals with 64-bit XIDs even on v16, the high bits
// are just unused.
// 02 00000000 00000000 00000000 00 XID
//
// ControlFile:
// 03 00000000 00000000 00000000 00 00000000
@@ -592,36 +591,35 @@ pub const TWOPHASEDIR_KEY: Key = Key {
};
#[inline(always)]
pub fn twophase_file_key(xid: u64) -> Key {
pub fn twophase_file_key(xid: TransactionId) -> Key {
Key {
field1: 0x02,
field2: 0,
field3: 0,
field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
field5: ((xid & 0x000000FF00000000) >> 32) as u8,
field6: (xid & 0x00000000FFFFFFFF) as u32,
field4: 0,
field5: 0,
field6: xid,
}
}
#[inline(always)]
pub fn twophase_key_range(xid: u64) -> Range<Key> {
// 64-bit XIDs really should not overflow
pub fn twophase_key_range(xid: TransactionId) -> Range<Key> {
let (next_xid, overflowed) = xid.overflowing_add(1);
Key {
field1: 0x02,
field2: 0,
field3: 0,
field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
field5: ((xid & 0x000000FF00000000) >> 32) as u8,
field6: (xid & 0x00000000FFFFFFFF) as u32,
field4: 0,
field5: 0,
field6: xid,
}..Key {
field1: 0x02,
field2: 0,
field3: u32::from(overflowed),
field4: ((next_xid & 0xFFFFFF0000000000) >> 40) as u32,
field5: ((next_xid & 0x000000FF00000000) >> 32) as u8,
field6: (next_xid & 0x00000000FFFFFFFF) as u32,
field3: 0,
field4: 0,
field5: u8::from(overflowed),
field6: next_xid,
}
}

View File

@@ -62,7 +62,7 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
serde::Serialize,
serde::Deserialize,
strum_macros::Display,
strum_macros::VariantNames,
strum_macros::EnumVariantNames,
strum_macros::AsRefStr,
strum_macros::IntoStaticStr,
)]

View File

@@ -89,19 +89,8 @@ impl PageserverUtilization {
/// If a node is currently hosting more work than it can comfortably handle. This does not indicate that
/// it will fail, but it is a strong signal that more work should not be added unless there is no alternative.
///
/// When a node is overloaded, we may override soft affinity preferences and do things like scheduling
/// into a node in a less desirable AZ, if all the nodes in the preferred AZ are overloaded.
pub fn is_overloaded(score: RawScore) -> bool {
// Why the factor of two? This is unscientific but reflects behavior of real systems:
// - In terms of shard counts, a node's preferred max count is a soft limit intended to keep
// startup and housekeeping jobs nice and responsive. We can go to double this limit if needed
// until some more nodes are deployed.
// - In terms of disk space, the node's utilization heuristic assumes every tenant needs to
// hold its biggest timeline fully on disk, which is tends to be an over estimate when
// some tenants are very idle and have dropped layers from disk. In practice going up to
// double is generally better than giving up and scheduling in a sub-optimal AZ.
score >= 2 * Self::UTILIZATION_FULL
score >= Self::UTILIZATION_FULL
}
pub fn adjust_shard_count_max(&mut self, shard_count: u32) {

View File

@@ -81,16 +81,17 @@ pub fn is_expected_io_error(e: &io::Error) -> bool {
)
}
#[async_trait::async_trait]
pub trait Handler<IO> {
/// Handle single query.
/// postgres_backend will issue ReadyForQuery after calling this (this
/// might be not what we want after CopyData streaming, but currently we don't
/// care). It will also flush out the output buffer.
fn process_query(
async fn process_query(
&mut self,
pgb: &mut PostgresBackend<IO>,
query_string: &str,
) -> impl Future<Output = Result<(), QueryError>>;
) -> Result<(), QueryError>;
/// Called on startup packet receival, allows to process params.
///

View File

@@ -23,6 +23,7 @@ async fn make_tcp_pair() -> (TcpStream, TcpStream) {
struct TestHandler {}
#[async_trait::async_trait]
impl<IO: AsyncRead + AsyncWrite + Unpin + Send> Handler<IO> for TestHandler {
// return single col 'hey' for any query
async fn process_query(

View File

@@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> {
PathBuf::from("pg_install")
};
for pg_version in &["v14", "v15", "v16", "v17"] {
for pg_version in &["v14", "v15", "v16"] {
let mut pg_install_dir_versioned = pg_install_dir.join(pg_version);
if pg_install_dir_versioned.is_relative() {
let cwd = env::current_dir().context("Failed to get current_dir")?;
@@ -121,7 +121,6 @@ fn main() -> anyhow::Result<()> {
.allowlist_type("XLogPageHeaderData")
.allowlist_type("XLogLongPageHeaderData")
.allowlist_var("XLOG_PAGE_MAGIC")
.allowlist_var("PG_MAJORVERSION_NUM")
.allowlist_var("PG_CONTROL_FILE_SIZE")
.allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
.allowlist_type("PageHeaderData")

View File

@@ -44,9 +44,6 @@ macro_rules! postgres_ffi {
// Re-export some symbols from bindings
pub use bindings::DBState_DB_SHUTDOWNED;
pub use bindings::{CheckPoint, ControlFileData, XLogRecord};
pub const ZERO_CHECKPOINT: bytes::Bytes =
bytes::Bytes::from_static(&[0u8; xlog_utils::SIZEOF_CHECKPOINT]);
}
};
}
@@ -57,7 +54,6 @@ macro_rules! for_all_postgres_versions {
$macro!(v14);
$macro!(v15);
$macro!(v16);
$macro!(v17);
};
}
@@ -92,7 +88,6 @@ macro_rules! dispatch_pgversion {
14 : v14,
15 : v15,
16 : v16,
17 : v17,
]
)
};
@@ -111,110 +106,6 @@ macro_rules! dispatch_pgversion {
};
}
#[macro_export]
macro_rules! enum_pgversion_dispatch {
($name:expr, $typ:ident, $bind:ident, $code:block) => {
enum_pgversion_dispatch!(
name = $name,
bind = $bind,
typ = $typ,
code = $code,
pgversions = [
V14 : v14,
V15 : v15,
V16 : v16,
V17 : v17,
]
)
};
(name = $name:expr,
bind = $bind:ident,
typ = $typ:ident,
code = $code:block,
pgversions = [$($variant:ident : $md:ident),+ $(,)?]) => {
match $name {
$(
self::$typ::$variant($bind) => {
use $crate::$md as pgv;
$code
}
),+,
}
};
}
#[macro_export]
macro_rules! enum_pgversion {
{$name:ident, pgv :: $t:ident} => {
enum_pgversion!{
name = $name,
typ = $t,
pgversions = [
V14 : v14,
V15 : v15,
V16 : v16,
V17 : v17,
]
}
};
{$name:ident, pgv :: $p:ident :: $t:ident} => {
enum_pgversion!{
name = $name,
path = $p,
typ = $t,
pgversions = [
V14 : v14,
V15 : v15,
V16 : v16,
V17 : v17,
]
}
};
{name = $name:ident,
typ = $t:ident,
pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {
pub enum $name {
$($variant ( $crate::$md::$t )),+
}
impl self::$name {
pub fn pg_version(&self) -> u32 {
enum_pgversion_dispatch!(self, $name, _ign, {
pgv::bindings::PG_MAJORVERSION_NUM
})
}
}
$(
impl Into<self::$name> for $crate::$md::$t {
fn into(self) -> self::$name {
self::$name::$variant (self)
}
}
)+
};
{name = $name:ident,
path = $p:ident,
typ = $t:ident,
pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {
pub enum $name {
$($variant ($crate::$md::$p::$t)),+
}
impl $name {
pub fn pg_version(&self) -> u32 {
enum_pgversion_dispatch!(self, $name, _ign, {
pgv::bindings::PG_MAJORVERSION_NUM
})
}
}
$(
impl Into<$name> for $crate::$md::$p::$t {
fn into(self) -> $name {
$name::$variant (self)
}
}
)+
};
}
pub mod pg_constants;
pub mod relfile_utils;

View File

@@ -152,9 +152,6 @@ pub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;
pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
// From heapam_xlog.h
pub const XLOG_HEAP2_REWRITE: u8 = 0x00;
// From replication/message.h
pub const XLOG_LOGICAL_MESSAGE: u8 = 0x00;
@@ -222,20 +219,15 @@ pub const INVALID_TRANSACTION_ID: u32 = 0;
pub const FIRST_BOOTSTRAP_OBJECT_ID: u32 = 12000;
pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;
/* pg_control.h */
pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
pub const XLOG_PARAMETER_CHANGE: u8 = 0x60;
pub const XLOG_END_OF_RECOVERY: u8 = 0x90;
pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
pub const XLP_LONG_HEADER: u16 = 0x0002;
/* From xlog.h */
pub const XLOG_REPLORIGIN_SET: u8 = 0x00;
pub const XLOG_REPLORIGIN_DROP: u8 = 0x10;
/* xlog_internal.h */
pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
pub const XLP_LONG_HEADER: u16 = 0x0002;
/* From replication/slot.h */
pub const REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN: usize = 4*4 /* offset of `slotdata` in ReplicationSlotOnDisk */
+ 64 /* NameData */ + 4*4;
@@ -253,6 +245,33 @@ pub const VM_HEAPBLOCKS_PER_PAGE: u32 =
/* From origin.c */
pub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE;
// List of subdirectories inside pgdata.
// Copied from src/bin/initdb/initdb.c
pub const PGDATA_SUBDIRS: [&str; 22] = [
"global",
"pg_wal/archive_status",
"pg_commit_ts",
"pg_dynshmem",
"pg_notify",
"pg_serial",
"pg_snapshots",
"pg_subtrans",
"pg_twophase",
"pg_multixact",
"pg_multixact/members",
"pg_multixact/offsets",
"base",
"base/1",
"pg_replslot",
"pg_tblspc",
"pg_stat",
"pg_stat_tmp",
"pg_xact",
"pg_logical",
"pg_logical/snapshots",
"pg_logical/mappings",
];
// Don't include postgresql.conf as it is inconvenient on node start:
// we need postgresql.conf before basebackup to synchronize safekeepers
// so no point in overwriting it during backup restore. Rest of the files

View File

@@ -5,33 +5,6 @@ pub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */
// List of subdirectories inside pgdata.
// Copied from src/bin/initdb/initdb.c
pub const PGDATA_SUBDIRS: [&str; 22] = [
"global",
"pg_wal/archive_status",
"pg_commit_ts",
"pg_dynshmem",
"pg_notify",
"pg_serial",
"pg_snapshots",
"pg_subtrans",
"pg_twophase",
"pg_multixact",
"pg_multixact/members",
"pg_multixact/offsets",
"base",
"base/1",
"pg_replslot",
"pg_tblspc",
"pg_stat",
"pg_stat_tmp",
"pg_xact",
"pg_logical",
"pg_logical/snapshots",
"pg_logical/mappings",
];
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
(bimg_info & BKPIMAGE_IS_COMPRESSED) != 0
}

View File

@@ -11,8 +11,6 @@ pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */
pub use super::super::v14::bindings::PGDATA_SUBDIRS;
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;

View File

@@ -11,8 +11,6 @@ pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */
pub use super::super::v14::bindings::PGDATA_SUBDIRS;
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;

View File

@@ -1,55 +0,0 @@
pub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;
pub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;
pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;
pub const XLOG_DBASE_DROP: u8 = 0x20;
pub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */
pub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */
pub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */
pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */
// List of subdirectories inside pgdata.
// Copied from src/bin/initdb/initdb.c
pub const PGDATA_SUBDIRS: [&str; 23] = [
"global",
"pg_wal/archive_status",
"pg_wal/summaries",
"pg_commit_ts",
"pg_dynshmem",
"pg_notify",
"pg_serial",
"pg_snapshots",
"pg_subtrans",
"pg_twophase",
"pg_multixact",
"pg_multixact/members",
"pg_multixact/offsets",
"base",
"base/1",
"pg_replslot",
"pg_tblspc",
"pg_stat",
"pg_stat_tmp",
"pg_xact",
"pg_logical",
"pg_logical/snapshots",
"pg_logical/mappings",
];
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
(bimg_info & ANY_COMPRESS_FLAG) != 0
}
pub const XLOG_HEAP2_PRUNE_ON_ACCESS: u8 = 0x10;
pub const XLOG_HEAP2_PRUNE_VACUUM_SCAN: u8 = 0x20;
pub const XLOG_HEAP2_PRUNE_VACUUM_CLEANUP: u8 = 0x30;
pub const XLOG_OVERWRITE_CONTRECORD: u8 = 0xD0;
pub const XLOG_CHECKPOINT_REDO: u8 = 0xE0;

View File

@@ -53,7 +53,7 @@ impl Conf {
#[allow(clippy::manual_range_patterns)]
match self.pg_version {
14 | 15 | 16 | 17 => Ok(path.join(format!("v{}", self.pg_version))),
14 | 15 | 16 => Ok(path.join(format!("v{}", self.pg_version))),
_ => bail!("Unsupported postgres version: {}", self.pg_version),
}
}

View File

@@ -185,7 +185,7 @@ mod tests {
use super::*;
fn parse(input: &str) -> anyhow::Result<RemoteStorageConfig> {
let toml = input.parse::<toml_edit::DocumentMut>().unwrap();
let toml = input.parse::<toml_edit::Document>().unwrap();
RemoteStorageConfig::from_toml(toml.as_item())
}

View File

@@ -3,9 +3,11 @@ use std::str::FromStr;
use anyhow::Context;
use metrics::{IntCounter, IntCounterVec};
use once_cell::sync::Lazy;
use strum_macros::{EnumString, VariantNames};
use strum_macros::{EnumString, EnumVariantNames};
#[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
#[derive(
EnumString, strum_macros::Display, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy,
)]
#[strum(serialize_all = "snake_case")]
pub enum LogFormat {
Plain,

View File

@@ -10,7 +10,7 @@ pub fn deserialize_item<T>(item: &toml_edit::Item) -> Result<T, Error>
where
T: serde::de::DeserializeOwned,
{
let document: toml_edit::DocumentMut = match item {
let document: toml_edit::Document = match item {
toml_edit::Item::Table(toml) => toml.clone().into(),
toml_edit::Item::Value(toml_edit::Value::InlineTable(toml)) => {
toml.clone().into_table().into()

View File

@@ -5,8 +5,6 @@ use std::{env, path::PathBuf, process::Command};
use anyhow::{anyhow, Context};
const WALPROPOSER_PG_VERSION: &str = "v17";
fn main() -> anyhow::Result<()> {
// Tell cargo to invalidate the built crate whenever the wrapper changes
println!("cargo:rerun-if-changed=bindgen_deps.h");
@@ -38,10 +36,7 @@ fn main() -> anyhow::Result<()> {
// Rebuild crate when libwalproposer.a changes
println!("cargo:rerun-if-changed={walproposer_lib_search_str}/libwalproposer.a");
let pg_config_bin = pg_install_abs
.join(WALPROPOSER_PG_VERSION)
.join("bin")
.join("pg_config");
let pg_config_bin = pg_install_abs.join("v16").join("bin").join("pg_config");
let inc_server_path: String = if pg_config_bin.exists() {
let output = Command::new(pg_config_bin)
.arg("--includedir-server")
@@ -58,7 +53,7 @@ fn main() -> anyhow::Result<()> {
.into()
} else {
let server_path = pg_install_abs
.join(WALPROPOSER_PG_VERSION)
.join("v16")
.join("include")
.join("postgresql")
.join("server")

View File

@@ -142,11 +142,16 @@ impl PagestreamClient {
) -> anyhow::Result<PagestreamGetPageResponse> {
let req = PagestreamFeMessage::GetPage(req);
let req: bytes::Bytes = req.serialize();
// let mut req = tokio_util::io::ReaderStream::new(&req);
let mut req = tokio_stream::once(Ok(req));
self.copy_both.send_all(&mut req).await?;
for i in 0..10 {
let mut req = tokio_stream::once(Ok(req.clone()));
self.copy_both.send_all(&mut req).await?;
}
for i in 0..9 {
let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
let next: bytes::Bytes = next.unwrap()?;
}
let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
let next: bytes::Bytes = next.unwrap()?;

View File

@@ -79,24 +79,16 @@ pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
return None;
}
let keys: Vec<&str> = split[0].split('-').collect();
let lsn_and_opt_generation: Vec<&str> = split[1].split('v').collect();
let lsns: Vec<&str> = lsn_and_opt_generation[0].split('-').collect();
let the_lsns: [&str; 2];
/*
* Generations add a -vX-XXXXXX postfix, which causes issues when we try to
* parse 'vX' as an LSN.
*/
let is_delta = if lsns.len() == 1 || lsns[1].is_empty() {
the_lsns = [lsns[0], lsns[0]];
let mut lsns: Vec<&str> = split[1].split('-').collect();
let is_delta = if lsns.len() == 1 {
lsns.push(lsns[0]);
false
} else {
the_lsns = [lsns[0], lsns[1]];
true
};
let key_range = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
let lsn_range = Lsn::from_hex(the_lsns[0]).unwrap()..Lsn::from_hex(the_lsns[1]).unwrap();
let lsn_range = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap();
let holes = Vec::new();
Some(LayerFile {
key_range,

View File

@@ -174,7 +174,7 @@ async fn main() -> anyhow::Result<()> {
println!("specified prefix '{}' failed validation", cmd.prefix);
return Ok(());
};
let toml_document = toml_edit::DocumentMut::from_str(&cmd.config_toml_str)?;
let toml_document = toml_edit::Document::from_str(&cmd.config_toml_str)?;
let toml_item = toml_document
.get("remote_storage")
.expect("need remote_storage");

View File

@@ -30,8 +30,9 @@ use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::dispatch_pgversion;
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PG_HBA};
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
use postgres_ffi::TransactionId;
use postgres_ffi::XLogFileName;
use postgres_ffi::PG_TLI;
use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
@@ -254,11 +255,8 @@ where
let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;
let pgversion = self.timeline.pg_version;
let subdirs = dispatch_pgversion!(pgversion, &pgv::bindings::PGDATA_SUBDIRS[..]);
// Create pgdata subdirs structure
for dir in subdirs.iter() {
for dir in PGDATA_SUBDIRS.iter() {
let header = new_tar_header_dir(dir)?;
self.ar
.append(&header, &mut io::empty())
@@ -608,7 +606,7 @@ where
//
// Extract twophase state files
//
async fn add_twophase_file(&mut self, xid: u64) -> Result<(), BasebackupError> {
async fn add_twophase_file(&mut self, xid: TransactionId) -> Result<(), BasebackupError> {
let img = self
.timeline
.get_twophase_file(xid, self.lsn, self.ctx)
@@ -619,11 +617,7 @@ where
buf.extend_from_slice(&img[..]);
let crc = crc32c::crc32c(&img[..]);
buf.put_u32_le(crc);
let path = if self.timeline.pg_version < 17 {
format!("pg_twophase/{:>08X}", xid)
} else {
format!("pg_twophase/{:>016X}", xid)
};
let path = format!("pg_twophase/{:>08X}", xid);
let header = new_tar_header(&path, buf.len() as u64)?;
self.ar
.append(&header, &buf[..])

View File

@@ -281,7 +281,7 @@ impl PageServerConf {
#[allow(clippy::manual_range_patterns)]
match pg_version {
14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}

View File

@@ -580,11 +580,9 @@ async fn import_file(
import_slru(modification, slru, file_path, reader, len, ctx).await?;
debug!("imported multixact members slru");
} else if file_path.starts_with("pg_twophase") {
let bytes = read_all_bytes(reader).await?;
let xid = u32::from_str_radix(file_name.as_ref(), 16)?;
// In PostgreSQL v17, this is a 64-bit FullTransactionid. In previous versions,
// it's a 32-bit TransactionId, which fits in u64 anyway.
let xid = u64::from_str_radix(file_name.as_ref(), 16)?;
let bytes = read_all_bytes(reader).await?;
modification
.put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]), ctx)
.await?;

View File

@@ -9,7 +9,7 @@ use metrics::{
use once_cell::sync::Lazy;
use pageserver_api::shard::TenantShardId;
use strum::{EnumCount, VariantNames};
use strum_macros::{IntoStaticStr, VariantNames};
use strum_macros::{EnumVariantNames, IntoStaticStr};
use tracing::warn;
use utils::id::TimelineId;
@@ -27,7 +27,7 @@ const CRITICAL_OP_BUCKETS: &[f64] = &[
];
// Metrics collected on operations on the storage repository.
#[derive(Debug, VariantNames, IntoStaticStr)]
#[derive(Debug, EnumVariantNames, IntoStaticStr)]
#[strum(serialize_all = "kebab_case")]
pub(crate) enum StorageTimeOperation {
#[strum(serialize = "layer flush")]
@@ -3170,6 +3170,16 @@ static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
.unwrap()
});
pub(crate) static CONSECUTIVE_NONBLOCKING_GETPAGE_REQUESTS_HISTOGRAM: Lazy<Histogram> =
Lazy::new(|| {
register_histogram!(
"pageserver_consecutive_nonblocking_getpage_requests",
"Number of consecutive nonblocking getpage requests",
0..=256.map(|x| x as f64).collect::<Vec<f64>>(),
)
.unwrap()
});
pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
let _guard = SERIALIZE.lock().unwrap();

View File

@@ -5,7 +5,7 @@ use anyhow::Context;
use async_compression::tokio::write::GzipEncoder;
use bytes::Buf;
use futures::FutureExt;
use once_cell::sync::OnceCell;
use once_cell::sync::{Lazy, OnceCell};
use pageserver_api::models::TenantState;
use pageserver_api::models::{
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
@@ -43,7 +43,7 @@ use crate::basebackup;
use crate::basebackup::BasebackupError;
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
use crate::metrics;
use crate::metrics::{self, CONSECUTIVE_NONBLOCKING_GETPAGE_REQUESTS_HISTOGRAM};
use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS};
use crate::pgdatadir_mapping::Version;
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
@@ -577,133 +577,186 @@ impl PageServerHandler {
}
}
loop {
let mut requests = Vec::new();
let mut num_consecutive_getpage_requests = 0;
'outer: loop {
// read request bytes (it's exactly 1 PagestreamFeMessage per CopyData)
let msg = tokio::select! {
biased;
_ = self.cancel.cancelled() => {
return Err(QueryError::Shutdown)
}
msg = pgb.read_message() => { msg }
};
let copy_data_bytes = match msg? {
Some(FeMessage::CopyData(bytes)) => bytes,
Some(FeMessage::Terminate) => break,
Some(m) => {
return Err(QueryError::Other(anyhow::anyhow!(
"unexpected message: {m:?} during COPY"
)));
}
None => break, // client disconnected
};
trace!("query: {copy_data_bytes:?}");
fail::fail_point!("ps::handle-pagerequest-message");
// parse request
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
// invoke handler function
let (handler_result, span) = match neon_fe_msg {
PagestreamFeMessage::Exists(req) => {
fail::fail_point!("ps::handle-pagerequest-message::exists");
let span = tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn);
(
self.handle_get_rel_exists_request(tenant_id, timeline_id, &req, &ctx)
.instrument(span.clone())
.await,
span,
)
}
PagestreamFeMessage::Nblocks(req) => {
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
let span = tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn);
(
self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx)
.instrument(span.clone())
.await,
span,
)
}
PagestreamFeMessage::GetPage(req) => {
fail::fail_point!("ps::handle-pagerequest-message::getpage");
// shard_id is filled in by the handler
let span = tracing::info_span!("handle_get_page_at_lsn_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.request_lsn);
(
self.handle_get_page_at_lsn_request(tenant_id, timeline_id, &req, &ctx)
.instrument(span.clone())
.await,
span,
)
}
PagestreamFeMessage::DbSize(req) => {
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
let span = tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn);
(
self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx)
.instrument(span.clone())
.await,
span,
)
}
PagestreamFeMessage::GetSlruSegment(req) => {
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
let span = tracing::info_span!("handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn);
(
self.handle_get_slru_segment_request(tenant_id, timeline_id, &req, &ctx)
.instrument(span.clone())
.await,
span,
)
}
};
// Map handler result to protocol behavior.
// Some handler errors cause exit from pagestream protocol.
// Other handler errors are sent back as an error message and we stay in pagestream protocol.
let response_msg = match handler_result {
Err(e) => match &e {
PageStreamError::Shutdown => {
// If we fail to fulfil a request during shutdown, which may be _because_ of
// shutdown, then do not send the error to the client. Instead just drop the
// connection.
span.in_scope(|| info!("dropping connection due to shutdown"));
return Err(QueryError::Shutdown);
let mut debounce: Option<std::time::Instant> = None;
requests.clear();
loop {
static BOUNCE_TIMEOUT: Lazy<Duration> = Lazy::new(|| {
utils::env::var::<humantime::Duration, _>("NEON_PAGESERVER_DEBOUNCE")
.unwrap()
.into()
});
let sleep_fut = if let Some(started_at) = debounce {
futures::future::Either::Left(tokio::time::sleep_until(
(started_at + *BOUNCE_TIMEOUT).into(),
))
} else {
futures::future::Either::Right(futures::future::pending())
};
tokio::select! {
biased;
_ = self.cancel.cancelled() => {
return Err(QueryError::Shutdown)
}
PageStreamError::Reconnect(reason) => {
span.in_scope(|| info!("handler requested reconnect: {reason}"));
return Err(QueryError::Reconnect);
msg = pgb.read_message() => {
requests.push(msg);
let started_at = debounce.get_or_insert_with(Instant::now);
if started_at.elapsed() > *BOUNCE_TIMEOUT {
break;
}
}
PageStreamError::Read(_)
| PageStreamError::LsnTimeout(_)
| PageStreamError::NotFound(_)
| PageStreamError::BadRequest(_) => {
// print the all details to the log with {:#}, but for the client the
// error message is enough. Do not log if shutting down, as the anyhow::Error
// here includes cancellation which is not an error.
let full = utils::error::report_compact_sources(&e);
span.in_scope(|| {
error!("error reading relation or page version: {full:#}")
});
PagestreamBeMessage::Error(PagestreamErrorResponse {
message: e.to_string(),
})
_ = sleep_fut => {
break;
}
},
Ok(response_msg) => response_msg,
};
// marshal & transmit response message
pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
tokio::select! {
biased;
_ = self.cancel.cancelled() => {
// We were requested to shut down.
info!("shutdown request received in page handler");
return Err(QueryError::Shutdown)
}
res = pgb.flush() => {
res?;
}
CONSECUTIVE_NONBLOCKING_GETPAGE_REQUESTS_HISTOGRAM
.observe(num_consecutive_getpage_requests as f64);
num_consecutive_getpage_requests = 0;
for msg in requests.drain(..) {
let copy_data_bytes = match msg? {
Some(FeMessage::CopyData(bytes)) => bytes,
Some(FeMessage::Terminate) => break 'outer,
Some(m) => {
return Err(QueryError::Other(anyhow::anyhow!(
"unexpected message: {m:?} during COPY"
)));
}
None => break 'outer, // client disconnected
};
trace!("query: {copy_data_bytes:?}");
fail::fail_point!("ps::handle-pagerequest-message");
// parse request
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
// invoke handler function
let (handler_result, span) = match neon_fe_msg {
PagestreamFeMessage::Exists(req) => {
CONSECUTIVE_NONBLOCKING_GETPAGE_REQUESTS_HISTOGRAM
.observe(num_consecutive_getpage_requests as f64);
num_consecutive_getpage_requests = 0;
fail::fail_point!("ps::handle-pagerequest-message::exists");
let span = tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn);
(
self.handle_get_rel_exists_request(tenant_id, timeline_id, &req, &ctx)
.instrument(span.clone())
.await,
span,
)
}
PagestreamFeMessage::Nblocks(req) => {
CONSECUTIVE_NONBLOCKING_GETPAGE_REQUESTS_HISTOGRAM
.observe(num_consecutive_getpage_requests as f64);
num_consecutive_getpage_requests = 0;
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
let span = tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn);
(
self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx)
.instrument(span.clone())
.await,
span,
)
}
PagestreamFeMessage::GetPage(req) => {
num_consecutive_getpage_requests += 1;
fail::fail_point!("ps::handle-pagerequest-message::getpage");
// shard_id is filled in by the handler
let span = tracing::info_span!("handle_get_page_at_lsn_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.request_lsn);
(
self.handle_get_page_at_lsn_request(tenant_id, timeline_id, &req, &ctx)
.instrument(span.clone())
.await,
span,
)
}
PagestreamFeMessage::DbSize(req) => {
CONSECUTIVE_NONBLOCKING_GETPAGE_REQUESTS_HISTOGRAM
.observe(num_consecutive_getpage_requests as f64);
num_consecutive_getpage_requests = 0;
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
let span = tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn);
(
self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx)
.instrument(span.clone())
.await,
span,
)
}
PagestreamFeMessage::GetSlruSegment(req) => {
CONSECUTIVE_NONBLOCKING_GETPAGE_REQUESTS_HISTOGRAM
.observe(num_consecutive_getpage_requests as f64);
num_consecutive_getpage_requests = 0;
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
let span = tracing::info_span!("handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn);
(
self.handle_get_slru_segment_request(
tenant_id,
timeline_id,
&req,
&ctx,
)
.instrument(span.clone())
.await,
span,
)
}
};
// Map handler result to protocol behavior.
// Some handler errors cause exit from pagestream protocol.
// Other handler errors are sent back as an error message and we stay in pagestream protocol.
let response_msg = match handler_result {
Err(e) => match &e {
PageStreamError::Shutdown => {
// If we fail to fulfil a request during shutdown, which may be _because_ of
// shutdown, then do not send the error to the client. Instead just drop the
// connection.
span.in_scope(|| info!("dropping connection due to shutdown"));
return Err(QueryError::Shutdown);
}
PageStreamError::Reconnect(reason) => {
span.in_scope(|| info!("handler requested reconnect: {reason}"));
return Err(QueryError::Reconnect);
}
PageStreamError::Read(_)
| PageStreamError::LsnTimeout(_)
| PageStreamError::NotFound(_)
| PageStreamError::BadRequest(_) => {
// print the all details to the log with {:#}, but for the client the
// error message is enough. Do not log if shutting down, as the anyhow::Error
// here includes cancellation which is not an error.
let full = utils::error::report_compact_sources(&e);
span.in_scope(|| {
error!("error reading relation or page version: {full:#}")
});
PagestreamBeMessage::Error(PagestreamErrorResponse {
message: e.to_string(),
})
}
},
Ok(response_msg) => response_msg,
};
// marshal & transmit response message
pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
tokio::select! {
biased;
_ = self.cancel.cancelled() => {
// We were requested to shut down.
info!("shutdown request received in page handler");
return Err(QueryError::Shutdown)
}
res = pgb.flush() => {
res?;
}
}
}
}
@@ -1199,6 +1252,7 @@ impl PageServerHandler {
}
}
#[async_trait::async_trait]
impl<IO> postgres_backend::Handler<IO> for PageServerHandler
where
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,

View File

@@ -633,7 +633,7 @@ impl Timeline {
pub(crate) async fn get_twophase_file(
&self,
xid: u64,
xid: TransactionId,
lsn: Lsn,
ctx: &RequestContext,
) -> Result<Bytes, PageReconstructError> {
@@ -646,19 +646,11 @@ impl Timeline {
&self,
lsn: Lsn,
ctx: &RequestContext,
) -> Result<HashSet<u64>, PageReconstructError> {
) -> Result<HashSet<TransactionId>, PageReconstructError> {
// fetch directory entry
let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;
if self.pg_version >= 17 {
Ok(TwoPhaseDirectoryV17::des(&buf)?.xids)
} else {
Ok(TwoPhaseDirectory::des(&buf)?
.xids
.iter()
.map(|x| u64::from(*x))
.collect())
}
Ok(TwoPhaseDirectory::des(&buf)?.xids)
}
pub(crate) async fn get_control_file(
@@ -910,13 +902,9 @@ impl Timeline {
// Then pg_twophase
result.add_key(TWOPHASEDIR_KEY);
let mut xids: Vec<u64> = self
.list_twophase_files(lsn, ctx)
.await?
.iter()
.cloned()
.collect();
let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;
let twophase_dir = TwoPhaseDirectory::des(&buf)?;
let mut xids: Vec<TransactionId> = twophase_dir.xids.iter().cloned().collect();
xids.sort_unstable();
for xid in xids {
result.add_key(twophase_file_key(xid));
@@ -1139,15 +1127,9 @@ impl<'a> DatadirModification<'a> {
// Create AuxFilesDirectory
self.init_aux_dir()?;
let buf = if self.tline.pg_version >= 17 {
TwoPhaseDirectoryV17::ser(&TwoPhaseDirectoryV17 {
xids: HashSet::new(),
})
} else {
TwoPhaseDirectory::ser(&TwoPhaseDirectory {
xids: HashSet::new(),
})
}?;
let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
xids: HashSet::new(),
})?;
self.pending_directory_entries
.push((DirectoryKind::TwoPhase, 0));
self.put(TWOPHASEDIR_KEY, Value::Image(buf.into()));
@@ -1223,13 +1205,6 @@ impl<'a> DatadirModification<'a> {
img: Bytes,
) -> anyhow::Result<()> {
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
let key = rel_block_to_key(rel, blknum);
if !key.is_valid_key_on_write_path() {
anyhow::bail!(
"the request contains data not supported by pageserver at {}",
key
);
}
self.put(rel_block_to_key(rel, blknum), Value::Image(img));
Ok(())
}
@@ -1241,34 +1216,14 @@ impl<'a> DatadirModification<'a> {
blknum: BlockNumber,
img: Bytes,
) -> anyhow::Result<()> {
let key = slru_block_to_key(kind, segno, blknum);
if !key.is_valid_key_on_write_path() {
anyhow::bail!(
"the request contains data not supported by pageserver at {}",
key
);
}
self.put(key, Value::Image(img));
self.put(slru_block_to_key(kind, segno, blknum), Value::Image(img));
Ok(())
}
pub(crate) fn put_rel_page_image_zero(
&mut self,
rel: RelTag,
blknum: BlockNumber,
) -> anyhow::Result<()> {
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
let key = rel_block_to_key(rel, blknum);
if !key.is_valid_key_on_write_path() {
anyhow::bail!(
"the request contains data not supported by pageserver: {} @ {}",
key,
self.lsn
);
}
self.pending_zero_data_pages.insert(key.to_compact());
pub(crate) fn put_rel_page_image_zero(&mut self, rel: RelTag, blknum: BlockNumber) {
self.pending_zero_data_pages
.insert(rel_block_to_key(rel, blknum).to_compact());
self.pending_bytes += ZERO_PAGE.len();
Ok(())
}
pub(crate) fn put_slru_page_image_zero(
@@ -1276,18 +1231,10 @@ impl<'a> DatadirModification<'a> {
kind: SlruKind,
segno: u32,
blknum: BlockNumber,
) -> anyhow::Result<()> {
let key = slru_block_to_key(kind, segno, blknum);
if !key.is_valid_key_on_write_path() {
anyhow::bail!(
"the request contains data not supported by pageserver: {} @ {}",
key,
self.lsn
);
}
self.pending_zero_data_pages.insert(key.to_compact());
) {
self.pending_zero_data_pages
.insert(slru_block_to_key(kind, segno, blknum).to_compact());
self.pending_bytes += ZERO_PAGE.len();
Ok(())
}
/// Call this at the end of each WAL record.
@@ -1339,31 +1286,22 @@ impl<'a> DatadirModification<'a> {
pub async fn put_twophase_file(
&mut self,
xid: u64,
xid: TransactionId,
img: Bytes,
ctx: &RequestContext,
) -> anyhow::Result<()> {
// Add it to the directory entry
let dirbuf = self.get(TWOPHASEDIR_KEY, ctx).await?;
let newdirbuf = if self.tline.pg_version >= 17 {
let mut dir = TwoPhaseDirectoryV17::des(&dirbuf)?;
if !dir.xids.insert(xid) {
anyhow::bail!("twophase file for xid {} already exists", xid);
}
self.pending_directory_entries
.push((DirectoryKind::TwoPhase, dir.xids.len()));
Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)
} else {
let xid = xid as u32;
let mut dir = TwoPhaseDirectory::des(&dirbuf)?;
if !dir.xids.insert(xid) {
anyhow::bail!("twophase file for xid {} already exists", xid);
}
self.pending_directory_entries
.push((DirectoryKind::TwoPhase, dir.xids.len()));
Bytes::from(TwoPhaseDirectory::ser(&dir)?)
};
self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));
let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
let mut dir = TwoPhaseDirectory::des(&buf)?;
if !dir.xids.insert(xid) {
anyhow::bail!("twophase file for xid {} already exists", xid);
}
self.pending_directory_entries
.push((DirectoryKind::TwoPhase, dir.xids.len()));
self.put(
TWOPHASEDIR_KEY,
Value::Image(Bytes::from(TwoPhaseDirectory::ser(&dir)?)),
);
self.put(twophase_file_key(xid), Value::Image(img));
Ok(())
@@ -1666,32 +1604,22 @@ impl<'a> DatadirModification<'a> {
/// This method is used for marking truncated SLRU files
pub async fn drop_twophase_file(
&mut self,
xid: u64,
xid: TransactionId,
ctx: &RequestContext,
) -> anyhow::Result<()> {
// Remove it from the directory entry
let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
let newdirbuf = if self.tline.pg_version >= 17 {
let mut dir = TwoPhaseDirectoryV17::des(&buf)?;
let mut dir = TwoPhaseDirectory::des(&buf)?;
if !dir.xids.remove(&xid) {
warn!("twophase file for xid {} does not exist", xid);
}
self.pending_directory_entries
.push((DirectoryKind::TwoPhase, dir.xids.len()));
Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)
} else {
let xid: u32 = u32::try_from(xid)?;
let mut dir = TwoPhaseDirectory::des(&buf)?;
if !dir.xids.remove(&xid) {
warn!("twophase file for xid {} does not exist", xid);
}
self.pending_directory_entries
.push((DirectoryKind::TwoPhase, dir.xids.len()));
Bytes::from(TwoPhaseDirectory::ser(&dir)?)
};
self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));
if !dir.xids.remove(&xid) {
warn!("twophase file for xid {} does not exist", xid);
}
self.pending_directory_entries
.push((DirectoryKind::TwoPhase, dir.xids.len()));
self.put(
TWOPHASEDIR_KEY,
Value::Image(Bytes::from(TwoPhaseDirectory::ser(&dir)?)),
);
// Delete it
self.delete(twophase_key_range(xid));
@@ -2161,21 +2089,11 @@ struct DbDirectory {
dbdirs: HashMap<(Oid, Oid), bool>,
}
// The format of TwoPhaseDirectory changed in PostgreSQL v17, because the filenames of
// pg_twophase files was expanded from 32-bit XIDs to 64-bit XIDs. Previously, the files
// were named like "pg_twophase/000002E5", now they're like
// "pg_twophsae/0000000A000002E4".
#[derive(Debug, Serialize, Deserialize)]
struct TwoPhaseDirectory {
xids: HashSet<TransactionId>,
}
#[derive(Debug, Serialize, Deserialize)]
struct TwoPhaseDirectoryV17 {
xids: HashSet<u64>,
}
#[derive(Debug, Serialize, Deserialize, Default)]
struct RelDirectory {
// Set of relations that exist. (relfilenode, forknum)

View File

@@ -7091,13 +7091,13 @@ mod tests {
vec![
// Image layer at GC horizon
PersistentLayerKey {
key_range: Key::MIN..Key::MAX,
key_range: Key::MIN..Key::NON_L0_MAX,
lsn_range: Lsn(0x30)..Lsn(0x31),
is_delta: false
},
// The delta layer below the horizon
// The delta layer covers the full range (with the layer key hack to avoid being recognized as L0)
PersistentLayerKey {
key_range: get_key(3)..get_key(4),
key_range: Key::MIN..Key::NON_L0_MAX,
lsn_range: Lsn(0x30)..Lsn(0x48),
is_delta: true
},

View File

@@ -452,8 +452,7 @@ impl TryFrom<toml_edit::Item> for TenantConfOpt {
.map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
}
toml_edit::Item::Table(table) => {
let deserializer =
toml_edit::de::Deserializer::from(toml_edit::DocumentMut::from(table));
let deserializer = toml_edit::de::Deserializer::new(table.into());
return serde_path_to_error::deserialize(deserializer)
.map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
}

View File

@@ -188,7 +188,7 @@ impl SplitImageLayerWriter {
.await
}
/// This function will be deprecated with #8841.
/// When split writer fails, the caller should call this function and handle partially generated layers.
pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, ImageLayerWriter)> {
Ok((self.generated_layers, self.inner))
}
@@ -204,7 +204,7 @@ impl SplitImageLayerWriter {
/// will split them into multiple files based on size.
#[must_use]
pub struct SplitDeltaLayerWriter {
inner: Option<(Key, DeltaLayerWriter)>,
inner: DeltaLayerWriter,
target_layer_size: u64,
generated_layers: Vec<SplitWriterResult>,
conf: &'static PageServerConf,
@@ -212,6 +212,7 @@ pub struct SplitDeltaLayerWriter {
tenant_shard_id: TenantShardId,
lsn_range: Range<Lsn>,
last_key_written: Key,
start_key: Key,
}
impl SplitDeltaLayerWriter {
@@ -219,18 +220,29 @@ impl SplitDeltaLayerWriter {
conf: &'static PageServerConf,
timeline_id: TimelineId,
tenant_shard_id: TenantShardId,
start_key: Key,
lsn_range: Range<Lsn>,
target_layer_size: u64,
ctx: &RequestContext,
) -> anyhow::Result<Self> {
Ok(Self {
target_layer_size,
inner: None,
inner: DeltaLayerWriter::new(
conf,
timeline_id,
tenant_shard_id,
start_key,
lsn_range.clone(),
ctx,
)
.await?,
generated_layers: Vec::new(),
conf,
timeline_id,
tenant_shard_id,
lsn_range,
last_key_written: Key::MIN,
start_key,
})
}
@@ -253,26 +265,9 @@ impl SplitDeltaLayerWriter {
//
// Also, keep all updates of a single key in a single file. TODO: split them using the legacy compaction
// strategy. https://github.com/neondatabase/neon/issues/8837
if self.inner.is_none() {
self.inner = Some((
key,
DeltaLayerWriter::new(
self.conf,
self.timeline_id,
self.tenant_shard_id,
key,
self.lsn_range.clone(),
ctx,
)
.await?,
));
}
let (_, inner) = self.inner.as_mut().unwrap();
let addition_size_estimation = KEY_SIZE as u64 + 8 /* LSN u64 size */ + 80 /* value size estimation */;
if inner.num_keys() >= 1
&& inner.estimated_size() + addition_size_estimation >= self.target_layer_size
if self.inner.num_keys() >= 1
&& self.inner.estimated_size() + addition_size_estimation >= self.target_layer_size
{
if key != self.last_key_written {
let next_delta_writer = DeltaLayerWriter::new(
@@ -284,13 +279,13 @@ impl SplitDeltaLayerWriter {
ctx,
)
.await?;
let (start_key, prev_delta_writer) =
std::mem::replace(&mut self.inner, Some((key, next_delta_writer))).unwrap();
let prev_delta_writer = std::mem::replace(&mut self.inner, next_delta_writer);
let layer_key = PersistentLayerKey {
key_range: start_key..key,
key_range: self.start_key..key,
lsn_range: self.lsn_range.clone(),
is_delta: true,
};
self.start_key = key;
if discard(&layer_key).await {
drop(prev_delta_writer);
self.generated_layers
@@ -301,18 +296,17 @@ impl SplitDeltaLayerWriter {
self.generated_layers
.push(SplitWriterResult::Produced(delta_layer));
}
} else if inner.estimated_size() >= S3_UPLOAD_LIMIT {
} else if self.inner.estimated_size() >= S3_UPLOAD_LIMIT {
// We have to produce a very large file b/c a key is updated too often.
anyhow::bail!(
"a single key is updated too often: key={}, estimated_size={}, and the layer file cannot be produced",
key,
inner.estimated_size()
self.inner.estimated_size()
);
}
}
self.last_key_written = key;
let (_, inner) = self.inner.as_mut().unwrap();
inner.put_value(key, lsn, val, ctx).await
self.inner.put_value(key, lsn, val, ctx).await
}
pub async fn put_value(
@@ -331,6 +325,7 @@ impl SplitDeltaLayerWriter {
self,
tline: &Arc<Timeline>,
ctx: &RequestContext,
end_key: Key,
discard: D,
) -> anyhow::Result<Vec<SplitWriterResult>>
where
@@ -342,15 +337,11 @@ impl SplitDeltaLayerWriter {
inner,
..
} = self;
let Some((start_key, inner)) = inner else {
return Ok(generated_layers);
};
if inner.num_keys() == 0 {
return Ok(generated_layers);
}
let end_key = self.last_key_written.next();
let layer_key = PersistentLayerKey {
key_range: start_key..end_key,
key_range: self.start_key..end_key,
lsn_range: self.lsn_range.clone(),
is_delta: true,
};
@@ -369,14 +360,15 @@ impl SplitDeltaLayerWriter {
self,
tline: &Arc<Timeline>,
ctx: &RequestContext,
end_key: Key,
) -> anyhow::Result<Vec<SplitWriterResult>> {
self.finish_with_discard_fn(tline, ctx, |_| async { false })
self.finish_with_discard_fn(tline, ctx, end_key, |_| async { false })
.await
}
/// This function will be deprecated with #8841.
pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, Option<DeltaLayerWriter>)> {
Ok((self.generated_layers, self.inner.map(|x| x.1)))
/// When split writer fails, the caller should call this function and handle partially generated layers.
pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, DeltaLayerWriter)> {
Ok((self.generated_layers, self.inner))
}
}
@@ -440,8 +432,10 @@ mod tests {
tenant.conf,
tline.timeline_id,
tenant.tenant_shard_id,
get_key(0),
Lsn(0x18)..Lsn(0x20),
4 * 1024 * 1024,
&ctx,
)
.await
.unwrap();
@@ -466,22 +460,11 @@ mod tests {
)
.await
.unwrap();
let layers = delta_writer.finish(&tline, &ctx).await.unwrap();
let layers = delta_writer
.finish(&tline, &ctx, get_key(10))
.await
.unwrap();
assert_eq!(layers.len(), 1);
assert_eq!(
layers
.into_iter()
.next()
.unwrap()
.into_resident_layer()
.layer_desc()
.key(),
PersistentLayerKey {
key_range: get_key(0)..get_key(1),
lsn_range: Lsn(0x18)..Lsn(0x20),
is_delta: true
}
);
}
#[tokio::test]
@@ -518,8 +501,10 @@ mod tests {
tenant.conf,
tline.timeline_id,
tenant.tenant_shard_id,
get_key(0),
Lsn(0x18)..Lsn(0x20),
4 * 1024 * 1024,
&ctx,
)
.await
.unwrap();
@@ -548,7 +533,10 @@ mod tests {
.finish(&tline, &ctx, get_key(N as u32))
.await
.unwrap();
let delta_layers = delta_writer.finish(&tline, &ctx).await.unwrap();
let delta_layers = delta_writer
.finish(&tline, &ctx, get_key(N as u32))
.await
.unwrap();
if discard {
for layer in image_layers {
layer.into_discarded_layer();
@@ -567,14 +555,6 @@ mod tests {
.collect_vec();
assert_eq!(image_layers.len(), N / 512 + 1);
assert_eq!(delta_layers.len(), N / 512 + 1);
assert_eq!(
delta_layers.first().unwrap().layer_desc().key_range.start,
get_key(0)
);
assert_eq!(
delta_layers.last().unwrap().layer_desc().key_range.end,
get_key(N as u32)
);
for idx in 0..image_layers.len() {
assert_ne!(image_layers[idx].layer_desc().key_range.start, Key::MIN);
assert_ne!(image_layers[idx].layer_desc().key_range.end, Key::MAX);
@@ -622,8 +602,10 @@ mod tests {
tenant.conf,
tline.timeline_id,
tenant.tenant_shard_id,
get_key(0),
Lsn(0x18)..Lsn(0x20),
4 * 1024,
&ctx,
)
.await
.unwrap();
@@ -662,35 +644,11 @@ mod tests {
)
.await
.unwrap();
let layers = delta_writer.finish(&tline, &ctx).await.unwrap();
let layers = delta_writer
.finish(&tline, &ctx, get_key(10))
.await
.unwrap();
assert_eq!(layers.len(), 2);
let mut layers_iter = layers.into_iter();
assert_eq!(
layers_iter
.next()
.unwrap()
.into_resident_layer()
.layer_desc()
.key(),
PersistentLayerKey {
key_range: get_key(0)..get_key(1),
lsn_range: Lsn(0x18)..Lsn(0x20),
is_delta: true
}
);
assert_eq!(
layers_iter
.next()
.unwrap()
.into_resident_layer()
.layer_desc()
.key(),
PersistentLayerKey {
key_range: get_key(1)..get_key(2),
lsn_range: Lsn(0x18)..Lsn(0x20),
is_delta: true
}
);
}
#[tokio::test]
@@ -710,8 +668,10 @@ mod tests {
tenant.conf,
tline.timeline_id,
tenant.tenant_shard_id,
get_key(0),
Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),
4 * 1024 * 1024,
&ctx,
)
.await
.unwrap();
@@ -729,20 +689,10 @@ mod tests {
.await
.unwrap();
}
let delta_layers = delta_writer.finish(&tline, &ctx).await.unwrap();
let delta_layers = delta_writer
.finish(&tline, &ctx, get_key(N as u32))
.await
.unwrap();
assert_eq!(delta_layers.len(), 1);
let delta_layer = delta_layers
.into_iter()
.next()
.unwrap()
.into_resident_layer();
assert_eq!(
delta_layer.layer_desc().key(),
PersistentLayerKey {
key_range: get_key(0)..get_key(1),
lsn_range: Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),
is_delta: true
}
);
}
}

View File

@@ -1809,6 +1809,7 @@ impl Timeline {
.unwrap();
// We don't want any of the produced layers to cover the full key range (i.e., MIN..MAX) b/c it will then be recognized
// as an L0 layer.
let hack_end_key = Key::NON_L0_MAX;
let mut delta_layers = Vec::new();
let mut image_layers = Vec::new();
let mut downloaded_layers = Vec::new();
@@ -1854,8 +1855,10 @@ impl Timeline {
self.conf,
self.timeline_id,
self.tenant_shard_id,
Key::MIN,
lowest_retain_lsn..end_lsn,
self.get_compaction_target_size(),
ctx,
)
.await?;
@@ -1962,7 +1965,7 @@ impl Timeline {
let produced_image_layers = if let Some(writer) = image_layer_writer {
if !dry_run {
writer
.finish_with_discard_fn(self, ctx, Key::MAX, discard)
.finish_with_discard_fn(self, ctx, hack_end_key, discard)
.await?
} else {
let (layers, _) = writer.take()?;
@@ -1975,7 +1978,7 @@ impl Timeline {
let produced_delta_layers = if !dry_run {
delta_layer_writer
.finish_with_discard_fn(self, ctx, discard)
.finish_with_discard_fn(self, ctx, hack_end_key, discard)
.await?
} else {
let (layers, _) = delta_layer_writer.take()?;

View File

@@ -25,7 +25,9 @@ use std::time::Duration;
use std::time::SystemTime;
use pageserver_api::shard::ShardIdentity;
use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz};
use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
use postgres_ffi::TimestampTz;
use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
use anyhow::{bail, Context, Result};
@@ -46,31 +48,16 @@ use pageserver_api::key::rel_block_to_key;
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
use postgres_ffi::pg_constants;
use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
use postgres_ffi::v14::xlog_utils::*;
use postgres_ffi::v14::CheckPoint;
use postgres_ffi::TransactionId;
use postgres_ffi::BLCKSZ;
use utils::bin_ser::SerializeError;
use utils::lsn::Lsn;
enum_pgversion! {CheckPoint, pgv::CheckPoint}
impl CheckPoint {
fn encode(&self) -> Result<Bytes, SerializeError> {
enum_pgversion_dispatch!(self, CheckPoint, cp, { cp.encode() })
}
fn update_next_xid(&mut self, xid: u32) -> bool {
enum_pgversion_dispatch!(self, CheckPoint, cp, { cp.update_next_xid(xid) })
}
pub fn update_next_multixid(&mut self, multi_xid: u32, multi_offset: u32) -> bool {
enum_pgversion_dispatch!(self, CheckPoint, cp, {
cp.update_next_multixid(multi_xid, multi_offset)
})
}
}
pub struct WalIngest {
shard: ShardIdentity,
pg_version: u32,
checkpoint: CheckPoint,
checkpoint_modified: bool,
warn_ingest_lag: WarnIngestLag,
@@ -91,16 +78,12 @@ impl WalIngest {
// Fetch the latest checkpoint into memory, so that we can compare with it
// quickly in `ingest_record` and update it when it changes.
let checkpoint_bytes = timeline.get_checkpoint(startpoint, ctx).await?;
let pgversion = timeline.pg_version;
let checkpoint = dispatch_pgversion!(pgversion, {
let checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;
trace!("CheckPoint.nextXid = {}", checkpoint.nextXid.value);
<pgv::CheckPoint as Into<CheckPoint>>::into(checkpoint)
});
let checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
trace!("CheckPoint.nextXid = {}", checkpoint.nextXid.value);
Ok(WalIngest {
shard: *timeline.get_shard_identity(),
pg_version: timeline.pg_version,
checkpoint,
checkpoint_modified: false,
warn_ingest_lag: WarnIngestLag {
@@ -134,7 +117,7 @@ impl WalIngest {
modification.set_lsn(lsn)?;
if decoded.is_dbase_create_copy(pg_version) {
if decoded.is_dbase_create_copy(self.pg_version) {
// Records of this type should always be preceded by a commit(), as they
// rely on reading data pages back from the Timeline.
assert!(!modification.has_dirty_data_pages());
@@ -237,26 +220,6 @@ impl WalIngest {
.await?;
}
}
} else if pg_version == 17 {
if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_WAL_LOG {
debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
} else if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY {
// The XLOG record was renamed between v14 and v15,
// but the record format is the same.
// So we can reuse XlCreateDatabase here.
debug!("XLOG_DBASE_CREATE_FILE_COPY");
let createdb = XlCreateDatabase::decode(&mut buf);
self.ingest_xlog_dbase_create(modification, &createdb, ctx)
.await?;
} else if info == postgres_ffi::v17::bindings::XLOG_DBASE_DROP {
let dropdb = XlDropDatabase::decode(&mut buf);
for tablespace_id in dropdb.tablespace_ids {
trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
modification
.drop_dbdir(tablespace_id, dropdb.db_id, ctx)
.await?;
}
}
}
}
pg_constants::RM_TBLSPC_ID => {
@@ -266,11 +229,7 @@ impl WalIngest {
let info = decoded.xl_info & !pg_constants::XLR_INFO_MASK;
if info == pg_constants::CLOG_ZEROPAGE {
let pageno = if pg_version < 17 {
buf.get_u32_le()
} else {
buf.get_u64_le() as u32
};
let pageno = buf.get_u32_le();
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
self.put_slru_page_image(
@@ -284,7 +243,7 @@ impl WalIngest {
.await?;
} else {
assert!(info == pg_constants::CLOG_TRUNCATE);
let xlrec = XlClogTruncate::decode(&mut buf, pg_version);
let xlrec = XlClogTruncate::decode(&mut buf);
self.ingest_clog_truncate_record(modification, &xlrec, ctx)
.await?;
}
@@ -323,21 +282,12 @@ impl WalIngest {
parsed_xact.xid,
lsn,
);
let xid: u64 = if pg_version >= 17 {
self.adjust_to_full_transaction_id(parsed_xact.xid)?
} else {
parsed_xact.xid as u64
};
modification.drop_twophase_file(xid, ctx).await?;
} else if info == pg_constants::XLOG_XACT_PREPARE {
let xid: u64 = if pg_version >= 17 {
self.adjust_to_full_transaction_id(decoded.xl_xid)?
} else {
decoded.xl_xid as u64
};
modification
.put_twophase_file(xid, Bytes::copy_from_slice(&buf[..]), ctx)
.drop_twophase_file(parsed_xact.xid, ctx)
.await?;
} else if info == pg_constants::XLOG_XACT_PREPARE {
modification
.put_twophase_file(decoded.xl_xid, Bytes::copy_from_slice(&buf[..]), ctx)
.await?;
}
}
@@ -345,11 +295,7 @@ impl WalIngest {
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
if info == pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE {
let pageno = if pg_version < 17 {
buf.get_u32_le()
} else {
buf.get_u64_le() as u32
};
let pageno = buf.get_u32_le();
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
self.put_slru_page_image(
@@ -362,11 +308,7 @@ impl WalIngest {
)
.await?;
} else if info == pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE {
let pageno = if pg_version < 17 {
buf.get_u32_le()
} else {
buf.get_u64_le() as u32
};
let pageno = buf.get_u32_le();
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
self.put_slru_page_image(
@@ -395,93 +337,70 @@ impl WalIngest {
pg_constants::RM_XLOG_ID => {
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
if info == pg_constants::XLOG_PARAMETER_CHANGE {
if let CheckPoint::V17(cp) = &mut self.checkpoint {
let rec = v17::XlParameterChange::decode(&mut buf);
cp.wal_level = rec.wal_level;
if info == pg_constants::XLOG_NEXTOID {
let next_oid = buf.get_u32_le();
if self.checkpoint.nextOid != next_oid {
self.checkpoint.nextOid = next_oid;
self.checkpoint_modified = true;
}
} else if info == pg_constants::XLOG_END_OF_RECOVERY {
if let CheckPoint::V17(cp) = &mut self.checkpoint {
let rec = v17::XlEndOfRecovery::decode(&mut buf);
cp.wal_level = rec.wal_level;
self.checkpoint_modified = true;
}
}
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
if info == pg_constants::XLOG_NEXTOID {
let next_oid = buf.get_u32_le();
if cp.nextOid != next_oid {
cp.nextOid = next_oid;
self.checkpoint_modified = true;
}
} else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
|| info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
} else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
|| info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
{
let mut checkpoint_bytes = [0u8; SIZEOF_CHECKPOINT];
buf.copy_to_slice(&mut checkpoint_bytes);
let xlog_checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
trace!(
"xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
xlog_checkpoint.oldestXid,
self.checkpoint.oldestXid
);
if (self
.checkpoint
.oldestXid
.wrapping_sub(xlog_checkpoint.oldestXid) as i32)
< 0
{
let mut checkpoint_bytes = [0u8; pgv::xlog_utils::SIZEOF_CHECKPOINT];
buf.copy_to_slice(&mut checkpoint_bytes);
let xlog_checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;
trace!(
"xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
xlog_checkpoint.oldestXid,
cp.oldestXid
);
if (cp.oldestXid.wrapping_sub(xlog_checkpoint.oldestXid) as i32) < 0 {
cp.oldestXid = xlog_checkpoint.oldestXid;
}
trace!(
"xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}",
xlog_checkpoint.oldestActiveXid,
cp.oldestActiveXid
);
// A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`,
// because at shutdown, all in-progress transactions will implicitly
// end. Postgres startup code knows that, and allows hot standby to start
// immediately from a shutdown checkpoint.
//
// In Neon, Postgres hot standby startup always behaves as if starting from
// an online checkpoint. It needs a valid `oldestActiveXid` value, so
// instead of overwriting self.checkpoint.oldestActiveXid with
// InvalidTransactionid from the checkpoint WAL record, update it to a
// proper value, knowing that there are no in-progress transactions at this
// point, except for prepared transactions.
//
// See also the neon code changes in the InitWalRecovery() function.
if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID
&& info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
{
let oldest_active_xid = if pg_version >= 17 {
let mut oldest_active_full_xid = cp.nextXid.value;
for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
if xid < oldest_active_full_xid {
oldest_active_full_xid = xid;
}
}
oldest_active_full_xid as u32
} else {
let mut oldest_active_xid = cp.nextXid.value as u32;
for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
let narrow_xid = xid as u32;
if (narrow_xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
oldest_active_xid = narrow_xid;
}
}
oldest_active_xid
};
cp.oldestActiveXid = oldest_active_xid;
} else {
cp.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
}
// Write a new checkpoint key-value pair on every checkpoint record, even
// if nothing really changed. Not strictly required, but it seems nice to
// have some trace of the checkpoint records in the layer files at the same
// LSNs.
self.checkpoint_modified = true;
self.checkpoint.oldestXid = xlog_checkpoint.oldestXid;
}
});
trace!(
"xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}",
xlog_checkpoint.oldestActiveXid,
self.checkpoint.oldestActiveXid
);
// A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`,
// because at shutdown, all in-progress transactions will implicitly
// end. Postgres startup code knows that, and allows hot standby to start
// immediately from a shutdown checkpoint.
//
// In Neon, Postgres hot standby startup always behaves as if starting from
// an online checkpoint. It needs a valid `oldestActiveXid` value, so
// instead of overwriting self.checkpoint.oldestActiveXid with
// InvalidTransactionid from the checkpoint WAL record, update it to a
// proper value, knowing that there are no in-progress transactions at this
// point, except for prepared transactions.
//
// See also the neon code changes in the InitWalRecovery() function.
if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID
&& info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
{
let mut oldest_active_xid = self.checkpoint.nextXid.value as u32;
for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
if (xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
oldest_active_xid = xid;
}
}
self.checkpoint.oldestActiveXid = oldest_active_xid;
} else {
self.checkpoint.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
}
// Write a new checkpoint key-value pair on every checkpoint record, even
// if nothing really changed. Not strictly required, but it seems nice to
// have some trace of the checkpoint records in the layer files at the same
// LSNs.
self.checkpoint_modified = true;
}
}
pg_constants::RM_LOGICALMSG_ID => {
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
@@ -505,11 +424,7 @@ impl WalIngest {
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
if info == pg_constants::XLOG_RUNNING_XACTS {
let xlrec = crate::walrecord::XlRunningXacts::decode(&mut buf);
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
cp.oldestActiveXid = xlrec.oldest_running_xid;
});
self.checkpoint.oldestActiveXid = xlrec.oldest_running_xid;
self.checkpoint_modified = true;
}
}
@@ -582,25 +497,6 @@ impl WalIngest {
Ok(modification.len() > prev_len)
}
/// This is the same as AdjustToFullTransactionId(xid) in PostgreSQL
fn adjust_to_full_transaction_id(&self, xid: TransactionId) -> Result<u64> {
let next_full_xid =
enum_pgversion_dispatch!(&self.checkpoint, CheckPoint, cp, { cp.nextXid.value });
let next_xid = (next_full_xid) as u32;
let mut epoch = (next_full_xid >> 32) as u32;
if xid > next_xid {
// Wraparound occurred, must be from a prev epoch.
if epoch == 0 {
bail!("apparent XID wraparound with prepared transaction XID {xid}, nextXid is {next_full_xid}");
}
epoch -= 1;
}
Ok((epoch as u64) << 32 | xid as u64)
}
/// Do not store this block, but observe it for the purposes of updating our relation size state.
async fn observe_decoded_block(
&mut self,
@@ -643,7 +539,7 @@ impl WalIngest {
&& blk.has_image
&& decoded.xl_rmid == pg_constants::RM_XLOG_ID
&& (decoded.xl_info == pg_constants::XLOG_FPI
|| decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
|| decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
// compression of WAL is not yet supported: fall back to storing the original WAL record
&& !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, modification.tline.pg_version)
// do not materialize null pages because them most likely be soon replaced with real data
@@ -901,73 +797,6 @@ impl WalIngest {
bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
}
}
17 => {
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
if info == pg_constants::XLOG_HEAP_INSERT {
let xlrec = v17::XlHeapInsert::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_DELETE {
let xlrec = v17::XlHeapDelete::decode(buf);
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_UPDATE
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
{
let xlrec = v17::XlHeapUpdate::decode(buf);
// the size of tuple data is inferred from the size of the record.
// we can't validate the remaining number of bytes without parsing
// the tuple data.
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);
}
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
// non-HOT update where the new tuple goes to different page than
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
// set.
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_LOCK {
let xlrec = v17::XlHeapLock::decode(buf);
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
old_heap_blkno = Some(decoded.blocks[0].blkno);
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
}
}
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
let xlrec = v17::XlHeapMultiInsert::decode(buf);
let offset_array_len =
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
0
} else {
size_of::<u16>() * xlrec.ntuples as usize
};
assert_eq!(offset_array_len, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {
let xlrec = v17::XlHeapLockUpdated::decode(buf);
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
old_heap_blkno = Some(decoded.blocks[0].blkno);
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
}
}
} else {
bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
}
}
_ => {}
}
@@ -1076,26 +905,26 @@ impl WalIngest {
assert_eq!(decoded.xl_rmid, pg_constants::RM_NEON_ID);
match pg_version {
16 | 17 => {
16 => {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
match info {
pg_constants::XLOG_NEON_HEAP_INSERT => {
let xlrec = v17::rm_neon::XlNeonHeapInsert::decode(buf);
let xlrec = v16::rm_neon::XlNeonHeapInsert::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
}
pg_constants::XLOG_NEON_HEAP_DELETE => {
let xlrec = v17::rm_neon::XlNeonHeapDelete::decode(buf);
let xlrec = v16::rm_neon::XlNeonHeapDelete::decode(buf);
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
}
pg_constants::XLOG_NEON_HEAP_UPDATE
| pg_constants::XLOG_NEON_HEAP_HOT_UPDATE => {
let xlrec = v17::rm_neon::XlNeonHeapUpdate::decode(buf);
let xlrec = v16::rm_neon::XlNeonHeapUpdate::decode(buf);
// the size of tuple data is inferred from the size of the record.
// we can't validate the remaining number of bytes without parsing
// the tuple data.
@@ -1111,7 +940,7 @@ impl WalIngest {
}
}
pg_constants::XLOG_NEON_HEAP_MULTI_INSERT => {
let xlrec = v17::rm_neon::XlNeonHeapMultiInsert::decode(buf);
let xlrec = v16::rm_neon::XlNeonHeapMultiInsert::decode(buf);
let offset_array_len =
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
@@ -1127,7 +956,7 @@ impl WalIngest {
}
}
pg_constants::XLOG_NEON_HEAP_LOCK => {
let xlrec = v17::rm_neon::XlNeonHeapLock::decode(buf);
let xlrec = v16::rm_neon::XlNeonHeapLock::decode(buf);
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
old_heap_blkno = Some(decoded.blocks[0].blkno);
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
@@ -1375,7 +1204,7 @@ impl WalIngest {
if rec.blkno % pg_constants::SLOTS_PER_FSM_PAGE != 0 {
// Tail of last remaining FSM page has to be zeroed.
// We are not precise here and instead of digging in FSM bitmap format just clear the whole page.
modification.put_rel_page_image_zero(rel, fsm_physical_page_no)?;
modification.put_rel_page_image_zero(rel, fsm_physical_page_no);
fsm_physical_page_no += 1;
}
let nblocks = get_relsize(modification, rel, ctx).await?;
@@ -1397,7 +1226,7 @@ impl WalIngest {
if rec.blkno % pg_constants::VM_HEAPBLOCKS_PER_PAGE != 0 {
// Tail of last remaining vm page has to be zeroed.
// We are not precise here and instead of digging in VM bitmap format just clear the whole page.
modification.put_rel_page_image_zero(rel, vm_page_no)?;
modification.put_rel_page_image_zero(rel, vm_page_no);
vm_page_no += 1;
}
let nblocks = get_relsize(modification, rel, ctx).await?;
@@ -1413,17 +1242,12 @@ impl WalIngest {
fn warn_on_ingest_lag(
&mut self,
conf: &crate::config::PageServerConf,
wal_timestamp: TimestampTz,
wal_timestmap: TimestampTz,
) {
debug_assert_current_span_has_tenant_and_timeline_id();
let now = SystemTime::now();
let rate_limits = &mut self.warn_ingest_lag;
let ts = enum_pgversion_dispatch!(&self.checkpoint, CheckPoint, _cp, {
pgv::xlog_utils::try_from_pg_timestamp(wal_timestamp)
});
match ts {
match try_from_pg_timestamp(wal_timestmap) {
Ok(ts) => {
match now.duration_since(ts) {
Ok(lag) => {
@@ -1433,7 +1257,7 @@ impl WalIngest {
warn!(%rate_limit_stats, %lag, "ingesting record with timestamp lagging more than wait_lsn_timeout");
})
}
}
},
Err(e) => {
let delta_t = e.duration();
// determined by prod victoriametrics query: 1000 * (timestamp(node_time_seconds{neon_service="pageserver"}) - node_time_seconds)
@@ -1447,6 +1271,7 @@ impl WalIngest {
}
}
};
}
Err(error) => {
rate_limits.timestamp_invalid_msg_ratelimit.call2(|rate_limit_stats| {
@@ -1554,17 +1379,14 @@ impl WalIngest {
// truncated, but a checkpoint record with the updated values isn't written until
// later. In Neon, a server can start at any LSN, not just on a checkpoint record,
// so we keep the oldestXid and oldestXidDB up-to-date.
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
cp.oldestXid = xlrec.oldest_xid;
cp.oldestXidDB = xlrec.oldest_xid_db;
});
self.checkpoint.oldestXid = xlrec.oldest_xid;
self.checkpoint.oldestXidDB = xlrec.oldest_xid_db;
self.checkpoint_modified = true;
// TODO Treat AdvanceOldestClogXid() or write a comment why we don't need it
let latest_page_number =
enum_pgversion_dispatch!(self.checkpoint, CheckPoint, cp, { cp.nextXid.value }) as u32
/ pg_constants::CLOG_XACTS_PER_PAGE;
self.checkpoint.nextXid.value as u32 / pg_constants::CLOG_XACTS_PER_PAGE;
// Now delete all segments containing pages between xlrec.pageno
// and latest_page_number.
@@ -1572,9 +1394,7 @@ impl WalIngest {
// First, make an important safety check:
// the current endpoint page must not be eligible for removal.
// See SimpleLruTruncate() in slru.c
if dispatch_pgversion!(modification.tline.pg_version, {
pgv::nonrelfile_utils::clogpage_precedes(latest_page_number, xlrec.pageno)
}) {
if clogpage_precedes(latest_page_number, xlrec.pageno) {
info!("could not truncate directory pg_xact apparent wraparound");
return Ok(());
}
@@ -1591,12 +1411,7 @@ impl WalIngest {
.await?
{
let segpage = segno * pg_constants::SLRU_PAGES_PER_SEGMENT;
let may_delete = dispatch_pgversion!(modification.tline.pg_version, {
pgv::nonrelfile_utils::slru_may_delete_clogsegment(segpage, xlrec.pageno)
});
if may_delete {
if slru_may_delete_clogsegment(segpage, xlrec.pageno) {
modification
.drop_slru_segment(SlruKind::Clog, segno, ctx)
.await?;
@@ -1715,23 +1530,14 @@ impl WalIngest {
xlrec: &XlMultiXactTruncate,
ctx: &RequestContext,
) -> Result<()> {
let (maxsegment, startsegment, endsegment) =
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
cp.oldestMulti = xlrec.end_trunc_off;
cp.oldestMultiDB = xlrec.oldest_multi_db;
let maxsegment: i32 = pgv::nonrelfile_utils::mx_offset_to_member_segment(
pg_constants::MAX_MULTIXACT_OFFSET,
);
let startsegment: i32 =
pgv::nonrelfile_utils::mx_offset_to_member_segment(xlrec.start_trunc_memb);
let endsegment: i32 =
pgv::nonrelfile_utils::mx_offset_to_member_segment(xlrec.end_trunc_memb);
(maxsegment, startsegment, endsegment)
});
self.checkpoint.oldestMulti = xlrec.end_trunc_off;
self.checkpoint.oldestMultiDB = xlrec.oldest_multi_db;
self.checkpoint_modified = true;
// PerformMembersTruncation
let maxsegment: i32 = mx_offset_to_member_segment(pg_constants::MAX_MULTIXACT_OFFSET);
let startsegment: i32 = mx_offset_to_member_segment(xlrec.start_trunc_memb);
let endsegment: i32 = mx_offset_to_member_segment(xlrec.end_trunc_memb);
let mut segment: i32 = startsegment;
// Delete all the segments except the last one. The last segment can still
@@ -1890,7 +1696,7 @@ impl WalIngest {
continue;
}
modification.put_rel_page_image_zero(rel, gap_blknum)?;
modification.put_rel_page_image_zero(rel, gap_blknum);
}
}
Ok(())
@@ -1956,7 +1762,7 @@ impl WalIngest {
// fill the gap with zeros
for gap_blknum in old_nblocks..blknum {
modification.put_slru_page_image_zero(kind, segno, gap_blknum)?;
modification.put_slru_page_image_zero(kind, segno, gap_blknum);
}
}
Ok(())
@@ -2005,23 +1811,11 @@ mod tests {
// TODO
}
#[tokio::test]
async fn test_zeroed_checkpoint_decodes_correctly() -> Result<()> {
for i in 14..=16 {
dispatch_pgversion!(i, {
pgv::CheckPoint::decode(&pgv::ZERO_CHECKPOINT)?;
});
}
Ok(())
}
static ZERO_CHECKPOINT: Bytes = Bytes::from_static(&[0u8; SIZEOF_CHECKPOINT]);
async fn init_walingest_test(tline: &Timeline, ctx: &RequestContext) -> Result<WalIngest> {
let mut m = tline.begin_modification(Lsn(0x10));
m.put_checkpoint(dispatch_pgversion!(
tline.pg_version,
pgv::ZERO_CHECKPOINT.clone()
))?;
m.put_checkpoint(ZERO_CHECKPOINT.clone())?;
m.put_relmap_file(0, 111, Bytes::from(""), ctx).await?; // dummy relmapper file
m.commit(ctx).await?;
let walingest = WalIngest::new(tline, Lsn(0x10), ctx).await?;

View File

@@ -174,7 +174,6 @@ impl DecodedWALRecord {
}
15 => info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY,
16 => info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY,
17 => info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY,
_ => {
panic!("Unsupported postgres version {pg_version}")
}
@@ -342,47 +341,16 @@ pub mod v14 {
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlParameterChange {
pub max_connections: i32,
pub max_worker_processes: i32,
pub max_wal_senders: i32,
pub max_prepared_xacts: i32,
pub max_locks_per_xact: i32,
pub wal_level: i32,
pub wal_log_hints: bool,
pub track_commit_timestamp: bool,
pub _padding: [u8; 2],
}
impl XlParameterChange {
pub fn decode(buf: &mut Bytes) -> XlParameterChange {
XlParameterChange {
max_connections: buf.get_i32_le(),
max_worker_processes: buf.get_i32_le(),
max_wal_senders: buf.get_i32_le(),
max_prepared_xacts: buf.get_i32_le(),
max_locks_per_xact: buf.get_i32_le(),
wal_level: buf.get_i32_le(),
wal_log_hints: buf.get_u8() != 0,
track_commit_timestamp: buf.get_u8() != 0,
_padding: [buf.get_u8(), buf.get_u8()],
}
}
}
}
pub mod v15 {
pub use super::v14::{
XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapLockUpdated, XlHeapMultiInsert, XlHeapUpdate,
XlParameterChange,
};
}
pub mod v16 {
pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert, XlParameterChange};
pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert};
use bytes::{Buf, Bytes};
use postgres_ffi::{OffsetNumber, TransactionId};
@@ -561,37 +529,6 @@ pub mod v16 {
}
}
pub mod v17 {
pub use super::v14::XlHeapLockUpdated;
use bytes::{Buf, Bytes};
pub use postgres_ffi::{TimeLineID, TimestampTz};
pub use super::v16::rm_neon;
pub use super::v16::{
XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapMultiInsert, XlHeapUpdate, XlParameterChange,
};
#[repr(C)]
#[derive(Debug)]
pub struct XlEndOfRecovery {
pub end_time: TimestampTz,
pub this_time_line_id: TimeLineID,
pub prev_time_line_id: TimeLineID,
pub wal_level: i32,
}
impl XlEndOfRecovery {
pub fn decode(buf: &mut Bytes) -> XlEndOfRecovery {
XlEndOfRecovery {
end_time: buf.get_i64_le(),
this_time_line_id: buf.get_u32_le(),
prev_time_line_id: buf.get_u32_le(),
wal_level: buf.get_i32_le(),
}
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlSmgrCreate {
@@ -809,13 +746,9 @@ pub struct XlClogTruncate {
}
impl XlClogTruncate {
pub fn decode(buf: &mut Bytes, pg_version: u32) -> XlClogTruncate {
pub fn decode(buf: &mut Bytes) -> XlClogTruncate {
XlClogTruncate {
pageno: if pg_version < 17 {
buf.get_u32_le()
} else {
buf.get_u64_le() as u32
},
pageno: buf.get_u32_le(),
oldest_xid: buf.get_u32_le(),
oldest_xid_db: buf.get_u32_le(),
}

View File

@@ -23,7 +23,7 @@ SHLIB_LINK_INTERNAL = $(libpq)
SHLIB_LINK = -lcurl
EXTENSION = neon
DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql neon--1.4--1.5.sql neon--1.5--1.4.sql
DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql
PGFILEDESC = "neon - cloud storage for PostgreSQL"
EXTRA_CLEAN = \

View File

@@ -1,12 +0,0 @@
#ifndef NEON_BITMAP_H
#define NEON_BITMAP_H
/*
* Utilities for manipulating bits8* as bitmaps.
*/
#define BITMAP_ISSET(bm, bit) ((bm)[(bit) >> 3] & (1 << ((bit) & 7)))
#define BITMAP_SET(bm, bit) (bm)[(bit) >> 3] |= (1 << ((bit) & 7))
#define BITMAP_CLR(bm, bit) (bm)[(bit) >> 3] &= ~(1 << ((bit) & 7))
#endif //NEON_BITMAP_H

View File

@@ -27,7 +27,6 @@
#include "pagestore_client.h"
#include "common/hashfn.h"
#include "pgstat.h"
#include "port/pg_iovec.h"
#include "postmaster/bgworker.h"
#include RELFILEINFO_HDR
#include "storage/buf_internals.h"
@@ -41,7 +40,6 @@
#include "utils/guc.h"
#include "hll.h"
#include "bitmap.h"
#define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "Assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)
@@ -471,99 +469,6 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
return found;
}
/*
* Check if page is present in the cache.
* Returns true if page is found in local cache.
*/
int
lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
int nblocks, bits8 *bitmap)
{
BufferTag tag;
FileCacheEntry *entry;
uint32 chunk_offs;
int found = 0;
uint32 hash;
int i = 0;
if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
return 0;
CopyNRelFileInfoToBufTag(tag, rinfo);
tag.forkNum = forkNum;
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
hash = get_hash_value(lfc_hash, &tag);
chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
LWLockAcquire(lfc_lock, LW_SHARED);
while (true)
{
int this_chunk = Min(nblocks, BLOCKS_PER_CHUNK - chunk_offs);
if (LFC_ENABLED())
{
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
if (entry != NULL)
{
for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
{
if ((entry->bitmap[chunk_offs >> 5] &
(1 << (chunk_offs & 31))) != 0)
{
BITMAP_SET(bitmap, i);
found++;
}
}
}
else
{
i += this_chunk;
}
}
else
{
return found;
}
/*
* Break out of the iteration before doing expensive stuff for
* a next iteration
*/
if (i + 1 >= nblocks)
break;
/*
* Prepare for the next iteration. We don't unlock here, as that'd
* probably be more expensive than the gains it'd get us.
*/
tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
hash = get_hash_value(lfc_hash, &tag);
chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
}
LWLockRelease(lfc_lock);
#if USE_ASSERT_CHECKING
do {
int count = 0;
for (int j = 0; j < nblocks; j++)
{
if (BITMAP_ISSET(bitmap, j))
count++;
}
Assert(count == found);
} while (false);
#endif
return found;
}
/*
* Evict a page (if present) from the local file cache
*/
@@ -643,171 +548,91 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
}
/*
* Try to read pages from local cache.
* Returns the number of pages read from the local cache, and sets bits in
* 'read' for the pages which were read. This may scribble over buffers not
* marked in 'read', so be careful with operation ordering.
*
* In case of error local file cache is disabled (lfc->limit is set to zero),
* and -1 is returned. Note that 'read' and the buffers may be touched and in
* an otherwise invalid state.
*
* If the mask argument is supplied, bits will be set at the offsets of pages
* that were present and read from the LFC.
* Try to read page from local cache.
* Returns true if page is found in local cache.
* In case of error local file cache is disabled (lfc->limit is set to zero).
*/
int
lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
void **buffers, BlockNumber nblocks, bits8 *mask)
bool
lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
char *buffer)
{
BufferTag tag;
FileCacheEntry *entry;
ssize_t rc;
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
bool result = true;
uint32 hash;
uint64 generation;
uint32 entry_offset;
int blocks_read = 0;
int buf_offset = 0;
if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
return 0;
return false;
if (!lfc_ensure_opened())
return 0;
return false;
CopyNRelFileInfoToBufTag(tag, rinfo);
tag.forkNum = forkNum;
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
hash = get_hash_value(lfc_hash, &tag);
/*
* For every chunk that has blocks we're interested in, we
* 1. get the chunk header
* 2. Check if the chunk actually has the blocks we're interested in
* 3. Read the blocks we're looking for (in one preadv), assuming they exist
* 4. Update the statistics for the read call.
*
* If there is an error, we do an early return.
*/
while (nblocks > 0)
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
if (!LFC_ENABLED())
{
struct iovec iov[PG_IOV_MAX];
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
int blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
int iteration_hits = 0;
int iteration_misses = 0;
Assert(blocks_in_chunk > 0);
for (int i = 0; i < blocks_in_chunk; i++)
{
iov[i].iov_base = buffers[buf_offset + i];
iov[i].iov_len = BLCKSZ;
}
tag.blockNum = blkno - chunk_offs;
hash = get_hash_value(lfc_hash, &tag);
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
/* We can return the blocks we've read before LFC got disabled;
* assuming we read any. */
if (!LFC_ENABLED())
{
LWLockRelease(lfc_lock);
return blocks_read;
}
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
/* Approximate working set for the blocks assumed in this entry */
for (int i = 0; i < blocks_in_chunk; i++)
{
tag.blockNum = blkno + i;
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
}
if (entry == NULL)
{
/* Pages are not cached */
lfc_ctl->misses += blocks_in_chunk;
pgBufferUsage.file_cache.misses += blocks_in_chunk;
LWLockRelease(lfc_lock);
buf_offset += blocks_in_chunk;
nblocks -= blocks_in_chunk;
blkno += blocks_in_chunk;
continue;
}
/* Unlink entry from LRU list to pin it for the duration of IO operation */
if (entry->access_count++ == 0)
dlist_delete(&entry->list_node);
generation = lfc_ctl->generation;
entry_offset = entry->offset;
LWLockRelease(lfc_lock);
for (int i = 0; i < blocks_in_chunk; i++)
{
/*
* If the page is valid, we consider it "read".
* All other pages will be fetched separately by the next cache
*/
if (entry->bitmap[(chunk_offs + i) / 32] & (1 << ((chunk_offs + i) % 32)))
{
BITMAP_SET(mask, buf_offset + i);
iteration_hits++;
}
else
iteration_misses++;
}
Assert(iteration_hits + iteration_misses > 0);
if (iteration_hits != 0)
{
rc = preadv(lfc_desc, iov, blocks_in_chunk,
((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
if (rc != (BLCKSZ * blocks_in_chunk))
{
lfc_disable("read");
return -1;
}
}
/* Place entry to the head of LRU list */
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
if (lfc_ctl->generation == generation)
{
CriticalAssert(LFC_ENABLED());
lfc_ctl->hits += iteration_hits;
lfc_ctl->misses += iteration_misses;
pgBufferUsage.file_cache.hits += iteration_hits;
pgBufferUsage.file_cache.misses += iteration_misses;
CriticalAssert(entry->access_count > 0);
if (--entry->access_count == 0)
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
}
else
{
/* generation mismatch, assume error condition */
LWLockRelease(lfc_lock);
return -1;
}
LWLockRelease(lfc_lock);
buf_offset += blocks_in_chunk;
nblocks -= blocks_in_chunk;
blkno += blocks_in_chunk;
blocks_read += iteration_hits;
return false;
}
return blocks_read;
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
/* Approximate working set */
tag.blockNum = blkno;
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
if (entry == NULL || (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) == 0)
{
/* Page is not cached */
lfc_ctl->misses += 1;
pgBufferUsage.file_cache.misses += 1;
LWLockRelease(lfc_lock);
return false;
}
/* Unlink entry from LRU list to pin it for the duration of IO operation */
if (entry->access_count++ == 0)
dlist_delete(&entry->list_node);
generation = lfc_ctl->generation;
entry_offset = entry->offset;
LWLockRelease(lfc_lock);
rc = pread(lfc_desc, buffer, BLCKSZ, ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
if (rc != BLCKSZ)
{
lfc_disable("read");
return false;
}
/* Place entry to the head of LRU list */
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
if (lfc_ctl->generation == generation)
{
CriticalAssert(LFC_ENABLED());
lfc_ctl->hits += 1;
pgBufferUsage.file_cache.hits += 1;
CriticalAssert(entry->access_count > 0);
if (--entry->access_count == 0)
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
}
else
result = false;
LWLockRelease(lfc_lock);
return result;
}
/*
@@ -815,17 +640,20 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
* If cache is full then evict some other page.
*/
void
lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
const void *const *buffers, BlockNumber nblocks)
#if PG_MAJORVERSION_NUM < 16
lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, char *buffer)
#else
lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, const void *buffer)
#endif
{
BufferTag tag;
FileCacheEntry *entry;
ssize_t rc;
bool found;
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
uint32 hash;
uint64 generation;
uint32 entry_offset;
int buf_offset = 0;
if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
return;
@@ -833,142 +661,110 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
if (!lfc_ensure_opened())
return;
CopyNRelFileInfoToBufTag(tag, rinfo);
tag.forkNum = forkNum;
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
CopyNRelFileInfoToBufTag(tag, rinfo);
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
hash = get_hash_value(lfc_hash, &tag);
/*
* For every chunk that has blocks we're interested in, we
* 1. get the chunk header
* 2. Check if the chunk actually has the blocks we're interested in
* 3. Read the blocks we're looking for (in one preadv), assuming they exist
* 4. Update the statistics for the read call.
*
* If there is an error, we do an early return.
*/
while (nblocks > 0)
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
if (!LFC_ENABLED())
{
struct iovec iov[PG_IOV_MAX];
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
int blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
Assert(blocks_in_chunk > 0);
LWLockRelease(lfc_lock);
return;
}
for (int i = 0; i < blocks_in_chunk; i++)
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
if (found)
{
/*
* Unlink entry from LRU list to pin it for the duration of IO
* operation
*/
if (entry->access_count++ == 0)
dlist_delete(&entry->list_node);
}
else
{
/*
* We have two choices if all cache pages are pinned (i.e. used in IO
* operations):
*
* 1) Wait until some of this operation is completed and pages is
* unpinned.
*
* 2) Allocate one more chunk, so that specified cache size is more
* recommendation than hard limit.
*
* As far as probability of such event (that all pages are pinned) is
* considered to be very very small: there are should be very large
* number of concurrent IO operations and them are limited by
* max_connections, we prefer not to complicate code and use second
* approach.
*/
if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
{
iov[i].iov_base = unconstify(void *, buffers[buf_offset + i]);
iov[i].iov_len = BLCKSZ;
/* Cache overflow: evict least recently used chunk */
FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
CriticalAssert(victim->access_count == 0);
entry->offset = victim->offset; /* grab victim's chunk */
hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
neon_log(DEBUG2, "Swap file cache page");
}
else if (!dlist_is_empty(&lfc_ctl->holes))
{
/* We can reuse a hole that was left behind when the LFC was shrunk previously */
FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
uint32 offset = hole->offset;
bool found;
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
hash = get_hash_value(lfc_hash, &tag);
hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
CriticalAssert(found);
lfc_ctl->used += 1;
entry->offset = offset; /* reuse the hole */
}
else
{
lfc_ctl->used += 1;
entry->offset = lfc_ctl->size++; /* allocate new chunk at end
* of file */
}
entry->access_count = 1;
entry->hash = hash;
memset(entry->bitmap, 0, sizeof entry->bitmap);
}
generation = lfc_ctl->generation;
entry_offset = entry->offset;
lfc_ctl->writes += 1;
LWLockRelease(lfc_lock);
rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
if (rc != BLCKSZ)
{
lfc_disable("write");
}
else
{
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
if (!LFC_ENABLED())
if (lfc_ctl->generation == generation)
{
LWLockRelease(lfc_lock);
return;
CriticalAssert(LFC_ENABLED());
/* Place entry to the head of LRU list */
CriticalAssert(entry->access_count > 0);
if (--entry->access_count == 0)
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
entry->bitmap[chunk_offs >> 5] |= (1 << (chunk_offs & 31));
}
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
if (found)
{
/*
* Unlink entry from LRU list to pin it for the duration of IO
* operation
*/
if (entry->access_count++ == 0)
dlist_delete(&entry->list_node);
}
else
{
/*
* We have two choices if all cache pages are pinned (i.e. used in IO
* operations):
*
* 1) Wait until some of this operation is completed and pages is
* unpinned.
*
* 2) Allocate one more chunk, so that specified cache size is more
* recommendation than hard limit.
*
* As far as probability of such event (that all pages are pinned) is
* considered to be very very small: there are should be very large
* number of concurrent IO operations and them are limited by
* max_connections, we prefer not to complicate code and use second
* approach.
*/
if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
{
/* Cache overflow: evict least recently used chunk */
FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
CriticalAssert(victim->access_count == 0);
entry->offset = victim->offset; /* grab victim's chunk */
hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
neon_log(DEBUG2, "Swap file cache page");
}
else if (!dlist_is_empty(&lfc_ctl->holes))
{
/* We can reuse a hole that was left behind when the LFC was shrunk previously */
FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
uint32 offset = hole->offset;
bool found;
hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
CriticalAssert(found);
lfc_ctl->used += 1;
entry->offset = offset; /* reuse the hole */
}
else
{
lfc_ctl->used += 1;
entry->offset = lfc_ctl->size++; /* allocate new chunk at end
* of file */
}
entry->access_count = 1;
entry->hash = hash;
memset(entry->bitmap, 0, sizeof entry->bitmap);
}
generation = lfc_ctl->generation;
entry_offset = entry->offset;
lfc_ctl->writes += blocks_in_chunk;
LWLockRelease(lfc_lock);
rc = pwritev(lfc_desc, iov, blocks_in_chunk,
((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
if (rc != BLCKSZ * blocks_in_chunk)
{
lfc_disable("write");
}
else
{
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
if (lfc_ctl->generation == generation)
{
CriticalAssert(LFC_ENABLED());
/* Place entry to the head of LRU list */
CriticalAssert(entry->access_count > 0);
if (--entry->access_count == 0)
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
for (int i = 0; i < blocks_in_chunk; i++)
{
entry->bitmap[(chunk_offs + i) >> 5] |=
(1 << ((chunk_offs + i) & 31));
}
}
LWLockRelease(lfc_lock);
}
blkno += blocks_in_chunk;
buf_offset += blocks_in_chunk;
nblocks -= blocks_in_chunk;
}
}
@@ -1263,7 +1059,7 @@ approximate_working_set_size_seconds(PG_FUNCTION_ARGS)
int32 dc;
time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
LWLockAcquire(lfc_lock, LW_SHARED);
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration, 1.0);
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration);
LWLockRelease(lfc_lock);
PG_RETURN_INT32(dc);
}
@@ -1280,7 +1076,7 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
int32 dc;
bool reset = PG_GETARG_BOOL(0);
LWLockAcquire(lfc_lock, reset ? LW_EXCLUSIVE : LW_SHARED);
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1, 1.0);
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1);
if (reset)
memset(lfc_ctl->wss_estimation.regs, 0, sizeof lfc_ctl->wss_estimation.regs);
LWLockRelease(lfc_lock);
@@ -1288,21 +1084,3 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
}
PG_RETURN_NULL();
}
PG_FUNCTION_INFO_V1(approximate_optimal_cache_size);
Datum
approximate_optimal_cache_size(PG_FUNCTION_ARGS)
{
if (lfc_size_limit != 0)
{
int32 dc;
time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
double min_hit_ratio = PG_ARGISNULL(1) ? 1.0 : PG_GETARG_FLOAT8(1);
LWLockAcquire(lfc_lock, LW_SHARED);
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration, min_hit_ratio);
LWLockRelease(lfc_lock);
PG_RETURN_INT32(dc);
}
PG_RETURN_NULL();
}

View File

@@ -6,7 +6,7 @@
* Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group
*
* Implements https://hal.science/hal-00465313/document
*
*
* Based on Hideaki Ohno's C++ implementation. This is probably not ideally
* suited to estimating the cardinality of very large sets; in particular, we
* have not attempted to further optimize the implementation as described in
@@ -126,78 +126,22 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
/* Compute the rank of the remaining 32 - "k" (registerWidth) bits */
count = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS);
if (cState->regs[index][count].ts)
{
/* update histgoram */
int64_t delta = (now - cState->regs[index][count].ts)/USECS_PER_SEC;
uint32_t new_histogram[HIST_SIZE] = {0};
for (int i = 0; i < HIST_SIZE; i++) {
/* Use middle point of interval */
uint32 interval_log2 = pg_ceil_log2_32((delta + (HIST_MIN_INTERVAL*((1<<i) + ((1<<i)/2))/2)) / HIST_MIN_INTERVAL);
uint32 cell = Min(interval_log2, HIST_SIZE-1);
new_histogram[cell] += cState->regs[index][count].histogram[i];
}
memcpy(cState->regs[index][count].histogram, new_histogram, sizeof new_histogram);
}
cState->regs[index][count].ts = now;
cState->regs[index][count].histogram[0] += 1; // most recent access always goes to first histogram backet
}
static uint32_t
getAccessCount(const HyperLogLogRegister* reg, time_t duration)
{
uint32_t count = 0;
// Simplest solution is to take in account all points fro overlapped interval
// for (size_t i = 0; i < HIST_SIZE && HIST_MIN_INTERVAL*((1 << i)/2) <= duration; i++) {
for (size_t i = 0; i < HIST_SIZE; i++) {
uint32_t high_boundary = HIST_MIN_INTERVAL*(1 << i);
uint32_t low_boundary = HIST_MIN_INTERVAL*((1 << i)/2);
if (high_boundary >= duration) {
// Assume uniform distribution of points within interval and use proportional number of points
Assert(duration >= low_boundary);
count += reg->histogram[i] * (duration - low_boundary) / (high_boundary - low_boundary);
break; // it's last interval within specified time range
} else {
count += reg->histogram[i];
}
}
return count;
cState->regs[index][count] = now;
}
static uint8
getMaximum(const HyperLogLogRegister* reg, TimestampTz since, time_t duration, double min_hit_ratio)
getMaximum(const TimestampTz* reg, TimestampTz since)
{
uint8 max = 0;
size_t i, j;
if (min_hit_ratio == 1.0)
for (size_t i = 0; i < HLL_C_BITS + 1; i++)
{
for (i = 0; i < HLL_C_BITS + 1; i++)
if (reg[i] >= since)
{
if (reg[i].ts >= since)
{
max = i;
}
}
}
else
{
uint32_t total_count = 0;
for (i = 0; i < HLL_C_BITS + 1; i++)
{
total_count += getAccessCount(&reg[i], duration);
}
if (total_count != 0)
{
for (i = 0; i < HLL_C_BITS + 1; i++)
{
// Take in account only bits with access frequncy exceeding maximal miss rate (1 - hit rate)
if (reg[i].ts >= since && 1.0 - (double)getAccessCount(&reg[i], duration) / total_count <= min_hit_ratio)
{
max = i;
}
}
max = i;
}
}
return max;
}
@@ -206,7 +150,7 @@ getMaximum(const HyperLogLogRegister* reg, TimestampTz since, time_t duration, d
* Estimates cardinality, based on elements added so far
*/
double
estimateSHLL(HyperLogLogState *cState, time_t duration, double min_hit_ratio)
estimateSHLL(HyperLogLogState *cState, time_t duration)
{
double result;
double sum = 0.0;
@@ -217,7 +161,7 @@ estimateSHLL(HyperLogLogState *cState, time_t duration, double min_hit_ratio)
for (i = 0; i < HLL_N_REGISTERS; i++)
{
R[i] = getMaximum(cState->regs[i], since, duration, min_hit_ratio);
R[i] = getMaximum(cState->regs[i], since);
sum += 1.0 / pow(2.0, R[i]);
}

View File

@@ -53,14 +53,6 @@
#define HLL_C_BITS (32 - HLL_BIT_WIDTH)
#define HLL_N_REGISTERS (1 << HLL_BIT_WIDTH)
/*
* Number of histogram cells. We use exponential histogram with first interval
* equals to one minutes. Autoscaler request LFC statistic with intervals 1,2,...,60 minutes
* so 2^8=64 seems to be enough for our needs.
*/
#define HIST_SIZE 8
#define HIST_MIN_INTERVAL 60 /* seconds */
/*
* HyperLogLog is an approximate technique for computing the number of distinct
* entries in a set. Importantly, it does this by using a fixed amount of
@@ -77,21 +69,18 @@
* modified timestamp >= the query timestamp. This value is the number of bits
* for this register in the normal HLL calculation.
*
* The memory usage is 2^B * (C + 1) * sizeof(HyperLogLogRegister), or 920kiB.
* The memory usage is 2^B * (C + 1) * sizeof(TimetampTz), or 184kiB.
* Usage could be halved if we decide to reduce the required time dimension
* precision; as 32 bits in second precision should be enough for statistics.
* However, that is not yet implemented.
*/
typedef struct
{
TimestampTz ts; /* last access timestamp */
uint32_t histogram[HIST_SIZE]; /* access counter exponential histogram */
} HyperLogLogRegister;
typedef struct HyperLogLogState
{
HyperLogLogRegister regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
TimestampTz regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
} HyperLogLogState;
extern void initSHLL(HyperLogLogState *cState);
extern void addSHLL(HyperLogLogState *cState, uint32 hash);
extern double estimateSHLL(HyperLogLogState *cState, time_t dutration, double min_hit_ratio);
extern double estimateSHLL(HyperLogLogState *cState, time_t dutration);
#endif

View File

@@ -537,11 +537,7 @@ pageserver_connect(shardno_t shard_no, int elevel)
/* No more polling needed; connection succeeded */
shard->last_connect_time = GetCurrentTimestamp();
#if PG_MAJORVERSION_NUM >= 17
shard->wes_read = CreateWaitEventSet(NULL, 3);
#else
shard->wes_read = CreateWaitEventSet(TopMemoryContext, 3);
#endif
AddWaitEventToSet(shard->wes_read, WL_LATCH_SET, PGINVALID_SOCKET,
MyLatch, NULL);
AddWaitEventToSet(shard->wes_read, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,

View File

@@ -639,7 +639,9 @@ _PG_init(void)
pg_init_libpagestore();
pg_init_walproposer();
Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
WalSender_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
LogicalFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
SlotFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
InitLogicalReplicationMonitor();

View File

@@ -6,11 +6,7 @@
#ifndef NEON_PGVERSIONCOMPAT_H
#define NEON_PGVERSIONCOMPAT_H
#if PG_MAJORVERSION_NUM < 17
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
#else
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != INVALID_PROC_NUMBER)
#endif
#define RelFileInfoEquals(a, b) ( \
NInfoGetSpcOid(a) == NInfoGetSpcOid(b) && \
@@ -54,7 +50,7 @@
#define CopyNRelFileInfoToBufTag(tag, rinfo) \
do { \
(tag).rnode = (rinfo); \
} while (false)
} while (false);
#define BufTagGetNRelFileInfo(tag) tag.rnode
@@ -102,7 +98,7 @@
(tag).spcOid = (rinfo).spcOid; \
(tag).dbOid = (rinfo).dbOid; \
(tag).relNumber = (rinfo).relNumber; \
} while (false)
} while (false);
#define BufTagGetNRelFileInfo(tag) \
((RelFileLocator) { \
@@ -117,10 +113,4 @@
#define DropRelationAllLocalBuffers DropRelationAllLocalBuffers
#endif
#if PG_MAJORVERSION_NUM < 17
#define ProcNumber BackendId
#define INVALID_PROC_NUMBER InvalidBackendId
#define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess())
#endif
#endif /* NEON_PGVERSIONCOMPAT_H */

View File

@@ -6,6 +6,8 @@
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* contrib/neon/pagestore_client.h
*
*-------------------------------------------------------------------------
*/
#ifndef pageserver_h
@@ -185,7 +187,7 @@ extern char *nm_to_string(NeonMessage *msg);
* API
*/
typedef uint16 shardno_t;
typedef unsigned shardno_t;
typedef struct
{
@@ -209,7 +211,7 @@ extern int neon_protocol_version;
extern shardno_t get_shard_number(BufferTag* tag);
extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
extern const f_smgr *smgr_neon(BackendId backend, NRelFileInfo rinfo);
extern void smgr_init_neon(void);
extern void readahead_buffer_resize(int newsize, void *extra);
@@ -231,13 +233,8 @@ extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nbuffers, bool skipFsync);
#endif
#if PG_MAJORVERSION_NUM >=17
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks);
#else
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
#endif
/*
* LSN values associated with each request to the pageserver
@@ -272,11 +269,19 @@ typedef struct
} neon_request_lsns;
#if PG_MAJORVERSION_NUM < 16
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer);
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
neon_request_lsns request_lsns, char *buffer);
extern void neon_write(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
#else
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
void *buffer);
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
neon_request_lsns request_lsns, void *buffer);
extern void neon_write(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, const void *buffer, bool skipFsync);
#endif
extern void neon_writeback(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, BlockNumber nblocks);
@@ -294,34 +299,17 @@ extern void update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockN
extern void forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum);
/* functions for local file cache */
extern void lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum,
BlockNumber blkno, const void *const *buffers,
BlockNumber nblocks);
/* returns number of blocks read, with one bit set in *read for each */
extern int lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum,
BlockNumber blkno, void **buffers,
BlockNumber nblocks, bits8 *mask);
extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum,
BlockNumber blkno);
extern int lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum,
BlockNumber blkno, int nblocks, bits8 *bitmap);
#if PG_MAJORVERSION_NUM < 16
extern void lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
char *buffer);
#else
extern void lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
const void *buffer);
#endif
extern bool lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, char *buffer);
extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
extern void lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
extern void lfc_init(void);
static inline bool
lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
void *buffer)
{
bits8 rv = 0;
return lfc_readv_select(rinfo, forkNum, blkno, &buffer, 1, &rv) == 1;
}
static inline void
lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
const void *buffer)
{
return lfc_writev(rinfo, forkNum, blkno, &buffer, 1);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -81,7 +81,6 @@ static void nwp_register_gucs(void);
static void assign_neon_safekeepers(const char *newval, void *extra);
static void nwp_prepare_shmem(void);
static uint64 backpressure_lag_impl(void);
static uint64 startup_backpressure_wrap(void);
static bool backpressure_throttling_impl(void);
static void walprop_register_bgworker(void);
@@ -91,7 +90,7 @@ static void walprop_pg_init_bgworker(void);
static TimestampTz walprop_pg_get_current_timestamp(WalProposer *wp);
static void walprop_pg_load_libpqwalreceiver(void);
static process_interrupts_callback_t PrevProcessInterruptsCallback = NULL;
static process_interrupts_callback_t PrevProcessInterruptsCallback;
static shmem_startup_hook_type prev_shmem_startup_hook_type;
#if PG_VERSION_NUM >= 150000
static shmem_request_hook_type prev_shmem_request_hook = NULL;
@@ -179,7 +178,7 @@ pg_init_walproposer(void)
nwp_prepare_shmem();
delay_backend_us = &startup_backpressure_wrap;
delay_backend_us = &backpressure_lag_impl;
PrevProcessInterruptsCallback = ProcessInterruptsCallback;
ProcessInterruptsCallback = backpressure_throttling_impl;
@@ -353,22 +352,6 @@ backpressure_lag_impl(void)
return 0;
}
/*
* We don't apply backpressure when we're the postmaster, or the startup
* process, because in postmaster we can't apply backpressure, and in
* the startup process we can't afford to slow down.
*/
static uint64
startup_backpressure_wrap(void)
{
if (AmStartupProcess() || !IsUnderPostmaster)
return 0;
delay_backend_us = &backpressure_lag_impl;
return backpressure_lag_impl();
}
/*
* WalproposerShmemSize --- report amount of shared memory space needed
*/
@@ -418,13 +401,12 @@ WalproposerShmemInit_SyncSafekeeper(void)
static bool
backpressure_throttling_impl(void)
{
uint64 lag;
int64 lag;
TimestampTz start,
stop;
bool retry = false;
if (PointerIsValid(PrevProcessInterruptsCallback))
retry = PrevProcessInterruptsCallback();
bool retry = PrevProcessInterruptsCallback
? PrevProcessInterruptsCallback()
: false;
/*
* Don't throttle read only transactions or wal sender. Do throttle CREATE
@@ -620,12 +602,7 @@ walprop_pg_init_walsender(void)
/* Create replication slot for WAL proposer if not exists */
if (SearchNamedReplicationSlot(WAL_PROPOSER_SLOT_NAME, false) == NULL)
{
#if PG_MAJORVERSION_NUM >= 17
ReplicationSlotCreate(WAL_PROPOSER_SLOT_NAME, false, RS_PERSISTENT,
false, false, false);
#else
ReplicationSlotCreate(WAL_PROPOSER_SLOT_NAME, false, RS_PERSISTENT, false);
#endif
ReplicationSlotReserveWal();
/* Write this slot to disk */
ReplicationSlotMarkDirty();
@@ -1532,11 +1509,7 @@ walprop_pg_init_event_set(WalProposer *wp)
wpg_log(FATAL, "double-initialization of event set");
/* for each sk, we have socket plus potentially socket for neon walreader */
#if PG_MAJORVERSION_NUM >= 17
waitEvents = CreateWaitEventSet(NULL, 2 + 2 * wp->n_safekeepers);
#else
waitEvents = CreateWaitEventSet(TopMemoryContext, 2 + 2 * wp->n_safekeepers);
#endif
AddWaitEventToSet(waitEvents, WL_LATCH_SET, PGINVALID_SOCKET,
MyLatch, NULL);
AddWaitEventToSet(waitEvents, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,

View File

@@ -1,7 +1,6 @@
#include "postgres.h"
#if PG_MAJORVERSION_NUM >= 16
#include "access/heapam_xlog.h"
#include "access/neon_xlog.h"
#include "replication/decode.h"
@@ -10,10 +9,6 @@
#include "neon_rmgr.h"
#endif /* PG >= 16 */
#if PG_MAJORVERSION_NUM == 16
/* individual record(group)'s handlers */
static void DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
@@ -404,398 +399,6 @@ DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)
header->t_infomask2 = xlhdr.t_infomask2;
header->t_hoff = xlhdr.t_hoff;
}
#endif
#if PG_MAJORVERSION_NUM == 17
/* individual record(group)'s handlers */
static void DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
/* common function to decode tuples */
static void DecodeXLogTuple(char *data, Size len, HeapTuple tuple);
void
neon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
uint8 info = XLogRecGetInfo(buf->record) & XLOG_NEON_OPMASK;
TransactionId xid = XLogRecGetXid(buf->record);
SnapBuild *builder = ctx->snapshot_builder;
ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
/*
* If we don't have snapshot or we are just fast-forwarding, there is no
* point in decoding data changes.
*/
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
ctx->fast_forward)
return;
switch (info)
{
case XLOG_NEON_HEAP_INSERT:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeNeonInsert(ctx, buf);
break;
case XLOG_NEON_HEAP_DELETE:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeNeonDelete(ctx, buf);
break;
case XLOG_NEON_HEAP_UPDATE:
case XLOG_NEON_HEAP_HOT_UPDATE:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeNeonUpdate(ctx, buf);
break;
case XLOG_NEON_HEAP_LOCK:
break;
case XLOG_NEON_HEAP_MULTI_INSERT:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeNeonMultiInsert(ctx, buf);
break;
default:
elog(ERROR, "unexpected RM_HEAP_ID record type: %u", info);
break;
}
}
static inline bool
FilterByOrigin(LogicalDecodingContext *ctx, RepOriginId origin_id)
{
if (ctx->callbacks.filter_by_origin_cb == NULL)
return false;
return filter_by_origin_cb_wrapper(ctx, origin_id);
}
/*
* Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs.
*
* Deletes can contain the new tuple.
*/
static void
DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
Size datalen;
char *tupledata;
Size tuplelen;
XLogReaderState *r = buf->record;
xl_neon_heap_insert *xlrec;
ReorderBufferChange *change;
RelFileLocator target_locator;
xlrec = (xl_neon_heap_insert *) XLogRecGetData(r);
/*
* Ignore insert records without new tuples (this does happen when
* raw_heap_insert marks the TOAST record as HEAP_INSERT_NO_LOGICAL).
*/
if (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))
return;
/* only interested in our database */
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
if (target_locator.dbOid != ctx->slot->data.database)
return;
/* output plugin doesn't look for this origin, no need to queue */
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
return;
change = ReorderBufferGetChange(ctx->reorder);
if (!(xlrec->flags & XLH_INSERT_IS_SPECULATIVE))
change->action = REORDER_BUFFER_CHANGE_INSERT;
else
change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT;
change->origin_id = XLogRecGetOrigin(r);
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
tupledata = XLogRecGetBlockData(r, 0, &datalen);
tuplelen = datalen - SizeOfHeapHeader;
change->data.tp.newtuple =
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
DecodeXLogTuple(tupledata, datalen, change->data.tp.newtuple);
change->data.tp.clear_toast_afterwards = true;
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
change,
xlrec->flags & XLH_INSERT_ON_TOAST_RELATION);
}
/*
* Parse XLOG_HEAP_DELETE from wal into proper tuplebufs.
*
* Deletes can possibly contain the old primary key.
*/
static void
DecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
XLogReaderState *r = buf->record;
xl_neon_heap_delete *xlrec;
ReorderBufferChange *change;
RelFileLocator target_locator;
xlrec = (xl_neon_heap_delete *) XLogRecGetData(r);
/* only interested in our database */
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
if (target_locator.dbOid != ctx->slot->data.database)
return;
/* output plugin doesn't look for this origin, no need to queue */
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
return;
change = ReorderBufferGetChange(ctx->reorder);
if (xlrec->flags & XLH_DELETE_IS_SUPER)
change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT;
else
change->action = REORDER_BUFFER_CHANGE_DELETE;
change->origin_id = XLogRecGetOrigin(r);
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
/* old primary key stored */
if (xlrec->flags & XLH_DELETE_CONTAINS_OLD)
{
Size datalen = XLogRecGetDataLen(r) - SizeOfNeonHeapHeader;
Size tuplelen = datalen - SizeOfNeonHeapHeader;
Assert(XLogRecGetDataLen(r) > (SizeOfNeonHeapDelete + SizeOfNeonHeapHeader));
change->data.tp.oldtuple =
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
DecodeXLogTuple((char *) xlrec + SizeOfNeonHeapDelete,
datalen, change->data.tp.oldtuple);
}
change->data.tp.clear_toast_afterwards = true;
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
change, false);
}
/*
* Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout
* in the record, from wal into proper tuplebufs.
*
* Updates can possibly contain a new tuple and the old primary key.
*/
static void
DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
XLogReaderState *r = buf->record;
xl_neon_heap_update *xlrec;
ReorderBufferChange *change;
char *data;
RelFileLocator target_locator;
xlrec = (xl_neon_heap_update *) XLogRecGetData(r);
/* only interested in our database */
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
if (target_locator.dbOid != ctx->slot->data.database)
return;
/* output plugin doesn't look for this origin, no need to queue */
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_UPDATE;
change->origin_id = XLogRecGetOrigin(r);
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
if (xlrec->flags & XLH_UPDATE_CONTAINS_NEW_TUPLE)
{
Size datalen;
Size tuplelen;
data = XLogRecGetBlockData(r, 0, &datalen);
tuplelen = datalen - SizeOfNeonHeapHeader;
change->data.tp.newtuple =
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
}
if (xlrec->flags & XLH_UPDATE_CONTAINS_OLD)
{
Size datalen;
Size tuplelen;
/* caution, remaining data in record is not aligned */
data = XLogRecGetData(r) + SizeOfNeonHeapUpdate;
datalen = XLogRecGetDataLen(r) - SizeOfNeonHeapUpdate;
tuplelen = datalen - SizeOfNeonHeapHeader;
change->data.tp.oldtuple =
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
}
change->data.tp.clear_toast_afterwards = true;
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
change, false);
}
/*
* Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs.
*
* Currently MULTI_INSERT will always contain the full tuples.
*/
static void
DecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
XLogReaderState *r = buf->record;
xl_neon_heap_multi_insert *xlrec;
int i;
char *data;
char *tupledata;
Size tuplelen;
RelFileLocator rlocator;
xlrec = (xl_neon_heap_multi_insert *) XLogRecGetData(r);
/*
* Ignore insert records without new tuples. This happens when a
* multi_insert is done on a catalog or on a non-persistent relation.
*/
if (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))
return;
/* only interested in our database */
XLogRecGetBlockTag(r, 0, &rlocator, NULL, NULL);
if (rlocator.dbOid != ctx->slot->data.database)
return;
/* output plugin doesn't look for this origin, no need to queue */
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
return;
/*
* We know that this multi_insert isn't for a catalog, so the block should
* always have data even if a full-page write of it is taken.
*/
tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
Assert(tupledata != NULL);
data = tupledata;
for (i = 0; i < xlrec->ntuples; i++)
{
ReorderBufferChange *change;
xl_neon_multi_insert_tuple *xlhdr;
int datalen;
HeapTuple tuple;
HeapTupleHeader header;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_INSERT;
change->origin_id = XLogRecGetOrigin(r);
memcpy(&change->data.tp.rlocator, &rlocator, sizeof(RelFileLocator));
xlhdr = (xl_neon_multi_insert_tuple *) SHORTALIGN(data);
data = ((char *) xlhdr) + SizeOfNeonMultiInsertTuple;
datalen = xlhdr->datalen;
change->data.tp.newtuple =
ReorderBufferGetTupleBuf(ctx->reorder, datalen);
tuple = change->data.tp.newtuple;
header = tuple->t_data;
/* not a disk based tuple */
ItemPointerSetInvalid(&tuple->t_self);
/*
* We can only figure this out after reassembling the transactions.
*/
tuple->t_tableOid = InvalidOid;
tuple->t_len = datalen + SizeofHeapTupleHeader;
memset(header, 0, SizeofHeapTupleHeader);
memcpy((char *) tuple->t_data + SizeofHeapTupleHeader,
(char *) data,
datalen);
header->t_infomask = xlhdr->t_infomask;
header->t_infomask2 = xlhdr->t_infomask2;
header->t_hoff = xlhdr->t_hoff;
/*
* Reset toast reassembly state only after the last row in the last
* xl_multi_insert_tuple record emitted by one heap_multi_insert()
* call.
*/
if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI &&
(i + 1) == xlrec->ntuples)
change->data.tp.clear_toast_afterwards = true;
else
change->data.tp.clear_toast_afterwards = false;
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),
buf->origptr, change, false);
/* move to the next xl_neon_multi_insert_tuple entry */
data += datalen;
}
Assert(data == tupledata + tuplelen);
}
/*
* Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete
* (but not by heap_multi_insert) into a tuplebuf.
*
* The size 'len' and the pointer 'data' in the record need to be
* computed outside as they are record specific.
*/
static void
DecodeXLogTuple(char *data, Size len, HeapTuple tuple)
{
xl_neon_heap_header xlhdr;
int datalen = len - SizeOfNeonHeapHeader;
HeapTupleHeader header;
Assert(datalen >= 0);
tuple->t_len = datalen + SizeofHeapTupleHeader;
header = tuple->t_data;
/* not a disk based tuple */
ItemPointerSetInvalid(&tuple->t_self);
/* we can only figure this out after reassembling the transactions */
tuple->t_tableOid = InvalidOid;
/* data is not stored aligned, copy to aligned storage */
memcpy((char *) &xlhdr,
data,
SizeOfNeonHeapHeader);
memset(header, 0, SizeofHeapTupleHeader);
memcpy(((char *) tuple->t_data) + SizeofHeapTupleHeader,
data + SizeOfNeonHeapHeader,
datalen);
header->t_infomask = xlhdr.t_infomask;
header->t_infomask2 = xlhdr.t_infomask2;
header->t_hoff = xlhdr.t_hoff;
}
#endif
#endif

View File

@@ -68,13 +68,8 @@ static void inmem_close(SMgrRelation reln, ForkNumber forknum);
static void inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
static bool inmem_exists(SMgrRelation reln, ForkNumber forknum);
static void inmem_unlink(NRelFileInfoBackend rinfo, ForkNumber forknum, bool isRedo);
#if PG_MAJORVERSION_NUM >= 17
static bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks);
#else
static bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
#endif
#if PG_MAJORVERSION_NUM < 16
static void inmem_extend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
@@ -98,9 +93,7 @@ static BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum);
static void inmem_truncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks);
static void inmem_immedsync(SMgrRelation reln, ForkNumber forknum);
#if PG_MAJORVERSION_NUM >= 17
static void inmem_registersync(SMgrRelation reln, ForkNumber forknum);
#endif
/*
* inmem_init() -- Initialize private state
@@ -197,14 +190,6 @@ inmem_close(SMgrRelation reln, ForkNumber forknum)
{
}
#if PG_MAJORVERSION_NUM >= 17
static bool
inmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
int nblocks)
{
return true;
}
#else
/*
* inmem_prefetch() -- Initiate asynchronous read of the specified block of a relation
*/
@@ -213,7 +198,6 @@ inmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
{
return true;
}
#endif
/*
* inmem_writeback() -- Tell the kernel to write pages back to storage.
@@ -227,13 +211,11 @@ inmem_writeback(SMgrRelation reln, ForkNumber forknum,
/*
* inmem_read() -- Read the specified block from a relation.
*/
#if PG_MAJORVERSION_NUM < 16
static void
inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
#if PG_MAJORVERSION_NUM < 16
char *buffer)
#else
static void
inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
void *buffer)
#endif
{
@@ -246,18 +228,6 @@ inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
memcpy(buffer, page_body[pg], BLCKSZ);
}
#if PG_MAJORVERSION_NUM >= 17
static void
inmem_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
void **buffers, BlockNumber nblocks)
{
for (int i = 0; i < nblocks; i++)
{
inmem_read(reln, forknum, blkno, buffers[i]);
}
}
#endif
/*
* inmem_write() -- Write the supplied block at the appropriate location.
*
@@ -310,18 +280,6 @@ inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
memcpy(page_body[pg], buffer, BLCKSZ);
}
#if PG_MAJORVERSION_NUM >= 17
static void
inmem_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
const void **buffers, BlockNumber nblocks, bool skipFsync)
{
for (int i = 0; i < nblocks; i++)
{
inmem_write(reln, forknum, blkno, buffers[i], skipFsync);
}
}
#endif
/*
* inmem_nblocks() -- Get the number of blocks stored in a relation.
*/
@@ -357,13 +315,6 @@ inmem_immedsync(SMgrRelation reln, ForkNumber forknum)
{
}
#if PG_MAJORVERSION_NUM >= 17
static void
inmem_registersync(SMgrRelation reln, ForkNumber forknum)
{
}
#endif
static const struct f_smgr inmem_smgr =
{
.smgr_init = inmem_init,
@@ -377,39 +328,23 @@ static const struct f_smgr inmem_smgr =
#if PG_MAJORVERSION_NUM >= 16
.smgr_zeroextend = inmem_zeroextend,
#endif
#if PG_MAJORVERSION_NUM >= 17
.smgr_prefetch = inmem_prefetch,
.smgr_readv = inmem_readv,
.smgr_writev = inmem_writev,
#else
.smgr_prefetch = inmem_prefetch,
.smgr_read = inmem_read,
.smgr_write = inmem_write,
#endif
.smgr_writeback = inmem_writeback,
.smgr_nblocks = inmem_nblocks,
.smgr_truncate = inmem_truncate,
.smgr_immedsync = inmem_immedsync,
#if PG_MAJORVERSION_NUM >= 17
.smgr_registersync = inmem_registersync,
#endif
.smgr_start_unlogged_build = NULL,
.smgr_finish_unlogged_build_phase_1 = NULL,
.smgr_end_unlogged_build = NULL,
.smgr_read_slru_segment = NULL,
};
const f_smgr *
smgr_inmem(ProcNumber backend, NRelFileInfo rinfo)
smgr_inmem(BackendId backend, NRelFileInfo rinfo)
{
Assert(InRecovery);
// // What does this code do?
// if (backend != INVALID_PROC_NUMBER)
// return smgr_standard(backend, rinfo);
// else
return &inmem_smgr;
if (backend != InvalidBackendId)
return smgr_standard(backend, rinfo);
else
return &inmem_smgr;
}
void

View File

@@ -11,7 +11,7 @@
#ifndef INMEM_SMGR_H
#define INMEM_SMGR_H
extern const f_smgr *smgr_inmem(ProcNumber backend, NRelFileInfo rinfo);
extern const f_smgr *smgr_inmem(BackendId backend, NRelFileInfo rinfo);
extern void smgr_init_inmem(void);
#endif /* INMEM_SMGR_H */

View File

@@ -100,9 +100,6 @@
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/dsm.h"
#if PG_MAJORVERSION_NUM >= 17
#include "storage/dsm_registry.h"
#endif
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
@@ -140,7 +137,7 @@ static BufferTag target_redo_tag;
static XLogReaderState *reader_state;
#define TRACE LOG
#define TRACE DEBUG5
#ifdef HAVE_LIBSECCOMP
@@ -520,10 +517,6 @@ CreateFakeSharedMemoryAndSemaphores()
/*
* Set up xlog, clog, and buffers
*/
#if PG_MAJORVERSION_NUM >= 17
DSMRegistryShmemInit();
VarsupShmemInit();
#endif
XLOGShmemInit();
CLOGShmemInit();
CommitTsShmemInit();
@@ -573,10 +566,7 @@ CreateFakeSharedMemoryAndSemaphores()
/*
* Set up other modules that need some shared memory space
*/
#if PG_MAJORVERSION_NUM < 17
/* "snapshot too old" was removed in PG17, and with it the SnapMgr */
SnapMgrInit();
#endif
BTreeShmemInit();
SyncScanShmemInit();
/* Skip due to the 'pg_notify' directory check */
@@ -752,7 +742,7 @@ BeginRedoForBlock(StringInfo input_message)
target_redo_tag.forkNum,
target_redo_tag.blockNum);
reln = smgropen(rinfo, INVALID_PROC_NUMBER, RELPERSISTENCE_PERMANENT);
reln = smgropen(rinfo, InvalidBackendId, RELPERSISTENCE_PERMANENT);
if (reln->smgr_cached_nblocks[forknum] == InvalidBlockNumber ||
reln->smgr_cached_nblocks[forknum] < blknum + 1)
{

View File

@@ -311,9 +311,7 @@ async fn auth_quirks(
let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?;
// check allowed list
if config.ip_allowlist_check_enabled
&& !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
{
if !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips) {
return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
}
@@ -605,7 +603,6 @@ mod tests {
rate_limiter_enabled: true,
rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
rate_limit_ip_subnet: 64,
ip_allowlist_check_enabled: true,
});
async fn read_message(r: &mut (impl AsyncRead + Unpin), b: &mut BytesMut) -> PgMessage {

View File

@@ -538,17 +538,4 @@ mod tests {
));
Ok(())
}
#[test]
fn test_connection_blocker() {
fn check(v: serde_json::Value) -> bool {
let peer_addr = IpAddr::from([127, 0, 0, 1]);
let ip_list: Vec<IpPattern> = serde_json::from_value(v).unwrap();
check_peer_addr_is_in_list(&peer_addr, &ip_list)
}
assert!(check(json!([])));
assert!(check(json!(["127.0.0.1"])));
assert!(!check(json!(["255.255.255.255"])));
}
}

View File

@@ -224,7 +224,6 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
rate_limiter_enabled: false,
rate_limiter: BucketRateLimiter::new(vec![]),
rate_limit_ip_subnet: 64,
ip_allowlist_check_enabled: true,
},
require_client_ip: false,
handshake_timeout: Duration::from_secs(10),

View File

@@ -224,10 +224,6 @@ struct ProxyCliArgs {
/// Whether to retry the wake_compute request
#[clap(long, default_value = config::RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)]
wake_compute_retry: String,
/// Configure if this is a private access proxy for the POC: In that case the proxy will ignore the IP allowlist
#[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
is_private_access_proxy: bool,
}
#[derive(clap::Args, Clone, Copy, Debug)]
@@ -686,7 +682,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
rate_limiter_enabled: args.auth_rate_limit_enabled,
rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
ip_allowlist_check_enabled: !args.is_private_access_proxy,
};
let config = Box::leak(Box::new(ProxyConfig {

View File

@@ -242,6 +242,6 @@ mod tests {
#[test]
fn test() {
let s = "{\"branch_created\":null,\"endpoint_created\":{\"endpoint_id\":\"ep-rapid-thunder-w0qqw2q9\"},\"project_created\":null,\"type\":\"endpoint_created\"}";
serde_json::from_str::<ControlPlaneEventKey>(s).unwrap();
let _: ControlPlaneEventKey = serde_json::from_str(s).unwrap();
}
}

View File

@@ -64,7 +64,6 @@ pub struct AuthenticationConfig {
pub rate_limiter_enabled: bool,
pub rate_limiter: AuthRateLimiter,
pub rate_limit_ip_subnet: u8,
pub ip_allowlist_check_enabled: bool,
}
impl TlsConfig {

View File

@@ -395,7 +395,7 @@ mod tests {
}
}
});
serde_json::from_str::<KickSession<'_>>(&json.to_string())?;
let _: KickSession<'_> = serde_json::from_str(&json.to_string())?;
Ok(())
}
@@ -403,7 +403,7 @@ mod tests {
#[test]
fn parse_db_info() -> anyhow::Result<()> {
// with password
serde_json::from_value::<DatabaseInfo>(json!({
let _: DatabaseInfo = serde_json::from_value(json!({
"host": "localhost",
"port": 5432,
"dbname": "postgres",
@@ -413,7 +413,7 @@ mod tests {
}))?;
// without password
serde_json::from_value::<DatabaseInfo>(json!({
let _: DatabaseInfo = serde_json::from_value(json!({
"host": "localhost",
"port": 5432,
"dbname": "postgres",
@@ -422,7 +422,7 @@ mod tests {
}))?;
// new field (forward compatibility)
serde_json::from_value::<DatabaseInfo>(json!({
let _: DatabaseInfo = serde_json::from_value(json!({
"host": "localhost",
"port": 5432,
"dbname": "postgres",
@@ -441,7 +441,7 @@ mod tests {
"address": "0.0.0.0",
"aux": dummy_aux(),
});
serde_json::from_str::<WakeCompute>(&json.to_string())?;
let _: WakeCompute = serde_json::from_str(&json.to_string())?;
Ok(())
}
@@ -451,18 +451,18 @@ mod tests {
let json = json!({
"role_secret": "secret",
});
serde_json::from_str::<GetRoleSecret>(&json.to_string())?;
let _: GetRoleSecret = serde_json::from_str(&json.to_string())?;
let json = json!({
"role_secret": "secret",
"allowed_ips": ["8.8.8.8"],
});
serde_json::from_str::<GetRoleSecret>(&json.to_string())?;
let _: GetRoleSecret = serde_json::from_str(&json.to_string())?;
let json = json!({
"role_secret": "secret",
"allowed_ips": ["8.8.8.8"],
"project_id": "project",
});
serde_json::from_str::<GetRoleSecret>(&json.to_string())?;
let _: GetRoleSecret = serde_json::from_str(&json.to_string())?;
Ok(())
}

View File

@@ -78,7 +78,7 @@ pub(crate) type ComputeReady = DatabaseInfo;
// TODO: replace with an http-based protocol.
struct MgmtHandler;
#[async_trait::async_trait]
impl postgres_backend::Handler<tokio::net::TcpStream> for MgmtHandler {
async fn process_query(
&mut self,

View File

@@ -6,7 +6,7 @@ use pq_proto::StartupMessageParams;
use smol_str::SmolStr;
use std::net::IpAddr;
use tokio::sync::mpsc;
use tracing::{debug, field::display, info, info_span, Span};
use tracing::{field::display, info, info_span, Span};
use try_lock::TryLock;
use uuid::Uuid;
@@ -362,9 +362,7 @@ impl RequestMonitoringInner {
});
}
if let Some(tx) = self.sender.take() {
tx.send(RequestData::from(&*self))
.inspect_err(|e| debug!("tx send failed: {e}"))
.ok();
let _: Result<(), _> = tx.send(RequestData::from(&*self));
}
}
@@ -373,9 +371,7 @@ impl RequestMonitoringInner {
// Here we log the length of the session.
self.disconnect_timestamp = Some(Utc::now());
if let Some(tx) = self.disconnect_sender.take() {
tx.send(RequestData::from(&*self))
.inspect_err(|e| debug!("tx send failed: {e}"))
.ok();
let _: Result<(), _> = tx.send(RequestData::from(&*self));
}
}
}

View File

@@ -290,7 +290,7 @@ async fn worker_inner(
}
if !w.flushed_row_groups().is_empty() {
let _rtchk: Writer<BytesMut> = upload_parquet(w, len, &storage).await?;
let _: Writer<BytesMut> = upload_parquet(w, len, &storage).await?;
}
Ok(())
@@ -598,15 +598,15 @@ mod tests {
assert_eq!(
file_stats,
[
(1312632, 3, 6000),
(1312621, 3, 6000),
(1312680, 3, 6000),
(1312637, 3, 6000),
(1312773, 3, 6000),
(1312610, 3, 6000),
(1312404, 3, 6000),
(1312639, 3, 6000),
(437848, 1, 2000)
(1315874, 3, 6000),
(1315867, 3, 6000),
(1315927, 3, 6000),
(1315884, 3, 6000),
(1316014, 3, 6000),
(1315856, 3, 6000),
(1315648, 3, 6000),
(1315884, 3, 6000),
(438913, 1, 2000)
]
);
@@ -638,11 +638,11 @@ mod tests {
assert_eq!(
file_stats,
[
(1203465, 5, 10000),
(1203189, 5, 10000),
(1203490, 5, 10000),
(1203475, 5, 10000),
(1203729, 5, 10000)
(1208861, 5, 10000),
(1208592, 5, 10000),
(1208885, 5, 10000),
(1208873, 5, 10000),
(1209128, 5, 10000)
]
);
@@ -667,15 +667,15 @@ mod tests {
assert_eq!(
file_stats,
[
(1312632, 3, 6000),
(1312621, 3, 6000),
(1312680, 3, 6000),
(1312637, 3, 6000),
(1312773, 3, 6000),
(1312610, 3, 6000),
(1312404, 3, 6000),
(1312639, 3, 6000),
(437848, 1, 2000)
(1315874, 3, 6000),
(1315867, 3, 6000),
(1315927, 3, 6000),
(1315884, 3, 6000),
(1316014, 3, 6000),
(1315856, 3, 6000),
(1315648, 3, 6000),
(1315884, 3, 6000),
(438913, 1, 2000)
]
);
@@ -712,7 +712,7 @@ mod tests {
// files are smaller than the size threshold, but they took too long to fill so were flushed early
assert_eq!(
file_stats,
[(657696, 2, 3001), (657410, 2, 3000), (657206, 2, 2999)]
[(659836, 2, 3001), (659550, 2, 3000), (659346, 2, 2999)]
);
tmpdir.close().unwrap();

View File

@@ -3,7 +3,7 @@
#![deny(
deprecated,
future_incompatible,
let_underscore,
// TODO: consider let_underscore
nonstandard_style,
rust_2024_compatibility
)]

View File

@@ -268,7 +268,7 @@ async fn keepalive_is_inherited() -> anyhow::Result<()> {
anyhow::Ok(keepalive)
});
TcpStream::connect(("127.0.0.1", port)).await?;
let _ = TcpStream::connect(("127.0.0.1", port)).await?;
assert!(t.await??, "keepalive should be inherited");
Ok(())

View File

@@ -6,7 +6,7 @@ use redis::{
ConnectionInfo, IntoConnectionInfo, RedisConnectionInfo, RedisResult,
};
use tokio::task::JoinHandle;
use tracing::{debug, error, info};
use tracing::{error, info};
use super::elasticache::CredentialsProvider;
@@ -109,10 +109,7 @@ impl ConnectionWithCredentialsProvider {
let credentials_provider = credentials_provider.clone();
let con2 = con.clone();
let f = tokio::spawn(async move {
Self::keep_connection(con2, credentials_provider)
.await
.inspect_err(|e| debug!("keep_connection failed: {e}"))
.ok();
let _ = Self::keep_connection(con2, credentials_provider).await;
});
self.refresh_token_task = Some(f);
}

View File

@@ -50,9 +50,7 @@ impl PoolingBackend {
.as_ref()
.map(|()| user_info.clone());
let (allowed_ips, maybe_secret) = backend.get_allowed_ips_and_secret(ctx).await?;
if config.ip_allowlist_check_enabled
&& !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
{
if !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips) {
return Err(AuthError::ip_address_not_allowed(ctx.peer_addr()));
}
if !self

View File

@@ -12,7 +12,6 @@ use std::{io, task};
use thiserror::Error;
use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
use tokio_rustls::server::TlsStream;
use tracing::debug;
/// Stream wrapper which implements libpq's protocol.
///
@@ -139,10 +138,9 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
);
// already error case, ignore client IO error
self.write_message(&BeMessage::ErrorResponse(msg, None))
.await
.inspect_err(|e| debug!("write_message failed: {e}"))
.ok();
let _: Result<_, std::io::Error> = self
.write_message(&BeMessage::ErrorResponse(msg, None))
.await;
Err(ReportedError {
source: anyhow::anyhow!(msg),
@@ -166,10 +164,9 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
);
// already error case, ignore client IO error
self.write_message(&BeMessage::ErrorResponse(&msg, None))
.await
.inspect_err(|e| debug!("write_message failed: {e}"))
.ok();
let _: Result<_, std::io::Error> = self
.write_message(&BeMessage::ErrorResponse(&msg, None))
.await;
Err(ReportedError {
source: anyhow::anyhow!(error),

View File

@@ -57,7 +57,7 @@ mod tests {
fn bad_url() {
let url = "test:foobar";
url.parse::<url::Url>().expect("unexpected parsing failure");
url.parse::<ApiUrl>().expect_err("should not parse");
let _ = url.parse::<ApiUrl>().expect_err("should not parse");
}
#[test]

View File

@@ -2,7 +2,6 @@
//! protocol commands.
use anyhow::Context;
use std::future::Future;
use std::str::{self, FromStr};
use std::sync::Arc;
use tokio::io::{AsyncRead, AsyncWrite};
@@ -96,6 +95,7 @@ fn cmd_to_string(cmd: &SafekeeperPostgresCommand) -> &str {
}
}
#[async_trait::async_trait]
impl<IO: AsyncRead + AsyncWrite + Unpin + Send> postgres_backend::Handler<IO>
for SafekeeperPostgresHandler
{
@@ -197,51 +197,49 @@ impl<IO: AsyncRead + AsyncWrite + Unpin + Send> postgres_backend::Handler<IO>
Ok(())
}
fn process_query(
async fn process_query(
&mut self,
pgb: &mut PostgresBackend<IO>,
query_string: &str,
) -> impl Future<Output = Result<(), QueryError>> {
Box::pin(async move {
if query_string
.to_ascii_lowercase()
.starts_with("set datestyle to ")
{
// important for debug because psycopg2 executes "SET datestyle TO 'ISO'" on connect
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
return Ok(());
) -> Result<(), QueryError> {
if query_string
.to_ascii_lowercase()
.starts_with("set datestyle to ")
{
// important for debug because psycopg2 executes "SET datestyle TO 'ISO'" on connect
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
return Ok(());
}
let cmd = parse_cmd(query_string)?;
let cmd_str = cmd_to_string(&cmd);
let _guard = PG_QUERIES_GAUGE.with_label_values(&[cmd_str]).guard();
info!("got query {:?}", query_string);
let tenant_id = self.tenant_id.context("tenantid is required")?;
let timeline_id = self.timeline_id.context("timelineid is required")?;
self.check_permission(Some(tenant_id))?;
self.ttid = TenantTimelineId::new(tenant_id, timeline_id);
match cmd {
SafekeeperPostgresCommand::StartWalPush => {
self.handle_start_wal_push(pgb)
.instrument(info_span!("WAL receiver"))
.await
}
let cmd = parse_cmd(query_string)?;
let cmd_str = cmd_to_string(&cmd);
let _guard = PG_QUERIES_GAUGE.with_label_values(&[cmd_str]).guard();
info!("got query {:?}", query_string);
let tenant_id = self.tenant_id.context("tenantid is required")?;
let timeline_id = self.timeline_id.context("timelineid is required")?;
self.check_permission(Some(tenant_id))?;
self.ttid = TenantTimelineId::new(tenant_id, timeline_id);
match cmd {
SafekeeperPostgresCommand::StartWalPush => {
self.handle_start_wal_push(pgb)
.instrument(info_span!("WAL receiver"))
.await
}
SafekeeperPostgresCommand::StartReplication { start_lsn, term } => {
self.handle_start_replication(pgb, start_lsn, term)
.instrument(info_span!("WAL sender"))
.await
}
SafekeeperPostgresCommand::IdentifySystem => self.handle_identify_system(pgb).await,
SafekeeperPostgresCommand::TimelineStatus => self.handle_timeline_status(pgb).await,
SafekeeperPostgresCommand::JSONCtrl { ref cmd } => {
handle_json_ctrl(self, pgb, cmd).await
}
SafekeeperPostgresCommand::StartReplication { start_lsn, term } => {
self.handle_start_replication(pgb, start_lsn, term)
.instrument(info_span!("WAL sender"))
.await
}
})
SafekeeperPostgresCommand::IdentifySystem => self.handle_identify_system(pgb).await,
SafekeeperPostgresCommand::TimelineStatus => self.handle_timeline_status(pgb).await,
SafekeeperPostgresCommand::JSONCtrl { ref cmd } => {
handle_json_ctrl(self, pgb, cmd).await
}
}
}
}

View File

@@ -18,7 +18,8 @@ Prerequisites:
Regression tests are in the 'regress' directory. They can be run in
parallel to minimize total runtime. Most regression test sets up their
environment with its own pageservers and safekeepers.
environment with its own pageservers and safekeepers (but see
`TEST_SHARED_FIXTURES`).
'pg_clients' contains tests for connecting with various client
libraries. Each client test uses a Dockerfile that pulls an image that
@@ -73,6 +74,7 @@ This is used to construct full path to the postgres binaries.
Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION=16`
`TEST_OUTPUT`: Set the directory where test state and test output files
should go.
`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
`RUST_LOG`: logging configuration to pass into Neon CLI
Useful parameters and commands:
@@ -257,8 +259,11 @@ compute Postgres nodes. The connections between them can be configured to use JW
authentication tokens, and some other configuration options can be tweaked too.
The easiest way to get access to a Neon Environment is by using the `neon_simple_env`
fixture. For convenience, there is a branch called `main` in environments created with
'neon_simple_env', ready to be used in the test.
fixture. The 'simple' env may be shared across multiple tests, so don't shut down the nodes
or make other destructive changes in that environment. Also don't assume that
there are no tenants or branches or data in the cluster. For convenience, there is a
branch called `empty`, though. The convention is to create a test-specific branch of
that and load any test data there, instead of the 'main' branch.
For more complicated cases, you can build a custom Neon Environment, with the `neon_env`
fixture:

View File

@@ -13,7 +13,7 @@ DEFAULT_WAL_SEG_SIZE = 16 * 1024 * 1024
class Lsn:
"""
Datatype for an LSN. Internally it is a 64-bit integer, but the string
representation is like "1/0123abcd". See also pg_lsn datatype in Postgres
representation is like "1/123abcd". See also pg_lsn datatype in Postgres
"""
def __init__(self, x: Union[int, str]):

View File

@@ -57,6 +57,7 @@ from _pytest.fixtures import FixtureRequest
from psycopg2.extensions import connection as PgConnection
from psycopg2.extensions import cursor as PgCursor
from psycopg2.extensions import make_dsn, parse_dsn
from typing_extensions import Literal
from urllib3.util.retry import Retry
from fixtures import overlayfs
@@ -220,6 +221,33 @@ def neon_api(neon_api_key: str, neon_api_base_url: str) -> NeonAPI:
return NeonAPI(neon_api_key, neon_api_base_url)
def shareable_scope(fixture_name: str, config: Config) -> Literal["session", "function"]:
"""Return either session of function scope, depending on TEST_SHARED_FIXTURES envvar.
This function can be used as a scope like this:
@pytest.fixture(scope=shareable_scope)
def myfixture(...)
...
"""
scope: Literal["session", "function"]
if os.environ.get("TEST_SHARED_FIXTURES") is None:
# Create the environment in the per-test output directory
scope = "function"
elif (
os.environ.get("BUILD_TYPE") is not None
and os.environ.get("DEFAULT_PG_VERSION") is not None
):
scope = "session"
else:
pytest.fail(
"Shared environment(TEST_SHARED_FIXTURES) requires BUILD_TYPE and DEFAULT_PG_VERSION to be set",
pytrace=False,
)
return scope
@pytest.fixture(scope="session")
def worker_port_num():
return (32768 - BASE_PORT) // int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", "1"))
@@ -553,6 +581,10 @@ class NeonEnvBuilder:
self.env = NeonEnv(self)
return self.env
def start(self):
assert self.env is not None, "environment is not already initialized, call init() first"
self.env.start()
def init_start(
self,
initial_tenant_conf: Optional[Dict[str, Any]] = None,
@@ -568,7 +600,7 @@ class NeonEnvBuilder:
Configuring pageserver with remote storage is now the default. There will be a warning if pageserver is created without one.
"""
env = self.init_configs(default_remote_storage_if_missing=default_remote_storage_if_missing)
env.start()
self.start()
# Prepare the default branch to start the postgres on later.
# Pageserver itself does not create tenants and timelines, until started first and asked via HTTP API.
@@ -933,11 +965,8 @@ class NeonEnvBuilder:
for directory_to_clean in reversed(directories_to_clean):
if not os.listdir(directory_to_clean):
log.info(f"Removing empty directory {directory_to_clean}")
try:
directory_to_clean.rmdir()
except Exception as e:
log.error(f"Error removing empty directory {directory_to_clean}: {e}")
log.debug(f"Removing empty directory {directory_to_clean}")
directory_to_clean.rmdir()
def cleanup_remote_storage(self):
for x in [self.pageserver_remote_storage, self.safekeepers_remote_storage]:
@@ -1072,6 +1101,9 @@ class NeonEnv:
self.pg_distrib_dir = config.pg_distrib_dir
self.endpoint_counter = 0
self.storage_controller_config = config.storage_controller_config
# generate initial tenant ID here instead of letting 'neon init' generate it,
# so that we don't need to dig it out of the config file afterwards.
self.initial_tenant = config.initial_tenant
self.initial_timeline = config.initial_timeline
@@ -1399,8 +1431,8 @@ class NeonEnv:
return "ep-" + str(self.endpoint_counter)
@pytest.fixture(scope="function")
def neon_simple_env(
@pytest.fixture(scope=shareable_scope)
def _shared_simple_env(
request: FixtureRequest,
pytestconfig: Config,
port_distributor: PortDistributor,
@@ -1418,13 +1450,19 @@ def neon_simple_env(
pageserver_io_buffer_alignment: Optional[int],
) -> Iterator[NeonEnv]:
"""
Simple Neon environment, with no authentication and no safekeepers.
# Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
is set, this is shared by all tests using `neon_simple_env`.
This fixture will use RemoteStorageKind.LOCAL_FS with pageserver.
"""
# Create the environment in the per-test output directory
repo_dir = get_test_repo_dir(request, top_output_dir)
if os.environ.get("TEST_SHARED_FIXTURES") is None:
# Create the environment in the per-test output directory
repo_dir = get_test_repo_dir(request, top_output_dir)
else:
# We're running shared fixtures. Share a single directory.
repo_dir = top_output_dir / "shared_repo"
shutil.rmtree(repo_dir, ignore_errors=True)
with NeonEnvBuilder(
top_output_dir=top_output_dir,
@@ -1446,9 +1484,27 @@ def neon_simple_env(
) as builder:
env = builder.init_start()
# For convenience in tests, create a branch from the freshly-initialized cluster.
env.neon_cli.create_branch("empty", ancestor_branch_name=DEFAULT_BRANCH_NAME)
yield env
@pytest.fixture(scope="function")
def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
"""
Simple Neon environment, with no authentication and no safekeepers.
If TEST_SHARED_FIXTURES environment variable is set, we reuse the same
environment for all tests that use 'neon_simple_env', keeping the
page server and safekeepers running. Any compute nodes are stopped after
each the test, however.
"""
yield _shared_simple_env
_shared_simple_env.endpoints.stop_all()
@pytest.fixture(scope="function")
def neon_env_builder(
pytestconfig: Config,
@@ -1517,6 +1573,14 @@ class PageserverPort:
http: int
CREATE_TIMELINE_ID_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
r"^Created timeline '(?P<timeline_id>[^']+)'", re.MULTILINE
)
TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
)
class AbstractNeonCli(abc.ABC):
"""
A typed wrapper around an arbitrary Neon CLI tool.
@@ -1745,9 +1809,6 @@ class NeonCli(AbstractNeonCli):
tenant_id: Optional[TenantId] = None,
timeline_id: Optional[TimelineId] = None,
) -> TimelineId:
if timeline_id is None:
timeline_id = TimelineId.generate()
cmd = [
"timeline",
"create",
@@ -1755,16 +1816,23 @@ class NeonCli(AbstractNeonCli):
new_branch_name,
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
"--timeline-id",
str(timeline_id),
"--pg-version",
self.env.pg_version,
]
if timeline_id is not None:
cmd.extend(["--timeline-id", str(timeline_id)])
res = self.raw_cli(cmd)
res.check_returncode()
return timeline_id
matches = CREATE_TIMELINE_ID_EXTRACTOR.search(res.stdout)
created_timeline_id = None
if matches is not None:
created_timeline_id = matches.group("timeline_id")
return TimelineId(str(created_timeline_id))
def create_branch(
self,
@@ -1772,17 +1840,12 @@ class NeonCli(AbstractNeonCli):
ancestor_branch_name: Optional[str] = None,
tenant_id: Optional[TenantId] = None,
ancestor_start_lsn: Optional[Lsn] = None,
new_timeline_id: Optional[TimelineId] = None,
) -> TimelineId:
if new_timeline_id is None:
new_timeline_id = TimelineId.generate()
cmd = [
"timeline",
"branch",
"--branch-name",
new_branch_name,
"--timeline-id",
str(new_timeline_id),
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
]
@@ -1794,7 +1857,16 @@ class NeonCli(AbstractNeonCli):
res = self.raw_cli(cmd)
res.check_returncode()
return TimelineId(str(new_timeline_id))
matches = CREATE_TIMELINE_ID_EXTRACTOR.search(res.stdout)
created_timeline_id = None
if matches is not None:
created_timeline_id = matches.group("timeline_id")
if created_timeline_id is None:
raise Exception("could not find timeline id after `neon timeline create` invocation")
else:
return TimelineId(str(created_timeline_id))
def list_timelines(self, tenant_id: Optional[TenantId] = None) -> List[Tuple[str, TimelineId]]:
"""
@@ -1803,9 +1875,6 @@ class NeonCli(AbstractNeonCli):
# main [b49f7954224a0ad25cc0013ea107b54b]
# ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
)
res = self.raw_cli(
["timeline", "list", "--tenant-id", str(tenant_id or self.env.initial_tenant)]
)
@@ -3426,7 +3495,6 @@ class VanillaPostgres(PgProtocol):
assert not self.running
with open(os.path.join(self.pgdatadir, "postgresql.conf"), "a") as conf_file:
conf_file.write("\n".join(options))
conf_file.write("\n")
def edit_hba(self, hba: List[str]):
"""Prepend hba lines into pg_hba.conf file."""
@@ -3480,7 +3548,6 @@ def vanilla_pg(
pg_bin = PgBin(test_output_dir, pg_distrib_dir, pg_version)
port = port_distributor.get_port()
with VanillaPostgres(pgdatadir, pg_bin, port) as vanilla_pg:
vanilla_pg.configure(["shared_preload_libraries='neon_rmgr'"])
yield vanilla_pg
@@ -4831,7 +4898,14 @@ SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile( # type: ignore[type-arg]
# This is autouse, so the test output directory always gets created, even
# if a test doesn't put anything there.
# if a test doesn't put anything there. It also solves a problem with the
# neon_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it
# creates the repo in the test output directory. But it cannot depend on
# 'test_output_dir' fixture, because when TEST_SHARED_FIXTURES is not set,
# it has 'session' scope and cannot access fixtures with 'function'
# scope. So it uses the get_test_output_dir() function to get the path, and
# this fixture ensures that the directory exists. That works because
# 'autouse' fixtures are run before other fixtures.
#
# NB: we request the overlay dir fixture so the fixture does its cleanups
@pytest.fixture(scope="function", autouse=True)

View File

@@ -16,7 +16,6 @@ class PgVersion(str, enum.Enum):
V14 = "14"
V15 = "15"
V16 = "16"
V17 = "17"
# Instead of making version an optional parameter in methods, we can use this fake entry
# to explicitly rely on the default server version (could be different from pg_version fixture value)
NOT_SET = "<-POSTRGRES VERSION IS NOT SET->"

View File

@@ -22,8 +22,10 @@ if TYPE_CHECKING:
def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg):
env = neon_simple_env
endpoint = env.endpoints.create_start("main")
env.neon_cli.create_branch("test_logical_replication", "empty")
endpoint = env.endpoints.create_start("test_logical_replication")
log.info("postgres is running on 'test_logical_replication' branch")
pg_bin.run_capture(["pgbench", "-i", "-s10", endpoint.connstr()])
endpoint.safe_psql("create publication pub1 for table pgbench_accounts, pgbench_history")

View File

@@ -84,7 +84,7 @@ def test_storage_controller_many_tenants(
compute_reconfigure_listener.register_on_notify(lambda body: time.sleep(0.01))
env = neon_env_builder.init_configs()
env.start()
neon_env_builder.start()
# We will intentionally stress reconciler concurrrency, which triggers a warning when lots
# of shards are hitting the delayed path.

View File

@@ -1,7 +0,0 @@
{
"public_extensions": [],
"library_index": {
"TODO": "We still need PG17 extensions"
},
"extension_data": {}
}

View File

@@ -8,10 +8,11 @@ from fixtures.neon_fixtures import NeonEnv
#
def test_basebackup_error(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_basebackup_error", "empty")
pageserver_http = env.pageserver.http_client()
# Introduce failpoint
pageserver_http.configure_failpoints(("basebackup-before-control-file", "return"))
with pytest.raises(Exception, match="basebackup-before-control-file"):
env.endpoints.create_start("main")
env.endpoints.create_start("test_basebackup_error")

View File

@@ -11,6 +11,7 @@ from fixtures.utils import query_scalar
#
def test_clog_truncate(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_clog_truncate", "empty")
# set aggressive autovacuum to make sure that truncation will happen
config = [
@@ -23,7 +24,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
"autovacuum_freeze_max_age=100000",
]
endpoint = env.endpoints.create_start("main", config_lines=config)
endpoint = env.endpoints.create_start("test_clog_truncate", config_lines=config)
# Install extension containing function needed for test
endpoint.safe_psql("CREATE EXTENSION neon_test_utils")
@@ -57,7 +58,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
# create new branch after clog truncation and start a compute node on it
log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}")
env.neon_cli.create_branch(
"test_clog_truncate_new", "main", ancestor_start_lsn=lsn_after_truncation
"test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation
)
endpoint2 = env.endpoints.create_start("test_clog_truncate_new")

View File

@@ -21,7 +21,7 @@ from fixtures.pageserver.http import PageserverApiException
from fixtures.pageserver.utils import (
timeline_delete_wait_completed,
)
from fixtures.pg_version import PgVersion, skip_on_postgres
from fixtures.pg_version import PgVersion
from fixtures.remote_storage import RemoteStorageKind, S3Storage, s3_storage
from fixtures.workload import Workload
@@ -156,9 +156,6 @@ ingest_lag_log_line = ".*ingesting record with timestamp lagging more than wait_
@check_ondisk_data_compatibility_if_enabled
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
@skip_on_postgres(
PgVersion.V17, "There are no snapshots yet"
) # TODO: revert this once we have snapshots
def test_backward_compatibility(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
@@ -181,7 +178,7 @@ def test_backward_compatibility(
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.from_repo_dir(compatibility_snapshot_dir / "repo")
env.pageserver.allowed_errors.append(ingest_lag_log_line)
env.start()
neon_env_builder.start()
check_neon_works(
env,
@@ -206,9 +203,6 @@ def test_backward_compatibility(
@check_ondisk_data_compatibility_if_enabled
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
@skip_on_postgres(
PgVersion.V17, "There are no snapshots yet"
) # TODO: revert this once we have snapshots
def test_forward_compatibility(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
@@ -271,7 +265,7 @@ def test_forward_compatibility(
# does not include logs from previous runs
assert not env.pageserver.log_contains("git-env:" + prev_pageserver_version)
env.start()
neon_env_builder.start()
# ensure the specified pageserver is running
assert env.pageserver.log_contains("git-env:" + prev_pageserver_version)

View File

@@ -4,8 +4,9 @@ from fixtures.neon_fixtures import NeonEnv
def test_compute_catalog(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_config", "empty")
endpoint = env.endpoints.create_start("main", config_lines=["log_min_messages=debug1"])
endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"])
client = endpoint.http_client()
objects = client.dbs_and_roles()

View File

@@ -9,9 +9,10 @@ from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder
#
def test_config(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_config", "empty")
# change config
endpoint = env.endpoints.create_start("main", config_lines=["log_min_messages=debug1"])
endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"])
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:

View File

@@ -17,7 +17,9 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
if env.pg_version == PgVersion.V14 and strategy == "wal_log":
pytest.skip("wal_log strategy not supported on PostgreSQL 14")
endpoint = env.endpoints.create_start("main")
env.neon_cli.create_branch("test_createdb", "empty")
endpoint = env.endpoints.create_start("test_createdb")
with endpoint.cursor() as cur:
# Cause a 'relmapper' change in the original branch
@@ -31,7 +33,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create a branch
env.neon_cli.create_branch("test_createdb2", "main", ancestor_start_lsn=lsn)
env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn)
endpoint2 = env.endpoints.create_start("test_createdb2")
# Test that you can connect to the new database on both branches
@@ -60,7 +62,8 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
#
def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
endpoint = env.endpoints.create_start("main")
env.neon_cli.create_branch("test_dropdb", "empty")
endpoint = env.endpoints.create_start("test_dropdb")
with endpoint.cursor() as cur:
cur.execute("CREATE DATABASE foodb")
@@ -77,10 +80,14 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create two branches before and after database drop.
env.neon_cli.create_branch("test_before_dropdb", "main", ancestor_start_lsn=lsn_before_drop)
env.neon_cli.create_branch(
"test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop
)
endpoint_before = env.endpoints.create_start("test_before_dropdb")
env.neon_cli.create_branch("test_after_dropdb", "main", ancestor_start_lsn=lsn_after_drop)
env.neon_cli.create_branch(
"test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop
)
endpoint_after = env.endpoints.create_start("test_after_dropdb")
# Test that database exists on the branch before drop

View File

@@ -7,7 +7,8 @@ from fixtures.utils import query_scalar
#
def test_createuser(neon_simple_env: NeonEnv):
env = neon_simple_env
endpoint = env.endpoints.create_start("main")
env.neon_cli.create_branch("test_createuser", "empty")
endpoint = env.endpoints.create_start("test_createuser")
with endpoint.cursor() as cur:
# Cause a 'relmapper' change in the original branch
@@ -18,7 +19,7 @@ def test_createuser(neon_simple_env: NeonEnv):
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create a branch
env.neon_cli.create_branch("test_createuser2", "main", ancestor_start_lsn=lsn)
env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn)
endpoint2 = env.endpoints.create_start("test_createuser2")
# Test that you can connect to new branch as a new user

View File

@@ -290,8 +290,9 @@ def assert_db_connlimit(endpoint: Any, db_name: str, connlimit: int, msg: str):
# Here we test the latter. The first one is tested in test_ddl_forwarding
def test_ddl_forwarding_invalid_db(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_ddl_forwarding_invalid_db", "empty")
endpoint = env.endpoints.create_start(
"main",
"test_ddl_forwarding_invalid_db",
# Some non-existent url
config_lines=["neon.console_url=http://localhost:9999/unknown/api/v0/roles_and_databases"],
)

View File

@@ -44,8 +44,6 @@ def test_remote_extensions(
):
if pg_version == PgVersion.V16:
pytest.skip("TODO: PG16 extension building")
if pg_version == PgVersion.V17:
pytest.skip("TODO: PG17 extension building")
# setup mock http server
# that expects request for anon.tar.zst

View File

@@ -10,9 +10,11 @@ def test_explain_with_lfc_stats(neon_simple_env: NeonEnv):
cache_dir = Path(env.repo_dir) / "file_cache"
cache_dir.mkdir(exist_ok=True)
log.info("Creating endpoint with 1MB shared_buffers and 64 MB LFC")
branchname = "test_explain_with_lfc_stats"
env.neon_cli.create_branch(branchname, "empty")
log.info(f"Creating endopint with 1MB shared_buffers and 64 MB LFC for branch {branchname}")
endpoint = env.endpoints.create_start(
"main",
branchname,
config_lines=[
"shared_buffers='1MB'",
f"neon.file_cache_path='{cache_dir}/file.cache'",

View File

@@ -16,8 +16,9 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
@pytest.mark.timeout(600)
def test_lfc_resize(neon_simple_env: NeonEnv, pg_bin: PgBin):
env = neon_simple_env
env.neon_cli.create_branch("test_lfc_resize", "empty")
endpoint = env.endpoints.create_start(
"main",
"test_lfc_resize",
config_lines=[
"neon.file_cache_path='file.cache'",
"neon.max_file_cache_size=512MB",

View File

@@ -12,9 +12,11 @@ def test_lfc_working_set_approximation(neon_simple_env: NeonEnv):
cache_dir = Path(env.repo_dir) / "file_cache"
cache_dir.mkdir(exist_ok=True)
log.info("Creating endpoint with 1MB shared_buffers and 64 MB LFC")
branchname = "test_approximate_working_set_size"
env.neon_cli.create_branch(branchname, "empty")
log.info(f"Creating endopint with 1MB shared_buffers and 64 MB LFC for branch {branchname}")
endpoint = env.endpoints.create_start(
"main",
branchname,
config_lines=[
"shared_buffers='1MB'",
f"neon.file_cache_path='{cache_dir}/file.cache'",
@@ -114,46 +116,3 @@ def test_sliding_working_set_approximation(neon_simple_env: NeonEnv):
assert estimation_1k >= 20 and estimation_1k <= 40
assert estimation_10k >= 200 and estimation_10k <= 400
def test_optimal_cache_size_approximation(neon_simple_env: NeonEnv):
env = neon_simple_env
endpoint = env.endpoints.create_start(
branch_name="main",
config_lines=[
"autovacuum = off",
"shared_buffers=1MB",
"neon.max_file_cache_size=256MB",
"neon.file_cache_size_limit=245MB",
],
)
conn = endpoint.connect()
cur = conn.cursor()
cur.execute("create extension neon version '1.5'")
cur.execute(
"create table t_huge(pk integer primary key, count integer default 0, payload text default repeat('?', 128))"
)
cur.execute(
"create table t_small(pk integer primary key, count integer default 0, payload text default repeat('?', 128))"
)
cur.execute(
"insert into t_huge(pk) values (generate_series(1,1000000))"
) # table size is 21277 pages
cur.execute(
"insert into t_small(pk) values (generate_series(1,100000))"
) # table size is 2128 pages
time.sleep(2)
before = time.monotonic()
for _ in range(100):
cur.execute("select sum(count) from t_small")
cur.execute("select sum(count) from t_huge")
after = time.monotonic()
cur.execute(f"select approximate_working_set_size_seconds({int(after - before + 1)})")
ws_estimation = cur.fetchall()[0][0]
log.info(f"Working set size estimaton {ws_estimation}")
cur.execute(f"select approximate_optimal_cache_size({int(after - before + 1)}, 0.99)")
optimal_cache_size = cur.fetchall()[0][0]
log.info(f"Optimal cache size for 99% hit rate {optimal_cache_size}")
assert ws_estimation >= 20000 and ws_estimation <= 30000
assert optimal_cache_size >= 2000 and optimal_cache_size <= 3000

View File

@@ -5,7 +5,7 @@ import threading
import time
from typing import List
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnvBuilder
from fixtures.utils import query_scalar
@@ -15,8 +15,11 @@ def test_local_file_cache_unlink(neon_env_builder: NeonEnvBuilder):
cache_dir = os.path.join(env.repo_dir, "file_cache")
os.mkdir(cache_dir)
env.neon_cli.create_branch("empty", ancestor_branch_name=DEFAULT_BRANCH_NAME)
env.neon_cli.create_branch("test_local_file_cache_unlink", "empty")
endpoint = env.endpoints.create_start(
"main",
"test_local_file_cache_unlink",
config_lines=[
"shared_buffers='1MB'",
f"neon.file_cache_path='{cache_dir}/file.cache'",

View File

@@ -36,8 +36,10 @@ def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
endpoint = env.endpoints.create_start("main", config_lines=["log_statement=all"])
timeline_id = env.neon_cli.create_branch("test_logical_replication", "empty")
endpoint = env.endpoints.create_start(
"test_logical_replication", config_lines=["log_statement=all"]
)
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
@@ -183,9 +185,10 @@ def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
env.neon_cli.create_branch("test_logical_replication", "empty")
# set low neon.logical_replication_max_snap_files
endpoint = env.endpoints.create_start(
"main",
"test_logical_replication",
config_lines=["log_statement=all", "neon.logical_replication_max_snap_files=1"],
)
@@ -469,7 +472,7 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
def test_replication_shutdown(neon_simple_env: NeonEnv):
# Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
env = neon_simple_env
env.neon_cli.create_branch("test_replication_shutdown_publisher", "main")
env.neon_cli.create_branch("test_replication_shutdown_publisher", "empty")
pub = env.endpoints.create("test_replication_shutdown_publisher")
env.neon_cli.create_branch("test_replication_shutdown_subscriber")

Some files were not shown because too many files have changed in this diff Show More