Compare commits

..

1 Commits

Author SHA1 Message Date
Arthur Petukhovsky
84f7d0ac92 Disable XLP_FIRST_IS_CONTRECORD check 2022-08-15 19:38:12 +00:00
78 changed files with 1232 additions and 1457 deletions

View File

@@ -1,8 +1,7 @@
#!/bin/sh
# fetch params from meta-data service
# get instance id from meta-data service
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
# store fqdn hostname in var
HOST=$(hostname -f)
@@ -15,8 +14,7 @@ cat <<EOF | tee /tmp/payload
"port": 6500,
"http_port": 7676,
"region_id": {{ console_region_id }},
"instance_id": "${INSTANCE_ID}",
"availability_zone_id": "${AZ_ID}"
"instance_id": "${INSTANCE_ID}"
}
EOF

View File

@@ -106,7 +106,7 @@ jobs:
mkdir -p perf-report-staging
# Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
# it's important for test_perf_pgbench.py::test_pgbench_remote_* tests
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --skip-interfering-proc-check --out-dir perf-report-staging --timeout 5400
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --skip-interfering-proc-check --out-dir perf-report-staging --timeout 3600
- name: Submit result
env:

View File

@@ -7,6 +7,10 @@ on:
- release
pull_request:
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
@@ -17,39 +21,9 @@ env:
COPT: '-Werror'
jobs:
tag:
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
outputs:
build-tag: ${{steps.build-tag.outputs.tag}}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Get build tag
run: |
echo run:$GITHUB_RUN_ID
echo ref:$GITHUB_REF_NAME
echo rev:$(git rev-list --count HEAD)
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "::set-output name=tag::$(git rev-list --count HEAD)"
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
echo "::set-output name=tag::$GITHUB_RUN_ID"
fi
shell: bash
id: build-tag
build-neon:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
strategy:
fail-fast: false
matrix:
@@ -61,7 +35,7 @@ jobs:
GIT_VERSION: ${{ github.sha }}
steps:
- name: Fix git ownership
- name: Fix git ownerwhip
run: |
# Workaround for `fatal: detected dubious ownership in repository at ...`
#
@@ -80,7 +54,6 @@ jobs:
- name: Set pg revision for caching
id: pg_ver
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
shell: bash -euxo pipefail {0}
# Set some environment variables used by all the steps.
#
@@ -104,7 +77,6 @@ jobs:
echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV
echo "CARGO_FLAGS=${CARGO_FLAGS}" >> $GITHUB_ENV
shell: bash -euxo pipefail {0}
# Don't include the ~/.cargo/registry/src directory. It contains just
# uncompressed versions of the crates in ~/.cargo/registry/cache
@@ -121,8 +93,8 @@ jobs:
target/
# Fall back to older versions of the key, if no cache for current Cargo.lock was found
key: |
v6-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
v6-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
- name: Cache postgres build
id: cache_pg
@@ -134,17 +106,14 @@ jobs:
- name: Build postgres
if: steps.cache_pg.outputs.cache-hit != 'true'
run: mold -run make postgres -j$(nproc)
shell: bash -euxo pipefail {0}
- name: Run cargo build
run: |
${cov_prefix} mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
shell: bash -euxo pipefail {0}
- name: Run cargo test
run: |
${cov_prefix} cargo test $CARGO_FLAGS
shell: bash -euxo pipefail {0}
- name: Install rust binaries
run: |
@@ -185,11 +154,9 @@ jobs:
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
done
fi
shell: bash -euxo pipefail {0}
- name: Install postgres binaries
run: cp -a tmp_install /tmp/neon/pg_install
shell: bash -euxo pipefail {0}
- name: Upload Neon artifact
uses: ./.github/actions/upload
@@ -204,9 +171,7 @@ jobs:
pg_regress-tests:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
needs: [ build-neon ]
strategy:
fail-fast: false
@@ -234,9 +199,7 @@ jobs:
other-tests:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
needs: [ build-neon ]
strategy:
fail-fast: false
@@ -267,9 +230,7 @@ jobs:
benchmarks:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
needs: [ build-neon ]
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
strategy:
@@ -300,9 +261,7 @@ jobs:
coverage-report:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
needs: [ other-tests, pg_regress-tests ]
strategy:
fail-fast: false
@@ -325,7 +284,7 @@ jobs:
!~/.cargo/registry/src
~/.cargo/git/
target/
key: v5-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
key: v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
- name: Get Neon artifact
uses: ./.github/actions/download
@@ -341,7 +300,6 @@ jobs:
- name: Merge coverage data
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
shell: bash -euxo pipefail {0}
- name: Build and upload coverage report
run: |
@@ -374,13 +332,9 @@ jobs:
\"description\": \"Coverage report is ready\",
\"target_url\": \"$REPORT_URL\"
}"
shell: bash -euxo pipefail {0}
trigger-e2e-tests:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-neon ]
steps:
- name: Set PR's status to pending and request a remote CI test
@@ -415,150 +369,150 @@ jobs:
}
}"
neon-image:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
docker-image:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ pg_regress-tests, other-tests ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
outputs:
build-tag: ${{steps.build-tag.outputs.tag}}
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- name: Kaniko build neon
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
with:
driver: docker
compute-tools-image:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
- name: Get build tag
run: |
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "::set-output name=tag::$(git rev-list --count HEAD)"
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
id: build-tag
- name: Get legacy build tag
run: |
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "::set-output name=tag::latest"
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "::set-output name=tag::release"
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
id: legacy-build-tag
- name: Build neon Docker image
uses: docker/build-push-action@v2
with:
context: .
build-args: |
GIT_VERSION="${{github.sha}}"
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
pull: true
push: true
tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
docker-image-compute:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ pg_regress-tests, other-tests ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
outputs:
build-tag: ${{steps.build-tag.outputs.tag}}
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build compute tools
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID
promote-image-compute-tools:
runs-on: dev
needs: [ compute-tools-image ]
if: github.event_name != 'workflow_dispatch'
container: amazon/aws-cli
strategy:
fail-fast: false
matrix:
name: [ compute-tools ]
steps:
- name: Promote image to latest
run:
MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
compute-node-image:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
# note: This image depends on neondatabase/compute-tools:latest (or :thisversion),
# which isn't available until after the image is promoted.
# Ergo, we must explicitly build and promote compute-tools separately.
needs:
- promote-image-compute-tools
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Configure ECR login
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- name: Kaniko build compute node
working-directory: ./vendor/postgres/
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg=COMPUTE_TOOLS_TAG=$GITHUB_RUN_ID --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
with:
driver: docker
promote-images:
runs-on: dev
needs: [ neon-image, compute-node-image ]
if: github.event_name != 'workflow_dispatch'
container: amazon/aws-cli
strategy:
fail-fast: false
matrix:
name: [ neon, compute-node ]
steps:
- name: Promote image to latest
run:
MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
push-docker-hub:
runs-on: dev
needs: [ promote-images, tag ]
container: golang:1.19-bullseye
steps:
- name: Install Crane & ECR helper
- name: Get build tag
run: |
go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
# - name: Get build tag
# run: |
# if [[ "$GITHUB_REF_NAME" == "main" ]]; then
# echo "::set-output name=tag::$(git rev-list --count HEAD)"
# elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
# echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
# else
# echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release' "
# echo "::set-output name=tag::$GITHUB_RUN_ID"
# fi
# id: build-tag
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "::set-output name=tag::$(git rev-list --count HEAD)"
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
id: build-tag
- name: Configure ECR login
- name: Get legacy build tag
run: |
mkdir /github/home/.docker/
echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "::set-output name=tag::latest"
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "::set-output name=tag::release"
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
id: legacy-build-tag
- name: Pull neon image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:latest neon
- name: Build compute-tools Docker image
uses: docker/build-push-action@v2
with:
context: .
build-args: |
GIT_VERSION="${{github.sha}}"
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
push: false
file: Dockerfile.compute-tools
tags: neondatabase/compute-tools:local
- name: Pull compute tools image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest compute-tools
- name: Push compute-tools Docker image
uses: docker/build-push-action@v2
with:
context: .
build-args: |
GIT_VERSION="${{github.sha}}"
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
push: true
file: Dockerfile.compute-tools
tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}
- name: Pull compute node image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest compute-node
- name: Configure docker login
run: |
# ECR Credential Helper & Docker Hub don't work together in config, hence reset
echo "" > /github/home/.docker/config.json
crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io
- name: Push neon image to Docker Hub
run: crane push neon neondatabase/neon:${{needs.tag.outputs.build-tag}}
- name: Push compute tools image to Docker Hub
run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
- name: Push compute node image to Docker Hub
run: crane push compute-node neondatabase/compute-node:${{needs.tag.outputs.build-tag}}
- name: Add latest tag to images
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
run: |
crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
- name: Build compute-node Docker image
uses: docker/build-push-action@v2
with:
context: ./vendor/postgres/
build-args:
COMPUTE_TOOLS_TAG=local
push: true
tags: neondatabase/compute-node:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/compute-node:${{steps.build-tag.outputs.tag}}
calculate-deploy-targets:
runs-on: [ self-hosted, Linux, k8s-runner ]
@@ -584,16 +538,14 @@ jobs:
deploy:
runs-on: [ self-hosted, Linux, k8s-runner ]
#container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
needs: [ push-docker-hub, calculate-deploy-targets, tag, other-tests, pg_regress-tests ]
# We need both storage **and** compute images for deploy, because control plane
# picks the compute version based on the storage version. If it notices a fresh
# storage it may bump the compute version. And if compute image failed to build
# it may break things badly.
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
strategy:
matrix:
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
@@ -604,19 +556,13 @@ jobs:
submodules: true
fetch-depth: 0
- name: Setup python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Setup ansible
run: |
export PATH="/root/.local/bin:$PATH"
pip install --progress-bar off --user ansible boto3
- name: Redeploy
run: |
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
export DOCKER_TAG=${{needs.docker-image.outputs.build-tag}}
cd "$(pwd)/.github/ansible"
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
@@ -639,16 +585,13 @@ jobs:
rm -f neon_install.tar.gz .neon_current_version
deploy-proxy:
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, calculate-deploy-targets, tag, other-tests, pg_regress-tests ]
runs-on: [ self-hosted, Linux, k8s-runner ]
# Compute image isn't strictly required for proxy deploy, but let's still wait for it
# to run all deploy jobs consistently.
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
strategy:
matrix:
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
@@ -661,9 +604,6 @@ jobs:
submodules: true
fetch-depth: 0
- name: Add curl
run: apt update && apt install curl -y
- name: Store kubeconfig file
run: |
echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
@@ -676,6 +616,6 @@ jobs:
- name: Re-deploy proxy
run: |
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
DOCKER_TAG=${{needs.docker-image.outputs.build-tag}}
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

View File

@@ -101,7 +101,7 @@ jobs:
!~/.cargo/registry/src
~/.cargo/git
target
key: v2-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
key: v1-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
- name: Run cargo clippy
run: ./run_clippy.sh

202
Cargo.lock generated
View File

@@ -48,9 +48,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.59"
version = "1.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c91f1f46651137be86f3a2b9a8359f9ab421d04d941c62b5982e1ca21113adf9"
checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704"
dependencies = [
"backtrace",
]
@@ -77,7 +77,7 @@ dependencies = [
"num-traits",
"rusticata-macros",
"thiserror",
"time 0.3.12",
"time 0.3.11",
]
[[package]]
@@ -126,9 +126,9 @@ dependencies = [
[[package]]
name = "async-trait"
version = "0.1.57"
version = "0.1.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76464446b8bc32758d7e88ee1a804d9914cd9b1cb264c029899680b0be29826f"
checksum = "96cf8829f67d2eab0b2dfa42c5d0ef737e0724e4a82b01b3e292456202b19716"
dependencies = [
"proc-macro2",
"quote",
@@ -166,7 +166,7 @@ dependencies = [
"http",
"http-body",
"hyper",
"itoa 1.0.3",
"itoa 1.0.2",
"matchit",
"memchr",
"mime",
@@ -298,9 +298,9 @@ checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3"
[[package]]
name = "bytemuck"
version = "1.11.0"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5377c8865e74a160d21f29c2d40669f53286db6eab59b88540cbb12ffc8b835"
checksum = "c53dfa917ec274df8ed3c572698f381a24eef2efba9492d797301b72b6db408a"
[[package]]
name = "byteorder"
@@ -310,9 +310,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "bytes"
version = "1.2.1"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
dependencies = [
"serde",
]
@@ -386,9 +386,9 @@ dependencies = [
[[package]]
name = "clap"
version = "3.2.16"
version = "3.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3dbbb6653e7c55cc8595ad3e1f7be8f32aba4eb7ff7f0fd1163d4f3d137c0a9"
checksum = "ab8b79fe3946ceb4a0b1c080b4018992b8d27e9ff363644c1c9b6387c854614d"
dependencies = [
"atty",
"bitflags",
@@ -455,7 +455,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap 3.2.16",
"clap 3.2.12",
"env_logger",
"hyper",
"log",
@@ -601,9 +601,9 @@ dependencies = [
[[package]]
name = "crossbeam-channel"
version = "0.5.6"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c"
dependencies = [
"cfg-if",
"crossbeam-utils",
@@ -611,9 +611,9 @@ dependencies = [
[[package]]
name = "crossbeam-deque"
version = "0.8.2"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
dependencies = [
"cfg-if",
"crossbeam-epoch",
@@ -622,9 +622,9 @@ dependencies = [
[[package]]
name = "crossbeam-epoch"
version = "0.9.10"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1"
checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d"
dependencies = [
"autocfg",
"cfg-if",
@@ -636,9 +636,9 @@ dependencies = [
[[package]]
name = "crossbeam-utils"
version = "0.8.11"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83"
dependencies = [
"cfg-if",
"once_cell",
@@ -917,9 +917,9 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
[[package]]
name = "fastrand"
version = "1.8.0"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499"
checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf"
dependencies = [
"instant",
]
@@ -1086,9 +1086,9 @@ dependencies = [
[[package]]
name = "generic-array"
version = "0.14.6"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803"
dependencies = [
"typenum",
"version_check",
@@ -1245,7 +1245,7 @@ checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399"
dependencies = [
"bytes",
"fnv",
"itoa 1.0.3",
"itoa 1.0.2",
]
[[package]]
@@ -1308,7 +1308,7 @@ dependencies = [
"http-body",
"httparse",
"httpdate",
"itoa 1.0.3",
"itoa 1.0.2",
"pin-project-lite",
"socket2",
"tokio",
@@ -1391,7 +1391,7 @@ dependencies = [
"ahash",
"atty",
"indexmap",
"itoa 1.0.3",
"itoa 1.0.2",
"lazy_static",
"log",
"num-format",
@@ -1432,15 +1432,15 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "itoa"
version = "1.0.3"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d"
[[package]]
name = "js-sys"
version = "0.3.59"
version = "0.3.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2"
checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27"
dependencies = [
"wasm-bindgen",
]
@@ -1482,9 +1482,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.127"
version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "505e71a4706fa491e9b1b55f51b95d4037d0821ee40131190475f692b35b009b"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "libloading"
@@ -1659,7 +1659,7 @@ name = "neon_local"
version = "0.1.0"
dependencies = [
"anyhow",
"clap 3.2.16",
"clap 3.2.12",
"comfy-table",
"control_plane",
"git-version",
@@ -1854,7 +1854,7 @@ dependencies = [
"byteorder",
"bytes",
"chrono",
"clap 3.2.16",
"clap 3.2.12",
"close_fds",
"const_format",
"crc32c",
@@ -2155,9 +2155,9 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
[[package]]
name = "prettyplease"
version = "0.1.18"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "697ae720ee02011f439e0701db107ffe2916d83f718342d65d7f8bf7b8a5fee9"
checksum = "da6ffbe862780245013cb1c0a48c4e44b7d665548088f91f6b90876d0625e4c2"
dependencies = [
"proc-macro2",
"syn",
@@ -2171,9 +2171,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro2"
version = "1.0.43"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7"
dependencies = [
"unicode-ident",
]
@@ -2271,7 +2271,7 @@ dependencies = [
"base64",
"bstr",
"bytes",
"clap 3.2.16",
"clap 3.2.12",
"futures",
"git-version",
"hashbrown 0.11.2",
@@ -2326,9 +2326,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.21"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
dependencies = [
"proc-macro2",
]
@@ -2411,9 +2411,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
version = "0.2.16"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
dependencies = [
"bitflags",
]
@@ -2508,7 +2508,7 @@ dependencies = [
"percent-encoding",
"pin-project-lite",
"rustls",
"rustls-pemfile 1.0.1",
"rustls-pemfile 1.0.0",
"serde",
"serde_json",
"serde_urlencoded",
@@ -2708,9 +2708,9 @@ dependencies = [
[[package]]
name = "rustls-pemfile"
version = "1.0.1"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0864aeff53f8c05aa08d86e5ef839d3dfcf07aeba2db32f12db0ef716e87bd55"
checksum = "e7522c9de787ff061458fe9a829dc790a3f5b22dc571694fc5883f448b94d9a9"
dependencies = [
"base64",
]
@@ -2726,15 +2726,15 @@ dependencies = [
[[package]]
name = "rustversion"
version = "1.0.9"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8"
checksum = "24c8ad4f0c00e1eb5bc7614d236a7f1300e3dbd76b68cac8e06fb00b015ad8d8"
[[package]]
name = "ryu"
version = "1.0.11"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
[[package]]
name = "safekeeper"
@@ -2744,7 +2744,7 @@ dependencies = [
"async-trait",
"byteorder",
"bytes",
"clap 3.2.16",
"clap 3.2.12",
"const_format",
"crc32c",
"daemonize",
@@ -2835,15 +2835,15 @@ dependencies = [
[[package]]
name = "semver"
version = "1.0.13"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93f6841e709003d68bb2deee8c343572bf446003ec20a583e76f7b15cebf3711"
checksum = "a2333e6df6d6598f2b1974829f853c2b4c5f4a6e503c10af918081aa6f8564e1"
[[package]]
name = "serde"
version = "1.0.142"
version = "1.0.139"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e590c437916fb6b221e1d00df6e3294f3fccd70ca7e92541c475d6ed6ef5fee2"
checksum = "0171ebb889e45aa68b44aee0859b3eede84c6f5f5c228e6f140c0b2a0a46cad6"
dependencies = [
"serde_derive",
]
@@ -2860,9 +2860,9 @@ dependencies = [
[[package]]
name = "serde_derive"
version = "1.0.142"
version = "1.0.139"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34b5b8d809babe02f538c2cfec6f2c1ed10804c0e5a6a041a049a4f5588ccc2e"
checksum = "dc1d3230c1de7932af58ad8ffbe1d784bd55efd5a9d84ac24f69c72d83543dfb"
dependencies = [
"proc-macro2",
"quote",
@@ -2871,11 +2871,11 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.83"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38dd04e3c8279e75b31ef29dbdceebfe5ad89f4d0937213c53f7d49d01b3d5a7"
checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7"
dependencies = [
"itoa 1.0.3",
"itoa 1.0.2",
"ryu",
"serde",
]
@@ -2887,7 +2887,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
dependencies = [
"form_urlencoded",
"itoa 1.0.3",
"itoa 1.0.2",
"ryu",
"serde",
]
@@ -2992,7 +2992,7 @@ dependencies = [
"num-bigint",
"num-traits",
"thiserror",
"time 0.3.12",
"time 0.3.11",
]
[[package]]
@@ -3003,12 +3003,9 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
[[package]]
name = "slab"
version = "0.4.7"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef"
dependencies = [
"autocfg",
]
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
[[package]]
name = "smallvec"
@@ -3116,9 +3113,9 @@ dependencies = [
[[package]]
name = "syn"
version = "1.0.99"
version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
dependencies = [
"proc-macro2",
"quote",
@@ -3194,18 +3191,18 @@ checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb"
[[package]]
name = "thiserror"
version = "1.0.32"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5f6586b7f764adc0231f4c79be7b920e766bb2f3e51b3661cdb263828f19994"
checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.32"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12bafc5b54507e0149cdf1b145a5d80ab80a90bcd9275df43d4fff68460f6c21"
checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a"
dependencies = [
"proc-macro2",
"quote",
@@ -3234,12 +3231,11 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.12"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74b7cc93fc23ba97fde84f7eea56c55d1ba183f495c6715defdfc7b9cb8c870f"
checksum = "72c91f41dcb2f096c05f0873d667dceec1087ce5bcf984ec8ffb19acddbb3217"
dependencies = [
"itoa 1.0.3",
"js-sys",
"itoa 1.0.2",
"libc",
"num_threads",
"quickcheck",
@@ -3279,9 +3275,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "tokio"
version = "1.20.1"
version = "1.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a8325f63a7d4774dd041e363b2409ed1c5cbbd0f867795e661df066b2b0a581"
checksum = "57aec3cfa4c296db7255446efb4928a6be304b431a806216105542a67b6ca82e"
dependencies = [
"autocfg",
"bytes",
@@ -3611,9 +3607,9 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"
[[package]]
name = "unicode-ident"
version = "1.0.3"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
[[package]]
name = "unicode-normalization"
@@ -3732,7 +3728,7 @@ name = "wal_craft"
version = "0.1.0"
dependencies = [
"anyhow",
"clap 3.2.16",
"clap 3.2.12",
"env_logger",
"log",
"once_cell",
@@ -3776,9 +3772,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.82"
version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d"
checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
@@ -3786,13 +3782,13 @@ dependencies = [
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.82"
version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f"
checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a"
dependencies = [
"bumpalo",
"lazy_static",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
@@ -3801,9 +3797,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.32"
version = "0.4.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa76fb221a1f8acddf5b54ace85912606980ad661ac7a503b4570ffd3a624dad"
checksum = "de9a9cec1733468a8c657e57fa2413d2ae2c0129b95e87c5b72b8ace4d13f31f"
dependencies = [
"cfg-if",
"js-sys",
@@ -3813,9 +3809,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.82"
version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602"
checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@@ -3823,9 +3819,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.82"
version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da"
checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048"
dependencies = [
"proc-macro2",
"quote",
@@ -3836,15 +3832,15 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.82"
version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a"
checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be"
[[package]]
name = "web-sys"
version = "0.3.59"
version = "0.3.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed055ab27f941423197eb86b2035720b1a3ce40504df082cac2ecc6ed73335a1"
checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90"
dependencies = [
"js-sys",
"wasm-bindgen",
@@ -3997,7 +3993,7 @@ dependencies = [
"scopeguard",
"serde",
"syn",
"time 0.3.12",
"time 0.3.11",
"tokio",
"tokio-util",
"tracing",
@@ -4019,7 +4015,7 @@ dependencies = [
"oid-registry",
"rusticata-macros",
"thiserror",
"time 0.3.12",
"time 0.3.11",
]
[[package]]
@@ -4048,6 +4044,6 @@ dependencies = [
[[package]]
name = "zeroize"
version = "1.5.7"
version = "1.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c394b5bd0c6f669e7275d9c20aa90ae064cb22e75a1cad54e1b34088034b149f"
checksum = "20b578acffd8516a6c3f2a1bdefc1ec37e547bb4e0fb8b6b01a4cafc886b4442"

View File

@@ -1,6 +1,8 @@
# Build Postgres
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS pg-build
WORKDIR /home/nonroot
FROM neondatabase/rust:1.58 AS pg-build
WORKDIR /pg
USER root
COPY vendor/postgres vendor/postgres
COPY Makefile Makefile
@@ -9,30 +11,27 @@ ENV BUILD_TYPE release
RUN set -e \
&& mold -run make -j $(nproc) -s postgres \
&& rm -rf tmp_install/build \
&& tar -C tmp_install -czf /home/nonroot/postgres_install.tar.gz .
&& tar -C tmp_install -czf /postgres_install.tar.gz .
# Build zenith binaries
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS build
WORKDIR /home/nonroot
FROM neondatabase/rust:1.58 AS build
ARG GIT_VERSION=local
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
ARG RUSTC_WRAPPER=cachepot
ENV AWS_REGION=eu-central-1
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
ARG CACHEPOT_BUCKET=neon-github-dev
#ARG AWS_ACCESS_KEY_ID
#ARG AWS_SECRET_ACCESS_KEY
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
ARG AWS_ACCESS_KEY_ID
ARG AWS_SECRET_ACCESS_KEY
COPY --from=pg-build /home/nonroot/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY . .
# Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
RUN set -e \
&& mold -run cargo build --release \
&& sudo -E "PATH=$PATH" mold -run cargo build --release \
&& cachepot -s
# Build final image
@@ -41,8 +40,8 @@ FROM debian:bullseye-slim
WORKDIR /data
RUN set -e \
&& apt update \
&& apt install -y \
&& apt-get update \
&& apt-get install -y \
libreadline-dev \
libseccomp-dev \
openssl \
@@ -51,14 +50,17 @@ RUN set -e \
&& useradd -d /data zenith \
&& chown -R zenith:zenith /data
COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/pageserver /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/runner/target/release/pageserver /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/runner/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/runner/target/release/proxy /usr/local/bin
COPY --from=pg-build /home/nonroot/tmp_install/ /usr/local/
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
COPY --from=pg-build /pg/tmp_install/ /usr/local/
COPY --from=pg-build /postgres_install.tar.gz /data/
COPY docker-entrypoint.sh /docker-entrypoint.sh
VOLUME ["/data"]
USER zenith
EXPOSE 6400
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["pageserver"]

View File

@@ -1,25 +1,22 @@
# First transient image to build compute_tools binaries
# NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS rust-build
WORKDIR /home/nonroot
FROM neondatabase/rust:1.58 AS rust-build
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
ARG RUSTC_WRAPPER=cachepot
ENV AWS_REGION=eu-central-1
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
ARG CACHEPOT_BUCKET=neon-github-dev
#ARG AWS_ACCESS_KEY_ID
#ARG AWS_SECRET_ACCESS_KEY
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
ARG AWS_ACCESS_KEY_ID
ARG AWS_SECRET_ACCESS_KEY
COPY . .
RUN set -e \
&& mold -run cargo build -p compute_tools --release \
&& sudo -E "PATH=$PATH" mold -run cargo build -p compute_tools --release \
&& cachepot -s
# Final image that only has one binary
FROM debian:bullseye-slim
FROM debian:buster-slim
COPY --from=rust-build /home/nonroot/target/release/compute_ctl /usr/local/bin/compute_ctl
COPY --from=rust-build /home/runner/target/release/compute_ctl /usr/local/bin/compute_ctl

View File

@@ -178,7 +178,6 @@ impl ComputeNode {
.args(&["--sync-safekeepers"])
.env("PGDATA", &self.pgdata) // we cannot use -D in this mode
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.expect("postgres --sync-safekeepers failed to start");
@@ -188,13 +187,10 @@ impl ComputeNode {
let sync_output = sync_handle
.wait_with_output()
.expect("postgres --sync-safekeepers failed");
if !sync_output.status.success() {
anyhow::bail!(
"postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}, stderr: {}",
"postgres --sync-safekeepers exited with non-zero status: {}",
sync_output.status,
String::from_utf8(sync_output.stdout).expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
String::from_utf8(sync_output.stderr).expect("postgres --sync-safekeepers exited, and stderr is not utf-8"),
);
}

View File

@@ -62,16 +62,9 @@ impl GenericOption {
/// Represent `GenericOption` as configuration option.
pub fn to_pg_setting(&self) -> String {
if let Some(val) = &self.value {
let name = match self.name.as_str() {
"safekeepers" => "neon.safekeepers",
"wal_acceptor_reconnect" => "neon.safekeeper_reconnect_timeout",
"wal_acceptor_connect_timeout" => "neon.safekeeper_connect_timeout",
it => it,
};
match self.vartype.as_ref() {
"string" => format!("{} = '{}'", name, val),
_ => format!("{} = {}", name, val),
"string" => format!("{} = '{}'", self.name, val),
_ => format!("{} = {}", self.name, val),
}
} else {
self.name.to_owned()

View File

@@ -85,7 +85,7 @@
"vartype": "bool"
},
{
"name": "neon.safekeepers",
"name": "safekeepers",
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
"vartype": "string"
},
@@ -181,6 +181,7 @@
}
]
},
"delta_operations": [
{
"action": "delete_db",

View File

@@ -28,7 +28,7 @@ mod pg_helpers_tests {
assert_eq!(
spec.cluster.settings.as_pg_settings(),
"fsync = off\nwal_level = replica\nhot_standby = on\nneon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
"fsync = off\nwal_level = replica\nhot_standby = on\nsafekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
);
}

View File

@@ -150,7 +150,7 @@ impl PostgresNode {
let port: u16 = conf.parse_field("port", &context)?;
let timeline_id: ZTimelineId = conf.parse_field("neon.timeline_id", &context)?;
let tenant_id: ZTenantId = conf.parse_field("neon.tenant_id", &context)?;
let uses_wal_proposer = conf.get("neon.safekeepers").is_some();
let uses_wal_proposer = conf.get("safekeepers").is_some();
// parse recovery_target_lsn, if any
let recovery_target_lsn: Option<Lsn> =
@@ -341,7 +341,7 @@ impl PostgresNode {
.map(|sk| format!("localhost:{}", sk.pg_port))
.collect::<Vec<String>>()
.join(",");
conf.append("neon.safekeepers", &safekeepers);
conf.append("safekeepers", &safekeepers);
} else {
// We only use setup without safekeepers for tests,
// and don't care about data durability on pageserver,

View File

@@ -24,7 +24,7 @@ use crate::safekeeper::SafekeeperNode;
// This data structures represents neon_local CLI config
//
// It is deserialized from the .neon/config file, or the config file passed
// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
// to 'zenith init --config=<path>' option. See control_plane/simple.conf for
// an example.
//
#[serde_as]
@@ -320,7 +320,7 @@ impl LocalEnv {
if !repopath.exists() {
bail!(
"Zenith config is not found in {}. You need to run 'neon_local init' first",
"Zenith config is not found in {}. You need to run 'zenith init' first",
repopath.to_str().unwrap()
);
}
@@ -337,12 +337,12 @@ impl LocalEnv {
}
pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
// Currently, the user first passes a config file with 'neon_local init --config=<path>'
// Currently, the user first passes a config file with 'zenith init --config=<path>'
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
// to .neon/config. TODO: We lose any formatting and comments along the way, which is
// a bit sad.
let mut conf_content = r#"# This file describes a locale deployment of the page server
# and safekeeeper node. It is read by the 'neon_local' command-line
# and safekeeeper node. It is read by the 'zenith' command-line
# utility.
"#
.to_string();
@@ -382,7 +382,7 @@ impl LocalEnv {
}
//
// Initialize a new Neon repository
// Initialize a new Zenith repository
//
pub fn init(&mut self) -> anyhow::Result<()> {
// check if config already exists

View File

@@ -1,4 +1,5 @@
use std::io::Write;
use std::net::TcpStream;
use std::path::PathBuf;
use std::process::Command;
use std::sync::Arc;
@@ -51,7 +52,7 @@ impl ResponseErrorMessageExt for Response {
Err(SafekeeperHttpError::Response(
match self.json::<HttpErrorBody>() {
Ok(err_body) => format!("Error: {}", err_body.msg),
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
Err(_) => format!("Http error ({}) at {url}.", status.as_u16()),
},
))
}
@@ -240,23 +241,37 @@ impl SafekeeperNode {
),
}
// Wait until process is gone
for i in 0..600 {
let signal = None; // Send no signal, just get the error code
match kill(pid, signal) {
Ok(_) => (), // Process exists, keep waiting
Err(Errno::ESRCH) => {
// Process not found, we're done
println!("done!");
return Ok(());
}
Err(err) => bail!(
"Failed to send signal to pageserver with pid {}: {}",
pid,
err.desc()
),
};
let address = connection_address(&self.pg_connection_config);
// TODO Remove this "timeout" and handle it on caller side instead.
// Shutting down may take a long time,
// if safekeeper flushes a lot of data
let mut tcp_stopped = false;
for i in 0..600 {
if !tcp_stopped {
if let Err(err) = TcpStream::connect(&address) {
tcp_stopped = true;
if err.kind() != io::ErrorKind::ConnectionRefused {
eprintln!("\nSafekeeper connection failed with error: {err}");
}
}
}
if tcp_stopped {
// Also check status on the HTTP port
match self.check_status() {
Err(SafekeeperHttpError::Transport(err)) if err.is_connect() => {
println!("done!");
return Ok(());
}
Err(err) => {
eprintln!("\nSafekeeper status check failed with error: {err}");
return Ok(());
}
Ok(()) => {
// keep waiting
}
}
}
if i % 10 == 0 {
print!(".");
io::stdout().flush().unwrap();

View File

@@ -1,8 +1,9 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufReader, Write};
use std::net::TcpStream;
use std::num::NonZeroU64;
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use std::process::Command;
use std::time::Duration;
use std::{io, result, thread};
@@ -102,19 +103,23 @@ impl PageServerNode {
/// Construct libpq connection string for connecting to the pageserver.
fn pageserver_connection_config(password: &str, listen_addr: &str) -> Config {
format!("postgresql://no_user:{password}@{listen_addr}/no_db")
format!("postgresql://no_user:{}@{}/no_db", password, listen_addr)
.parse()
.unwrap()
}
pub fn initialize(
pub fn init(
&self,
create_tenant: Option<ZTenantId>,
initial_timeline_id: Option<ZTimelineId>,
config_overrides: &[&str],
) -> anyhow::Result<ZTimelineId> {
let mut cmd = Command::new(self.env.pageserver_bin()?);
let id = format!("id={}", self.env.pageserver.id);
// FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
let base_data_dir_param = self.env.base_data_dir.display().to_string();
let pg_distrib_dir_param =
format!("pg_distrib_dir='{}'", self.env.pg_distrib_dir.display());
let authg_type_param = format!("auth_type='{}'", self.env.pageserver.auth_type);
@@ -134,52 +139,67 @@ impl PageServerNode {
.collect::<Vec<_>>()
.join(",")
);
let mut args = Vec::with_capacity(20);
args.push("--init");
args.extend(["-D", &base_data_dir_param]);
args.extend(["-c", &pg_distrib_dir_param]);
args.extend(["-c", &authg_type_param]);
args.extend(["-c", &listen_http_addr_param]);
args.extend(["-c", &listen_pg_addr_param]);
args.extend(["-c", &broker_endpoints_param]);
args.extend(["-c", &id]);
let broker_etcd_prefix_param = self
.env
.etcd_broker
.broker_etcd_prefix
.as_ref()
.map(|prefix| format!("broker_etcd_prefix='{prefix}'"));
let mut init_config_overrides = config_overrides.to_vec();
init_config_overrides.push(&id);
init_config_overrides.push(&pg_distrib_dir_param);
init_config_overrides.push(&authg_type_param);
init_config_overrides.push(&listen_http_addr_param);
init_config_overrides.push(&listen_pg_addr_param);
init_config_overrides.push(&broker_endpoints_param);
if let Some(broker_etcd_prefix_param) = broker_etcd_prefix_param.as_deref() {
init_config_overrides.push(broker_etcd_prefix_param);
args.extend(["-c", broker_etcd_prefix_param]);
}
for config_override in config_overrides {
args.extend(["-c", config_override]);
}
if self.env.pageserver.auth_type != AuthType::Trust {
init_config_overrides.push("auth_validation_public_key_path='auth_public_key.pem'");
args.extend([
"-c",
"auth_validation_public_key_path='auth_public_key.pem'",
]);
}
self.start_node(&init_config_overrides, &self.env.base_data_dir, true)?;
let init_result = self
.try_init_timeline(create_tenant, initial_timeline_id)
.context("Failed to create initial tenant and timeline for pageserver");
match &init_result {
Ok(initial_timeline_id) => {
println!("Successfully initialized timeline {initial_timeline_id}")
}
Err(e) => eprintln!("{e:#}"),
let create_tenant = create_tenant.map(|id| id.to_string());
if let Some(tenant_id) = create_tenant.as_deref() {
args.extend(["--create-tenant", tenant_id])
}
self.stop(false)?;
init_result
}
fn try_init_timeline(
&self,
new_tenant_id: Option<ZTenantId>,
new_timeline_id: Option<ZTimelineId>,
) -> anyhow::Result<ZTimelineId> {
let initial_tenant_id = self.tenant_create(new_tenant_id, HashMap::new())?;
let initial_timeline_info =
self.timeline_create(initial_tenant_id, new_timeline_id, None, None)?;
Ok(initial_timeline_info.timeline_id)
let initial_timeline_id = initial_timeline_id.unwrap_or_else(ZTimelineId::generate);
let initial_timeline_id_string = initial_timeline_id.to_string();
args.extend(["--initial-timeline-id", &initial_timeline_id_string]);
let cmd_with_args = cmd.args(args);
let init_output = fill_rust_env_vars(cmd_with_args)
.output()
.with_context(|| {
format!("failed to init pageserver with command {:?}", cmd_with_args)
})?;
if !init_output.status.success() {
bail!(
"init invocation failed, {}\nStdout: {}\nStderr: {}",
init_output.status,
String::from_utf8_lossy(&init_output.stdout),
String::from_utf8_lossy(&init_output.stderr)
);
}
// echo the captured output of the init command
println!("{}", String::from_utf8_lossy(&init_output.stdout));
Ok(initial_timeline_id)
}
pub fn repo_path(&self) -> PathBuf {
@@ -191,35 +211,15 @@ impl PageServerNode {
}
pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
self.start_node(config_overrides, &self.repo_path(), false)
}
fn start_node(
&self,
config_overrides: &[&str],
datadir: &Path,
update_config: bool,
) -> anyhow::Result<()> {
println!(
print!(
"Starting pageserver at '{}' in '{}'",
connection_address(&self.pg_connection_config),
datadir.display()
self.repo_path().display()
);
io::stdout().flush()?;
io::stdout().flush().unwrap();
let mut args = vec![
"-D",
datadir.to_str().with_context(|| {
format!(
"Datadir path '{}' cannot be represented as a unicode string",
datadir.display()
)
})?,
];
if update_config {
args.push("--update-config");
}
let repo_path = self.repo_path();
let mut args = vec!["-D", repo_path.to_str().unwrap()];
for config_override in config_overrides {
args.extend(["-c", config_override]);
@@ -231,8 +231,8 @@ impl PageServerNode {
if !filled_cmd.status()?.success() {
bail!(
"Pageserver failed to start. See console output and '{}' for details.",
datadir.join("pageserver.log").display()
"Pageserver failed to start. See '{}' for details.",
self.repo_path().join("pageserver.log").display()
);
}
@@ -241,7 +241,7 @@ impl PageServerNode {
const RETRIES: i8 = 15;
for retries in 1..RETRIES {
match self.check_status() {
Ok(()) => {
Ok(_) => {
println!("\nPageserver started");
return Ok(());
}
@@ -255,18 +255,21 @@ impl PageServerNode {
if retries == 5 {
println!() // put a line break after dots for second message
}
println!("Pageserver not responding yet, err {err} retrying ({retries})...");
println!(
"Pageserver not responding yet, err {} retrying ({})...",
err, retries
);
}
}
PageserverHttpError::Response(msg) => {
bail!("pageserver failed to start: {msg} ")
bail!("pageserver failed to start: {} ", msg)
}
}
thread::sleep(Duration::from_secs(1));
}
}
}
bail!("pageserver failed to start in {RETRIES} seconds");
bail!("pageserver failed to start in {} seconds", RETRIES);
}
///
@@ -296,32 +299,51 @@ impl PageServerNode {
match kill(pid, sig) {
Ok(_) => (),
Err(Errno::ESRCH) => {
println!("Pageserver with pid {pid} does not exist, but a PID file was found");
println!(
"Pageserver with pid {} does not exist, but a PID file was found",
pid
);
return Ok(());
}
Err(err) => bail!(
"Failed to send signal to pageserver with pid {pid}: {}",
"Failed to send signal to pageserver with pid {}: {}",
pid,
err.desc()
),
}
// Wait until process is gone
for i in 0..600 {
let signal = None; // Send no signal, just get the error code
match kill(pid, signal) {
Ok(_) => (), // Process exists, keep waiting
Err(Errno::ESRCH) => {
// Process not found, we're done
println!("done!");
return Ok(());
}
Err(err) => bail!(
"Failed to send signal to pageserver with pid {}: {}",
pid,
err.desc()
),
};
let address = connection_address(&self.pg_connection_config);
// TODO Remove this "timeout" and handle it on caller side instead.
// Shutting down may take a long time,
// if pageserver checkpoints a lot of data
let mut tcp_stopped = false;
for i in 0..600 {
if !tcp_stopped {
if let Err(err) = TcpStream::connect(&address) {
tcp_stopped = true;
if err.kind() != io::ErrorKind::ConnectionRefused {
eprintln!("\nPageserver connection failed with error: {err}");
}
}
}
if tcp_stopped {
// Also check status on the HTTP port
match self.check_status() {
Err(PageserverHttpError::Transport(err)) if err.is_connect() => {
println!("done!");
return Ok(());
}
Err(err) => {
eprintln!("\nPageserver status check failed with error: {err}");
return Ok(());
}
Ok(()) => {
// keep waiting
}
}
}
if i % 10 == 0 {
print!(".");
io::stdout().flush().unwrap();
@@ -329,13 +351,13 @@ impl PageServerNode {
thread::sleep(Duration::from_millis(100));
}
bail!("Failed to stop pageserver with pid {pid}");
bail!("Failed to stop pageserver with pid {}", pid);
}
pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
let mut client = self.pg_connection_config.connect(NoTls).unwrap();
println!("Pageserver query: '{sql}'");
println!("Pageserver query: '{}'", sql);
client.simple_query(sql).unwrap()
}
@@ -370,8 +392,9 @@ impl PageServerNode {
&self,
new_tenant_id: Option<ZTenantId>,
settings: HashMap<&str, &str>,
) -> anyhow::Result<ZTenantId> {
self.http_request(Method::POST, format!("{}/tenant", self.http_base_url))
) -> anyhow::Result<Option<ZTenantId>> {
let tenant_id_string = self
.http_request(Method::POST, format!("{}/tenant", self.http_base_url))
.json(&TenantCreateRequest {
new_tenant_id,
checkpoint_distance: settings
@@ -410,16 +433,18 @@ impl PageServerNode {
})
.send()?
.error_from_body()?
.json::<Option<String>>()
.with_context(|| {
format!("Failed to parse tenant creation response for tenant id: {new_tenant_id:?}")
})?
.context("No tenant id was found in the tenant creation response")
.and_then(|tenant_id_string| {
tenant_id_string.parse().with_context(|| {
format!("Failed to parse response string as tenant id: '{tenant_id_string}'")
.json::<Option<String>>()?;
tenant_id_string
.map(|id| {
id.parse().with_context(|| {
format!(
"Failed to parse tennat creation response as tenant id: {}",
id
)
})
})
.transpose()
}
pub fn tenant_config(&self, tenant_id: ZTenantId, settings: HashMap<&str, &str>) -> Result<()> {
@@ -490,27 +515,22 @@ impl PageServerNode {
new_timeline_id: Option<ZTimelineId>,
ancestor_start_lsn: Option<Lsn>,
ancestor_timeline_id: Option<ZTimelineId>,
) -> anyhow::Result<TimelineInfo> {
self.http_request(
Method::POST,
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
)
.json(&TimelineCreateRequest {
new_timeline_id,
ancestor_start_lsn,
ancestor_timeline_id,
})
.send()?
.error_from_body()?
.json::<Option<TimelineInfo>>()
.with_context(|| {
format!("Failed to parse timeline creation response for tenant id: {tenant_id}")
})?
.with_context(|| {
format!(
"No timeline id was found in the timeline creation response for tenant {tenant_id}"
) -> anyhow::Result<Option<TimelineInfo>> {
let timeline_info_response = self
.http_request(
Method::POST,
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
)
})
.json(&TimelineCreateRequest {
new_timeline_id,
ancestor_start_lsn,
ancestor_timeline_id,
})
.send()?
.error_from_body()?
.json::<Option<TimelineInfo>>()?;
Ok(timeline_info_response)
}
/// Import a basebackup prepared using either:

24
docker-entrypoint.sh Executable file
View File

@@ -0,0 +1,24 @@
#!/bin/sh
set -eux
pageserver_id_param="${NODE_ID:-10}"
broker_endpoints_param="${BROKER_ENDPOINT:-absent}"
if [ "$broker_endpoints_param" != "absent" ]; then
broker_endpoints_param="-c broker_endpoints=['$broker_endpoints_param']"
else
broker_endpoints_param=''
fi
remote_storage_param="${REMOTE_STORAGE:-}"
if [ "$1" = 'pageserver' ]; then
if [ ! -d "/data/tenants" ]; then
echo "Initializing pageserver data directory"
pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=${pageserver_id_param}" $broker_endpoints_param $remote_storage_param
fi
echo "Staring pageserver at 0.0.0.0:6400"
pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" $broker_endpoints_param -D /data
else
"$@"
fi

View File

@@ -44,7 +44,7 @@ impl ParseCallbacks for PostgresFfiCallbacks {
fn main() {
// Tell cargo to invalidate the built crate whenever the wrapper changes
println!("cargo:rerun-if-changed=bindgen_deps.h");
println!("cargo:rerun-if-changed=pg_control_ffi.h");
// Finding the location of C headers for the Postgres server:
// - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/tmp_install`
@@ -88,9 +88,9 @@ fn main() {
// the resulting bindings.
let bindings = bindgen::Builder::default()
//
// All the needed PostgreSQL headers are included from 'bindgen_deps.h'
// All the needed PostgreSQL headers are included from 'pg_control_ffi.h'
//
.header("bindgen_deps.h")
.header("pg_control_ffi.h")
//
// Tell cargo to invalidate the built crate whenever any of the
// included header files changed.

View File

@@ -23,7 +23,7 @@
//! information. You can use PostgreSQL's pg_controldata utility to view its
//! contents.
//!
use super::bindings::{ControlFileData, PG_CONTROL_FILE_SIZE};
use crate::{ControlFileData, PG_CONTROL_FILE_SIZE};
use anyhow::{bail, Result};
use bytes::{Bytes, BytesMut};

View File

@@ -7,62 +7,21 @@
// https://github.com/rust-lang/rust-bindgen/issues/1651
#![allow(deref_nullptr)]
use serde::{Deserialize, Serialize};
use utils::lsn::Lsn;
macro_rules! postgres_ffi {
($version:ident) => {
#[path = "."]
pub mod $version {
// fixme: does this have to be 'pub'?
pub mod bindings {
// bindgen generates bindings for a lot of stuff we don't need
#![allow(dead_code)]
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
use serde::{Deserialize, Serialize};
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
}
pub mod controlfile_utils;
pub mod nonrelfile_utils;
pub mod pg_constants;
pub mod relfile_utils;
pub mod waldecoder;
pub mod xlog_utils;
// Re-export some symbols from bindings
pub use bindings::DBState_DB_SHUTDOWNED;
pub use bindings::{CheckPoint, ControlFileData, XLogRecord};
}
};
}
postgres_ffi!(v14);
// Export some widely used datatypes that are unlikely to change across Postgres versions
pub use v14::bindings::{uint32, uint64, Oid};
pub use v14::bindings::{BlockNumber, OffsetNumber};
pub use v14::bindings::{MultiXactId, TransactionId};
// Likewise for these, although the assumption that these don't change is a little more iffy.
pub use v14::bindings::{MultiXactOffset, MultiXactStatus};
// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
// --with-segsize=SEGSIZE, but assume the defaults for now.
pub const BLCKSZ: u16 = 8192;
pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
pub const XLOG_BLCKSZ: usize = 8192;
// PG timeline is always 1, changing it doesn't have any useful meaning in Neon.
//
// NOTE: this is not to be confused with Neon timelines; different concept!
//
// It's a shaky assumption, that it's always 1. We might import a
// PostgreSQL data directory that has gone through timeline bumps,
// for example. FIXME later.
pub const PG_TLI: u32 = 1;
pub mod controlfile_utils;
pub mod nonrelfile_utils;
pub mod pg_constants;
pub mod relfile_utils;
pub mod waldecoder;
pub mod xlog_utils;
// See TransactionIdIsNormal in transam.h
pub const fn transaction_id_is_normal(id: TransactionId) -> bool {
id > v14::pg_constants::FIRST_NORMAL_TRANSACTION_ID
id > pg_constants::FIRST_NORMAL_TRANSACTION_ID
}
// See TransactionIdPrecedes in transam.c

View File

@@ -1,12 +1,11 @@
//!
//! Common utilities for dealing with PostgreSQL non-relation files.
//!
use crate::transaction_id_precedes;
use super::pg_constants;
use crate::{pg_constants, transaction_id_precedes};
use bytes::BytesMut;
use log::*;
use super::bindings::MultiXactId;
use crate::MultiXactId;
pub fn transaction_id_set_status(xid: u32, status: u8, page: &mut BytesMut) {
trace!(

View File

@@ -7,8 +7,7 @@
//! comments on them.
//!
use super::bindings::PageHeaderData;
use crate::BLCKSZ;
use crate::PageHeaderData;
//
// From pg_tablespace_d.h
@@ -32,6 +31,11 @@ pub const SMGR_TRUNCATE_HEAP: u32 = 0x0001;
pub const SMGR_TRUNCATE_VM: u32 = 0x0002;
pub const SMGR_TRUNCATE_FSM: u32 = 0x0004;
// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
// --with-segsize=SEGSIZE, but assume the defaults for now.
pub const BLCKSZ: u16 = 8192;
pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
//
// From bufpage.h
//
@@ -209,6 +213,7 @@ pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;
/* FIXME: pageserver should request wal_seg_size from compute node */
pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;
pub const XLOG_BLCKSZ: usize = 8192;
pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
pub const XLP_LONG_HEADER: u16 = 0x0002;

View File

@@ -1,11 +1,11 @@
//!
//! Common utilities for dealing with PostgreSQL relation files.
//!
use super::pg_constants;
use crate::pg_constants;
use once_cell::sync::OnceCell;
use regex::Regex;
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
#[derive(Debug, Clone, thiserror::Error, PartialEq)]
pub enum FilePathError {
#[error("invalid relation fork name")]
InvalidForkName,

View File

@@ -10,7 +10,10 @@
//!
use super::pg_constants;
use super::xlog_utils::*;
use super::bindings::{XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord, XLOG_PAGE_MAGIC};
use super::XLogLongPageHeaderData;
use super::XLogPageHeaderData;
use super::XLogRecord;
use super::XLOG_PAGE_MAGIC;
use bytes::{Buf, BufMut, Bytes, BytesMut};
use crc32c::*;
use log::*;
@@ -81,11 +84,11 @@ impl WalStreamDecoder {
}
match self.state {
State::WaitingForRecord => {
if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD != 0 {
return Err(
"invalid xlog page header: unexpected XLP_FIRST_IS_CONTRECORD".into(),
);
}
// if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD != 0 {
// return Err(
// "invalid xlog page header: unexpected XLP_FIRST_IS_CONTRECORD".into(),
// );
// }
if hdr.xlp_rem_len != 0 {
return Err(format!(
"invalid xlog page header: xlp_rem_len={}, but it's not a contrecord",

View File

@@ -7,24 +7,22 @@
// have been named the same as the corresponding PostgreSQL functions instead.
//
use crc32c::crc32c_append;
use crate::pg_constants;
use crate::CheckPoint;
use crate::FullTransactionId;
use crate::XLogLongPageHeaderData;
use crate::XLogPageHeaderData;
use crate::XLogRecord;
use crate::XLOG_PAGE_MAGIC;
use super::bindings::{
CheckPoint, FullTransactionId, XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord,
XLOG_PAGE_MAGIC,
};
use super::pg_constants;
use super::pg_constants::WAL_SEGMENT_SIZE;
use crate::v14::waldecoder::WalStreamDecoder;
use crate::PG_TLI;
use crate::{uint32, uint64, Oid};
use crate::pg_constants::WAL_SEGMENT_SIZE;
use crate::waldecoder::WalStreamDecoder;
use bytes::BytesMut;
use bytes::{Buf, Bytes};
use log::*;
use serde::Serialize;
use std::fs::File;
use std::io::prelude::*;
use std::io::ErrorKind;
@@ -49,6 +47,9 @@ pub const XLOG_SIZE_OF_XLOG_RECORD: usize = std::mem::size_of::<XLogRecord>();
#[allow(clippy::identity_op)]
pub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;
// PG timeline is always 1, changing it doesn't have useful meaning in Zenith.
pub const PG_TLI: u32 = 1;
pub type XLogRecPtr = u64;
pub type TimeLineID = u32;
pub type TimestampTz = i64;
@@ -79,12 +80,12 @@ pub fn XLogSegNoOffsetToRecPtr(
#[allow(non_snake_case)]
pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
format!(
return format!(
"{:>08X}{:>08X}{:>08X}",
tli,
logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),
logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)
)
);
}
#[allow(non_snake_case)]
@@ -345,85 +346,6 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result<Bytes, Seriali
Ok(seg_buf.freeze())
}
#[repr(C)]
#[derive(Serialize)]
struct XlLogicalMessage {
db_id: Oid,
transactional: uint32, // bool, takes 4 bytes due to alignment in C structures
prefix_size: uint64,
message_size: uint64,
}
impl XlLogicalMessage {
pub fn encode(&self) -> Bytes {
use utils::bin_ser::LeSer;
self.ser().unwrap().into()
}
}
/// Create new WAL record for non-transactional logical message.
/// Used for creating artificial WAL for tests, as LogicalMessage
/// record is basically no-op.
///
/// NOTE: This leaves the xl_prev field zero. The safekeeper and
/// pageserver tolerate that, but PostgreSQL does not.
pub fn encode_logical_message(prefix: &str, message: &str) -> Vec<u8> {
let mut prefix_bytes: Vec<u8> = Vec::with_capacity(prefix.len() + 1);
prefix_bytes.write_all(prefix.as_bytes()).unwrap();
prefix_bytes.push(0);
let message_bytes = message.as_bytes();
let logical_message = XlLogicalMessage {
db_id: 0,
transactional: 0,
prefix_size: prefix_bytes.len() as u64,
message_size: message_bytes.len() as u64,
};
let mainrdata = logical_message.encode();
let mainrdata_len: usize = mainrdata.len() + prefix_bytes.len() + message_bytes.len();
// only short mainrdata is supported for now
assert!(mainrdata_len <= 255);
let mainrdata_len = mainrdata_len as u8;
let mut data: Vec<u8> = vec![pg_constants::XLR_BLOCK_ID_DATA_SHORT, mainrdata_len];
data.extend_from_slice(&mainrdata);
data.extend_from_slice(&prefix_bytes);
data.extend_from_slice(message_bytes);
let total_len = XLOG_SIZE_OF_XLOG_RECORD + data.len();
let mut header = XLogRecord {
xl_tot_len: total_len as u32,
xl_xid: 0,
xl_prev: 0,
xl_info: 0,
xl_rmid: 21,
__bindgen_padding_0: [0u8; 2usize],
xl_crc: 0, // crc will be calculated later
};
let header_bytes = header.encode().expect("failed to encode header");
let crc = crc32c_append(0, &data);
let crc = crc32c_append(crc, &header_bytes[0..XLOG_RECORD_CRC_OFFS]);
header.xl_crc = crc;
let mut wal: Vec<u8> = Vec::new();
wal.extend_from_slice(&header.encode().expect("failed to encode header"));
wal.extend_from_slice(&data);
// WAL start position must be aligned at 8 bytes,
// this will add padding for the next WAL record.
const PADDING: usize = 8;
let padding_rem = wal.len() % PADDING;
if padding_rem != 0 {
wal.resize(wal.len() + PADDING - padding_rem, 0);
}
wal
}
#[cfg(test)]
mod tests {
use super::*;
@@ -625,15 +547,4 @@ mod tests {
checkpoint.update_next_xid(1024);
assert_eq!(checkpoint.nextXid.value, 2048);
}
#[test]
pub fn test_encode_logical_message() {
let expected = [
64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 170, 34, 166, 227, 255,
38, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 112, 114,
101, 102, 105, 120, 0, 109, 101, 115, 115, 97, 103, 101,
];
let actual = encode_logical_message("prefix", "message");
assert_eq!(expected, actual[..]);
}
}

View File

@@ -4,8 +4,8 @@ use log::*;
use once_cell::sync::Lazy;
use postgres::types::PgLsn;
use postgres::Client;
use postgres_ffi::v14::pg_constants::WAL_SEGMENT_SIZE;
use postgres_ffi::v14::xlog_utils::{
use postgres_ffi::pg_constants::WAL_SEGMENT_SIZE;
use postgres_ffi::xlog_utils::{
XLOG_BLCKSZ, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
};
use std::cmp::Ordering;

View File

@@ -265,7 +265,7 @@ mod tests {
use serde::{Deserialize, Serialize};
use std::io::Cursor;
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub struct ShortStruct {
a: u8,
b: u32,
@@ -286,7 +286,7 @@ mod tests {
const SHORT2_ENC_LE: &[u8] = &[8, 0, 0, 3, 7];
const SHORT2_ENC_LE_TRAILING: &[u8] = &[8, 0, 0, 3, 7, 0xff, 0xff, 0xff];
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub struct LongMsg {
pub tag: u8,
pub blockpos: u32,

View File

@@ -10,10 +10,12 @@ pub fn get_request_param<'a>(
) -> Result<&'a str, ApiError> {
match request.param(param_name) {
Some(arg) => Ok(arg),
None => Err(ApiError::BadRequest(format!(
"no {} specified in path param",
param_name
))),
None => {
return Err(ApiError::BadRequest(format!(
"no {} specified in path param",
param_name
)))
}
}
}

View File

@@ -18,7 +18,7 @@ pub const XLOG_BLCKSZ: u32 = 8192;
pub struct Lsn(pub u64);
/// We tried to parse an LSN from a string, but failed
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
#[derive(Debug, PartialEq, thiserror::Error)]
#[error("LsnParseError")]
pub struct LsnParseError;

View File

@@ -50,7 +50,7 @@ pub trait Handler {
/// PostgresBackend protocol state.
/// XXX: The order of the constructors matters.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
#[derive(Clone, Copy, PartialEq, PartialOrd)]
pub enum ProtoState {
Initialization,
Encrypted,

View File

@@ -930,7 +930,7 @@ impl<'a> BeMessage<'a> {
// Neon extension of postgres replication protocol
// See NEON_STATUS_UPDATE_TAG_BYTE
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct ReplicationFeedback {
// Last known size of the timeline. Used to enforce timeline size limit.
pub current_timeline_size: u64,

View File

@@ -9,7 +9,7 @@ use std::sync::Mutex;
use std::time::Duration;
/// An error happened while waiting for a number
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
#[derive(Debug, PartialEq, thiserror::Error)]
#[error("SeqWaitError")]
pub enum SeqWaitError {
/// The wait timeout was reached

View File

@@ -4,7 +4,7 @@ use serde::Deserialize;
use std::io::Read;
use utils::bin_ser::LeSer;
#[derive(Debug, PartialEq, Eq, Deserialize)]
#[derive(Debug, PartialEq, Deserialize)]
pub struct HeaderData {
magic: u16,
info: u16,

View File

@@ -30,9 +30,6 @@ static CERT: Lazy<rustls::Certificate> = Lazy::new(|| {
});
#[test]
// [false-positive](https://github.com/rust-lang/rust-clippy/issues/9274),
// we resize the vector so doing some modifications after all
#[allow(clippy::read_zero_byte_vec)]
fn ssl() {
let (mut client_sock, server_sock) = make_tcp_pair();

View File

@@ -501,10 +501,10 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
// default_tenantid was generated by the `env.init()` call above
let initial_tenant_id = env.default_tenant_id.unwrap();
// Initialize pageserver, create initial tenant and timeline.
// Call 'pageserver init'.
let pageserver = PageServerNode::from_env(&env);
let initial_timeline_id = pageserver
.initialize(
.init(
Some(initial_tenant_id),
initial_timeline_id_arg,
&pageserver_config_overrides(init_match),
@@ -551,15 +551,25 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
.values_of("config")
.map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
.unwrap_or_default();
let new_tenant_id = pageserver.tenant_create(initial_tenant_id, tenant_conf)?;
println!("tenant {new_tenant_id} successfully created on the pageserver");
let new_tenant_id = pageserver
.tenant_create(initial_tenant_id, tenant_conf)?
.ok_or_else(|| {
anyhow!("Tenant with id {:?} was already created", initial_tenant_id)
})?;
println!(
"tenant {} successfully created on the pageserver",
new_tenant_id
);
// Create an initial timeline for the new tenant
let new_timeline_id = parse_timeline_id(create_match)?;
let timeline_info =
pageserver.timeline_create(new_tenant_id, new_timeline_id, None, None)?;
let new_timeline_id = timeline_info.timeline_id;
let last_record_lsn = timeline_info
let timeline = pageserver
.timeline_create(new_tenant_id, new_timeline_id, None, None)?
.context(format!(
"Failed to create initial timeline for tenant {new_tenant_id}"
))?;
let new_timeline_id = timeline.timeline_id;
let last_record_lsn = timeline
.local
.context(format!("Failed to get last record LSN: no local timeline info for timeline {new_timeline_id}"))?
.last_record_lsn;
@@ -606,18 +616,20 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
let new_branch_name = create_match
.value_of("branch-name")
.ok_or_else(|| anyhow!("No branch name provided"))?;
let timeline_info = pageserver.timeline_create(tenant_id, None, None, None)?;
let new_timeline_id = timeline_info.timeline_id;
let timeline = pageserver
.timeline_create(tenant_id, None, None, None)?
.ok_or_else(|| anyhow!("Failed to create new timeline for tenant {}", tenant_id))?;
let new_timeline_id = timeline.timeline_id;
let last_record_lsn = timeline_info
let last_record_lsn = timeline
.local
.expect("no local timeline info")
.last_record_lsn;
env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
println!(
"Created timeline '{}' at Lsn {last_record_lsn} for tenant: {tenant_id}",
timeline_info.timeline_id
"Created timeline '{}' at Lsn {} for tenant: {}",
timeline.timeline_id, last_record_lsn, tenant_id,
);
}
Some(("import", import_match)) => {
@@ -668,7 +680,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
let ancestor_timeline_id = env
.get_branch_timeline_id(ancestor_branch_name, tenant_id)
.ok_or_else(|| {
anyhow!("Found no timeline id for branch name '{ancestor_branch_name}'")
anyhow!(
"Found no timeline id for branch name '{}'",
ancestor_branch_name
)
})?;
let start_lsn = branch_match
@@ -676,15 +691,12 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
.map(Lsn::from_str)
.transpose()
.context("Failed to parse ancestor start Lsn from the request")?;
let timeline_info = pageserver.timeline_create(
tenant_id,
None,
start_lsn,
Some(ancestor_timeline_id),
)?;
let new_timeline_id = timeline_info.timeline_id;
let timeline = pageserver
.timeline_create(tenant_id, None, start_lsn, Some(ancestor_timeline_id))?
.ok_or_else(|| anyhow!("Failed to create new timeline for tenant {}", tenant_id))?;
let new_timeline_id = timeline.timeline_id;
let last_record_lsn = timeline_info
let last_record_lsn = timeline
.local
.expect("no local timeline info")
.last_record_lsn;
@@ -692,11 +704,11 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
println!(
"Created timeline '{}' at Lsn {last_record_lsn} for tenant: {tenant_id}. Ancestor timeline: '{ancestor_branch_name}'",
timeline_info.timeline_id
"Created timeline '{}' at Lsn {} for tenant: {}. Ancestor timeline: '{}'",
timeline.timeline_id, last_record_lsn, tenant_id, ancestor_branch_name,
);
}
Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{sub_name}'"),
Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{}'", sub_name),
None => bail!("no tenant subcommand provided"),
}

View File

@@ -24,13 +24,8 @@ use tracing::*;
use crate::reltag::{RelTag, SlruKind};
use crate::DatadirTimeline;
use postgres_ffi::v14::pg_constants;
use postgres_ffi::v14::xlog_utils::{generate_wal_segment, normalize_lsn, XLogFileName};
use postgres_ffi::v14::{CheckPoint, ControlFileData};
use postgres_ffi::TransactionId;
use postgres_ffi::PG_TLI;
use postgres_ffi::{BLCKSZ, RELSEG_SIZE};
use postgres_ffi::xlog_utils::*;
use postgres_ffi::*;
use utils::lsn::Lsn;
/// This is short-living object only for the time of tarball creation,
@@ -205,7 +200,7 @@ where
}
// Add a file for each chunk of blocks (aka segment)
let chunks = (0..nblocks).chunks(RELSEG_SIZE as usize);
let chunks = (0..nblocks).chunks(pg_constants::RELSEG_SIZE as usize);
for (seg, blocks) in chunks.into_iter().enumerate() {
let mut segment_data: Vec<u8> = vec![];
for blknum in blocks {
@@ -225,19 +220,23 @@ where
fn add_slru_segment(&mut self, slru: SlruKind, segno: u32) -> anyhow::Result<()> {
let nblocks = self.timeline.get_slru_segment_size(slru, segno, self.lsn)?;
let mut slru_buf: Vec<u8> = Vec::with_capacity(nblocks as usize * BLCKSZ as usize);
let mut slru_buf: Vec<u8> =
Vec::with_capacity(nblocks as usize * pg_constants::BLCKSZ as usize);
for blknum in 0..nblocks {
let img = self
.timeline
.get_slru_page_at_lsn(slru, segno, blknum, self.lsn)?;
if slru == SlruKind::Clog {
ensure!(img.len() == BLCKSZ as usize || img.len() == BLCKSZ as usize + 8);
ensure!(
img.len() == pg_constants::BLCKSZ as usize
|| img.len() == pg_constants::BLCKSZ as usize + 8
);
} else {
ensure!(img.len() == BLCKSZ as usize);
ensure!(img.len() == pg_constants::BLCKSZ as usize);
}
slru_buf.extend_from_slice(&img[..BLCKSZ as usize]);
slru_buf.extend_from_slice(&img[..pg_constants::BLCKSZ as usize]);
}
let segname = format!("{}/{:>04X}", slru.to_str(), segno);

View File

@@ -1,6 +1,6 @@
//! Main entry point for the Page Server executable.
use std::{env, ops::ControlFlow, path::Path, str::FromStr};
use std::{env, path::Path, str::FromStr};
use tracing::*;
use anyhow::{bail, Context, Result};
@@ -13,7 +13,7 @@ use pageserver::{
config::{defaults::*, PageServerConf},
http, page_cache, page_service, profiling, tenant_mgr, thread_mgr,
thread_mgr::ThreadKind,
virtual_file, LOG_FILE_NAME,
timelines, virtual_file, LOG_FILE_NAME,
};
use utils::{
auth::JwtAuth,
@@ -24,6 +24,7 @@ use utils::{
shutdown::exit_now,
signals::{self, Signal},
tcp_listener,
zid::{ZTenantId, ZTimelineId},
};
project_git_version!(GIT_VERSION);
@@ -41,7 +42,6 @@ fn main() -> anyhow::Result<()> {
.about("Materializes WAL stream to pages and serves them to the postgres")
.version(&*version())
.arg(
Arg::new("daemonize")
.short('d')
.long("daemonize")
@@ -52,7 +52,7 @@ fn main() -> anyhow::Result<()> {
Arg::new("init")
.long("init")
.takes_value(false)
.help("Initialize pageserver with all given config overrides"),
.help("Initialize pageserver service: creates an initial config, tenant and timeline, if specified"),
)
.arg(
Arg::new("workdir")
@@ -61,6 +61,20 @@ fn main() -> anyhow::Result<()> {
.takes_value(true)
.help("Working directory for the pageserver"),
)
.arg(
Arg::new("create-tenant")
.long("create-tenant")
.takes_value(true)
.help("Create tenant during init")
.requires("init"),
)
.arg(
Arg::new("initial-timeline-id")
.long("initial-timeline-id")
.takes_value(true)
.help("Use a specific timeline id during init and tenant creation")
.requires("create-tenant"),
)
// See `settings.md` for more details on the extra configuration patameters pageserver can process
.arg(
Arg::new("config-override")
@@ -71,9 +85,6 @@ fn main() -> anyhow::Result<()> {
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
)
.arg(Arg::new("update-config").long("update-config").takes_value(false).help(
"Update the config file when started",
))
.arg(
Arg::new("enabled-features")
.long("enabled-features")
@@ -99,6 +110,18 @@ fn main() -> anyhow::Result<()> {
.with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
let cfg_file_path = workdir.join("pageserver.toml");
let init = arg_matches.is_present("init");
let create_tenant = arg_matches
.value_of("create-tenant")
.map(ZTenantId::from_str)
.transpose()
.context("Failed to parse tenant id from the arguments")?;
let initial_timeline_id = arg_matches
.value_of("initial-timeline-id")
.map(ZTimelineId::from_str)
.transpose()
.context("Failed to parse timeline id from the arguments")?;
// Set CWD to workdir for non-daemon modes
env::set_current_dir(&workdir).with_context(|| {
format!(
@@ -108,86 +131,30 @@ fn main() -> anyhow::Result<()> {
})?;
let daemonize = arg_matches.is_present("daemonize");
let conf = match initialize_config(&cfg_file_path, arg_matches, &workdir)? {
ControlFlow::Continue(conf) => conf,
ControlFlow::Break(()) => {
info!("Pageserver config init successful");
return Ok(());
}
};
let tenants_path = conf.tenants_path();
if !tenants_path.exists() {
utils::crashsafe_dir::create_dir_all(conf.tenants_path()).with_context(|| {
format!(
"Failed to create tenants root dir at '{}'",
tenants_path.display()
)
})?;
if init && daemonize {
bail!("--daemonize cannot be used with --init")
}
// Initialize up failpoints support
let scenario = FailScenario::setup();
// Basic initialization of things that don't change after startup
virtual_file::init(conf.max_file_descriptors);
page_cache::init(conf.page_cache_size);
start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
scenario.teardown();
Ok(())
}
fn initialize_config(
cfg_file_path: &Path,
arg_matches: clap::ArgMatches,
workdir: &Path,
) -> anyhow::Result<ControlFlow<(), &'static PageServerConf>> {
let init = arg_matches.is_present("init");
let update_config = init || arg_matches.is_present("update-config");
let (mut toml, config_file_exists) = if cfg_file_path.is_file() {
if init {
anyhow::bail!(
"Config file '{}' already exists, cannot init it, use --update-config to update it",
cfg_file_path.display()
);
}
// Supplement the CLI arguments with the config file
let cfg_file_contents = std::fs::read_to_string(&cfg_file_path).with_context(|| {
format!(
"Failed to read pageserver config at '{}'",
cfg_file_path.display()
)
})?;
(
cfg_file_contents
.parse::<toml_edit::Document>()
.with_context(|| {
format!(
"Failed to parse '{}' as pageserver config",
cfg_file_path.display()
)
})?,
true,
)
} else if cfg_file_path.exists() {
anyhow::bail!(
"Config file '{}' exists but is not a regular file",
cfg_file_path.display()
);
} else {
let mut toml = if init {
// We're initializing the repo, so there's no config file yet
(
DEFAULT_CONFIG_FILE
.parse::<toml_edit::Document>()
.context("could not parse built-in config file")?,
false,
)
DEFAULT_CONFIG_FILE
.parse::<toml_edit::Document>()
.context("could not parse built-in config file")?
} else {
// Supplement the CLI arguments with the config file
let cfg_file_contents = std::fs::read_to_string(&cfg_file_path)
.with_context(|| format!("No pageserver config at '{}'", cfg_file_path.display()))?;
cfg_file_contents
.parse::<toml_edit::Document>()
.with_context(|| {
format!(
"Failed to read '{}' as pageserver config",
cfg_file_path.display()
)
})?
};
// Process any extra options given with -c
if let Some(values) = arg_matches.values_of("config-override") {
for option_line in values {
let doc = toml_edit::Document::from_str(option_line).with_context(|| {
@@ -198,38 +165,49 @@ fn initialize_config(
})?;
for (key, item) in doc.iter() {
if config_file_exists && update_config && key == "id" && toml.contains_key(key) {
anyhow::bail!("Pageserver config file exists at '{}' and has node id already, it cannot be overridden", cfg_file_path.display());
if key == "id" {
anyhow::ensure!(
init,
"node id can only be set during pageserver init and cannot be overridden"
);
}
toml.insert(key, item.clone());
}
}
}
debug!("Resulting toml: {toml}");
let conf = PageServerConf::parse_and_validate(&toml, workdir)
trace!("Resulting toml: {}", toml);
let conf = PageServerConf::parse_and_validate(&toml, &workdir)
.context("Failed to parse pageserver configuration")?;
if update_config {
info!("Writing pageserver config to '{}'", cfg_file_path.display());
// The configuration is all set up now. Turn it into a 'static
// that can be freely stored in structs and passed across threads
// as a ref.
let conf: &'static PageServerConf = Box::leak(Box::new(conf));
// Initialize up failpoints support
let scenario = FailScenario::setup();
// Basic initialization of things that don't change after startup
virtual_file::init(conf.max_file_descriptors);
page_cache::init(conf.page_cache_size);
// Create repo and exit if init was requested
if init {
timelines::init_pageserver(conf, create_tenant, initial_timeline_id)
.context("Failed to init pageserver")?;
// write the config file
std::fs::write(&cfg_file_path, toml.to_string()).with_context(|| {
format!(
"Failed to write pageserver config to '{}'",
"Failed to initialize pageserver config at '{}'",
cfg_file_path.display()
)
})?;
info!(
"Config successfully written to '{}'",
cfg_file_path.display()
)
} else {
start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
}
Ok(if init {
ControlFlow::Break(())
} else {
ControlFlow::Continue(Box::leak(Box::new(conf)))
})
scenario.teardown();
Ok(())
}
fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {

View File

@@ -11,13 +11,14 @@ use super::models::{
StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
TimelineCreateRequest,
};
use crate::layered_repository::{metadata::TimelineMetadata, LayeredTimeline};
use crate::layered_repository::metadata::TimelineMetadata;
use crate::pgdatadir_mapping::DatadirTimeline;
use crate::repository::{LocalTimelineState, RepositoryTimeline};
use crate::repository::{Repository, Timeline};
use crate::storage_sync;
use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
use crate::tenant_config::TenantConfOpt;
use crate::TimelineImpl;
use crate::{config::PageServerConf, tenant_mgr, timelines};
use utils::{
auth::JwtAuth,
@@ -85,7 +86,7 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
// Helper functions to construct a LocalTimelineInfo struct for a timeline
fn local_timeline_info_from_loaded_timeline(
timeline: &LayeredTimeline,
timeline: &TimelineImpl,
include_non_incremental_logical_size: bool,
include_non_incremental_physical_size: bool,
) -> anyhow::Result<LocalTimelineInfo> {
@@ -160,13 +161,13 @@ fn local_timeline_info_from_unloaded_timeline(metadata: &TimelineMetadata) -> Lo
}
fn local_timeline_info_from_repo_timeline(
repo_timeline: &RepositoryTimeline<LayeredTimeline>,
repo_timeline: &RepositoryTimeline<TimelineImpl>,
include_non_incremental_logical_size: bool,
include_non_incremental_physical_size: bool,
) -> anyhow::Result<LocalTimelineInfo> {
match repo_timeline {
RepositoryTimeline::Loaded(timeline) => local_timeline_info_from_loaded_timeline(
timeline,
&*timeline,
include_non_incremental_logical_size,
include_non_incremental_physical_size,
),

View File

@@ -15,24 +15,13 @@ use crate::pgdatadir_mapping::*;
use crate::reltag::{RelTag, SlruKind};
use crate::walingest::WalIngest;
use crate::walrecord::DecodedWALRecord;
use postgres_ffi::v14::relfile_utils::*;
use postgres_ffi::v14::waldecoder::*;
use postgres_ffi::v14::xlog_utils::*;
use postgres_ffi::v14::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
use postgres_ffi::relfile_utils::*;
use postgres_ffi::waldecoder::*;
use postgres_ffi::xlog_utils::*;
use postgres_ffi::Oid;
use postgres_ffi::BLCKSZ;
use postgres_ffi::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
use utils::lsn::Lsn;
// Returns checkpoint LSN from controlfile
pub fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
// Read control file to extract the LSN
let controlfile_path = path.join("global").join("pg_control");
let controlfile = ControlFileData::decode(&std::fs::read(controlfile_path)?)?;
let lsn = controlfile.checkPoint;
Ok(Lsn(lsn))
}
///
/// Import all relation data pages from local disk into the repository.
///
@@ -121,8 +110,8 @@ fn import_rel<T: DatadirTimeline, Reader: Read>(
let mut buf: [u8; 8192] = [0u8; 8192];
ensure!(len % BLCKSZ as usize == 0);
let nblocks = len / BLCKSZ as usize;
ensure!(len % pg_constants::BLCKSZ as usize == 0);
let nblocks = len / pg_constants::BLCKSZ as usize;
let rel = RelTag {
spcnode: spcoid,
@@ -131,7 +120,7 @@ fn import_rel<T: DatadirTimeline, Reader: Read>(
forknum,
};
let mut blknum: u32 = segno * (1024 * 1024 * 1024 / BLCKSZ as u32);
let mut blknum: u32 = segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
// Call put_rel_creation for every segment of the relation,
// because there is no guarantee about the order in which we are processing segments.
@@ -155,7 +144,8 @@ fn import_rel<T: DatadirTimeline, Reader: Read>(
Err(err) => match err.kind() {
std::io::ErrorKind::UnexpectedEof => {
// reached EOF. That's expected.
let relative_blknum = blknum - segno * (1024 * 1024 * 1024 / BLCKSZ as u32);
let relative_blknum =
blknum - segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
ensure!(relative_blknum == nblocks as u32, "unexpected EOF");
break;
}
@@ -194,8 +184,8 @@ fn import_slru<T: DatadirTimeline, Reader: Read>(
.to_string_lossy();
let segno = u32::from_str_radix(filename, 16)?;
ensure!(len % BLCKSZ as usize == 0); // we assume SLRU block size is the same as BLCKSZ
let nblocks = len / BLCKSZ as usize;
ensure!(len % pg_constants::BLCKSZ as usize == 0); // we assume SLRU block size is the same as BLCKSZ
let nblocks = len / pg_constants::BLCKSZ as usize;
ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as usize);

View File

@@ -1,5 +1,5 @@
use crate::repository::{key_range_size, singleton_range, Key};
use postgres_ffi::BLCKSZ;
use postgres_ffi::pg_constants;
use std::ops::Range;
///
@@ -19,7 +19,7 @@ impl KeySpace {
///
pub fn partition(&self, target_size: u64) -> KeyPartitioning {
// Assume that each value is 8k in size.
let target_nblocks = (target_size / BLCKSZ as u64) as usize;
let target_nblocks = (target_size / pg_constants::BLCKSZ as u64) as usize;
let mut parts = Vec::new();
let mut current_part = Vec::new();

View File

@@ -59,9 +59,7 @@ mod storage_layer;
mod timeline;
use storage_layer::Layer;
use timeline::LayeredTimelineEntry;
pub use timeline::LayeredTimeline;
use timeline::{LayeredTimeline, LayeredTimelineEntry};
// re-export this function so that page_cache.rs can use it.
pub use crate::layered_repository::ephemeral_file::writeback as writeback_ephemeral_file;

View File

@@ -157,14 +157,7 @@ where
// Look up the right page
let cache = page_cache::get();
loop {
match cache
.read_immutable_buf(self.file_id, blknum)
.map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::Other,
format!("Failed to read immutable buf: {e:#}"),
)
})? {
match cache.read_immutable_buf(self.file_id, blknum) {
ReadBufResult::Found(guard) => break Ok(guard),
ReadBufResult::NotFound(mut write_guard) => {
// Read the page from disk into the buffer

View File

@@ -209,7 +209,7 @@ where
reader: R,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum VisitDirection {
Forwards,
Backwards,

View File

@@ -12,7 +12,7 @@ use once_cell::sync::Lazy;
use std::cmp::min;
use std::collections::HashMap;
use std::fs::OpenOptions;
use std::io::{self, ErrorKind};
use std::io::{Error, ErrorKind};
use std::ops::DerefMut;
use std::path::PathBuf;
use std::sync::{Arc, RwLock};
@@ -51,7 +51,7 @@ impl EphemeralFile {
conf: &PageServerConf,
tenantid: ZTenantId,
timelineid: ZTimelineId,
) -> Result<EphemeralFile, io::Error> {
) -> Result<EphemeralFile, std::io::Error> {
let mut l = EPHEMERAL_FILES.write().unwrap();
let file_id = l.next_file_id;
l.next_file_id += 1;
@@ -76,7 +76,7 @@ impl EphemeralFile {
})
}
fn fill_buffer(&self, buf: &mut [u8], blkno: u32) -> Result<(), io::Error> {
fn fill_buffer(&self, buf: &mut [u8], blkno: u32) -> Result<(), Error> {
let mut off = 0;
while off < PAGE_SZ {
let n = self
@@ -96,13 +96,10 @@ impl EphemeralFile {
Ok(())
}
fn get_buf_for_write(&self, blkno: u32) -> Result<page_cache::PageWriteGuard, io::Error> {
fn get_buf_for_write(&self, blkno: u32) -> Result<page_cache::PageWriteGuard, Error> {
// Look up the right page
let cache = page_cache::get();
let mut write_guard = match cache
.write_ephemeral_buf(self.file_id, blkno)
.map_err(|e| to_io_error(e, "Failed to write ephemeral buf"))?
{
let mut write_guard = match cache.write_ephemeral_buf(self.file_id, blkno) {
WriteBufResult::Found(guard) => guard,
WriteBufResult::NotFound(mut guard) => {
// Read the page from disk into the buffer
@@ -130,7 +127,7 @@ pub fn is_ephemeral_file(filename: &str) -> bool {
}
impl FileExt for EphemeralFile {
fn read_at(&self, dstbuf: &mut [u8], offset: u64) -> Result<usize, io::Error> {
fn read_at(&self, dstbuf: &mut [u8], offset: u64) -> Result<usize, Error> {
// Look up the right page
let blkno = (offset / PAGE_SZ as u64) as u32;
let off = offset as usize % PAGE_SZ;
@@ -140,10 +137,7 @@ impl FileExt for EphemeralFile {
let mut write_guard;
let cache = page_cache::get();
let buf = match cache
.read_ephemeral_buf(self.file_id, blkno)
.map_err(|e| to_io_error(e, "Failed to read ephemeral buf"))?
{
let buf = match cache.read_ephemeral_buf(self.file_id, blkno) {
ReadBufResult::Found(guard) => {
read_guard = guard;
read_guard.as_ref()
@@ -164,7 +158,7 @@ impl FileExt for EphemeralFile {
Ok(len)
}
fn write_at(&self, srcbuf: &[u8], offset: u64) -> Result<usize, io::Error> {
fn write_at(&self, srcbuf: &[u8], offset: u64) -> Result<usize, Error> {
// Look up the right page
let blkno = (offset / PAGE_SZ as u64) as u32;
let off = offset as usize % PAGE_SZ;
@@ -172,10 +166,7 @@ impl FileExt for EphemeralFile {
let mut write_guard;
let cache = page_cache::get();
let buf = match cache
.write_ephemeral_buf(self.file_id, blkno)
.map_err(|e| to_io_error(e, "Failed to write ephemeral buf"))?
{
let buf = match cache.write_ephemeral_buf(self.file_id, blkno) {
WriteBufResult::Found(guard) => {
write_guard = guard;
write_guard.deref_mut()
@@ -199,7 +190,7 @@ impl FileExt for EphemeralFile {
}
impl BlobWriter for EphemeralFile {
fn write_blob(&mut self, srcbuf: &[u8]) -> Result<u64, io::Error> {
fn write_blob(&mut self, srcbuf: &[u8]) -> Result<u64, Error> {
let pos = self.size;
let mut blknum = (self.size / PAGE_SZ as u64) as u32;
@@ -277,11 +268,11 @@ impl Drop for EphemeralFile {
}
}
pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), io::Error> {
pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), std::io::Error> {
if let Some(file) = EPHEMERAL_FILES.read().unwrap().files.get(&file_id) {
match file.write_all_at(buf, blkno as u64 * PAGE_SZ as u64) {
Ok(_) => Ok(()),
Err(e) => Err(io::Error::new(
Err(e) => Err(std::io::Error::new(
ErrorKind::Other,
format!(
"failed to write back to ephemeral file at {} error: {}",
@@ -291,7 +282,7 @@ pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), io::Error>
)),
}
} else {
Err(io::Error::new(
Err(std::io::Error::new(
ErrorKind::Other,
"could not write back page, not found in ephemeral files hash",
))
@@ -301,14 +292,11 @@ pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), io::Error>
impl BlockReader for EphemeralFile {
type BlockLease = page_cache::PageReadGuard<'static>;
fn read_blk(&self, blknum: u32) -> Result<Self::BlockLease, io::Error> {
fn read_blk(&self, blknum: u32) -> Result<Self::BlockLease, std::io::Error> {
// Look up the right page
let cache = page_cache::get();
loop {
match cache
.read_ephemeral_buf(self.file_id, blknum)
.map_err(|e| to_io_error(e, "Failed to read ephemeral buf"))?
{
match cache.read_ephemeral_buf(self.file_id, blknum) {
ReadBufResult::Found(guard) => return Ok(guard),
ReadBufResult::NotFound(mut write_guard) => {
// Read the page from disk into the buffer
@@ -323,10 +311,6 @@ impl BlockReader for EphemeralFile {
}
}
fn to_io_error(e: anyhow::Error, context: &str) -> io::Error {
io::Error::new(ErrorKind::Other, format!("{context}: {e:#}"))
}
#[cfg(test)]
mod tests {
use super::*;
@@ -338,7 +322,7 @@ mod tests {
fn repo_harness(
test_name: &str,
) -> Result<(&'static PageServerConf, ZTenantId, ZTimelineId), io::Error> {
) -> Result<(&'static PageServerConf, ZTenantId, ZTimelineId), Error> {
let repo_dir = PageServerConf::test_repo_dir(test_name);
let _ = fs::remove_dir_all(&repo_dir);
let conf = PageServerConf::dummy_conf(repo_dir);
@@ -355,7 +339,7 @@ mod tests {
// Helper function to slurp contents of a file, starting at the current position,
// into a string
fn read_string(efile: &EphemeralFile, offset: u64, len: usize) -> Result<String, io::Error> {
fn read_string(efile: &EphemeralFile, offset: u64, len: usize) -> Result<String, Error> {
let mut buf = Vec::new();
buf.resize(len, 0u8);
@@ -367,7 +351,7 @@ mod tests {
}
#[test]
fn test_ephemeral_files() -> Result<(), io::Error> {
fn test_ephemeral_files() -> Result<(), Error> {
let (conf, tenantid, timelineid) = repo_harness("ephemeral_files")?;
let file_a = EphemeralFile::create(conf, tenantid, timelineid)?;
@@ -398,7 +382,7 @@ mod tests {
}
#[test]
fn test_ephemeral_blobs() -> Result<(), io::Error> {
fn test_ephemeral_blobs() -> Result<(), Error> {
let (conf, tenantid, timelineid) = repo_harness("ephemeral_blobs")?;
let mut file = EphemeralFile::create(conf, tenantid, timelineid)?;

View File

@@ -4,7 +4,6 @@ use anyhow::{anyhow, bail, ensure, Context, Result};
use bytes::Bytes;
use fail::fail_point;
use itertools::Itertools;
use metrics::core::{AtomicU64, GenericCounter};
use once_cell::sync::Lazy;
use tracing::*;
@@ -45,7 +44,7 @@ use crate::reltag::RelTag;
use crate::tenant_config::TenantConfOpt;
use crate::DatadirTimeline;
use postgres_ffi::v14::xlog_utils::to_pg_timestamp;
use postgres_ffi::xlog_utils::to_pg_timestamp;
use utils::{
lsn::{AtomicLsn, Lsn, RecordLsn},
seqwait::SeqWait,
@@ -224,70 +223,6 @@ impl From<LayeredTimelineEntry> for RepositoryTimeline<LayeredTimeline> {
}
}
struct TimelineMetrics {
pub reconstruct_time_histo: Histogram,
pub materialized_page_cache_hit_counter: GenericCounter<AtomicU64>,
pub flush_time_histo: Histogram,
pub compact_time_histo: Histogram,
pub create_images_time_histo: Histogram,
pub init_logical_size_histo: Histogram,
pub load_layer_map_histo: Histogram,
pub last_record_gauge: IntGauge,
pub wait_lsn_time_histo: Histogram,
pub current_physical_size_gauge: UIntGauge,
}
impl TimelineMetrics {
fn new(tenant_id: &ZTenantId, timeline_id: &ZTimelineId) -> Self {
let tenant_id = tenant_id.to_string();
let timeline_id = timeline_id.to_string();
let reconstruct_time_histo = RECONSTRUCT_TIME
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
.unwrap();
let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
.unwrap();
let flush_time_histo = STORAGE_TIME
.get_metric_with_label_values(&["layer flush", &tenant_id, &timeline_id])
.unwrap();
let compact_time_histo = STORAGE_TIME
.get_metric_with_label_values(&["compact", &tenant_id, &timeline_id])
.unwrap();
let create_images_time_histo = STORAGE_TIME
.get_metric_with_label_values(&["create images", &tenant_id, &timeline_id])
.unwrap();
let init_logical_size_histo = STORAGE_TIME
.get_metric_with_label_values(&["init logical size", &tenant_id, &timeline_id])
.unwrap();
let load_layer_map_histo = STORAGE_TIME
.get_metric_with_label_values(&["load layer map", &tenant_id, &timeline_id])
.unwrap();
let last_record_gauge = LAST_RECORD_LSN
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
.unwrap();
let wait_lsn_time_histo = WAIT_LSN_TIME
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
.unwrap();
let current_physical_size_gauge = CURRENT_PHYSICAL_SIZE
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
.unwrap();
TimelineMetrics {
reconstruct_time_histo,
materialized_page_cache_hit_counter,
flush_time_histo,
compact_time_histo,
create_images_time_histo,
init_logical_size_histo,
load_layer_map_histo,
last_record_gauge,
wait_lsn_time_histo,
current_physical_size_gauge,
}
}
}
pub struct LayeredTimeline {
conf: &'static PageServerConf,
tenant_conf: Arc<RwLock<TenantConfOpt>>,
@@ -334,7 +269,14 @@ pub struct LayeredTimeline {
ancestor_lsn: Lsn,
// Metrics
metrics: TimelineMetrics,
reconstruct_time_histo: Histogram,
materialized_page_cache_hit_counter: IntCounter,
flush_time_histo: Histogram,
compact_time_histo: Histogram,
create_images_time_histo: Histogram,
last_record_gauge: IntGauge,
wait_lsn_time_histo: Histogram,
current_physical_size_gauge: UIntGauge,
/// If `true`, will backup its files that appear after each checkpointing to the remote storage.
upload_layers: AtomicBool,
@@ -484,7 +426,7 @@ impl Timeline for LayeredTimeline {
"wait_lsn called by WAL receiver thread"
);
self.metrics.wait_lsn_time_histo.observe_closure_duration(
self.wait_lsn_time_histo.observe_closure_duration(
|| self.last_record_lsn
.wait_for_timeout(lsn, self.conf.wait_lsn_timeout)
.with_context(|| {
@@ -526,8 +468,7 @@ impl Timeline for LayeredTimeline {
self.get_reconstruct_data(key, lsn, &mut reconstruct_state)?;
self.metrics
.reconstruct_time_histo
self.reconstruct_time_histo
.observe_closure_duration(|| self.reconstruct_value(key, lsn, reconstruct_state))
}
@@ -589,7 +530,7 @@ impl Timeline for LayeredTimeline {
}
fn get_physical_size(&self) -> u64 {
self.metrics.current_physical_size_gauge.get()
self.current_physical_size_gauge.get()
}
fn get_physical_size_non_incremental(&self) -> anyhow::Result<u64> {
@@ -663,6 +604,43 @@ impl LayeredTimeline {
walredo_mgr: Arc<dyn WalRedoManager + Send + Sync>,
upload_layers: bool,
) -> LayeredTimeline {
let reconstruct_time_histo = RECONSTRUCT_TIME
.get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
.unwrap();
let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
.get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
.unwrap();
let flush_time_histo = STORAGE_TIME
.get_metric_with_label_values(&[
"layer flush",
&tenant_id.to_string(),
&timeline_id.to_string(),
])
.unwrap();
let compact_time_histo = STORAGE_TIME
.get_metric_with_label_values(&[
"compact",
&tenant_id.to_string(),
&timeline_id.to_string(),
])
.unwrap();
let create_images_time_histo = STORAGE_TIME
.get_metric_with_label_values(&[
"create images",
&tenant_id.to_string(),
&timeline_id.to_string(),
])
.unwrap();
let last_record_gauge = LAST_RECORD_LSN
.get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
.unwrap();
let wait_lsn_time_histo = WAIT_LSN_TIME
.get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
.unwrap();
let current_physical_size_gauge = CURRENT_PHYSICAL_SIZE
.get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
.unwrap();
let mut result = LayeredTimeline {
conf,
tenant_conf,
@@ -685,7 +663,14 @@ impl LayeredTimeline {
ancestor_timeline: ancestor,
ancestor_lsn: metadata.ancestor_lsn(),
metrics: TimelineMetrics::new(&tenant_id, &timeline_id),
reconstruct_time_histo,
materialized_page_cache_hit_counter,
flush_time_histo,
compact_time_histo,
create_images_time_histo,
last_record_gauge,
wait_lsn_time_histo,
current_physical_size_gauge,
upload_layers: AtomicBool::new(upload_layers),
@@ -721,8 +706,6 @@ impl LayeredTimeline {
let mut layers = self.layers.write().unwrap();
let mut num_layers = 0;
let timer = self.metrics.load_layer_map_histo.start_timer();
// Scan timeline directory and create ImageFileName and DeltaFilename
// structs representing all files on disk
let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
@@ -794,11 +777,7 @@ impl LayeredTimeline {
"loaded layer map with {} layers at {}, total physical size: {}",
num_layers, disk_consistent_lsn, total_physical_size
);
self.metrics
.current_physical_size_gauge
.set(total_physical_size);
timer.stop_and_record();
self.current_physical_size_gauge.set(total_physical_size);
Ok(())
}
@@ -829,16 +808,12 @@ impl LayeredTimeline {
}
}
let timer = self.metrics.init_logical_size_histo.start_timer();
// Have to calculate it the hard way
let last_lsn = self.get_last_record_lsn();
let logical_size = self.get_current_logical_size_non_incremental(last_lsn)?;
self.current_logical_size
.store(logical_size as isize, AtomicOrdering::SeqCst);
debug!("calculated logical size the hard way: {}", logical_size);
timer.stop_and_record();
Ok(())
}
@@ -903,7 +878,7 @@ impl LayeredTimeline {
ValueReconstructResult::Continue => {
// If we reached an earlier cached page image, we're done.
if cont_lsn == cached_lsn + 1 {
self.metrics.materialized_page_cache_hit_counter.inc_by(1);
self.materialized_page_cache_hit_counter.inc_by(1);
return Ok(());
}
if prev_lsn <= cont_lsn {
@@ -1099,7 +1074,7 @@ impl LayeredTimeline {
fn finish_write(&self, new_lsn: Lsn) {
assert!(new_lsn.is_aligned());
self.metrics.last_record_gauge.set(new_lsn.0 as i64);
self.last_record_gauge.set(new_lsn.0 as i64);
self.last_record_lsn.advance(new_lsn);
}
@@ -1203,7 +1178,7 @@ impl LayeredTimeline {
}
};
let timer = self.metrics.flush_time_histo.start_timer();
let timer = self.flush_time_histo.start_timer();
loop {
let layers = self.layers.read().unwrap();
@@ -1374,7 +1349,7 @@ impl LayeredTimeline {
// update the timeline's physical size
let sz = new_delta_path.metadata()?.len();
self.metrics.current_physical_size_gauge.add(sz);
self.current_physical_size_gauge.add(sz);
// update metrics
NUM_PERSISTENT_FILES_CREATED.inc_by(1);
PERSISTENT_BYTES_WRITTEN.inc_by(sz);
@@ -1443,7 +1418,7 @@ impl LayeredTimeline {
}
// 3. Compact
let timer = self.metrics.compact_time_histo.start_timer();
let timer = self.compact_time_histo.start_timer();
self.compact_level0(target_file_size)?;
timer.stop_and_record();
}
@@ -1519,7 +1494,7 @@ impl LayeredTimeline {
lsn: Lsn,
force: bool,
) -> Result<HashSet<PathBuf>> {
let timer = self.metrics.create_images_time_histo.start_timer();
let timer = self.create_images_time_histo.start_timer();
let mut image_layers: Vec<ImageLayer> = Vec::new();
let mut layer_paths_to_upload = HashSet::new();
for partition in partitioning.parts.iter() {
@@ -1563,8 +1538,7 @@ impl LayeredTimeline {
let mut layers = self.layers.write().unwrap();
for l in image_layers {
self.metrics
.current_physical_size_gauge
self.current_physical_size_gauge
.add(l.path().metadata()?.len());
layers.insert_historic(Arc::new(l));
}
@@ -1814,8 +1788,7 @@ impl LayeredTimeline {
let new_delta_path = l.path();
// update the timeline's physical size
self.metrics
.current_physical_size_gauge
self.current_physical_size_gauge
.add(new_delta_path.metadata()?.len());
new_layer_paths.insert(new_delta_path);
@@ -1828,9 +1801,7 @@ impl LayeredTimeline {
drop(all_keys_iter);
for l in deltas_to_compact {
if let Some(path) = l.local_path() {
self.metrics
.current_physical_size_gauge
.sub(path.metadata()?.len());
self.current_physical_size_gauge.sub(path.metadata()?.len());
layer_paths_do_delete.insert(path);
}
l.delete()?;
@@ -2087,9 +2058,7 @@ impl LayeredTimeline {
let mut layer_paths_to_delete = HashSet::with_capacity(layers_to_remove.len());
for doomed_layer in layers_to_remove {
if let Some(path) = doomed_layer.local_path() {
self.metrics
.current_physical_size_gauge
.sub(path.metadata()?.len());
self.current_physical_size_gauge.sub(path.metadata()?.len());
layer_paths_to_delete.insert(path);
}
doomed_layer.delete()?;
@@ -2117,7 +2086,7 @@ impl LayeredTimeline {
key: Key,
request_lsn: Lsn,
mut data: ValueReconstructState,
) -> anyhow::Result<Bytes> {
) -> Result<Bytes> {
// Perform WAL redo if needed
data.records.reverse();
@@ -2167,15 +2136,13 @@ impl LayeredTimeline {
if img.len() == page_cache::PAGE_SZ {
let cache = page_cache::get();
cache
.memorize_materialized_page(
self.tenant_id,
self.timeline_id,
key,
last_rec_lsn,
&img,
)
.context("Materialized page memoization failed")?;
cache.memorize_materialized_page(
self.tenant_id,
self.timeline_id,
key,
last_rec_lsn,
&img,
);
}
Ok(img)

View File

@@ -28,6 +28,7 @@ use tracing::info;
use crate::thread_mgr::ThreadKind;
use metrics::{register_int_gauge_vec, IntGaugeVec};
use layered_repository::LayeredRepository;
use pgdatadir_mapping::DatadirTimeline;
/// Current storage format version
@@ -61,6 +62,9 @@ pub enum CheckpointConfig {
Forced,
}
pub type RepositoryImpl = LayeredRepository;
pub type TimelineImpl = <LayeredRepository as repository::Repository>::Timeline;
pub fn shutdown_pageserver(exit_code: i32) {
// Shut down the libpq endpoint thread. This prevents new connections from
// being accepted.

View File

@@ -45,7 +45,6 @@ use std::{
},
};
use anyhow::Context;
use once_cell::sync::OnceCell;
use tracing::error;
use utils::{
@@ -84,7 +83,7 @@ pub fn get() -> &'static PageCache {
}
}
pub const PAGE_SZ: usize = postgres_ffi::BLCKSZ as usize;
pub const PAGE_SZ: usize = postgres_ffi::pg_constants::BLCKSZ as usize;
const MAX_USAGE_COUNT: u8 = 5;
///
@@ -343,7 +342,7 @@ impl PageCache {
key: Key,
lsn: Lsn,
img: &[u8],
) -> anyhow::Result<()> {
) {
let cache_key = CacheKey::MaterializedPage {
hash_key: MaterializedPageHashKey {
tenant_id,
@@ -353,7 +352,7 @@ impl PageCache {
lsn,
};
match self.lock_for_write(&cache_key)? {
match self.lock_for_write(&cache_key) {
WriteBufResult::Found(write_guard) => {
// We already had it in cache. Another thread must've put it there
// concurrently. Check that it had the same contents that we
@@ -365,19 +364,17 @@ impl PageCache {
write_guard.mark_valid();
}
}
Ok(())
}
// Section 1.2: Public interface functions for working with Ephemeral pages.
pub fn read_ephemeral_buf(&self, file_id: u64, blkno: u32) -> anyhow::Result<ReadBufResult> {
pub fn read_ephemeral_buf(&self, file_id: u64, blkno: u32) -> ReadBufResult {
let mut cache_key = CacheKey::EphemeralPage { file_id, blkno };
self.lock_for_read(&mut cache_key)
}
pub fn write_ephemeral_buf(&self, file_id: u64, blkno: u32) -> anyhow::Result<WriteBufResult> {
pub fn write_ephemeral_buf(&self, file_id: u64, blkno: u32) -> WriteBufResult {
let cache_key = CacheKey::EphemeralPage { file_id, blkno };
self.lock_for_write(&cache_key)
@@ -405,7 +402,7 @@ impl PageCache {
// Section 1.3: Public interface functions for working with immutable file pages.
pub fn read_immutable_buf(&self, file_id: u64, blkno: u32) -> anyhow::Result<ReadBufResult> {
pub fn read_immutable_buf(&self, file_id: u64, blkno: u32) -> ReadBufResult {
let mut cache_key = CacheKey::ImmutableFilePage { file_id, blkno };
self.lock_for_read(&mut cache_key)
@@ -498,16 +495,15 @@ impl PageCache {
/// }
/// ```
///
fn lock_for_read(&self, cache_key: &mut CacheKey) -> anyhow::Result<ReadBufResult> {
fn lock_for_read(&self, cache_key: &mut CacheKey) -> ReadBufResult {
loop {
// First check if the key already exists in the cache.
if let Some(read_guard) = self.try_lock_for_read(cache_key) {
return Ok(ReadBufResult::Found(read_guard));
return ReadBufResult::Found(read_guard);
}
// Not found. Find a victim buffer
let (slot_idx, mut inner) =
self.find_victim().context("Failed to find evict victim")?;
let (slot_idx, mut inner) = self.find_victim();
// Insert mapping for this. At this point, we may find that another
// thread did the same thing concurrently. In that case, we evicted
@@ -530,10 +526,10 @@ impl PageCache {
inner.dirty = false;
slot.usage_count.store(1, Ordering::Relaxed);
return Ok(ReadBufResult::NotFound(PageWriteGuard {
return ReadBufResult::NotFound(PageWriteGuard {
inner,
valid: false,
}));
});
}
}
@@ -560,16 +556,15 @@ impl PageCache {
///
/// Similar to lock_for_read(), but the returned buffer is write-locked and
/// may be modified by the caller even if it's already found in the cache.
fn lock_for_write(&self, cache_key: &CacheKey) -> anyhow::Result<WriteBufResult> {
fn lock_for_write(&self, cache_key: &CacheKey) -> WriteBufResult {
loop {
// First check if the key already exists in the cache.
if let Some(write_guard) = self.try_lock_for_write(cache_key) {
return Ok(WriteBufResult::Found(write_guard));
return WriteBufResult::Found(write_guard);
}
// Not found. Find a victim buffer
let (slot_idx, mut inner) =
self.find_victim().context("Failed to find evict victim")?;
let (slot_idx, mut inner) = self.find_victim();
// Insert mapping for this. At this point, we may find that another
// thread did the same thing concurrently. In that case, we evicted
@@ -592,10 +587,10 @@ impl PageCache {
inner.dirty = false;
slot.usage_count.store(1, Ordering::Relaxed);
return Ok(WriteBufResult::NotFound(PageWriteGuard {
return WriteBufResult::NotFound(PageWriteGuard {
inner,
valid: false,
}));
});
}
}
@@ -759,7 +754,7 @@ impl PageCache {
/// Find a slot to evict.
///
/// On return, the slot is empty and write-locked.
fn find_victim(&self) -> anyhow::Result<(usize, RwLockWriteGuard<SlotInner>)> {
fn find_victim(&self) -> (usize, RwLockWriteGuard<SlotInner>) {
let iter_limit = self.slots.len() * 10;
let mut iters = 0;
loop {
@@ -772,7 +767,7 @@ impl PageCache {
let mut inner = match slot.inner.try_write() {
Ok(inner) => inner,
Err(TryLockError::Poisoned(err)) => {
anyhow::bail!("buffer lock was poisoned: {err:?}")
panic!("buffer lock was poisoned: {:?}", err)
}
Err(TryLockError::WouldBlock) => {
// If we have looped through the whole buffer pool 10 times
@@ -782,7 +777,7 @@ impl PageCache {
// there are buffers in the pool. In practice, with a reasonably
// large buffer pool it really shouldn't happen.
if iters > iter_limit {
anyhow::bail!("exceeded evict iter limit");
panic!("could not find a victim buffer to evict");
}
continue;
}
@@ -809,7 +804,7 @@ impl PageCache {
inner.dirty = false;
inner.key = None;
}
return Ok((slot_idx, inner));
return (slot_idx, inner);
}
}
}

View File

@@ -40,10 +40,9 @@ use crate::thread_mgr;
use crate::thread_mgr::ThreadKind;
use crate::CheckpointConfig;
use metrics::{register_histogram_vec, HistogramVec};
use postgres_ffi::v14::xlog_utils::to_pg_timestamp;
use postgres_ffi::xlog_utils::to_pg_timestamp;
use postgres_ffi::v14::pg_constants::DEFAULTTABLESPACE_OID;
use postgres_ffi::BLCKSZ;
use postgres_ffi::pg_constants;
// Wrapped in libpq CopyData
enum PagestreamFeMessage {
@@ -726,9 +725,10 @@ impl PageServerHandler {
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;
let total_blocks = timeline.get_db_size(DEFAULTTABLESPACE_OID, req.dbnode, lsn)?;
let total_blocks =
timeline.get_db_size(pg_constants::DEFAULTTABLESPACE_OID, req.dbnode, lsn)?;
let db_size = total_blocks as i64 * BLCKSZ as i64;
let db_size = total_blocks as i64 * pg_constants::BLCKSZ as i64;
Ok(PagestreamBeMessage::DbSize(PagestreamDbSizeResponse {
db_size,

View File

@@ -13,10 +13,8 @@ use crate::repository::*;
use crate::walrecord::ZenithWalRecord;
use anyhow::{bail, ensure, Result};
use bytes::{Buf, Bytes};
use postgres_ffi::v14::pg_constants;
use postgres_ffi::v14::xlog_utils::TimestampTz;
use postgres_ffi::BLCKSZ;
use postgres_ffi::{Oid, TransactionId};
use postgres_ffi::xlog_utils::TimestampTz;
use postgres_ffi::{pg_constants, Oid, TransactionId};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::ops::Range;
@@ -299,9 +297,9 @@ pub trait DatadirTimeline: Timeline {
let clog_page =
self.get_slru_page_at_lsn(SlruKind::Clog, segno, blknum, probe_lsn)?;
if clog_page.len() == BLCKSZ as usize + 8 {
if clog_page.len() == pg_constants::BLCKSZ as usize + 8 {
let mut timestamp_bytes = [0u8; 8];
timestamp_bytes.copy_from_slice(&clog_page[BLCKSZ as usize..]);
timestamp_bytes.copy_from_slice(&clog_page[pg_constants::BLCKSZ as usize..]);
let timestamp = TimestampTz::from_be_bytes(timestamp_bytes);
if timestamp >= search_timestamp {
@@ -384,7 +382,7 @@ pub trait DatadirTimeline: Timeline {
total_size += relsize as usize;
}
}
Ok(total_size * BLCKSZ as usize)
Ok(total_size * pg_constants::BLCKSZ as usize)
}
///
@@ -914,7 +912,7 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
result?;
if pending_nblocks != 0 {
writer.update_current_logical_size(pending_nblocks * BLCKSZ as isize);
writer.update_current_logical_size(pending_nblocks * pg_constants::BLCKSZ as isize);
self.pending_nblocks = 0;
}
@@ -942,7 +940,7 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
writer.finish_write(lsn);
if pending_nblocks != 0 {
writer.update_current_logical_size(pending_nblocks * BLCKSZ as isize);
writer.update_current_logical_size(pending_nblocks * pg_constants::BLCKSZ as isize);
}
Ok(())
@@ -1016,7 +1014,7 @@ struct SlruSegmentDirectory {
segments: HashSet<u32>,
}
static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; pg_constants::BLCKSZ as usize]);
// Layout of the Key address space
//

View File

@@ -2,9 +2,8 @@ use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::fmt;
use postgres_ffi::v14::pg_constants;
use postgres_ffi::v14::relfile_utils::forknumber_to_name;
use postgres_ffi::Oid;
use postgres_ffi::relfile_utils::forknumber_to_name;
use postgres_ffi::{pg_constants, Oid};
///
/// Relation data file segment id throughout the Postgres cluster.

View File

@@ -412,6 +412,7 @@ pub mod repo_harness {
use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
use std::{fs, path::PathBuf};
use crate::RepositoryImpl;
use crate::{
config::PageServerConf,
layered_repository::LayeredRepository,
@@ -507,11 +508,11 @@ pub mod repo_harness {
})
}
pub fn load(&self) -> LayeredRepository {
pub fn load(&self) -> RepositoryImpl {
self.try_load().expect("failed to load test repo")
}
pub fn try_load(&self) -> Result<LayeredRepository> {
pub fn try_load(&self) -> Result<RepositoryImpl> {
let walredo_mgr = Arc::new(TestRedoManager);
let repo = LayeredRepository::new(

View File

@@ -3,14 +3,16 @@
use crate::config::PageServerConf;
use crate::http::models::TenantInfo;
use crate::layered_repository::{load_metadata, LayeredRepository, LayeredTimeline};
use crate::layered_repository::{load_metadata, LayeredRepository};
use crate::repository::Repository;
use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex};
use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
use crate::tenant_config::TenantConfOpt;
use crate::thread_mgr::ThreadKind;
use crate::timelines::CreateRepo;
use crate::walredo::PostgresRedoManager;
use crate::{thread_mgr, timelines, walreceiver};
use crate::{RepositoryImpl, TimelineImpl};
use anyhow::Context;
use serde::{Deserialize, Serialize};
use std::collections::hash_map::Entry;
@@ -94,13 +96,13 @@ mod tenants_state {
struct Tenant {
state: TenantState,
/// Contains in-memory state, including the timeline that might not yet flushed on disk or loaded form disk.
repo: Arc<LayeredRepository>,
repo: Arc<RepositoryImpl>,
/// Timelines, located locally in the pageserver's datadir.
/// Timelines can entirely be removed entirely by the `detach` operation only.
///
/// Local timelines have more metadata that's loaded into memory,
/// that is located in the `repo.timelines` field, [`crate::layered_repository::LayeredTimelineEntry`].
local_timelines: HashMap<ZTimelineId, Arc<LayeredTimeline>>,
local_timelines: HashMap<ZTimelineId, Arc<<RepositoryImpl as Repository>::Timeline>>,
}
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
@@ -177,7 +179,7 @@ pub enum LocalTimelineUpdate {
},
Attach {
id: ZTenantTimelineId,
datadir: Arc<LayeredTimeline>,
datadir: Arc<<RepositoryImpl as Repository>::Timeline>,
},
}
@@ -283,8 +285,10 @@ pub fn create_tenant_repository(
conf,
tenant_conf,
tenant_id,
wal_redo_manager,
remote_index,
CreateRepo::Real {
wal_redo_manager,
remote_index,
},
)?;
v.insert(Tenant {
state: TenantState::Idle,
@@ -365,7 +369,7 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow:
Ok(())
}
pub fn get_repository_for_tenant(tenant_id: ZTenantId) -> anyhow::Result<Arc<LayeredRepository>> {
pub fn get_repository_for_tenant(tenant_id: ZTenantId) -> anyhow::Result<Arc<RepositoryImpl>> {
let m = tenants_state::read_tenants();
let tenant = m
.get(&tenant_id)
@@ -379,7 +383,7 @@ pub fn get_repository_for_tenant(tenant_id: ZTenantId) -> anyhow::Result<Arc<Lay
pub fn get_local_timeline_with_load(
tenant_id: ZTenantId,
timeline_id: ZTimelineId,
) -> anyhow::Result<Arc<LayeredTimeline>> {
) -> anyhow::Result<Arc<TimelineImpl>> {
let mut m = tenants_state::write_tenants();
let tenant = m
.get_mut(&tenant_id)
@@ -484,9 +488,9 @@ pub fn detach_tenant(conf: &'static PageServerConf, tenant_id: ZTenantId) -> any
}
fn load_local_timeline(
repo: &LayeredRepository,
repo: &RepositoryImpl,
timeline_id: ZTimelineId,
) -> anyhow::Result<Arc<LayeredTimeline>> {
) -> anyhow::Result<Arc<TimelineImpl>> {
let inmem_timeline = repo.get_timeline_load(timeline_id).with_context(|| {
format!("Inmem timeline {timeline_id} not found in tenant's repository")
})?;
@@ -630,7 +634,7 @@ fn load_local_repo(
conf: &'static PageServerConf,
tenant_id: ZTenantId,
remote_index: &RemoteIndex,
) -> anyhow::Result<Arc<LayeredRepository>> {
) -> anyhow::Result<Arc<RepositoryImpl>> {
let mut m = tenants_state::write_tenants();
let tenant = m.entry(tenant_id).or_insert_with(|| {
// Set up a WAL redo manager, for applying WAL records.

View File

@@ -3,7 +3,7 @@
//
use anyhow::{bail, ensure, Context, Result};
use postgres_ffi::ControlFileData;
use std::{
fs,
path::Path,
@@ -13,21 +13,18 @@ use std::{
use tracing::*;
use utils::{
crashsafe_dir,
crashsafe_dir, logging,
lsn::Lsn,
zid::{ZTenantId, ZTimelineId},
};
use crate::import_datadir;
use crate::tenant_mgr;
use crate::{
config::PageServerConf, repository::Repository, storage_sync::index::RemoteIndex,
tenant_config::TenantConfOpt,
};
use crate::{
layered_repository::{LayeredRepository, LayeredTimeline},
walredo::WalRedoManager,
tenant_config::TenantConfOpt, RepositoryImpl, TimelineImpl,
};
use crate::{import_datadir, LOG_FILE_NAME};
use crate::{layered_repository::LayeredRepository, walredo::WalRedoManager};
use crate::{repository::Timeline, CheckpointConfig};
#[derive(Debug, Clone, Copy)]
@@ -36,13 +33,69 @@ pub struct PointInTime {
pub lsn: Lsn,
}
pub fn init_pageserver(
conf: &'static PageServerConf,
create_tenant: Option<ZTenantId>,
initial_timeline_id: Option<ZTimelineId>,
) -> anyhow::Result<()> {
// Initialize logger
// use true as daemonize parameter because otherwise we pollute zenith cli output with a few pages long output of info messages
let _log_file = logging::init(LOG_FILE_NAME, true)?;
crashsafe_dir::create_dir_all(conf.tenants_path())?;
if let Some(tenant_id) = create_tenant {
println!("initializing tenantid {}", tenant_id);
let repo = create_repo(conf, TenantConfOpt::default(), tenant_id, CreateRepo::Dummy)
.context("failed to create repo")?;
let new_timeline_id = initial_timeline_id.unwrap_or_else(ZTimelineId::generate);
bootstrap_timeline(conf, tenant_id, new_timeline_id, repo.as_ref())
.context("failed to create initial timeline")?;
println!("initial timeline {} created", new_timeline_id)
} else if initial_timeline_id.is_some() {
println!("Ignoring initial timeline parameter, due to no tenant id to create given");
}
println!("pageserver init succeeded");
Ok(())
}
pub enum CreateRepo {
Real {
wal_redo_manager: Arc<dyn WalRedoManager + Send + Sync>,
remote_index: RemoteIndex,
},
Dummy,
}
pub fn create_repo(
conf: &'static PageServerConf,
tenant_conf: TenantConfOpt,
tenant_id: ZTenantId,
wal_redo_manager: Arc<dyn WalRedoManager + Send + Sync>,
remote_index: RemoteIndex,
) -> Result<Arc<LayeredRepository>> {
create_repo: CreateRepo,
) -> Result<Arc<RepositoryImpl>> {
let (wal_redo_manager, remote_index) = match create_repo {
CreateRepo::Real {
wal_redo_manager,
remote_index,
} => (wal_redo_manager, remote_index),
CreateRepo::Dummy => {
// We don't use the real WAL redo manager, because we don't want to spawn the WAL redo
// process during repository initialization.
//
// FIXME: That caused trouble, because the WAL redo manager spawned a thread that launched
// initdb in the background, and it kept running even after the "zenith init" had exited.
// In tests, we started the page server immediately after that, so that initdb was still
// running in the background, and we failed to run initdb again in the same directory. This
// has been solved for the rapid init+start case now, but the general race condition remains
// if you restart the server quickly. The WAL redo manager doesn't use a separate thread
// anymore, but I think that could still happen.
let wal_redo_manager = Arc::new(crate::walredo::DummyRedoManager {});
(wal_redo_manager as _, RemoteIndex::default())
}
};
let repo_dir = conf.tenant_path(&tenant_id);
ensure!(
!repo_dir.exists(),
@@ -69,6 +122,16 @@ pub fn create_repo(
)))
}
// Returns checkpoint LSN from controlfile
fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
// Read control file to extract the LSN
let controlfile_path = path.join("global").join("pg_control");
let controlfile = ControlFileData::decode(&fs::read(controlfile_path)?)?;
let lsn = controlfile.checkPoint;
Ok(Lsn(lsn))
}
// Create the cluster temporarily in 'initdbpath' directory inside the repository
// to get bootstrap data for timeline initialization.
//
@@ -118,7 +181,7 @@ fn bootstrap_timeline<R: Repository>(
run_initdb(conf, &initdb_path)?;
let pgdata_path = initdb_path;
let lsn = import_datadir::get_lsn_from_controlfile(&pgdata_path)?.align();
let lsn = get_lsn_from_controlfile(&pgdata_path)?.align();
// Import the contents of the data directory at the initial checkpoint
// LSN, and any WAL after that.
@@ -160,7 +223,7 @@ pub(crate) fn create_timeline(
new_timeline_id: Option<ZTimelineId>,
ancestor_timeline_id: Option<ZTimelineId>,
mut ancestor_start_lsn: Option<Lsn>,
) -> Result<Option<(ZTimelineId, Arc<LayeredTimeline>)>> {
) -> Result<Option<(ZTimelineId, Arc<TimelineImpl>)>> {
let new_timeline_id = new_timeline_id.unwrap_or_else(ZTimelineId::generate);
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;

View File

@@ -22,8 +22,8 @@
//! bespoken Rust code.
use anyhow::Context;
use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
use postgres_ffi::nonrelfile_utils::clogpage_precedes;
use postgres_ffi::nonrelfile_utils::slru_may_delete_clogsegment;
use postgres_ffi::{page_is_new, page_set_lsn};
use anyhow::Result;
@@ -33,12 +33,10 @@ use tracing::*;
use crate::pgdatadir_mapping::*;
use crate::reltag::{RelTag, SlruKind};
use crate::walrecord::*;
use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
use postgres_ffi::v14::pg_constants;
use postgres_ffi::v14::xlog_utils::*;
use postgres_ffi::v14::CheckPoint;
use postgres_ffi::nonrelfile_utils::mx_offset_to_member_segment;
use postgres_ffi::xlog_utils::*;
use postgres_ffi::TransactionId;
use postgres_ffi::BLCKSZ;
use postgres_ffi::{pg_constants, CheckPoint};
use utils::lsn::Lsn;
static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);
@@ -295,7 +293,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
// Extract page image from FPI record
let img_len = blk.bimg_len as usize;
let img_offs = blk.bimg_offset as usize;
let mut image = BytesMut::with_capacity(BLCKSZ as usize);
let mut image = BytesMut::with_capacity(pg_constants::BLCKSZ as usize);
image.extend_from_slice(&decoded.record[img_offs..img_offs + img_len]);
if blk.hole_length != 0 {
@@ -311,7 +309,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
if !page_is_new(&image) {
page_set_lsn(&mut image, lsn)
}
assert_eq!(image.len(), BLCKSZ as usize);
assert_eq!(image.len(), pg_constants::BLCKSZ as usize);
self.put_rel_page_image(modification, rel, blk.blkno, image.freeze())?;
} else {
let rec = ZenithWalRecord::Postgres {
@@ -1035,8 +1033,7 @@ mod tests {
use crate::pgdatadir_mapping::create_test_timeline;
use crate::repository::repo_harness::*;
use crate::repository::Timeline;
use postgres_ffi::v14::xlog_utils::SIZEOF_CHECKPOINT;
use postgres_ffi::RELSEG_SIZE;
use postgres_ffi::pg_constants;
/// Arbitrary relation tag, for testing.
const TESTREL_A: RelTag = RelTag {
@@ -1325,7 +1322,7 @@ mod tests {
let mut walingest = init_walingest_test(&*tline)?;
let mut lsn = 0x10;
for blknum in 0..RELSEG_SIZE + 1 {
for blknum in 0..pg_constants::RELSEG_SIZE + 1 {
lsn += 0x10;
let mut m = tline.begin_modification(Lsn(lsn));
let img = TEST_IMG(&format!("foo blk {} at {}", blknum, Lsn(lsn)));
@@ -1335,22 +1332,31 @@ mod tests {
assert_current_logical_size(&*tline, Lsn(lsn));
assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(lsn))?, RELSEG_SIZE + 1);
assert_eq!(
tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
pg_constants::RELSEG_SIZE + 1
);
// Truncate one block
lsn += 0x10;
let mut m = tline.begin_modification(Lsn(lsn));
walingest.put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE)?;
walingest.put_rel_truncation(&mut m, TESTREL_A, pg_constants::RELSEG_SIZE)?;
m.commit()?;
assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(lsn))?, RELSEG_SIZE);
assert_eq!(
tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
pg_constants::RELSEG_SIZE
);
assert_current_logical_size(&*tline, Lsn(lsn));
// Truncate another block
lsn += 0x10;
let mut m = tline.begin_modification(Lsn(lsn));
walingest.put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE - 1)?;
walingest.put_rel_truncation(&mut m, TESTREL_A, pg_constants::RELSEG_SIZE - 1)?;
m.commit()?;
assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(lsn))?, RELSEG_SIZE - 1);
assert_eq!(
tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
pg_constants::RELSEG_SIZE - 1
);
assert_current_logical_size(&*tline, Lsn(lsn));
// Truncate to 1500, and then truncate all the way down to 0, one block at a time

View File

@@ -16,7 +16,6 @@ use std::{
time::Duration,
};
use crate::{layered_repository::LayeredTimeline, repository::Timeline};
use anyhow::Context;
use chrono::{NaiveDateTime, Utc};
use etcd_broker::{
@@ -26,7 +25,12 @@ use etcd_broker::{
use tokio::select;
use tracing::*;
use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
use crate::{
exponential_backoff,
repository::{Repository, Timeline},
DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
};
use crate::{RepositoryImpl, TimelineImpl};
use utils::{
lsn::Lsn,
zid::{NodeId, ZTenantTimelineId},
@@ -39,7 +43,7 @@ pub(super) fn spawn_connection_manager_task(
id: ZTenantTimelineId,
broker_loop_prefix: String,
mut client: Client,
local_timeline: Arc<LayeredTimeline>,
local_timeline: Arc<TimelineImpl>,
wal_connect_timeout: Duration,
lagging_wal_timeout: Duration,
max_lsn_wal_lag: NonZeroU64,
@@ -96,8 +100,6 @@ async fn connection_manager_loop_step(
info!("Subscribed for etcd timeline changes, waiting for new etcd data");
loop {
let time_until_next_retry = walreceiver_state.time_until_next_retry();
select! {
broker_connection_result = &mut broker_subscription.watcher_handle => {
cleanup_broker_connection(broker_connection_result, walreceiver_state);
@@ -112,23 +114,27 @@ async fn connection_manager_loop_step(
} => {
let wal_connection = walreceiver_state.wal_connection.as_mut().expect("Should have a connection, as checked by the corresponding select! guard");
match wal_connection_update {
TaskEvent::Started => {},
TaskEvent::Started => {
*walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(0) += 1;
},
TaskEvent::NewEvent(status) => {
if status.has_processed_wal {
// We have advanced last_record_lsn by processing the WAL received
// from this safekeeper. This is good enough to clean unsuccessful
// retries history and allow reconnecting to this safekeeper without
// sleeping for a long time.
walreceiver_state.wal_connection_retries.remove(&wal_connection.sk_id);
if status.has_received_wal {
// Reset connection attempts here only, we know that safekeeper is healthy
// because it can send us a WAL update.
walreceiver_state.wal_connection_attempts.remove(&wal_connection.sk_id);
}
wal_connection.status = status;
},
TaskEvent::End(end_result) => {
match end_result {
Ok(()) => debug!("WAL receiving task finished"),
Err(e) => warn!("WAL receiving task failed: {e}"),
Err(e) => {
warn!("WAL receiving task failed: {e}");
// If the task failed, set the connection attempts to at least 1, to try other safekeepers.
let _ = *walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(1);
}
};
walreceiver_state.drop_old_connection(false).await;
walreceiver_state.wal_connection = None;
},
}
},
@@ -152,8 +158,6 @@ async fn connection_manager_loop_step(
}
}
},
_ = async { tokio::time::sleep(time_until_next_retry.unwrap()).await }, if time_until_next_retry.is_some() => {}
}
// Fetch more etcd timeline updates, but limit ourselves since they may arrive quickly.
@@ -234,15 +238,11 @@ async fn subscribe_for_timeline_updates(
}
}
const WALCONNECTION_RETRY_MIN_BACKOFF_SECONDS: f64 = 0.1;
const WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS: f64 = 15.0;
const WALCONNECTION_RETRY_BACKOFF_MULTIPLIER: f64 = 1.5;
/// All data that's needed to run endless broker loop and keep the WAL streaming connection alive, if possible.
struct WalreceiverState {
id: ZTenantTimelineId,
/// Use pageserver data about the timeline to filter out some of the safekeepers.
local_timeline: Arc<LayeredTimeline>,
local_timeline: Arc<TimelineImpl>,
/// The timeout on the connection to safekeeper for WAL streaming.
wal_connect_timeout: Duration,
/// The timeout to use to determine when the current connection is "stale" and reconnect to the other one.
@@ -251,8 +251,7 @@ struct WalreceiverState {
max_lsn_wal_lag: NonZeroU64,
/// Current connection to safekeeper for WAL streaming.
wal_connection: Option<WalConnection>,
/// Info about retries and unsuccessful attempts to connect to safekeepers.
wal_connection_retries: HashMap<NodeId, RetryInfo>,
wal_connection_attempts: HashMap<NodeId, u32>,
/// Data about all timelines, available for connection, fetched from etcd, grouped by their corresponding safekeeper node id.
wal_stream_candidates: HashMap<NodeId, EtcdSkTimeline>,
}
@@ -260,8 +259,6 @@ struct WalreceiverState {
/// Current connection data.
#[derive(Debug)]
struct WalConnection {
/// Time when the connection was initiated.
started_at: NaiveDateTime,
/// Current safekeeper pageserver is connected to for WAL streaming.
sk_id: NodeId,
/// Status of the connection.
@@ -281,12 +278,6 @@ struct NewCommittedWAL {
discovered_at: NaiveDateTime,
}
#[derive(Debug)]
struct RetryInfo {
next_retry_at: Option<NaiveDateTime>,
retry_duration_seconds: f64,
}
/// Data about the timeline to connect to, received from etcd.
#[derive(Debug)]
struct EtcdSkTimeline {
@@ -300,7 +291,7 @@ struct EtcdSkTimeline {
impl WalreceiverState {
fn new(
id: ZTenantTimelineId,
local_timeline: Arc<LayeredTimeline>,
local_timeline: Arc<<RepositoryImpl as Repository>::Timeline>,
wal_connect_timeout: Duration,
lagging_wal_timeout: Duration,
max_lsn_wal_lag: NonZeroU64,
@@ -313,18 +304,31 @@ impl WalreceiverState {
max_lsn_wal_lag,
wal_connection: None,
wal_stream_candidates: HashMap::new(),
wal_connection_retries: HashMap::new(),
wal_connection_attempts: HashMap::new(),
}
}
/// Shuts down the current connection (if any) and immediately starts another one with the given connection string.
async fn change_connection(&mut self, new_sk_id: NodeId, new_wal_source_connstr: String) {
self.drop_old_connection(true).await;
if let Some(old_connection) = self.wal_connection.take() {
old_connection.connection_task.shutdown().await
}
let id = self.id;
let connect_timeout = self.wal_connect_timeout;
let connection_attempt = self
.wal_connection_attempts
.get(&new_sk_id)
.copied()
.unwrap_or(0);
let connection_handle = TaskHandle::spawn(move |events_sender, cancellation| {
async move {
exponential_backoff(
connection_attempt,
DEFAULT_BASE_BACKOFF_SECONDS,
DEFAULT_MAX_BACKOFF_SECONDS,
)
.await;
super::walreceiver_connection::handle_walreceiver_connection(
id,
&new_wal_source_connstr,
@@ -340,11 +344,10 @@ impl WalreceiverState {
let now = Utc::now().naive_utc();
self.wal_connection = Some(WalConnection {
started_at: now,
sk_id: new_sk_id,
status: WalConnectionStatus {
is_connected: false,
has_processed_wal: false,
has_received_wal: false,
latest_connection_update: now,
latest_wal_update: now,
streaming_lsn: None,
@@ -355,71 +358,6 @@ impl WalreceiverState {
});
}
/// Drops the current connection (if any) and updates retry timeout for the next
/// connection attempt to the same safekeeper.
async fn drop_old_connection(&mut self, needs_shutdown: bool) {
let wal_connection = match self.wal_connection.take() {
Some(wal_connection) => wal_connection,
None => return,
};
if needs_shutdown {
wal_connection.connection_task.shutdown().await;
}
let retry = self
.wal_connection_retries
.entry(wal_connection.sk_id)
.or_insert(RetryInfo {
next_retry_at: None,
retry_duration_seconds: WALCONNECTION_RETRY_MIN_BACKOFF_SECONDS,
});
let now = Utc::now().naive_utc();
// Schedule the next retry attempt. We want to have exponential backoff for connection attempts,
// and we add backoff to the time when we started the connection attempt. If the connection
// was active for a long time, then next_retry_at will be in the past.
retry.next_retry_at =
wal_connection
.started_at
.checked_add_signed(chrono::Duration::milliseconds(
(retry.retry_duration_seconds * 1000.0) as i64,
));
if let Some(next) = &retry.next_retry_at {
if next > &now {
info!(
"Next connection retry to {:?} is at {}",
wal_connection.sk_id, next
);
}
}
let next_retry_duration =
retry.retry_duration_seconds * WALCONNECTION_RETRY_BACKOFF_MULTIPLIER;
// Clamp the next retry duration to the maximum allowed.
let next_retry_duration = next_retry_duration.min(WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS);
// Clamp the next retry duration to the minimum allowed.
let next_retry_duration = next_retry_duration.max(WALCONNECTION_RETRY_MIN_BACKOFF_SECONDS);
retry.retry_duration_seconds = next_retry_duration;
}
/// Returns time needed to wait to have a new candidate for WAL streaming.
fn time_until_next_retry(&self) -> Option<Duration> {
let now = Utc::now().naive_utc();
let next_retry_at = self
.wal_connection_retries
.values()
.filter_map(|retry| retry.next_retry_at)
.filter(|next_retry_at| next_retry_at > &now)
.min();
next_retry_at.and_then(|next_retry_at| (next_retry_at - now).to_std().ok())
}
/// Adds another etcd timeline into the state, if its more recent than the one already added there for the same key.
fn register_timeline_update(&mut self, timeline_update: BrokerUpdate<SkTimelineInfo>) {
match self
@@ -613,37 +551,52 @@ impl WalreceiverState {
/// Optionally, omits the given node, to support gracefully switching from a healthy safekeeper to another.
///
/// The candidate that is chosen:
/// * has no pending retry cooldown
/// * has greatest commit_lsn among the ones that are left
/// * has fewest connection attempts from pageserver to safekeeper node (reset every time we receive a WAL message from the node)
/// * has greatest data Lsn among the ones that are left
///
/// NOTE:
/// We evict timeline data received from etcd based on time passed since it was registered, along with its connection attempts values, but
/// otherwise to reset the connection attempts, a successful connection to that node is needed.
/// That won't happen now, before all nodes with less connection attempts are connected to first, which might leave the sk node with more advanced state to be ignored.
fn select_connection_candidate(
&self,
node_to_omit: Option<NodeId>,
) -> Option<(NodeId, &SkTimelineInfo, String)> {
self.applicable_connection_candidates()
let all_candidates = self
.applicable_connection_candidates()
.filter(|&(sk_id, _, _)| Some(sk_id) != node_to_omit)
.collect::<Vec<_>>();
let smallest_attempts_allowed = all_candidates
.iter()
.map(|(sk_id, _, _)| {
self.wal_connection_attempts
.get(sk_id)
.copied()
.unwrap_or(0)
})
.min()?;
all_candidates
.into_iter()
.filter(|(sk_id, _, _)| {
smallest_attempts_allowed
>= self
.wal_connection_attempts
.get(sk_id)
.copied()
.unwrap_or(0)
})
.max_by_key(|(_, info, _)| info.commit_lsn)
}
/// Returns a list of safekeepers that have valid info and ready for connection.
/// Some safekeepers are filtered by the retry cooldown.
fn applicable_connection_candidates(
&self,
) -> impl Iterator<Item = (NodeId, &SkTimelineInfo, String)> {
let now = Utc::now().naive_utc();
self.wal_stream_candidates
.iter()
.filter(|(_, info)| info.timeline.commit_lsn.is_some())
.filter(move |(sk_id, _)| {
let next_retry_at = self
.wal_connection_retries
.get(sk_id)
.and_then(|retry_info| {
retry_info.next_retry_at
});
next_retry_at.is_none() || next_retry_at.unwrap() <= now
})
.filter_map(|(sk_id, etcd_info)| {
let info = &etcd_info.timeline;
match wal_stream_connection_string(
@@ -678,7 +631,7 @@ impl WalreceiverState {
});
for node_id in node_ids_to_remove {
self.wal_connection_retries.remove(&node_id);
self.wal_connection_attempts.remove(&node_id);
}
}
}
@@ -735,6 +688,7 @@ fn wal_stream_connection_string(
#[cfg(test)]
mod tests {
use crate::repository::{
repo_harness::{RepoHarness, TIMELINE_ID},
Repository,
@@ -839,7 +793,7 @@ mod tests {
let connection_status = WalConnectionStatus {
is_connected: true,
has_processed_wal: true,
has_received_wal: true,
latest_connection_update: now,
latest_wal_update: now,
commit_lsn: Some(Lsn(current_lsn)),
@@ -848,7 +802,6 @@ mod tests {
state.max_lsn_wal_lag = NonZeroU64::new(100).unwrap();
state.wal_connection = Some(WalConnection {
started_at: now,
sk_id: connected_sk_id,
status: connection_status.clone(),
connection_task: TaskHandle::spawn(move |sender, _| async move {
@@ -1068,13 +1021,7 @@ mod tests {
},
),
]);
state.wal_connection_retries = HashMap::from([(
NodeId(0),
RetryInfo {
next_retry_at: now.checked_add_signed(chrono::Duration::hours(1)),
retry_duration_seconds: WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS,
},
)]);
state.wal_connection_attempts = HashMap::from([(NodeId(0), 1), (NodeId(1), 0)]);
let candidate_with_less_errors = state
.next_connection_candidate()
@@ -1082,7 +1029,7 @@ mod tests {
assert_eq!(
candidate_with_less_errors.safekeeper_id,
NodeId(1),
"Should select the node with no pending retry cooldown"
"Should select the node with less connection errors"
);
Ok(())
@@ -1100,7 +1047,7 @@ mod tests {
let connection_status = WalConnectionStatus {
is_connected: true,
has_processed_wal: true,
has_received_wal: true,
latest_connection_update: now,
latest_wal_update: now,
commit_lsn: Some(current_lsn),
@@ -1108,7 +1055,6 @@ mod tests {
};
state.wal_connection = Some(WalConnection {
started_at: now,
sk_id: connected_sk_id,
status: connection_status.clone(),
connection_task: TaskHandle::spawn(move |sender, _| async move {
@@ -1188,7 +1134,7 @@ mod tests {
let connection_status = WalConnectionStatus {
is_connected: true,
has_processed_wal: true,
has_received_wal: true,
latest_connection_update: time_over_threshold,
latest_wal_update: time_over_threshold,
commit_lsn: Some(current_lsn),
@@ -1196,7 +1142,6 @@ mod tests {
};
state.wal_connection = Some(WalConnection {
started_at: now,
sk_id: NodeId(1),
status: connection_status.clone(),
connection_task: TaskHandle::spawn(move |sender, _| async move {
@@ -1261,7 +1206,7 @@ mod tests {
let connection_status = WalConnectionStatus {
is_connected: true,
has_processed_wal: true,
has_received_wal: true,
latest_connection_update: now,
latest_wal_update: time_over_threshold,
commit_lsn: Some(current_lsn),
@@ -1269,7 +1214,6 @@ mod tests {
};
state.wal_connection = Some(WalConnection {
started_at: now,
sk_id: NodeId(1),
status: connection_status,
connection_task: TaskHandle::spawn(move |_, _| async move { Ok(()) }),
@@ -1341,7 +1285,7 @@ mod tests {
max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(),
wal_connection: None,
wal_stream_candidates: HashMap::new(),
wal_connection_retries: HashMap::new(),
wal_connection_attempts: HashMap::new(),
}
}
}

View File

@@ -27,7 +27,7 @@ use crate::{
walingest::WalIngest,
walrecord::DecodedWALRecord,
};
use postgres_ffi::v14::waldecoder::WalStreamDecoder;
use postgres_ffi::waldecoder::WalStreamDecoder;
use utils::{lsn::Lsn, pq_proto::ReplicationFeedback, zid::ZTenantTimelineId};
/// Status of the connection.
@@ -35,9 +35,8 @@ use utils::{lsn::Lsn, pq_proto::ReplicationFeedback, zid::ZTenantTimelineId};
pub struct WalConnectionStatus {
/// If we were able to initiate a postgres connection, this means that safekeeper process is at least running.
pub is_connected: bool,
/// Defines a healthy connection as one on which pageserver received WAL from safekeeper
/// and is able to process it in walingest without errors.
pub has_processed_wal: bool,
/// Defines a healthy connection as one on which we have received at least some WAL bytes.
pub has_received_wal: bool,
/// Connection establishment time or the timestamp of a latest connection message received.
pub latest_connection_update: NaiveDateTime,
/// Time of the latest WAL message received.
@@ -72,7 +71,7 @@ pub async fn handle_walreceiver_connection(
info!("connected!");
let mut connection_status = WalConnectionStatus {
is_connected: true,
has_processed_wal: false,
has_received_wal: false,
latest_connection_update: Utc::now().naive_utc(),
latest_wal_update: Utc::now().naive_utc(),
streaming_lsn: None,
@@ -118,6 +117,13 @@ pub async fn handle_walreceiver_connection(
let identify = identify_system(&mut replication_client).await?;
info!("{identify:?}");
connection_status.latest_connection_update = Utc::now().naive_utc();
if let Err(e) = events_sender.send(TaskEvent::NewEvent(connection_status.clone())) {
warn!("Wal connection event listener dropped after IDENTIFY_SYSTEM, aborting the connection: {e}");
return Ok(());
}
// NB: this is a flush_lsn, not a commit_lsn.
let end_of_wal = Lsn::from(u64::from(identify.xlogpos));
let mut caught_up = false;
let ZTenantTimelineId {
@@ -125,14 +131,6 @@ pub async fn handle_walreceiver_connection(
timeline_id,
} = id;
connection_status.latest_connection_update = Utc::now().naive_utc();
connection_status.latest_wal_update = Utc::now().naive_utc();
connection_status.commit_lsn = Some(end_of_wal);
if let Err(e) = events_sender.send(TaskEvent::NewEvent(connection_status.clone())) {
warn!("Wal connection event listener dropped after IDENTIFY_SYSTEM, aborting the connection: {e}");
return Ok(());
}
let (repo, timeline) = tokio::task::spawn_blocking(move || {
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)
.with_context(|| format!("no repository found for tenant {tenant_id}"))?;
@@ -183,7 +181,6 @@ pub async fn handle_walreceiver_connection(
} {
let replication_message = replication_message?;
let now = Utc::now().naive_utc();
let last_rec_lsn_before_msg = last_rec_lsn;
// Update the connection status before processing the message. If the message processing
// fails (e.g. in walingest), we still want to know latests LSNs from the safekeeper.
@@ -196,6 +193,7 @@ pub async fn handle_walreceiver_connection(
));
if !xlog_data.data().is_empty() {
connection_status.latest_wal_update = now;
connection_status.has_received_wal = true;
}
}
ReplicationMessage::PrimaryKeepAlive(keepalive) => {
@@ -267,15 +265,6 @@ pub async fn handle_walreceiver_connection(
_ => None,
};
if !connection_status.has_processed_wal && last_rec_lsn > last_rec_lsn_before_msg {
// We have successfully processed at least one WAL record.
connection_status.has_processed_wal = true;
if let Err(e) = events_sender.send(TaskEvent::NewEvent(connection_status.clone())) {
warn!("Wal connection event listener dropped, aborting the connection: {e}");
return Ok(());
}
}
let timeline_to_check = Arc::clone(&timeline);
tokio::task::spawn_blocking(move || timeline_to_check.check_checkpoint_distance())
.await

View File

@@ -3,10 +3,9 @@
//!
use anyhow::Result;
use bytes::{Buf, Bytes};
use postgres_ffi::v14::pg_constants;
use postgres_ffi::v14::xlog_utils::{TimestampTz, XLOG_SIZE_OF_XLOG_RECORD};
use postgres_ffi::v14::XLogRecord;
use postgres_ffi::BLCKSZ;
use postgres_ffi::pg_constants;
use postgres_ffi::xlog_utils::{TimestampTz, XLOG_SIZE_OF_XLOG_RECORD};
use postgres_ffi::XLogRecord;
use postgres_ffi::{BlockNumber, OffsetNumber};
use postgres_ffi::{MultiXactId, MultiXactOffset, MultiXactStatus, Oid, TransactionId};
use serde::{Deserialize, Serialize};
@@ -619,7 +618,7 @@ pub fn decode_wal_record(
blk.hole_length = 0;
}
} else {
blk.hole_length = BLCKSZ - blk.bimg_len;
blk.hole_length = pg_constants::BLCKSZ - blk.bimg_len;
}
datatotal += blk.bimg_len as u32;
blocks_total_len += blk.bimg_len as u32;
@@ -629,7 +628,9 @@ pub fn decode_wal_record(
* bimg_len < BLCKSZ if the HAS_HOLE flag is set.
*/
if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE != 0
&& (blk.hole_offset == 0 || blk.hole_length == 0 || blk.bimg_len == BLCKSZ)
&& (blk.hole_offset == 0
|| blk.hole_length == 0
|| blk.bimg_len == pg_constants::BLCKSZ)
{
// TODO
/*
@@ -666,7 +667,7 @@ pub fn decode_wal_record(
* flag is set.
*/
if (blk.bimg_info & pg_constants::BKPIMAGE_IS_COMPRESSED == 0)
&& blk.bimg_len == BLCKSZ
&& blk.bimg_len == pg_constants::BLCKSZ
{
// TODO
/*
@@ -684,7 +685,7 @@ pub fn decode_wal_record(
*/
if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE == 0
&& blk.bimg_info & pg_constants::BKPIMAGE_IS_COMPRESSED == 0
&& blk.bimg_len != BLCKSZ
&& blk.bimg_len != pg_constants::BLCKSZ
{
// TODO
/*

View File

@@ -44,12 +44,11 @@ use crate::reltag::{RelTag, SlruKind};
use crate::repository::Key;
use crate::walrecord::ZenithWalRecord;
use metrics::{register_histogram, register_int_counter, Histogram, IntCounter};
use postgres_ffi::v14::nonrelfile_utils::{
mx_offset_to_flags_bitshift, mx_offset_to_flags_offset, mx_offset_to_member_offset,
transaction_id_set_status,
};
use postgres_ffi::v14::pg_constants;
use postgres_ffi::BLCKSZ;
use postgres_ffi::nonrelfile_utils::mx_offset_to_flags_bitshift;
use postgres_ffi::nonrelfile_utils::mx_offset_to_flags_offset;
use postgres_ffi::nonrelfile_utils::mx_offset_to_member_offset;
use postgres_ffi::nonrelfile_utils::transaction_id_set_status;
use postgres_ffi::pg_constants;
///
/// `RelTag` + block number (`blknum`) gives us a unique id of the page in the cluster.
@@ -83,6 +82,24 @@ pub trait WalRedoManager: Send + Sync {
) -> Result<Bytes, WalRedoError>;
}
///
/// A dummy WAL Redo Manager implementation that doesn't allow replaying
/// anything. Currently used during bootstrapping (zenith init), to create
/// a Repository object without launching the real WAL redo process.
///
pub struct DummyRedoManager {}
impl crate::walredo::WalRedoManager for DummyRedoManager {
fn request_redo(
&self,
_key: Key,
_lsn: Lsn,
_base_img: Option<Bytes>,
_records: Vec<(Lsn, ZenithWalRecord)>,
) -> Result<Bytes, WalRedoError> {
Err(WalRedoError::InvalidState)
}
}
// Metrics collected on WAL redo operations
//
// We collect the time spent in actual WAL redo ('redo'), and time waiting
@@ -418,10 +435,10 @@ impl PostgresRedoManager {
}
// Append the timestamp
if page.len() == BLCKSZ as usize + 8 {
page.truncate(BLCKSZ as usize);
if page.len() == pg_constants::BLCKSZ as usize + 8 {
page.truncate(pg_constants::BLCKSZ as usize);
}
if page.len() == BLCKSZ as usize {
if page.len() == pg_constants::BLCKSZ as usize {
page.extend_from_slice(&timestamp.to_be_bytes());
} else {
warn!(
@@ -742,7 +759,7 @@ impl PostgresRedoProcess {
// We expect the WAL redo process to respond with an 8k page image. We read it
// into this buffer.
let mut resultbuf = vec![0; BLCKSZ.into()];
let mut resultbuf = vec![0; pg_constants::BLCKSZ.into()];
let mut nresult: usize = 0; // # of bytes read into 'resultbuf' so far
// Prepare for calling poll()
@@ -755,7 +772,7 @@ impl PostgresRedoProcess {
// We do three things simultaneously: send the old base image and WAL records to
// the child process's stdin, read the result from child's stdout, and forward any logging
// information that the child writes to its stderr to the page server's log.
while nresult < BLCKSZ.into() {
while nresult < pg_constants::BLCKSZ.into() {
// If we have more data to write, wake up if 'stdin' becomes writeable or
// we have data to read. Otherwise only wake up if there's data to read.
let nfds = if nwrite < writebuf.len() { 3 } else { 2 };

View File

@@ -86,7 +86,7 @@ impl From<DatabaseInfo> for tokio_postgres::Config {
/// * However, when we substitute `T` with [`ClientCredentials`],
/// this helps us provide the credentials only to those auth
/// backends which require them for the authentication process.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum BackendType<T> {
/// Legacy Cloud API (V1) + link auth.
LegacyConsole(T),

View File

@@ -65,17 +65,8 @@ impl NodeInfo {
// require for our business.
let mut connection_error = None;
let ports = self.config.get_ports();
let hosts = self.config.get_hosts();
// the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array
if ports.len() > 1 && ports.len() != hosts.len() {
return Err(io::Error::new(
io::ErrorKind::Other,
format!("couldn't connect: bad compute config, ports and hosts entries' count does not match: {:?}", self.config),
));
}
for (i, host) in hosts.iter().enumerate() {
let port = ports.get(i).or_else(|| ports.first()).unwrap_or(&5432);
for (i, host) in self.config.get_hosts().iter().enumerate() {
let port = ports.get(i).or_else(|| ports.get(0)).unwrap_or(&5432);
let host = match host {
Host::Tcp(host) => host.as_str(),
Host::Unix(_) => continue, // unix sockets are not welcome here

View File

@@ -14,7 +14,7 @@ pub const SCRAM_RAW_NONCE_LEN: usize = 18;
fn validate_sasl_extensions<'a>(parts: impl Iterator<Item = &'a str>) -> Option<()> {
for mut chars in parts.map(|s| s.chars()) {
let attr = chars.next()?;
if !('a'..='z').contains(&attr) && !('A'..='Z').contains(&attr) {
if !('a'..'z').contains(&attr) && !('A'..'Z').contains(&attr) {
return None;
}
let eq = chars.next()?;

View File

@@ -40,7 +40,7 @@ struct SafeKeeperStateV1 {
wal_start_lsn: Lsn,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ServerInfoV2 {
/// Postgres server version
pub pg_version: u32,
@@ -70,7 +70,7 @@ pub struct SafeKeeperStateV2 {
pub wal_start_lsn: Lsn,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ServerInfoV3 {
/// Postgres server version
pub pg_version: u32,

View File

@@ -9,7 +9,7 @@ use crate::timeline::{Timeline, TimelineTools};
use crate::SafeKeeperConf;
use anyhow::{bail, Context, Result};
use postgres_ffi::PG_TLI;
use postgres_ffi::xlog_utils::PG_TLI;
use regex::Regex;
use std::str::FromStr;
use std::sync::Arc;
@@ -90,10 +90,7 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
fn process_query(&mut self, pgb: &mut PostgresBackend, query_string: &str) -> Result<()> {
let cmd = parse_cmd(query_string)?;
info!(
"got query {:?} in timeline {:?}",
query_string, self.ztimelineid
);
info!("got query {:?}", query_string);
let create = !(matches!(cmd, SafekeeperPostgresCommand::StartReplication { .. })
|| matches!(cmd, SafekeeperPostgresCommand::IdentifySystem));
@@ -109,17 +106,23 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
}
match cmd {
SafekeeperPostgresCommand::StartWalPush => ReceiveWalConn::new(pgb)
.run(self)
.context("failed to run ReceiveWalConn"),
SafekeeperPostgresCommand::StartReplication { start_lsn } => ReplicationConn::new(pgb)
.run(self, pgb, start_lsn)
.context("failed to run ReplicationConn"),
SafekeeperPostgresCommand::IdentifySystem => self.handle_identify_system(pgb),
SafekeeperPostgresCommand::JSONCtrl { ref cmd } => handle_json_ctrl(self, pgb, cmd),
SafekeeperPostgresCommand::StartWalPush => {
ReceiveWalConn::new(pgb)
.run(self)
.context("failed to run ReceiveWalConn")?;
}
SafekeeperPostgresCommand::StartReplication { start_lsn } => {
ReplicationConn::new(pgb)
.run(self, pgb, start_lsn)
.context("failed to run ReplicationConn")?;
}
SafekeeperPostgresCommand::IdentifySystem => {
self.handle_identify_system(pgb)?;
}
SafekeeperPostgresCommand::JSONCtrl { ref cmd } => {
handle_json_ctrl(self, pgb, cmd)?;
}
}
.context(format!("timeline {timelineid}"))?;
Ok(())
}
}
@@ -150,15 +153,8 @@ impl SafekeeperPostgresHandler {
/// Handle IDENTIFY_SYSTEM replication command
///
fn handle_identify_system(&mut self, pgb: &mut PostgresBackend) -> Result<()> {
let lsn = if self.is_walproposer_recovery() {
// walproposer should get all local WAL until flush_lsn
self.timeline.get().get_end_of_wal()
} else {
// other clients shouldn't get any uncommitted WAL
self.timeline.get().get_state().0.commit_lsn
}
.to_string();
let start_pos = self.timeline.get().get_end_of_wal();
let lsn = start_pos.to_string();
let sysid = self
.timeline
.get()
@@ -207,11 +203,4 @@ impl SafekeeperPostgresHandler {
.write_message(&BeMessage::CommandComplete(b"IDENTIFY_SYSTEM"))?;
Ok(())
}
/// Returns true if current connection is a replication connection, originating
/// from a walproposer recovery function. This connection gets a special handling:
/// safekeeper must stream all local WAL till the flush_lsn, whether committed or not.
pub fn is_walproposer_recovery(&self) -> bool {
self.appname == Some("wal_proposer_recovery".to_string())
}
}

View File

@@ -7,7 +7,8 @@
//!
use anyhow::Result;
use bytes::Bytes;
use bytes::{BufMut, Bytes, BytesMut};
use crc32c::crc32c_append;
use serde::{Deserialize, Serialize};
use tracing::*;
@@ -18,8 +19,9 @@ use crate::safekeeper::{
};
use crate::safekeeper::{SafeKeeperState, Term, TermHistory, TermSwitchEntry};
use crate::timeline::TimelineTools;
use postgres_ffi::v14::pg_constants;
use postgres_ffi::v14::xlog_utils;
use postgres_ffi::pg_constants;
use postgres_ffi::xlog_utils;
use postgres_ffi::{uint32, uint64, Oid, XLogRecord};
use utils::{
lsn::Lsn,
postgres_backend::PostgresBackend,
@@ -142,7 +144,7 @@ fn append_logical_message(
spg: &mut SafekeeperPostgresHandler,
msg: &AppendLogicalMessage,
) -> Result<InsertedWAL> {
let wal_data = xlog_utils::encode_logical_message(&msg.lm_prefix, &msg.lm_message);
let wal_data = encode_logical_message(&msg.lm_prefix, &msg.lm_message);
let sk_state = spg.timeline.get().get_state().1;
let begin_lsn = msg.begin_lsn;
@@ -180,3 +182,90 @@ fn append_logical_message(
append_response,
})
}
#[repr(C)]
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
struct XlLogicalMessage {
db_id: Oid,
transactional: uint32, // bool, takes 4 bytes due to alignment in C structures
prefix_size: uint64,
message_size: uint64,
}
impl XlLogicalMessage {
pub fn encode(&self) -> Bytes {
use utils::bin_ser::LeSer;
self.ser().unwrap().into()
}
}
/// Create new WAL record for non-transactional logical message.
/// Used for creating artificial WAL for tests, as LogicalMessage
/// record is basically no-op.
fn encode_logical_message(prefix: &str, message: &str) -> Vec<u8> {
let mut prefix_bytes = BytesMut::with_capacity(prefix.len() + 1);
prefix_bytes.put(prefix.as_bytes());
prefix_bytes.put_u8(0);
let message_bytes = message.as_bytes();
let logical_message = XlLogicalMessage {
db_id: 0,
transactional: 0,
prefix_size: prefix_bytes.len() as u64,
message_size: message_bytes.len() as u64,
};
let mainrdata = logical_message.encode();
let mainrdata_len: usize = mainrdata.len() + prefix_bytes.len() + message_bytes.len();
// only short mainrdata is supported for now
assert!(mainrdata_len <= 255);
let mainrdata_len = mainrdata_len as u8;
let mut data: Vec<u8> = vec![pg_constants::XLR_BLOCK_ID_DATA_SHORT, mainrdata_len];
data.extend_from_slice(&mainrdata);
data.extend_from_slice(&prefix_bytes);
data.extend_from_slice(message_bytes);
let total_len = xlog_utils::XLOG_SIZE_OF_XLOG_RECORD + data.len();
let mut header = XLogRecord {
xl_tot_len: total_len as u32,
xl_xid: 0,
xl_prev: 0,
xl_info: 0,
xl_rmid: 21,
__bindgen_padding_0: [0u8; 2usize],
xl_crc: 0, // crc will be calculated later
};
let header_bytes = header.encode().expect("failed to encode header");
let crc = crc32c_append(0, &data);
let crc = crc32c_append(crc, &header_bytes[0..xlog_utils::XLOG_RECORD_CRC_OFFS]);
header.xl_crc = crc;
let mut wal: Vec<u8> = Vec::new();
wal.extend_from_slice(&header.encode().expect("failed to encode header"));
wal.extend_from_slice(&data);
// WAL start position must be aligned at 8 bytes,
// this will add padding for the next WAL record.
const PADDING: usize = 8;
let padding_rem = wal.len() % PADDING;
if padding_rem != 0 {
wal.resize(wal.len() + PADDING - padding_rem, 0);
}
wal
}
#[test]
fn test_encode_logical_message() {
let expected = [
64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 170, 34, 166, 227, 255, 38,
0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 112, 114, 101, 102,
105, 120, 0, 109, 101, 115, 115, 97, 103, 101,
];
let actual = encode_logical_message("prefix", "message");
assert_eq!(expected, actual[..]);
}

View File

@@ -7,7 +7,7 @@ use metrics::{
proto::MetricFamily,
Gauge, IntGaugeVec,
};
use postgres_ffi::v14::xlog_utils::XLogSegNo;
use postgres_ffi::xlog_utils::XLogSegNo;
use utils::{lsn::Lsn, zid::ZTenantTimelineId};
use crate::{

View File

@@ -5,7 +5,9 @@ use byteorder::{LittleEndian, ReadBytesExt};
use bytes::{Buf, BufMut, Bytes, BytesMut};
use etcd_broker::subscription_value::SkTimelineInfo;
use postgres_ffi::v14::xlog_utils::{TimeLineID, XLogSegNo, MAX_SEND_SIZE};
use postgres_ffi::xlog_utils::TimeLineID;
use postgres_ffi::xlog_utils::XLogSegNo;
use serde::{Deserialize, Serialize};
use std::cmp::max;
use std::cmp::min;
@@ -17,6 +19,7 @@ use crate::control_file;
use crate::send_wal::HotStandbyFeedback;
use crate::wal_storage;
use postgres_ffi::xlog_utils::MAX_SEND_SIZE;
use utils::{
bin_ser::LeSer,
lsn::Lsn,
@@ -124,7 +127,7 @@ impl AcceptorState {
/// Information about Postgres. Safekeeper gets it once and then verifies
/// all further connections from computes match.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ServerInfo {
/// Postgres server version
pub pg_version: u32,

View File

@@ -6,7 +6,7 @@ use crate::timeline::{ReplicaState, Timeline, TimelineTools};
use crate::wal_storage::WalReader;
use anyhow::{bail, Context, Result};
use postgres_ffi::v14::xlog_utils::{get_current_timestamp, TimestampTz, MAX_SEND_SIZE};
use postgres_ffi::xlog_utils::{get_current_timestamp, TimestampTz, MAX_SEND_SIZE};
use bytes::Bytes;
use serde::{Deserialize, Serialize};
@@ -36,7 +36,7 @@ const NEON_STATUS_UPDATE_TAG_BYTE: u8 = b'z';
type FullTransactionId = u64;
/// Hot standby feedback received from replica
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct HotStandbyFeedback {
pub ts: TimestampTz,
pub xmin: FullTransactionId,
@@ -170,7 +170,6 @@ impl ReplicationConn {
// spawn the background thread which receives HotStandbyFeedback messages.
let bg_timeline = Arc::clone(spg.timeline.get());
let bg_stream_in = self.stream_in.take().unwrap();
let bg_timeline_id = spg.ztimelineid.unwrap();
let state = ReplicaState::new();
// This replica_id is used below to check if it's time to stop replication.
@@ -189,8 +188,6 @@ impl ReplicationConn {
let _ = thread::Builder::new()
.name("HotStandbyFeedback thread".into())
.spawn(move || {
let _enter =
info_span!("HotStandbyFeedback thread", timeline = %bg_timeline_id).entered();
if let Err(err) = Self::background_thread(bg_stream_in, bg_replica_guard) {
error!("Replication background thread failed: {}", err);
}
@@ -201,12 +198,13 @@ impl ReplicationConn {
.build()?;
runtime.block_on(async move {
let (inmem_state, persisted_state) = spg.timeline.get().get_state();
let (_, persisted_state) = spg.timeline.get().get_state();
// add persisted_state.timeline_start_lsn == Lsn(0) check
if persisted_state.server.wal_seg_size == 0 {
bail!("Cannot start replication before connecting to walproposer");
}
let wal_end = spg.timeline.get().get_end_of_wal();
// Walproposer gets special handling: safekeeper must give proposer all
// local WAL till the end, whether committed or not (walproposer will
// hang otherwise). That's because walproposer runs the consensus and
@@ -216,8 +214,8 @@ impl ReplicationConn {
// another compute rises which collects majority and starts fixing log
// on this safekeeper itself. That's ok as (old) proposer will never be
// able to commit such WAL.
let stop_pos: Option<Lsn> = if spg.is_walproposer_recovery() {
let wal_end = spg.timeline.get().get_end_of_wal();
let stop_pos: Option<Lsn> = if spg.appname == Some("wal_proposer_recovery".to_string())
{
Some(wal_end)
} else {
None
@@ -228,7 +226,7 @@ impl ReplicationConn {
// switch to copy
pgb.write_message(&BeMessage::CopyBothResponse)?;
let mut end_pos = stop_pos.unwrap_or(inmem_state.commit_lsn);
let mut end_pos = Lsn(0);
let mut wal_reader = WalReader::new(
spg.conf.timeline_dir(&spg.timeline.get().zttid),

View File

@@ -4,9 +4,8 @@
use anyhow::{bail, Context, Result};
use etcd_broker::subscription_value::SkTimelineInfo;
use once_cell::sync::Lazy;
use postgres_ffi::v14::xlog_utils::XLogSegNo;
use postgres_ffi::xlog_utils::XLogSegNo;
use serde::Serialize;
use tokio::sync::watch;

View File

@@ -11,8 +11,7 @@ use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use postgres_ffi::v14::xlog_utils::{XLogFileName, XLogSegNo, XLogSegNoOffsetToRecPtr};
use postgres_ffi::PG_TLI;
use postgres_ffi::xlog_utils::{XLogFileName, XLogSegNo, XLogSegNoOffsetToRecPtr, PG_TLI};
use remote_storage::{GenericRemoteStorage, RemoteStorage};
use tokio::fs::File;
use tokio::runtime::Builder;

View File

@@ -13,10 +13,9 @@ use std::pin::Pin;
use tokio::io::AsyncRead;
use once_cell::sync::Lazy;
use postgres_ffi::v14::xlog_utils::{
find_end_of_wal, IsPartialXLogFileName, IsXLogFileName, XLogFromFileName, XLogSegNo,
use postgres_ffi::xlog_utils::{
find_end_of_wal, IsPartialXLogFileName, IsXLogFileName, XLogFromFileName, XLogSegNo, PG_TLI,
};
use postgres_ffi::PG_TLI;
use std::cmp::min;
use std::fs::{self, remove_file, File, OpenOptions};
@@ -31,10 +30,9 @@ use crate::safekeeper::SafeKeeperState;
use crate::wal_backup::read_object;
use crate::SafeKeeperConf;
use postgres_ffi::v14::xlog_utils::XLogFileName;
use postgres_ffi::XLOG_BLCKSZ;
use postgres_ffi::xlog_utils::{XLogFileName, XLOG_BLCKSZ};
use postgres_ffi::v14::waldecoder::WalStreamDecoder;
use postgres_ffi::waldecoder::WalStreamDecoder;
use metrics::{register_histogram_vec, Histogram, HistogramVec, DISK_WRITE_SECONDS_BUCKETS};

View File

@@ -18,10 +18,6 @@ exclude = ^vendor/
# some tests don't typecheck when this flag is set
check_untyped_defs = false
# Help mypy find imports when running against list of individual files.
# Without this line it would behave differently when executed on the entire project.
mypy_path = $MYPY_CONFIG_FILE_DIR:$MYPY_CONFIG_FILE_DIR/test_runner
disallow_incomplete_defs = false
disallow_untyped_calls = false
disallow_untyped_decorators = false

View File

@@ -1,11 +1,7 @@
from typing import Optional
from uuid import uuid4, UUID
import pytest
import pathlib
import os
import subprocess
from fixtures.utils import lsn_from_hex
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
DEFAULT_BRANCH_NAME,
NeonEnv,
@@ -13,43 +9,16 @@ from fixtures.neon_fixtures import (
NeonPageserverHttpClient,
NeonPageserverApiException,
wait_until,
neon_binpath,
pg_distrib_dir,
)
# test that we cannot override node id after init
def test_pageserver_init_node_id(neon_simple_env: NeonEnv):
repo_dir = neon_simple_env.repo_dir
pageserver_config = repo_dir / 'pageserver.toml'
pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver'
run_pageserver = lambda args: subprocess.run([str(pageserver_bin), '-D', str(repo_dir), *args],
check=False,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
# remove initial config
pageserver_config.unlink()
bad_init = run_pageserver(['--init', '-c', f'pg_distrib_dir="{pg_distrib_dir}"'])
assert bad_init.returncode == 1, 'pageserver should not be able to init new config without the node id'
assert "missing id" in bad_init.stderr
assert not pageserver_config.exists(), 'config file should not be created after init error'
completed_init = run_pageserver(
['--init', '-c', 'id = 12345', '-c', f'pg_distrib_dir="{pg_distrib_dir}"'])
assert completed_init.returncode == 0, 'pageserver should be able to create a new config with the node id given'
assert pageserver_config.exists(), 'config file should be created successfully'
bad_reinit = run_pageserver(
['--init', '-c', 'id = 12345', '-c', f'pg_distrib_dir="{pg_distrib_dir}"'])
assert bad_reinit.returncode == 1, 'pageserver should not be able to init new config without the node id'
assert "already exists, cannot init it" in bad_reinit.stderr
bad_update = run_pageserver(['--update-config', '-c', 'id = 3'])
assert bad_update.returncode == 1, 'pageserver should not allow updating node id'
assert "has node id already, it cannot be overridden" in bad_update.stderr
# test that we cannot override node id
def test_pageserver_init_node_id(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init()
with pytest.raises(
Exception,
match="node id can only be set during pageserver init and cannot be overridden"):
env.pageserver.start(overrides=['--pageserver-config-override=id=10'])
def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID):

View File

@@ -1,9 +1,7 @@
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, wait_for_last_record_lsn, wait_for_upload
from fixtures.utils import lsn_from_hex, lsn_to_hex, query_scalar
import time
from uuid import UUID
from fixtures.neon_fixtures import NeonEnv
from fixtures.utils import query_scalar
#
@@ -92,49 +90,3 @@ def test_readonly_node(neon_simple_env: NeonEnv):
env.postgres.create_start(branch_name='test_readonly_node',
node_name='test_readonly_node_preinitdb',
lsn='0/42')
# Similar test, but with more data, and we force checkpoints
def test_timetravel(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch('test_timetravel', 'empty')
pg = env.postgres.create_start('test_timetravel')
client = env.pageserver.http_client()
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
lsns = []
with pg.cursor() as cur:
cur.execute(f"""
CREATE TABLE testtab(id serial primary key, iteration int, data text);
INSERT INTO testtab (iteration, data) SELECT 0, 'data' FROM generate_series(1, 100000);
""")
current_lsn = lsn_from_hex(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
lsns.append((0, current_lsn))
for i in range(1, 5):
with pg.cursor() as cur:
cur.execute(f'UPDATE testtab SET iteration = {i}')
current_lsn = lsn_from_hex(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
lsns.append((i, current_lsn))
# wait until pageserver receives that data
wait_for_last_record_lsn(client, UUID(tenant_id), UUID(timeline_id), current_lsn)
# run checkpoint manually to force a new layer file
env.pageserver.safe_psql(f"checkpoint {tenant_id} {timeline_id}")
##### Restart pageserver
env.postgres.stop_all()
env.pageserver.stop()
env.pageserver.start()
for (i, lsn) in lsns:
pg_old = env.postgres.create_start(branch_name='test_timetravel',
node_name=f'test_old_lsn_{i}',
lsn=lsn_to_hex(lsn))
with pg_old.cursor() as cur:
assert query_scalar(cur, f"select count(*) from testtab where iteration={i}") == 100000
assert query_scalar(cur, f"select count(*) from testtab where iteration<>{i}") == 0

View File

@@ -1,17 +0,0 @@
"""Tests for the code in test fixtures"""
from fixtures.neon_fixtures import NeonEnvBuilder
# Test that pageserver and safekeeper can restart quickly.
# This is a regression test, see https://github.com/neondatabase/neon/issues/2247
def test_fixture_restart(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
for i in range(3):
env.pageserver.stop()
env.pageserver.start()
for i in range(3):
env.safekeepers[0].stop()
env.safekeepers[0].start()

View File

@@ -44,22 +44,30 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path,
cannot use NeonPageserver yet because it depends on neon cli
which currently lacks support for multiple pageservers
"""
# actually run new pageserver
cmd = [
str(pageserver_bin),
'--init',
'--workdir',
str(new_pageserver_dir),
'--daemonize',
'--update-config',
f"-c listen_pg_addr='localhost:{pg_port}'",
f"-c listen_http_addr='localhost:{http_port}'",
f"-c pg_distrib_dir='{pg_distrib_dir}'",
f"-c id=2",
f"-c remote_storage={{local_path='{remote_storage_mock_path}'}}",
]
if broker is not None:
cmd.append(f"-c broker_endpoints=['{broker.client_url()}']", )
subprocess.check_output(cmd, text=True)
# actually run new pageserver
cmd = [
str(pageserver_bin),
'--workdir',
str(new_pageserver_dir),
'--daemonize',
]
log.info("starting new pageserver %s", cmd)
out = subprocess.check_output(cmd, text=True)
log.info("started new pageserver %s", out)

View File

@@ -569,7 +569,7 @@ class ProposerPostgres(PgProtocol):
f"neon.timeline_id = '{self.timeline_id.hex}'\n",
f"neon.tenant_id = '{self.tenant_id.hex}'\n",
f"neon.pageserver_connstring = ''\n",
f"neon.safekeepers = '{safekeepers}'\n",
f"safekeepers = '{safekeepers}'\n",
f"listen_addresses = '{self.listen_addr}'\n",
f"port = '{self.port}'\n",
]

View File

@@ -1855,11 +1855,11 @@ class Postgres(PgProtocol):
# walproposer uses different application_name
if ("synchronous_standby_names" in cfg_line or
# don't repeat safekeepers/wal_acceptors multiple times
"neon.safekeepers" in cfg_line):
"safekeepers" in cfg_line):
continue
f.write(cfg_line)
f.write("synchronous_standby_names = 'walproposer'\n")
f.write("neon.safekeepers = '{}'\n".format(safekeepers))
f.write("safekeepers = '{}'\n".format(safekeepers))
return self
def config(self, lines: List[str]) -> 'Postgres':