Compare commits

..

11 Commits

Author SHA1 Message Date
Arpad Müller
c056db20c7 wip 2023-09-08 18:25:05 +02:00
Arpad Müller
f417db52e0 wip 2023-09-07 19:32:42 +02:00
Arpad Müller
2050136437 Switch the locks to tokio ones 2023-09-07 19:32:42 +02:00
Arpad Müller
eb2dd7118e Fix test_vfile_concurrency test 2023-09-07 19:25:56 +02:00
Arpad Müller
0890120517 fix 2023-09-07 17:47:18 +02:00
Arpad Müller
9e23a91c0b Fix them for real this time 2023-09-07 17:38:04 +02:00
Arpad Müller
f64a2d723a Fix tests 2023-09-06 20:04:27 +02:00
Arpad Müller
bd04abbcab Make VirtualFile::{open_with_options, create} async fn 2023-09-06 20:04:27 +02:00
Arpad Müller
ae1af9d10e Make VirtualFile::open async fn 2023-09-06 20:04:27 +02:00
Arpad Müller
41e87f92c3 Make VirtualFile::with_file async 2023-09-06 20:04:26 +02:00
Arpad Müller
3a8b630f90 Make VirtualFile::sync_all async 2023-09-06 18:39:22 +02:00
124 changed files with 1579 additions and 4941 deletions

View File

@@ -19,7 +19,6 @@
!trace/
!vendor/postgres-v14/
!vendor/postgres-v15/
!vendor/postgres-v16/
!workspace_hack/
!neon_local/
!scripts/ninstall.sh

View File

@@ -1,8 +0,0 @@
self-hosted-runner:
labels:
- gen3
- large
- small
- us-east-2
config-variables:
- SLACK_UPCOMING_RELEASE_CHANNEL_ID

View File

@@ -70,9 +70,6 @@ runs:
name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
path: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
prefix: latest
# The lack of compatibility snapshot (for example, for the new Postgres version)
# shouldn't fail the whole job. Only relevant test should fail.
skip-if-does-not-exist: true
- name: Checkout
if: inputs.needs_postgres_source == 'true'

View File

@@ -1,31 +0,0 @@
name: Lint GitHub Workflows
on:
push:
branches:
- main
- release
paths:
- '.github/workflows/*.ya?ml'
pull_request:
paths:
- '.github/workflows/*.ya?ml'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
actionlint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: reviewdog/action-actionlint@v1
env:
# SC2046 - Quote this to prevent word splitting. - https://www.shellcheck.net/wiki/SC2046
# SC2086 - Double quote to prevent globbing and word splitting. - https://www.shellcheck.net/wiki/SC2086
SHELLCHECK_OPTS: --exclude=SC2046,SC2086
with:
fail_on_error: true
filter_mode: nofilter
level: error

View File

@@ -19,7 +19,6 @@ on:
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
BRANCH: "ci-run/pr-${{ github.event.pull_request.number }}"
permissions: write-all
@@ -55,47 +54,25 @@ jobs:
- uses: actions/checkout@v3
with:
ref: main
token: ${{ secrets.CI_ACCESS_TOKEN }}
- run: gh pr checkout "${PR_NUMBER}"
- run: git checkout -b "${BRANCH}"
- run: git checkout -b "ci-run/pr-${PR_NUMBER}"
- run: git push --force origin "${BRANCH}"
- run: git push --force origin "ci-run/pr-${PR_NUMBER}"
- name: Create a Pull Request for CI run (if required)
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
cat << EOF > body.md
This Pull Request is created automatically to run the CI pipeline for #${PR_NUMBER}
HEAD="ci-run/pr-${PR_NUMBER}"
BODY="This Pull Request was create automatically to run CI pipeline for #${PR_NUMBER}.\n\nPlease do not alter or merge/close it.\n\nFeel free to comment the original PR."
Please do not alter or merge/close it.
Feel free to review/comment/discuss the original PR #${PR_NUMBER}.
EOF
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${HEAD} --base main --json number --jq '.[].number')"
ALREADY_CREATED=$(gh pr --repo "${GITHUB_REPOSITORY}" list --head "${HEAD}" --base "main" --json "number" --jq '.[].number')
if [ -z "${ALREADY_CREATED}" ]; then
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
--body-file "body.md" \
--head "${BRANCH}" \
--base "main" \
--draft
fi
cleanup:
# Close PRs and delete branchs if the original PR is closed.
if: |
github.event.action == 'closed' &&
github.event.pull_request.head.repo.full_name != github.repository
runs-on: ubuntu-latest
steps:
- run: |
CLOSED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${HEAD} --json 'closed' --jq '.[].closed')"
if [ "${CLOSED}" == "false" ]; then
gh pr --repo "${GITHUB_REPOSITORY}" close "${BRANCH}" --delete-branch
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
--body "${BODY}" \
--head "${HEAD}" \
--base "main" \
--draft
fi

View File

@@ -137,11 +137,11 @@ jobs:
}'
if [ "$(date +%A)" = "Saturday" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
matrix=$(echo $matrix | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
{ "platform": "rds-aurora", "db_size": "50gb"}]')
fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
- name: Generate matrix for OLAP benchmarks
id: olap-compare-matrix
@@ -153,11 +153,11 @@ jobs:
}'
if [ "$(date +%A)" = "Saturday" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres" },
matrix=$(echo $matrix | jq '.include += [{ "platform": "rds-postgres" },
{ "platform": "rds-aurora" }]')
fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
- name: Generate matrix for TPC-H benchmarks
id: tpch-compare-matrix
@@ -172,11 +172,11 @@ jobs:
}'
if [ "$(date +%A)" = "Saturday" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "scale": "10" },
matrix=$(echo $matrix | jq '.include += [{ "platform": "rds-postgres", "scale": "10" },
{ "platform": "rds-aurora", "scale": "10" }]')
fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
pgbench-compare:
needs: [ generate-matrices ]
@@ -254,7 +254,7 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
if [ "${PLATFORM}" = "neon"* ]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
@@ -383,7 +383,7 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
if [ "${PLATFORM}" = "neon"* ]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
@@ -476,7 +476,7 @@ jobs:
;;
esac
CONNSTR_SECRET_NAME="BENCHMARK_${ENV_PLATFORM}_S${TEST_OLAP_SCALE}_CONNSTR"
CONNSTR_SECRET_NAME="BENCHMARK_${ENV_PLATFORM}_S${SCALE}_CONNSTR"
echo "CONNSTR_SECRET_NAME=${CONNSTR_SECRET_NAME}" >> $GITHUB_ENV
- name: Set up Connection String
@@ -487,7 +487,7 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
if [ "${PLATFORM}" = "neon"* ]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
@@ -577,7 +577,7 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
if [ "${PLATFORM}" = "neon"* ]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"

View File

@@ -23,30 +23,7 @@ env:
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
jobs:
check-permissions:
runs-on: ubuntu-latest
steps:
- name: Disallow PRs from forks
if: |
github.event_name == 'pull_request' &&
github.event.pull_request.head.repo.full_name != github.repository
run: |
if [ "${{ contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.pull_request.author_association) }}" = "true" ]; then
MESSAGE="Please create a PR from a branch of ${GITHUB_REPOSITORY} instead of a fork"
else
MESSAGE="The PR should be reviewed and labelled with 'approved-for-ci-run' to trigger a CI run"
fi
echo >&2 "We don't run CI for PRs from forks"
echo >&2 "${MESSAGE}"
exit 1
tag:
needs: [ check-permissions ]
runs-on: [ self-hosted, gen3, small ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
outputs:
@@ -75,7 +52,6 @@ jobs:
id: build-tag
check-codestyle-python:
needs: [ check-permissions ]
runs-on: [ self-hosted, gen3, small ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
@@ -108,7 +84,6 @@ jobs:
run: poetry run mypy .
check-codestyle-rust:
needs: [ check-permissions ]
runs-on: [ self-hosted, gen3, large ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
@@ -175,7 +150,6 @@ jobs:
run: cargo deny check
build-neon:
needs: [ check-permissions ]
runs-on: [ self-hosted, gen3, large ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
@@ -212,7 +186,7 @@ jobs:
# Eventually it will be replaced by a regression test https://github.com/neondatabase/neon/pull/4603
FAILED=false
for postgres in postgres-v14 postgres-v15 postgres-v16; do
for postgres in postgres-v14 postgres-v15; do
expected=$(cat vendor/revisions.json | jq --raw-output '."'"${postgres}"'"')
actual=$(git rev-parse "HEAD:vendor/${postgres}")
if [ "${expected}" != "${actual}" ]; then
@@ -234,10 +208,6 @@ jobs:
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
- name: Set pg 16 revision for caching
id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
# Set some environment variables used by all the steps.
#
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
@@ -258,12 +228,10 @@ jobs:
cov_prefix=""
CARGO_FLAGS="--locked --release"
fi
{
echo "cov_prefix=${cov_prefix}"
echo "CARGO_FEATURES=${CARGO_FEATURES}"
echo "CARGO_FLAGS=${CARGO_FLAGS}"
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
} >> $GITHUB_ENV
echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV
echo "CARGO_FLAGS=${CARGO_FLAGS}" >> $GITHUB_ENV
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo" >> $GITHUB_ENV
# Disabled for now
# Don't include the ~/.cargo/registry/src directory. It contains just
@@ -298,13 +266,6 @@ jobs:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v16 build
id: cache_pg_16
uses: actions/cache@v3
with:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: mold -run make postgres-v14 -j$(nproc)
@@ -313,10 +274,6 @@ jobs:
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: mold -run make postgres-v15 -j$(nproc)
- name: Build postgres v16
if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: mold -run make postgres-v16 -j$(nproc)
- name: Build neon extensions
run: mold -run make neon-pg-ext -j$(nproc)
@@ -390,17 +347,17 @@ jobs:
uses: ./.github/actions/save-coverage-data
regress-tests:
needs: [ check-permissions, build-neon ]
runs-on: [ self-hosted, gen3, large ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
# Default shared memory is 64mb
options: --init --shm-size=512mb
needs: [ build-neon ]
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
pg_version: [ v14, v15, v16 ]
pg_version: [ v14, v15 ]
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -428,12 +385,12 @@ jobs:
uses: ./.github/actions/save-coverage-data
benchmarks:
needs: [ check-permissions, build-neon ]
runs-on: [ self-hosted, gen3, small ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
# Default shared memory is 64mb
options: --init --shm-size=512mb
needs: [ build-neon ]
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
strategy:
fail-fast: false
@@ -460,13 +417,12 @@ jobs:
# while coverage is currently collected for the debug ones
create-test-report:
needs: [ check-permissions, regress-tests, coverage-report, benchmarks ]
if: ${{ !cancelled() && contains(fromJSON('["skipped", "success"]'), needs.check-permissions.result) }}
runs-on: [ self-hosted, gen3, small ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ regress-tests, coverage-report, benchmarks ]
if: ${{ !cancelled() }}
steps:
- uses: actions/checkout@v3
@@ -507,12 +463,11 @@ jobs:
})
coverage-report:
needs: [ check-permissions, regress-tests ]
runs-on: [ self-hosted, gen3, small ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ regress-tests ]
strategy:
fail-fast: false
matrix:
@@ -627,11 +582,11 @@ jobs:
})
trigger-e2e-tests:
needs: [ check-permissions, promote-images, tag ]
runs-on: [ self-hosted, gen3, small ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
options: --init
needs: [ promote-images, tag ]
steps:
- name: Set PR's status to pending and request a remote CI test
run: |
@@ -672,8 +627,8 @@ jobs:
}"
neon-image:
needs: [ check-permissions, tag ]
runs-on: [ self-hosted, gen3, large ]
needs: [ tag ]
container: gcr.io/kaniko-project/executor:v1.9.2-debug
defaults:
run:
@@ -720,7 +675,7 @@ jobs:
compute-tools-image:
runs-on: [ self-hosted, gen3, large ]
needs: [ check-permissions, tag ]
needs: [ tag ]
container: gcr.io/kaniko-project/executor:v1.9.2-debug
defaults:
run:
@@ -765,17 +720,17 @@ jobs:
run: rm -rf ~/.ecr
compute-node-image:
needs: [ check-permissions, tag ]
runs-on: [ self-hosted, gen3, large ]
container:
image: gcr.io/kaniko-project/executor:v1.9.2-debug
# Workaround for "Resolving download.osgeo.org (download.osgeo.org)... failed: Temporary failure in name resolution.""
# Should be prevented by https://github.com/neondatabase/neon/issues/4281
options: --add-host=download.osgeo.org:140.211.15.30
needs: [ tag ]
strategy:
fail-fast: false
matrix:
version: [ v14, v15, v16 ]
version: [ v14, v15 ]
defaults:
run:
shell: sh -eu {0}
@@ -824,12 +779,12 @@ jobs:
run: rm -rf ~/.ecr
vm-compute-node-image:
needs: [ check-permissions, tag, compute-node-image ]
runs-on: [ self-hosted, gen3, large ]
needs: [ tag, compute-node-image ]
strategy:
fail-fast: false
matrix:
version: [ v14, v15, v16 ]
version: [ v14, v15 ]
defaults:
run:
shell: sh -eu {0}
@@ -866,7 +821,7 @@ jobs:
docker push 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
test-images:
needs: [ check-permissions, tag, neon-image, compute-node-image, compute-tools-image ]
needs: [ tag, neon-image, compute-node-image, compute-tools-image ]
runs-on: [ self-hosted, gen3, small ]
steps:
@@ -909,8 +864,8 @@ jobs:
docker compose -f ./docker-compose/docker-compose.yml down
promote-images:
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
runs-on: [ self-hosted, gen3, small ]
needs: [ tag, test-images, vm-compute-node-image ]
container: golang:1.19-bullseye
# Don't add if-condition here.
# The job should always be run because we have dependant other jobs that shouldn't be skipped
@@ -930,7 +885,6 @@ jobs:
run: |
crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14
crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15
crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} vm-compute-node-v16
- name: Add latest tag to images
if: |
@@ -943,8 +897,6 @@ jobs:
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v16:${{needs.tag.outputs.build-tag}} latest
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} latest
- name: Push images to production ECR
if: |
@@ -957,8 +909,6 @@ jobs:
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v16:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v16:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:latest
- name: Configure Docker Hub login
run: |
@@ -970,7 +920,6 @@ jobs:
run: |
crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}}
crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}}
crane push vm-compute-node-v16 neondatabase/vm-compute-node-v16:${{needs.tag.outputs.build-tag}}
- name: Push latest tags to Docker Hub
if: |
@@ -983,15 +932,13 @@ jobs:
crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/compute-node-v16:${{needs.tag.outputs.build-tag}} latest
crane tag neondatabase/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} latest
- name: Cleanup ECR folder
run: rm -rf ~/.ecr
trigger-custom-extensions-build-and-wait:
needs: [ check-permissions, tag ]
runs-on: ubuntu-latest
needs: [ tag ]
steps:
- name: Set PR's status to pending and request a remote CI test
run: |
@@ -1034,7 +981,7 @@ jobs:
last_status="" # a variable to carry the last status of the "build-and-upload-extensions" context
for ((i=0; i <= TIMEOUT; i+=INTERVAL)); do
for ((i=0; i <= $TIMEOUT; i+=$INTERVAL)); do
sleep $INTERVAL
# Get statuses for the latest commit in the PR / branch
@@ -1064,11 +1011,10 @@ jobs:
exit 1
deploy:
needs: [ check-permissions, promote-images, tag, regress-tests, trigger-custom-extensions-build-and-wait ]
if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch'
runs-on: [ self-hosted, gen3, small ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
needs: [ promote-images, tag, regress-tests, trigger-custom-extensions-build-and-wait ]
if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch'
steps:
- name: Fix git ownership
run: |
@@ -1106,35 +1052,20 @@ jobs:
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
retries: 5
script: |
await github.rest.git.createRef({
github.rest.git.createRef({
owner: context.repo.owner,
repo: context.repo.repo,
ref: "refs/tags/${{ needs.tag.outputs.build-tag }}",
sha: context.sha,
})
- name: Create GitHub release
if: github.ref_name == 'release'
uses: actions/github-script@v6
with:
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
retries: 5
script: |
await github.rest.repos.createRelease({
owner: context.repo.owner,
repo: context.repo.repo,
tag_name: "${{ needs.tag.outputs.build-tag }}",
generate_release_notes: true,
})
promote-compatibility-data:
needs: [ check-permissions, promote-images, tag, regress-tests ]
if: github.ref_name == 'release'
runs-on: [ self-hosted, gen3, small ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
options: --init
needs: [ promote-images, tag, regress-tests ]
if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch'
steps:
- name: Promote compatibility snapshot for the release
env:
@@ -1142,7 +1073,7 @@ jobs:
PREFIX: artifacts/latest
run: |
# Update compatibility snapshot for the release
for pg_version in v14 v15 v16; do
for pg_version in v14 v15; do
for build_type in debug release; do
OLD_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}-${GITHUB_RUN_ID}.tar.zst
NEW_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}.tar.zst

View File

@@ -38,7 +38,7 @@ jobs:
fetch-depth: 1
- name: Install macOS postgres dependencies
run: brew install flex bison openssl protobuf icu4c pkg-config
run: brew install flex bison openssl protobuf
- name: Set pg 14 revision for caching
id: pg_v14_rev
@@ -48,10 +48,6 @@ jobs:
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
- name: Set pg 16 revision for caching
id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v3
@@ -66,13 +62,6 @@ jobs:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v16 build
id: cache_pg_16
uses: actions/cache@v3
with:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
@@ -96,10 +85,6 @@ jobs:
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: make postgres-v15 -j$(nproc)
- name: Build postgres v16
if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: make postgres-v16 -j$(nproc)
- name: Build neon extensions
run: make neon-pg-ext -j$(nproc)

View File

@@ -1,29 +0,0 @@
name: Notify Slack channel about upcoming release
concurrency:
group: ${{ github.workflow }}-${{ github.event.number }}
cancel-in-progress: true
on:
pull_request:
branches:
- release
types:
# Default types that triggers a workflow:
# - https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request
- opened
- synchronize
- reopened
# Additional types that we want to handle:
- closed
jobs:
notify:
runs-on: [ ubuntu-latest ]
steps:
- uses: neondatabase/dev-actions/release-pr-notify@main
with:
slack-token: ${{ secrets.SLACK_BOT_TOKEN }}
slack-channel-id: ${{ vars.SLACK_UPCOMING_RELEASE_CHANNEL_ID || 'C05QQ9J1BRC' }} # if not set, then `#test-release-notifications`
github-token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -7,14 +7,11 @@ on:
jobs:
create_release_branch:
runs-on: [ ubuntu-latest ]
permissions:
contents: write # for `git push`
runs-on: [ubuntu-latest]
steps:
- name: Check out code
uses: actions/checkout@v4
uses: actions/checkout@v3
with:
ref: main
@@ -29,16 +26,9 @@ jobs:
run: git push origin releases/${{ steps.date.outputs.date }}
- name: Create pull request into release
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
cat << EOF > body.md
## Release ${{ steps.date.outputs.date }}
**Please merge this PR using 'Create a merge commit'!**
EOF
gh pr create --title "Release ${{ steps.date.outputs.date }}" \
--body-file "body.md" \
--head "releases/${{ steps.date.outputs.date }}" \
--base "release"
uses: thomaseizinger/create-pull-request@e3972219c86a56550fb70708d96800d8e24ba862 # 1.3.0
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
head: releases/${{ steps.date.outputs.date }}
base: release
title: Release ${{ steps.date.outputs.date }}

4
.gitmodules vendored
View File

@@ -6,7 +6,3 @@
path = vendor/postgres-v15
url = https://github.com/neondatabase/postgres.git
branch = REL_15_STABLE_neon
[submodule "vendor/postgres-v16"]
path = vendor/postgres-v16
url = https://github.com/neondatabase/postgres.git
branch = REL_16_STABLE_neon

View File

@@ -27,28 +27,3 @@ your patch's fault. Help to fix the root cause if something else has
broken the CI, before pushing.
*Happy Hacking!*
# How to run a CI pipeline on Pull Requests from external contributors
_An instruction for maintainers_
## TL;DR:
- Review the PR
- If and only if it looks **safe** (i.e. it doesn't contain any malicious code which could expose secrets or harm the CI), then:
- Press the "Approve and run" button in GitHub UI
- Add the `approved-for-ci-run` label to the PR
Repeat all steps after any change to the PR.
- When the changes are ready to get merged — merge the original PR (not the internal one)
## Longer version:
GitHub Actions triggered by the `pull_request` event don't share repository secrets with the forks (for security reasons).
So, passing the CI pipeline on Pull Requests from external contributors is impossible.
We're using the following approach to make it work:
- After the review, assign the `approved-for-ci-run` label to the PR if changes look safe
- A GitHub Action will create an internal branch and a new PR with the same changes (for example, for a PR `#1234`, it'll be a branch `ci-run/pr-1234`)
- Because the PR is created from the internal branch, it is able to access repository secrets (that's why it's crucial to make sure that the PR doesn't contain any malicious code that could expose our secrets or intentionally harm the CI)
- The label gets removed automatically, so to run CI again with new changes, the label should be added again (after the review)
For details see [`approved-for-ci-run.yml`](.github/workflows/approved-for-ci-run.yml)

View File

@@ -1,5 +1,4 @@
[workspace]
resolver = "2"
members = [
"compute_tools",
"control_plane",

View File

@@ -12,7 +12,6 @@ WORKDIR /home/nonroot
COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
COPY --chown=nonroot pgxn pgxn
COPY --chown=nonroot Makefile Makefile
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
@@ -40,7 +39,6 @@ ARG CACHEPOT_BUCKET=neon-github-dev
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
COPY --chown=nonroot . .
# Show build caching stats to check if it was used in the end.
@@ -67,7 +65,6 @@ RUN set -e \
&& apt install -y \
libreadline-dev \
libseccomp-dev \
libicu67 \
openssl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
@@ -84,7 +81,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.

View File

@@ -74,8 +74,8 @@ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar
ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \
echo "9a2a219da005a1730a39d1959a1c7cec619b1efb009b65be80ffc25bad299068 postgis.tar.gz" | sha256sum --check && \
mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
./autogen.sh && \
@@ -124,8 +124,8 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y ninja-build python3-dev libncurses5 binutils clang
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.8.tar.gz -O plv8.tar.gz && \
echo "92b10c7db39afdae97ff748c9ec54713826af222c459084ad002571b79eb3f49 plv8.tar.gz" | sha256sum --check && \
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.5.tar.gz -O plv8.tar.gz && \
echo "1e108d5df639e4c189e1c5bdfa2432a521c126ca89e7e5a969d46899ca7bf106 plv8.tar.gz" | sha256sum --check && \
mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -172,8 +172,8 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz
cp -R /h3/usr / && \
rm -rf build
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3-pg.tar.gz && \
echo "c135aa45999b2ad1326d2537c1cadef96d52660838e4ca371706c08fdea1a956 h3-pg.tar.gz" | sha256sum --check && \
mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -243,8 +243,8 @@ RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b214
FROM build-deps AS hypopg-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.3.1.tar.gz -O hypopg.tar.gz && \
echo "e7f01ee0259dc1713f318a108f987663d60f3041948c2ada57a94b469565ca8e hypopg.tar.gz" | sha256sum --check && \
mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -307,8 +307,8 @@ RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgta
FROM build-deps AS ip4r-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.1.tar.gz -O ip4r.tar.gz && \
echo "78b9f0c1ae45c22182768fe892a32d533c82281035e10914111400bf6301c726 ip4r.tar.gz" | sha256sum --check && \
mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -323,8 +323,8 @@ RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O i
FROM build-deps AS prefix-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.9.tar.gz -O prefix.tar.gz && \
echo "38d30a08d0241a8bbb8e1eb8f0152b385051665a8e621c8899e7c5068f8b511e prefix.tar.gz" | sha256sum --check && \
mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -339,8 +339,8 @@ RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O p
FROM build-deps AS hll-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.17.tar.gz -O hll.tar.gz && \
echo "9a18288e884f197196b0d29b9f178ba595b0dfc21fbf7a8699380e77fa04c1e9 hll.tar.gz" | sha256sum --check && \
mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -355,8 +355,8 @@ RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar
FROM build-deps AS plpgsql-check-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.4.0.tar.gz -O plpgsql_check.tar.gz && \
echo "9ba58387a279b35a3bfa39ee611e5684e6cddb2ba046ddb2c5190b3bd2ca254a plpgsql_check.tar.gz" | sha256sum --check && \
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.3.2.tar.gz -O plpgsql_check.tar.gz && \
echo "9d81167c4bbeb74eebf7d60147b21961506161addc2aee537f95ad8efeae427b plpgsql_check.tar.gz" | sha256sum --check && \
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -371,21 +371,12 @@ RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.4.0.tar.gz
FROM build-deps AS timescaledb-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ARG PG_VERSION
ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in \
"v14" | "v15") \
export TIMESCALEDB_VERSION=2.10.1 \
export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
;; \
*) \
echo "TimescaleDB not supported on this PostgreSQL version. See https://github.com/timescale/timescaledb/issues/5752" && exit 0;; \
esac && \
apt-get update && \
RUN apt-get update && \
apt-get install -y cmake && \
wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \
echo "6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 timescaledb.tar.gz" | sha256sum --check && \
mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
cd build && \
@@ -414,10 +405,6 @@ RUN case "${PG_VERSION}" in \
export PG_HINT_PLAN_VERSION=15_1_5_0 \
export PG_HINT_PLAN_CHECKSUM=564cbbf4820973ffece63fbf76e3c0af62c4ab23543142c7caaa682bc48918be \
;; \
"v16") \
export PG_HINT_PLAN_VERSION=16_1_6_0 \
export PG_HINT_PLAN_CHECKSUM=ce6a8040c78012000f5da7240caf6a971401412f41d33f930f09291e6c304b99 \
;; \
*) \
echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
;; \
@@ -465,8 +452,8 @@ FROM build-deps AS pg-cron-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.5.2.tar.gz -O pg_cron.tar.gz && \
echo "6f7f0980c03f1e2a6a747060e67bf4a303ca2a50e941e2c19daeed2b44dec744 pg_cron.tar.gz" | sha256sum --check && \
mkdir pg_cron-src && cd pg_cron-src && tar xvzf ../pg_cron.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -492,8 +479,8 @@ RUN apt-get update && \
libfreetype6-dev
ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_1.tar.gz -O rdkit.tar.gz && \
echo "db346afbd0ba52c843926a2a62f8a38c7b774ffab37eaf382d789a824f21996c rdkit.tar.gz" | sha256sum --check && \
mkdir rdkit-src && cd rdkit-src && tar xvzf ../rdkit.tar.gz --strip-components=1 -C . && \
cmake \
-D RDK_BUILD_CAIRO_SUPPORT=OFF \
@@ -564,16 +551,8 @@ FROM build-deps AS pg-embedding-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in \
"v14" | "v15") \
export PG_EMBEDDING_VERSION=0.3.6 \
export PG_EMBEDDING_CHECKSUM=b2e2b359335d26987778c7fae0c9bcc8ebc3530fc214113be1ddbc8a136e52ac \
;; \
*) \
echo "pg_embedding not supported on this PostgreSQL version. Use pgvector instead." && exit 0;; \
esac && \
wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/${PG_EMBEDDING_VERSION}.tar.gz -O pg_embedding.tar.gz && \
echo "${PG_EMBEDDING_CHECKSUM} pg_embedding.tar.gz" | sha256sum --check && \
RUN wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/0.3.5.tar.gz -O pg_embedding.tar.gz && \
echo "0e95b27b8b6196e2cf0a0c9ec143fe2219b82e54c5bb4ee064e76398cbe69ae9 pg_embedding.tar.gz" | sha256sum --check && \
mkdir pg_embedding-src && cd pg_embedding-src && tar xvzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -605,10 +584,6 @@ RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/1.1.0/postgre
# Layer "rust extensions"
# This layer is used to build `pgx` deps
#
# FIXME: This needs to be updated to latest version of 'pgrx' (it was renamed from
# 'pgx' to 'pgrx') for PostgreSQL 16. And that in turn requires bumping the pgx
# dependency on all the rust extension that depend on it, too.
#
#########################################################################################
FROM build-deps AS rust-extensions-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -623,17 +598,7 @@ USER nonroot
WORKDIR /home/nonroot
ARG PG_VERSION
RUN case "${PG_VERSION}" in \
"v14" | "v15") \
;; \
"v16") \
echo "TODO: Not yet supported for PostgreSQL 16. Need to update pgrx dependencies" && exit 0 \
;; \
*) \
echo "unexpected PostgreSQL version ${PG_VERSION}" && exit 1 \
;; \
esac && \
curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
rm rustup-init && \
@@ -650,21 +615,10 @@ USER root
#########################################################################################
FROM rust-extensions-build AS pg-jsonschema-pg-build
ARG PG_VERSION
# caeab60d70b2fd3ae421ec66466a3abbb37b7ee6 made on 06/03/2023
# there is no release tag yet, but we need it due to the superuser fix in the control file, switch to git tag after release >= 0.1.5
RUN case "${PG_VERSION}" in \
"v14" | "v15") \
;; \
"v16") \
echo "TODO: Not yet supported for PostgreSQL 16. Need to update pgrx dependencies" && exit 0 \
;; \
*) \
echo "unexpected PostgreSQL version \"${PG_VERSION}\"" && exit 1 \
;; \
esac && \
wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421ec66466a3abbb37b7ee6.tar.gz -O pg_jsonschema.tar.gz && \
RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421ec66466a3abbb37b7ee6.tar.gz -O pg_jsonschema.tar.gz && \
echo "54129ce2e7ee7a585648dbb4cef6d73f795d94fe72f248ac01119992518469a4 pg_jsonschema.tar.gz" | sha256sum --check && \
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
sed -i 's/pgx = "0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -679,23 +633,12 @@ RUN case "${PG_VERSION}" in \
#########################################################################################
FROM rust-extensions-build AS pg-graphql-pg-build
ARG PG_VERSION
# b4988843647450a153439be367168ed09971af85 made on 22/02/2023 (from remove-pgx-contrib-spiext branch)
# Currently pgx version bump to >= 0.7.2 causes "call to unsafe function" compliation errors in
# pgx-contrib-spiext. There is a branch that removes that dependency, so use it. It is on the
# same 1.1 version we've used before.
RUN case "${PG_VERSION}" in \
"v14" | "v15") \
;; \
"v16") \
echo "TODO: Not yet supported for PostgreSQL 16. Need to update pgrx dependencies" && exit 0 \
;; \
*) \
echo "unexpected PostgreSQL version" && exit 1 \
;; \
esac && \
wget https://github.com/yrashk/pg_graphql/archive/b4988843647450a153439be367168ed09971af85.tar.gz -O pg_graphql.tar.gz && \
RUN wget https://github.com/yrashk/pg_graphql/archive/b4988843647450a153439be367168ed09971af85.tar.gz -O pg_graphql.tar.gz && \
echo "0c7b0e746441b2ec24187d0e03555faf935c2159e2839bddd14df6dafbc8c9bd pg_graphql.tar.gz" | sha256sum --check && \
mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
sed -i 's/pgx = "~0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -713,20 +656,9 @@ RUN case "${PG_VERSION}" in \
#########################################################################################
FROM rust-extensions-build AS pg-tiktoken-pg-build
ARG PG_VERSION
# 801f84f08c6881c8aa30f405fafbf00eec386a72 made on 10/03/2023
RUN case "${PG_VERSION}" in \
"v14" | "v15") \
;; \
"v16") \
echo "TODO: Not yet supported for PostgreSQL 16. Need to update pgrx dependencies" && exit 0 \
;; \
*) \
echo "unexpected PostgreSQL version" && exit 1 \
;; \
esac && \
wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405fafbf00eec386a72.tar.gz -O pg_tiktoken.tar.gz && \
RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405fafbf00eec386a72.tar.gz -O pg_tiktoken.tar.gz && \
echo "52f60ac800993a49aa8c609961842b611b6b1949717b69ce2ec9117117e16e4a pg_tiktoken.tar.gz" | sha256sum --check && \
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
cargo pgx install --release && \
@@ -740,19 +672,8 @@ RUN case "${PG_VERSION}" in \
#########################################################################################
FROM rust-extensions-build AS pg-pgx-ulid-build
ARG PG_VERSION
RUN case "${PG_VERSION}" in \
"v14" | "v15") \
;; \
"v16") \
echo "TODO: Not yet supported for PostgreSQL 16. Need to update pgrx dependencies" && exit 0 \
;; \
*) \
echo "unexpected PostgreSQL version" && exit 1 \
;; \
esac && \
wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.0.tar.gz -O pgx_ulid.tar.gz && \
RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.0.tar.gz -O pgx_ulid.tar.gz && \
echo "908b7358e6f846e87db508ae5349fb56a88ee6305519074b12f3d5b0ff09f791 pgx_ulid.tar.gz" | sha256sum --check && \
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xvzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
sed -i 's/pgx = "=0.7.3"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -805,20 +726,6 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon_utils \
-s install && \
make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/neon_rmgr \
-s install && \
case "${PG_VERSION}" in \
"v14" | "v15") \
;; \
"v16") \
echo "Skipping HNSW for PostgreSQL 16" && exit 0 \
;; \
*) \
echo "unexpected PostgreSQL version" && exit 1 \
;; \
esac && \
make -j $(getconf _NPROCESSORS_ONLN) \
PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-C pgxn/hnsw \

View File

@@ -29,7 +29,6 @@ else ifeq ($(UNAME_S),Darwin)
# It can be configured with OPENSSL_PREFIX variable
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
@@ -84,8 +83,6 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
# I'm not sure why it wouldn't work, but this is the only place (apart from
# the "build-all-versions" entry points) where direct mention of PostgreSQL
# versions is used.
.PHONY: postgres-configure-v16
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
.PHONY: postgres-configure-v15
postgres-configure-v15: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
.PHONY: postgres-configure-v14
@@ -121,10 +118,6 @@ postgres-clean-%:
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq clean
.PHONY: postgres-check-%
postgres-check-%: postgres-%
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 check
.PHONY: neon-pg-ext-%
neon-pg-ext-%: postgres-%
+@echo "Compiling neon $*"
@@ -137,11 +130,6 @@ neon-pg-ext-%: postgres-%
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
+@echo "Compiling neon_rmgr $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
+@echo "Compiling neon_test_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
@@ -152,13 +140,6 @@ neon-pg-ext-%: postgres-%
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
# pg_embedding was temporarily released as hnsw from this repo, when we only
# supported PostgreSQL 14 and 15
neon-pg-ext-v14: neon-pg-ext-hnsw-v14
neon-pg-ext-v15: neon-pg-ext-hnsw-v15
neon-pg-ext-hnsw-%: postgres-headers-% postgres-%
+@echo "Compiling hnsw $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/hnsw-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
@@ -186,39 +167,28 @@ neon-pg-ext-clean-%:
.PHONY: neon-pg-ext
neon-pg-ext: \
neon-pg-ext-v14 \
neon-pg-ext-v15 \
neon-pg-ext-v16
neon-pg-ext-v15
.PHONY: neon-pg-ext-clean
neon-pg-ext-clean: \
neon-pg-ext-clean-v14 \
neon-pg-ext-clean-v15 \
neon-pg-ext-clean-v16
neon-pg-ext-clean-v15
# shorthand to build all Postgres versions
.PHONY: postgres
postgres: \
postgres-v14 \
postgres-v15 \
postgres-v16
postgres-v15
.PHONY: postgres-headers
postgres-headers: \
postgres-headers-v14 \
postgres-headers-v15 \
postgres-headers-v16
postgres-headers-v15
.PHONY: postgres-clean
postgres-clean: \
postgres-clean-v14 \
postgres-clean-v15 \
postgres-clean-v16
.PHONY: postgres-check
postgres-check: \
postgres-check-v14 \
postgres-check-v15 \
postgres-check-v16
postgres-clean-v15
# This doesn't remove the effects of 'configure'.
.PHONY: clean

View File

@@ -29,18 +29,18 @@ See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more informati
```bash
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler \
libcurl4-openssl-dev openssl python-poetry lsof
libcurl4-openssl-dev openssl python-poetry
```
* On Fedora, these packages are needed:
```bash
dnf install flex bison readline-devel zlib-devel openssl-devel \
libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \
protobuf-devel libcurl-devel openssl poetry lsof
protobuf-devel libcurl-devel openssl poetry
```
* On Arch based systems, these packages are needed:
```bash
pacman -S base-devel readline zlib libseccomp openssl clang \
postgresql-libs cmake postgresql protobuf curl lsof
postgresql-libs cmake postgresql protobuf curl
```
Building Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your distribution provides an older version, you can install a newer version from [here](https://github.com/protocolbuffers/protobuf/releases).
@@ -55,7 +55,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
1. Install XCode and dependencies
```
xcode-select --install
brew install protobuf openssl flex bison icu4c pkg-config
brew install protobuf openssl flex bison
# add openssl to PATH, required for ed25519 keys generation in neon_local
echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc

View File

@@ -1,5 +0,0 @@
disallowed-methods = [
"tokio::task::block_in_place",
# Allow this for now, to deny it later once we stop using Handle::block_on completely
# "tokio::runtime::Handle::block_on",
]

View File

@@ -1,7 +1,7 @@
use anyhow::{anyhow, Ok, Result};
use postgres::Client;
use tokio_postgres::NoTls;
use tracing::{error, instrument, warn};
use tracing::{error, instrument};
use crate::compute::ComputeNode;
@@ -55,24 +55,13 @@ pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
ON CONFLICT (id) DO UPDATE
SET updated_at = now();";
match client.simple_query(query).await {
Result::Ok(result) => {
if result.len() != 1 {
return Err(anyhow::anyhow!(
"expected 1 query results, but got {}",
result.len()
));
}
}
Err(err) => {
if let Some(state) = err.code() {
if state == &tokio_postgres::error::SqlState::DISK_FULL {
warn!("Tenant disk is full");
return Ok(());
}
}
return Err(err.into());
}
let result = client.simple_query(query).await?;
if result.len() != 1 {
return Err(anyhow::format_err!(
"expected 1 query result, but got {}",
result.len()
));
}
Ok(())

View File

@@ -46,6 +46,8 @@ pub fn write_postgres_conf(
writeln!(file, "{}", conf)?;
}
write!(file, "{}", &spec.cluster.settings.as_pg_settings())?;
// Add options for connecting to storage
writeln!(file, "# Neon storage settings")?;
if let Some(s) = &spec.pageserver_connstring {

View File

@@ -74,7 +74,6 @@ More specifically, here is an example ext_index.json
use anyhow::Context;
use anyhow::{self, Result};
use compute_api::spec::RemoteExtSpec;
use regex::Regex;
use remote_storage::*;
use serde_json;
use std::io::Read;
@@ -107,71 +106,16 @@ fn get_pg_config(argument: &str, pgbin: &str) -> String {
pub fn get_pg_version(pgbin: &str) -> String {
// pg_config --version returns a (platform specific) human readable string
// such as "PostgreSQL 15.4". We parse this to v14/v15/v16 etc.
// such as "PostgreSQL 15.4". We parse this to v14/v15
let human_version = get_pg_config("--version", pgbin);
return parse_pg_version(&human_version).to_string();
}
fn parse_pg_version(human_version: &str) -> &str {
// Normal releases have version strings like "PostgreSQL 15.4". But there
// are also pre-release versions like "PostgreSQL 17devel" or "PostgreSQL
// 16beta2" or "PostgreSQL 17rc1". And with the --with-extra-version
// configure option, you can tack any string to the version number,
// e.g. "PostgreSQL 15.4foobar".
match Regex::new(r"^PostgreSQL (?<major>\d+).+")
.unwrap()
.captures(human_version)
{
Some(captures) if captures.len() == 2 => match &captures["major"] {
"14" => return "v14",
"15" => return "v15",
"16" => return "v16",
_ => {}
},
_ => {}
if human_version.contains("15") {
return "v15".to_string();
} else if human_version.contains("14") {
return "v14".to_string();
}
panic!("Unsuported postgres version {human_version}");
}
#[cfg(test)]
mod tests {
use super::parse_pg_version;
#[test]
fn test_parse_pg_version() {
assert_eq!(parse_pg_version("PostgreSQL 15.4"), "v15");
assert_eq!(parse_pg_version("PostgreSQL 15.14"), "v15");
assert_eq!(
parse_pg_version("PostgreSQL 15.4 (Ubuntu 15.4-0ubuntu0.23.04.1)"),
"v15"
);
assert_eq!(parse_pg_version("PostgreSQL 14.15"), "v14");
assert_eq!(parse_pg_version("PostgreSQL 14.0"), "v14");
assert_eq!(
parse_pg_version("PostgreSQL 14.9 (Debian 14.9-1.pgdg120+1"),
"v14"
);
assert_eq!(parse_pg_version("PostgreSQL 16devel"), "v16");
assert_eq!(parse_pg_version("PostgreSQL 16beta1"), "v16");
assert_eq!(parse_pg_version("PostgreSQL 16rc2"), "v16");
assert_eq!(parse_pg_version("PostgreSQL 16extra"), "v16");
}
#[test]
#[should_panic]
fn test_parse_pg_unsupported_version() {
parse_pg_version("PostgreSQL 13.14");
}
#[test]
#[should_panic]
fn test_parse_pg_incorrect_version_format() {
parse_pg_version("PostgreSQL 14");
}
}
// download the archive for a given extension,
// unzip it, and place files in the appropriate locations (share/lib)
pub async fn download_extension(

View File

@@ -1,7 +1,6 @@
# Minimal neon environment with one safekeeper. This is equivalent to the built-in
# defaults that you get with no --config
[[pageservers]]
id=1
[pageserver]
listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898'
pg_auth_type = 'Trust'

View File

@@ -32,7 +32,7 @@ impl AttachmentService {
// Makes no sense to construct this if pageservers aren't going to use it: assume
// pageservers have control plane API set
let listen_url = env.control_plane_api.clone().unwrap();
let listen_url = env.pageserver.control_plane_api.clone().unwrap();
let listen = format!(
"{}:{}",
@@ -80,6 +80,7 @@ impl AttachmentService {
let url = self
.env
.pageserver
.control_plane_api
.clone()
.unwrap()

View File

@@ -50,17 +50,16 @@ fn default_conf() -> String {
format!(
r#"
# Default built-in configuration, defined in main.rs
control_plane_api = '{DEFAULT_PAGESERVER_CONTROL_PLANE_API}'
[broker]
listen_addr = '{DEFAULT_BROKER_ADDR}'
[[pageservers]]
[pageserver]
id = {DEFAULT_PAGESERVER_ID}
listen_pg_addr = '{DEFAULT_PAGESERVER_PG_ADDR}'
listen_http_addr = '{DEFAULT_PAGESERVER_HTTP_ADDR}'
pg_auth_type = '{trust_auth}'
http_auth_type = '{trust_auth}'
control_plane_api = '{DEFAULT_PAGESERVER_CONTROL_PLANE_API}'
[[safekeepers]]
id = {DEFAULT_SAFEKEEPER_ID}
@@ -259,7 +258,7 @@ fn get_timeline_infos(
env: &local_env::LocalEnv,
tenant_id: &TenantId,
) -> Result<HashMap<TimelineId, TimelineInfo>> {
Ok(get_default_pageserver(env)
Ok(PageServerNode::from_env(env)
.timeline_list(tenant_id)?
.into_iter()
.map(|timeline_info| (timeline_info.timeline_id, timeline_info))
@@ -320,30 +319,17 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
.context("Failed to initialize neon repository")?;
// Initialize pageserver, create initial tenant and timeline.
for ps_conf in &env.pageservers {
PageServerNode::from_env(&env, ps_conf)
.initialize(&pageserver_config_overrides(init_match))
.unwrap_or_else(|e| {
eprintln!("pageserver init failed: {e:?}");
exit(1);
});
}
let pageserver = PageServerNode::from_env(&env);
pageserver
.initialize(&pageserver_config_overrides(init_match))
.unwrap_or_else(|e| {
eprintln!("pageserver init failed: {e:?}");
exit(1);
});
Ok(env)
}
/// The default pageserver is the one where CLI tenant/timeline operations are sent by default.
/// For typical interactive use, one would just run with a single pageserver. Scenarios with
/// tenant/timeline placement across multiple pageservers are managed by python test code rather
/// than this CLI.
fn get_default_pageserver(env: &local_env::LocalEnv) -> PageServerNode {
let ps_conf = env
.pageservers
.first()
.expect("Config is validated to contain at least one pageserver");
PageServerNode::from_env(env, ps_conf)
}
fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
init_match
.get_many::<String>("pageserver-config-override")
@@ -354,7 +340,7 @@ fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
}
fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> anyhow::Result<()> {
let pageserver = get_default_pageserver(env);
let pageserver = PageServerNode::from_env(env);
match tenant_match.subcommand() {
Some(("list", _)) => {
for t in pageserver.tenant_list()? {
@@ -370,11 +356,11 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
// If tenant ID was not specified, generate one
let tenant_id = parse_tenant_id(create_match)?.unwrap_or_else(TenantId::generate);
let generation = if env.control_plane_api.is_some() {
let generation = if env.pageserver.control_plane_api.is_some() {
// We must register the tenant with the attachment service, so
// that when the pageserver restarts, it will be re-attached.
let attachment_service = AttachmentService::from_env(env);
attachment_service.attach_hook(tenant_id, pageserver.conf.id)?
attachment_service.attach_hook(tenant_id, env.pageserver.id)?
} else {
None
};
@@ -439,7 +425,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
}
fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
let pageserver = get_default_pageserver(env);
let pageserver = PageServerNode::from_env(env);
match timeline_match.subcommand() {
Some(("list", list_match)) => {
@@ -516,7 +502,6 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
None,
pg_version,
ComputeMode::Primary,
DEFAULT_PAGESERVER_ID,
)?;
println!("Done");
}
@@ -570,6 +555,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
Some(ep_subcommand_data) => ep_subcommand_data,
None => bail!("no endpoint subcommand provided"),
};
let mut cplane = ComputeControlPlane::load(env.clone())?;
// All subcommands take an optional --tenant-id option
@@ -666,13 +652,6 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
.copied()
.unwrap_or(false);
let pageserver_id =
if let Some(id_str) = sub_args.get_one::<String>("endpoint-pageserver-id") {
NodeId(id_str.parse().context("while parsing pageserver id")?)
} else {
DEFAULT_PAGESERVER_ID
};
let mode = match (lsn, hot_standby) {
(Some(lsn), false) => ComputeMode::Static(lsn),
(None, true) => ComputeMode::Replica,
@@ -688,7 +667,6 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
http_port,
pg_version,
mode,
pageserver_id,
)?;
}
"start" => {
@@ -698,13 +676,6 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
.get_one::<String>("endpoint_id")
.ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;
let pageserver_id =
if let Some(id_str) = sub_args.get_one::<String>("endpoint-pageserver-id") {
NodeId(id_str.parse().context("while parsing pageserver id")?)
} else {
DEFAULT_PAGESERVER_ID
};
let remote_ext_config = sub_args.get_one::<String>("remote-ext-config");
// If --safekeepers argument is given, use only the listed safekeeper nodes.
@@ -724,8 +695,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
let endpoint = cplane.endpoints.get(endpoint_id.as_str());
let ps_conf = env.get_pageserver_conf(pageserver_id)?;
let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {
let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) {
let claims = Claims::new(Some(tenant_id), Scope::Tenant);
Some(env.generate_auth_token(&claims)?)
@@ -792,7 +762,6 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
http_port,
pg_version,
mode,
pageserver_id,
)?;
ep.start(&auth_token, safekeepers, remote_ext_config)?;
}
@@ -817,64 +786,48 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
}
fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageServerNode> {
let node_id = if let Some(id_str) = args.get_one::<String>("pageserver-id") {
NodeId(id_str.parse().context("while parsing pageserver id")?)
} else {
DEFAULT_PAGESERVER_ID
};
Ok(PageServerNode::from_env(
env,
env.get_pageserver_conf(node_id)?,
))
}
let pageserver = PageServerNode::from_env(env);
match sub_match.subcommand() {
Some(("start", subcommand_args)) => {
if let Err(e) = get_pageserver(env, subcommand_args)?
.start(&pageserver_config_overrides(subcommand_args))
{
Some(("start", start_match)) => {
if let Err(e) = pageserver.start(&pageserver_config_overrides(start_match)) {
eprintln!("pageserver start failed: {e}");
exit(1);
}
}
Some(("stop", subcommand_args)) => {
let immediate = subcommand_args
Some(("stop", stop_match)) => {
let immediate = stop_match
.get_one::<String>("stop-mode")
.map(|s| s.as_str())
== Some("immediate");
if let Err(e) = get_pageserver(env, subcommand_args)?.stop(immediate) {
if let Err(e) = pageserver.stop(immediate) {
eprintln!("pageserver stop failed: {}", e);
exit(1);
}
}
Some(("restart", subcommand_args)) => {
let pageserver = get_pageserver(env, subcommand_args)?;
Some(("restart", restart_match)) => {
//TODO what shutdown strategy should we use here?
if let Err(e) = pageserver.stop(false) {
eprintln!("pageserver stop failed: {}", e);
exit(1);
}
if let Err(e) = pageserver.start(&pageserver_config_overrides(subcommand_args)) {
if let Err(e) = pageserver.start(&pageserver_config_overrides(restart_match)) {
eprintln!("pageserver start failed: {e}");
exit(1);
}
}
Some(("status", subcommand_args)) => {
match get_pageserver(env, subcommand_args)?.check_status() {
Ok(_) => println!("Page server is up and running"),
Err(err) => {
eprintln!("Page server is not available: {}", err);
exit(1);
}
Some(("status", _)) => match PageServerNode::from_env(env).check_status() {
Ok(_) => println!("Page server is up and running"),
Err(err) => {
eprintln!("Page server is not available: {}", err);
exit(1);
}
}
},
Some((sub_name, _)) => bail!("Unexpected pageserver subcommand '{}'", sub_name),
None => bail!("no pageserver subcommand provided"),
@@ -990,7 +943,7 @@ fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow
broker::start_broker_process(env)?;
// Only start the attachment service if the pageserver is configured to need it
if env.control_plane_api.is_some() {
if env.pageserver.control_plane_api.is_some() {
let attachment_service = AttachmentService::from_env(env);
if let Err(e) = attachment_service.start() {
eprintln!("attachment_service start failed: {:#}", e);
@@ -999,13 +952,11 @@ fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow
}
}
for ps_conf in &env.pageservers {
let pageserver = PageServerNode::from_env(env, ps_conf);
if let Err(e) = pageserver.start(&pageserver_config_overrides(sub_match)) {
eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
try_stop_all(env, true);
exit(1);
}
let pageserver = PageServerNode::from_env(env);
if let Err(e) = pageserver.start(&pageserver_config_overrides(sub_match)) {
eprintln!("pageserver {} start failed: {:#}", env.pageserver.id, e);
try_stop_all(env, true);
exit(1);
}
for node in env.safekeepers.iter() {
@@ -1029,6 +980,8 @@ fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<
}
fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
let pageserver = PageServerNode::from_env(env);
// Stop all endpoints
match ComputeControlPlane::load(env.clone()) {
Ok(cplane) => {
@@ -1043,11 +996,8 @@ fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
}
}
for ps_conf in &env.pageservers {
let pageserver = PageServerNode::from_env(env, ps_conf);
if let Err(e) = pageserver.stop(immediate) {
eprintln!("pageserver {} stop failed: {:#}", ps_conf.id, e);
}
if let Err(e) = pageserver.stop(immediate) {
eprintln!("pageserver {} stop failed: {:#}", env.pageserver.id, e);
}
for node in env.safekeepers.iter() {
@@ -1061,7 +1011,7 @@ fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
eprintln!("neon broker stop failed: {e:#}");
}
if env.control_plane_api.is_some() {
if env.pageserver.control_plane_api.is_some() {
let attachment_service = AttachmentService::from_env(env);
if let Err(e) = attachment_service.stop(immediate) {
eprintln!("attachment service stop failed: {e:#}");
@@ -1081,16 +1031,6 @@ fn cli() -> Command {
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
// --id, when using a pageserver command
let pageserver_id_arg = Arg::new("pageserver-id")
.long("id")
.help("pageserver id")
.required(false);
// --pageserver-id when using a non-pageserver command
let endpoint_pageserver_id_arg = Arg::new("endpoint-pageserver-id")
.long("pageserver-id")
.required(false);
let safekeeper_extra_opt_arg = Arg::new("safekeeper-extra-opt")
.short('e')
.long("safekeeper-extra-opt")
@@ -1255,16 +1195,10 @@ fn cli() -> Command {
.arg_required_else_help(true)
.about("Manage pageserver")
.subcommand(Command::new("status"))
.arg(pageserver_id_arg.clone())
.subcommand(Command::new("start").about("Start local pageserver")
.arg(pageserver_id_arg.clone())
.arg(pageserver_config_args.clone()))
.subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
.subcommand(Command::new("stop").about("Stop local pageserver")
.arg(pageserver_id_arg.clone())
.arg(stop_mode_arg.clone()))
.subcommand(Command::new("restart").about("Restart local pageserver")
.arg(pageserver_id_arg.clone())
.arg(pageserver_config_args.clone()))
.subcommand(Command::new("restart").about("Restart local pageserver").arg(pageserver_config_args.clone()))
)
.subcommand(
Command::new("attachment_service")
@@ -1308,7 +1242,6 @@ fn cli() -> Command {
.arg(lsn_arg.clone())
.arg(pg_port_arg.clone())
.arg(http_port_arg.clone())
.arg(endpoint_pageserver_id_arg.clone())
.arg(
Arg::new("config-only")
.help("Don't do basebackup, create endpoint directory with only config files")
@@ -1326,7 +1259,6 @@ fn cli() -> Command {
.arg(lsn_arg)
.arg(pg_port_arg)
.arg(http_port_arg)
.arg(endpoint_pageserver_id_arg.clone())
.arg(pg_version_arg)
.arg(hot_standby_arg)
.arg(safekeepers_arg)

View File

@@ -70,7 +70,6 @@ pub struct EndpointConf {
http_port: u16,
pg_version: u32,
skip_pg_catalog_updates: bool,
pageserver_id: NodeId,
}
//
@@ -83,16 +82,19 @@ pub struct ComputeControlPlane {
pub endpoints: BTreeMap<String, Arc<Endpoint>>,
env: LocalEnv,
pageserver: Arc<PageServerNode>,
}
impl ComputeControlPlane {
// Load current endpoints from the endpoints/ subdirectories
pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
let pageserver = Arc::new(PageServerNode::from_env(&env));
let mut endpoints = BTreeMap::default();
for endpoint_dir in std::fs::read_dir(env.endpoints_path())
.with_context(|| format!("failed to list {}", env.endpoints_path().display()))?
{
let ep = Endpoint::from_dir_entry(endpoint_dir?, &env)?;
let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?;
endpoints.insert(ep.endpoint_id.clone(), Arc::new(ep));
}
@@ -100,6 +102,7 @@ impl ComputeControlPlane {
base_port: 55431,
endpoints,
env,
pageserver,
})
}
@@ -122,18 +125,15 @@ impl ComputeControlPlane {
http_port: Option<u16>,
pg_version: u32,
mode: ComputeMode,
pageserver_id: NodeId,
) -> Result<Arc<Endpoint>> {
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
let pageserver =
PageServerNode::from_env(&self.env, self.env.get_pageserver_conf(pageserver_id)?);
let ep = Arc::new(Endpoint {
endpoint_id: endpoint_id.to_owned(),
pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), http_port),
env: self.env.clone(),
pageserver,
pageserver: Arc::clone(&self.pageserver),
timeline_id,
mode,
tenant_id,
@@ -159,7 +159,6 @@ impl ComputeControlPlane {
pg_port,
pg_version,
skip_pg_catalog_updates: true,
pageserver_id,
})?,
)?;
std::fs::write(
@@ -194,14 +193,18 @@ pub struct Endpoint {
// These are not part of the endpoint as such, but the environment
// the endpoint runs in.
pub env: LocalEnv,
pageserver: PageServerNode,
pageserver: Arc<PageServerNode>,
// Optimizations
skip_pg_catalog_updates: bool,
}
impl Endpoint {
fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> {
fn from_dir_entry(
entry: std::fs::DirEntry,
env: &LocalEnv,
pageserver: &Arc<PageServerNode>,
) -> Result<Endpoint> {
if !entry.file_type()?.is_dir() {
anyhow::bail!(
"Endpoint::from_dir_entry failed: '{}' is not a directory",
@@ -217,15 +220,12 @@ impl Endpoint {
let conf: EndpointConf =
serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;
let pageserver =
PageServerNode::from_env(env, env.get_pageserver_conf(conf.pageserver_id)?);
Ok(Endpoint {
pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.pg_port),
http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.http_port),
endpoint_id,
env: env.clone(),
pageserver,
pageserver: Arc::clone(pageserver),
timeline_id: conf.timeline_id,
mode: conf.mode,
tenant_id: conf.tenant_id,

View File

@@ -68,17 +68,11 @@ pub struct LocalEnv {
pub broker: NeonBroker,
/// This Vec must always contain at least one pageserver
pub pageservers: Vec<PageServerConf>,
pub pageserver: PageServerConf,
#[serde(default)]
pub safekeepers: Vec<SafekeeperConf>,
// Control plane location: if None, we will not run attachment_service. If set, this will
// be propagated into each pageserver's configuration.
#[serde(default)]
pub control_plane_api: Option<Url>,
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
#[serde(default)]
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
@@ -124,6 +118,9 @@ pub struct PageServerConf {
// auth type used for the PG and HTTP ports
pub pg_auth_type: AuthType,
pub http_auth_type: AuthType,
// Control plane location
pub control_plane_api: Option<Url>,
}
impl Default for PageServerConf {
@@ -134,6 +131,7 @@ impl Default for PageServerConf {
listen_http_addr: String::new(),
pg_auth_type: AuthType::Trust,
http_auth_type: AuthType::Trust,
control_plane_api: None,
}
}
}
@@ -182,18 +180,26 @@ impl LocalEnv {
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
let path = self.pg_distrib_dir.clone();
#[allow(clippy::manual_range_patterns)]
match pg_version {
14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
14 => Ok(path.join(format!("v{pg_version}"))),
15 => Ok(path.join(format!("v{pg_version}"))),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
Ok(self.pg_distrib_dir(pg_version)?.join("bin"))
match pg_version {
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
Ok(self.pg_distrib_dir(pg_version)?.join("lib"))
match pg_version {
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pageserver_bin(&self) -> PathBuf {
@@ -216,23 +222,15 @@ impl LocalEnv {
self.base_data_dir.join("endpoints")
}
pub fn pageserver_data_dir(&self, pageserver_id: NodeId) -> PathBuf {
self.base_data_dir
.join(format!("pageserver_{pageserver_id}"))
// TODO: move pageserver files into ./pageserver
pub fn pageserver_data_dir(&self) -> PathBuf {
self.base_data_dir.clone()
}
pub fn safekeeper_data_dir(&self, data_dir_name: &str) -> PathBuf {
self.base_data_dir.join("safekeepers").join(data_dir_name)
}
pub fn get_pageserver_conf(&self, id: NodeId) -> anyhow::Result<&PageServerConf> {
if let Some(conf) = self.pageservers.iter().find(|node| node.id == id) {
Ok(conf)
} else {
bail!("could not find pageserver {id}")
}
}
pub fn register_branch_mapping(
&mut self,
branch_name: String,
@@ -309,10 +307,6 @@ impl LocalEnv {
env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
}
if env.pageservers.is_empty() {
anyhow::bail!("Configuration must contain at least one pageserver");
}
env.base_data_dir = base_path();
Ok(env)
@@ -345,7 +339,7 @@ impl LocalEnv {
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
// to .neon/config. TODO: We lose any formatting and comments along the way, which is
// a bit sad.
let mut conf_content = r#"# This file describes a local deployment of the page server
let mut conf_content = r#"# This file describes a locale deployment of the page server
# and safekeeeper node. It is read by the 'neon_local' command-line
# utility.
"#
@@ -475,9 +469,9 @@ impl LocalEnv {
}
fn auth_keys_needed(&self) -> bool {
self.pageservers.iter().any(|ps| {
ps.pg_auth_type == AuthType::NeonJWT || ps.http_auth_type == AuthType::NeonJWT
}) || self.safekeepers.iter().any(|sk| sk.auth_enabled)
self.pageserver.pg_auth_type == AuthType::NeonJWT
|| self.pageserver.http_auth_type == AuthType::NeonJWT
|| self.safekeepers.iter().any(|sk| sk.auth_enabled)
}
}

View File

@@ -27,7 +27,6 @@ use utils::{
lsn::Lsn,
};
use crate::local_env::PageServerConf;
use crate::{background_process, local_env::LocalEnv};
#[derive(Error, Debug)]
@@ -77,40 +76,43 @@ impl ResponseErrorMessageExt for Response {
#[derive(Debug)]
pub struct PageServerNode {
pub pg_connection_config: PgConnectionConfig,
pub conf: PageServerConf,
pub env: LocalEnv,
pub http_client: Client,
pub http_base_url: String,
}
impl PageServerNode {
pub fn from_env(env: &LocalEnv, conf: &PageServerConf) -> PageServerNode {
let (host, port) =
parse_host_port(&conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
pub fn from_env(env: &LocalEnv) -> PageServerNode {
let (host, port) = parse_host_port(&env.pageserver.listen_pg_addr)
.expect("Unable to parse listen_pg_addr");
let port = port.unwrap_or(5432);
Self {
pg_connection_config: PgConnectionConfig::new_host_port(host, port),
conf: conf.clone(),
env: env.clone(),
http_client: Client::new(),
http_base_url: format!("http://{}/v1", conf.listen_http_addr),
http_base_url: format!("http://{}/v1", env.pageserver.listen_http_addr),
}
}
// pageserver conf overrides defined by neon_local configuration.
fn neon_local_overrides(&self) -> Vec<String> {
let id = format!("id={}", self.conf.id);
let id = format!("id={}", self.env.pageserver.id);
// FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
let pg_distrib_dir_param = format!(
"pg_distrib_dir='{}'",
self.env.pg_distrib_dir_raw().display()
);
let http_auth_type_param = format!("http_auth_type='{}'", self.conf.http_auth_type);
let listen_http_addr_param = format!("listen_http_addr='{}'", self.conf.listen_http_addr);
let http_auth_type_param =
format!("http_auth_type='{}'", self.env.pageserver.http_auth_type);
let listen_http_addr_param = format!(
"listen_http_addr='{}'",
self.env.pageserver.listen_http_addr
);
let pg_auth_type_param = format!("pg_auth_type='{}'", self.conf.pg_auth_type);
let listen_pg_addr_param = format!("listen_pg_addr='{}'", self.conf.listen_pg_addr);
let pg_auth_type_param = format!("pg_auth_type='{}'", self.env.pageserver.pg_auth_type);
let listen_pg_addr_param =
format!("listen_pg_addr='{}'", self.env.pageserver.listen_pg_addr);
let broker_endpoint_param = format!("broker_endpoint='{}'", self.env.broker.client_url());
@@ -124,18 +126,17 @@ impl PageServerNode {
broker_endpoint_param,
];
if let Some(control_plane_api) = &self.env.control_plane_api {
if let Some(control_plane_api) = &self.env.pageserver.control_plane_api {
overrides.push(format!(
"control_plane_api='{}'",
control_plane_api.as_str()
));
}
if self.conf.http_auth_type != AuthType::Trust || self.conf.pg_auth_type != AuthType::Trust
if self.env.pageserver.http_auth_type != AuthType::Trust
|| self.env.pageserver.pg_auth_type != AuthType::Trust
{
// Keys are generated in the toplevel repo dir, pageservers' workdirs
// are one level below that, so refer to keys with ../
overrides.push("auth_validation_public_key_path='../auth_public_key.pem'".to_owned());
overrides.push("auth_validation_public_key_path='auth_public_key.pem'".to_owned());
}
overrides
}
@@ -143,12 +144,16 @@ impl PageServerNode {
/// Initializes a pageserver node by creating its config with the overrides provided.
pub fn initialize(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
// First, run `pageserver --init` and wait for it to write a config into FS and exit.
self.pageserver_init(config_overrides)
.with_context(|| format!("Failed to run init for pageserver node {}", self.conf.id,))
self.pageserver_init(config_overrides).with_context(|| {
format!(
"Failed to run init for pageserver node {}",
self.env.pageserver.id,
)
})
}
pub fn repo_path(&self) -> PathBuf {
self.env.pageserver_data_dir(self.conf.id)
self.env.pageserver_data_dir()
}
/// The pid file is created by the pageserver process, with its pid stored inside.
@@ -164,7 +169,7 @@ impl PageServerNode {
fn pageserver_init(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
let datadir = self.repo_path();
let node_id = self.conf.id;
let node_id = self.env.pageserver.id;
println!(
"Initializing pageserver node {} at '{}' in {:?}",
node_id,
@@ -173,10 +178,6 @@ impl PageServerNode {
);
io::stdout().flush()?;
if !datadir.exists() {
std::fs::create_dir(&datadir)?;
}
let datadir_path_str = datadir.to_str().with_context(|| {
format!("Cannot start pageserver node {node_id} in path that has no string representation: {datadir:?}")
})?;
@@ -207,7 +208,7 @@ impl PageServerNode {
let datadir = self.repo_path();
print!(
"Starting pageserver node {} at '{}' in {:?}",
self.conf.id,
self.env.pageserver.id,
self.pg_connection_config.raw_address(),
datadir
);
@@ -216,7 +217,7 @@ impl PageServerNode {
let datadir_path_str = datadir.to_str().with_context(|| {
format!(
"Cannot start pageserver node {} in path that has no string representation: {:?}",
self.conf.id, datadir,
self.env.pageserver.id, datadir,
)
})?;
let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
@@ -260,7 +261,7 @@ impl PageServerNode {
// FIXME: why is this tied to pageserver's auth type? Whether or not the safekeeper
// needs a token, and how to generate that token, seems independent to whether
// the pageserver requires a token in incoming requests.
Ok(if self.conf.http_auth_type != AuthType::Trust {
Ok(if self.env.pageserver.http_auth_type != AuthType::Trust {
// Generate a token to connect from the pageserver to a safekeeper
let token = self
.env
@@ -285,7 +286,7 @@ impl PageServerNode {
pub fn page_server_psql_client(&self) -> anyhow::Result<postgres::Client> {
let mut config = self.pg_connection_config.clone();
if self.conf.pg_auth_type == AuthType::NeonJWT {
if self.env.pageserver.pg_auth_type == AuthType::NeonJWT {
let token = self
.env
.generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
@@ -296,7 +297,7 @@ impl PageServerNode {
fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> anyhow::Result<RequestBuilder> {
let mut builder = self.http_client.request(method, url);
if self.conf.http_auth_type == AuthType::NeonJWT {
if self.env.pageserver.http_auth_type == AuthType::NeonJWT {
let token = self
.env
.generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;

View File

@@ -30,7 +30,7 @@ cleanup() {
echo "clean up containers if exists"
cleanup
for pg_version in 14 15 16; do
for pg_version in 14 15; do
echo "start containers (pg_version=$pg_version)."
PG_VERSION=$pg_version docker compose -f $COMPOSE_FILE up --build -d

View File

@@ -381,8 +381,6 @@ pub struct TimelineInfo {
pub pg_version: u32,
pub state: TimelineState,
pub walreceiver_status: String,
}
#[derive(Debug, Clone, Serialize)]

View File

@@ -10,11 +10,9 @@ should be auto-generated too, but that's a TODO.
The PostgreSQL on-disk file format is not portable across different
CPU architectures and operating systems. It is also subject to change
in each major PostgreSQL version. Currently, this module supports
PostgreSQL v14, v15 and v16: bindings and code that depends on them are
version-specific.
This code is organized in modules `postgres_ffi::v14`, `postgres_ffi::v15` and
`postgres_ffi::v16`. Version independent code is explicitly exported into
shared `postgres_ffi`.
PostgreSQL v14 and v15: bindings and code that depends on them are version-specific.
This code is organized in modules: `postgres_ffi::v14` and `postgres_ffi::v15`
Version independend code is explicitly exported into shared `postgres_ffi`.
TODO: Currently, there is also some code that deals with WAL records

View File

@@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> {
PathBuf::from("pg_install")
};
for pg_version in &["v14", "v15", "v16"] {
for pg_version in &["v14", "v15"] {
let mut pg_install_dir_versioned = pg_install_dir.join(pg_version);
if pg_install_dir_versioned.is_relative() {
let cwd = env::current_dir().context("Failed to get current_dir")?;
@@ -125,7 +125,6 @@ fn main() -> anyhow::Result<()> {
.allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
.allowlist_type("PageHeaderData")
.allowlist_type("DBState")
.allowlist_type("RelMapFile")
// Because structs are used for serialization, tell bindgen to emit
// explicit padding fields.
.explicit_padding(true)

View File

@@ -51,59 +51,11 @@ macro_rules! for_all_postgres_versions {
($macro:tt) => {
$macro!(v14);
$macro!(v15);
$macro!(v16);
};
}
for_all_postgres_versions! { postgres_ffi }
/// dispatch_pgversion
///
/// Run a code block in a context where the postgres_ffi bindings for a
/// specific (supported) PostgreSQL version are `use`-ed in scope under the pgv
/// identifier.
/// If the provided pg_version is not supported, we panic!(), unless the
/// optional third argument was provided (in which case that code will provide
/// the default handling instead).
///
/// Use like
///
/// dispatch_pgversion!(my_pgversion, { pgv::constants::XLOG_DBASE_CREATE })
/// dispatch_pgversion!(my_pgversion, pgv::constants::XLOG_DBASE_CREATE)
///
/// Other uses are for macro-internal purposes only and strictly unsupported.
///
#[macro_export]
macro_rules! dispatch_pgversion {
($version:expr, $code:expr) => {
dispatch_pgversion!($version, $code, panic!("Unknown PostgreSQL version {}", $version))
};
($version:expr, $code:expr, $invalid_pgver_handling:expr) => {
dispatch_pgversion!(
$version => $code,
default = $invalid_pgver_handling,
pgversions = [
14 : v14,
15 : v15,
16 : v16,
]
)
};
($pgversion:expr => $code:expr,
default = $default:expr,
pgversions = [$($sv:literal : $vsv:ident),+ $(,)?]) => {
match ($pgversion) {
$($sv => {
use $crate::$vsv as pgv;
$code
},)+
_ => {
$default
}
}
};
}
pub mod pg_constants;
pub mod relfile_utils;
@@ -138,7 +90,13 @@ pub use v14::xlog_utils::XLogFileName;
pub use v14::bindings::DBState_DB_SHUTDOWNED;
pub fn bkpimage_is_compressed(bimg_info: u8, version: u32) -> anyhow::Result<bool> {
dispatch_pgversion!(version, Ok(pgv::bindings::bkpimg_is_compressed(bimg_info)))
match version {
14 => Ok(bimg_info & v14::bindings::BKPIMAGE_IS_COMPRESSED != 0),
15 => Ok(bimg_info & v15::bindings::BKPIMAGE_COMPRESS_PGLZ != 0
|| bimg_info & v15::bindings::BKPIMAGE_COMPRESS_LZ4 != 0
|| bimg_info & v15::bindings::BKPIMAGE_COMPRESS_ZSTD != 0),
_ => anyhow::bail!("Unknown version {}", version),
}
}
pub fn generate_wal_segment(
@@ -149,11 +107,11 @@ pub fn generate_wal_segment(
) -> Result<Bytes, SerializeError> {
assert_eq!(segno, lsn.segment_number(WAL_SEGMENT_SIZE));
dispatch_pgversion!(
pg_version,
pgv::xlog_utils::generate_wal_segment(segno, system_id, lsn),
Err(SerializeError::BadInput)
)
match pg_version {
14 => v14::xlog_utils::generate_wal_segment(segno, system_id, lsn),
15 => v15::xlog_utils::generate_wal_segment(segno, system_id, lsn),
_ => Err(SerializeError::BadInput),
}
}
pub fn generate_pg_control(
@@ -162,11 +120,11 @@ pub fn generate_pg_control(
lsn: Lsn,
pg_version: u32,
) -> anyhow::Result<(Bytes, u64)> {
dispatch_pgversion!(
pg_version,
pgv::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
anyhow::bail!("Unknown version {}", pg_version)
)
match pg_version {
14 => v14::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
15 => v15::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
_ => anyhow::bail!("Unknown version {}", pg_version),
}
}
// PG timeline is always 1, changing it doesn't have any useful meaning in Neon.
@@ -238,6 +196,8 @@ pub fn fsm_logical_to_physical(addr: BlockNumber) -> BlockNumber {
}
pub mod waldecoder {
use crate::{v14, v15};
use bytes::{Buf, Bytes, BytesMut};
use std::num::NonZeroU32;
use thiserror::Error;
@@ -288,17 +248,22 @@ pub mod waldecoder {
}
pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
dispatch_pgversion!(
self.pg_version,
{
use pgv::waldecoder_handler::WalStreamDecoderHandler;
match self.pg_version {
// This is a trick to support both versions simultaneously.
// See WalStreamDecoderHandler comments.
14 => {
use self::v14::waldecoder_handler::WalStreamDecoderHandler;
self.poll_decode_internal()
},
Err(WalDecodeError {
}
15 => {
use self::v15::waldecoder_handler::WalStreamDecoderHandler;
self.poll_decode_internal()
}
_ => Err(WalDecodeError {
msg: format!("Unknown version {}", self.pg_version),
lsn: self.lsn,
})
)
}),
}
}
}
}

View File

@@ -163,20 +163,6 @@ pub const RM_HEAP2_ID: u8 = 9;
pub const RM_HEAP_ID: u8 = 10;
pub const RM_LOGICALMSG_ID: u8 = 21;
// from neon_rmgr.h
pub const RM_NEON_ID: u8 = 134;
pub const XLOG_NEON_HEAP_INIT_PAGE: u8 = 0x80;
pub const XLOG_NEON_HEAP_INSERT: u8 = 0x00;
pub const XLOG_NEON_HEAP_DELETE: u8 = 0x10;
pub const XLOG_NEON_HEAP_UPDATE: u8 = 0x20;
pub const XLOG_NEON_HEAP_HOT_UPDATE: u8 = 0x30;
pub const XLOG_NEON_HEAP_LOCK: u8 = 0x40;
pub const XLOG_NEON_HEAP_MULTI_INSERT: u8 = 0x50;
pub const XLOG_NEON_HEAP_VISIBLE: u8 = 0x40;
// from xlogreader.h
pub const XLR_INFO_MASK: u8 = 0x0F;
pub const XLR_RMGR_INFO_MASK: u8 = 0xF0;

View File

@@ -3,8 +3,3 @@ pub const XLOG_DBASE_DROP: u8 = 0x10;
pub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
(bimg_info & BKPIMAGE_IS_COMPRESSED) != 0
}

View File

@@ -1,18 +1,10 @@
pub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;
pub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;
pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;
pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x00;
pub const XLOG_DBASE_DROP: u8 = 0x20;
pub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */
pub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */
pub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */
pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
(bimg_info & ANY_COMPRESS_FLAG) != 0
}

View File

@@ -1,18 +0,0 @@
pub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;
pub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;
pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;
pub const XLOG_DBASE_DROP: u8 = 0x20;
pub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */
pub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */
pub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */
pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
(bimg_info & ANY_COMPRESS_FLAG) != 0
}

View File

@@ -49,9 +49,9 @@ impl Conf {
pub fn pg_distrib_dir(&self) -> anyhow::Result<PathBuf> {
let path = self.pg_distrib_dir.clone();
#[allow(clippy::manual_range_patterns)]
match self.pg_version {
14 | 15 | 16 => Ok(path.join(format!("v{}", self.pg_version))),
14 => Ok(path.join(format!("v{}", self.pg_version))),
15 => Ok(path.join(format!("v{}", self.pg_version))),
_ => bail!("Unsupported postgres version: {}", self.pg_version),
}
}
@@ -250,18 +250,11 @@ fn craft_internal<C: postgres::GenericClient>(
let (mut intermediate_lsns, last_lsn) = f(client, initial_lsn)?;
let last_lsn = match last_lsn {
None => client.pg_current_wal_insert_lsn()?,
Some(last_lsn) => {
let insert_lsn = client.pg_current_wal_insert_lsn()?;
match last_lsn.cmp(&insert_lsn) {
Ordering::Less => bail!(
"Some records were inserted after the crafted WAL: {} vs {}",
last_lsn,
insert_lsn
),
Ordering::Equal => last_lsn,
Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
}
}
Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
Ordering::Less => bail!("Some records were inserted after the crafted WAL"),
Ordering::Equal => last_lsn,
Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
},
};
if !intermediate_lsns.starts_with(&[initial_lsn]) {
intermediate_lsns.insert(0, initial_lsn);
@@ -370,9 +363,8 @@ impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
);
ensure!(
u64::from(after_xlog_switch) as usize % XLOG_BLCKSZ == XLOG_SIZE_OF_XLOG_SHORT_PHD,
"XLOG_SWITCH message ended not on page boundary: {}, offset = {}",
after_xlog_switch,
u64::from(after_xlog_switch) as usize % XLOG_BLCKSZ
"XLOG_SWITCH message ended not on page boundary: {}",
after_xlog_switch
);
Ok((vec![before_xlog_switch, after_xlog_switch], next_segment))
}

View File

@@ -959,7 +959,7 @@ mod tests {
let make_params = |options| StartupMessageParams::new([("options", options)]);
let params = StartupMessageParams::new([]);
assert!(params.options_escaped().is_none());
assert!(matches!(params.options_escaped(), None));
let params = make_params("");
assert!(split_options(&params).is_empty());

View File

@@ -573,7 +573,7 @@ mod tests {
#[test]
fn relative_path() {
let all_paths = ["", "some/path", "some/path/"];
let all_paths = vec!["", "some/path", "some/path/"];
let all_paths: Vec<RemotePath> = all_paths
.iter()
.map(|x| RemotePath::new(Path::new(x)).expect("bad path"))

View File

@@ -9,12 +9,11 @@ PORT=$4
SYSID=$(od -A n -j 24 -N 8 -t d8 "$WAL_PATH"/000000010000000000000002* | cut -c 3-)
rm -fr "$DATA_DIR"
env -i LD_LIBRARY_PATH="$PG_BIN"/../lib "$PG_BIN"/initdb -E utf8 -U cloud_admin -D "$DATA_DIR" --sysid="$SYSID"
echo "port=$PORT" >> "$DATA_DIR"/postgresql.conf
echo "shared_preload_libraries='\$libdir/neon_rmgr.so'" >> "$DATA_DIR"/postgresql.conf
echo port="$PORT" >> "$DATA_DIR"/postgresql.conf
REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| cut -c 42-)
declare -i WAL_SIZE=$REDO_POS+114
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l logfile start
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l logfile stop -m immediate
cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done

View File

@@ -178,17 +178,14 @@ pub async fn ws_handler(
/// Starts the monitor. If startup fails or the monitor exits, an error will
/// be logged and our internal state will be reset to allow for new connections.
#[tracing::instrument(skip_all)]
#[tracing::instrument(skip_all, fields(?args))]
async fn start_monitor(
ws: WebSocket,
args: &Args,
kill: broadcast::Receiver<()>,
token: CancellationToken,
) {
info!(
?args,
"accepted new websocket connection -> starting monitor"
);
info!("accepted new websocket connection -> starting monitor");
let timeout = Duration::from_secs(4);
let monitor = tokio::time::timeout(
timeout,

View File

@@ -5,7 +5,6 @@
//! all functionality.
use std::sync::Arc;
use std::time::{Duration, Instant};
use std::{fmt::Debug, mem};
use anyhow::{bail, Context};
@@ -37,8 +36,6 @@ pub struct Runner {
/// by us vs the autoscaler-agent.
counter: usize,
last_upscale_request_at: Option<Instant>,
/// A signal to kill the main thread produced by `self.run()`. This is triggered
/// when the server receives a new connection. When the thread receives the
/// signal off this channel, it will gracefully shutdown.
@@ -102,7 +99,6 @@ impl Runner {
cgroup: None,
dispatcher,
counter: 1, // NB: must be odd, see the comment about the field for more.
last_upscale_request_at: None,
kill,
};
@@ -401,20 +397,6 @@ impl Runner {
if request.is_none() {
bail!("failed to listen for upscale event from cgroup")
}
// If it's been less than 1 second since the last time we requested upscaling,
// ignore the event, to avoid spamming the agent (otherwise, this can happen
// ~1k times per second).
if let Some(t) = self.last_upscale_request_at {
let elapsed = t.elapsed();
if elapsed < Duration::from_secs(1) {
info!(elapsed_millis = elapsed.as_millis(), "cgroup asked for upscale but too soon to forward the request, ignoring");
continue;
}
}
self.last_upscale_request_at = Some(Instant::now());
info!("cgroup asking for upscale; forwarding request");
self.counter += 2; // Increment, preserving parity (i.e. keep the
// counter odd). See the field comment for more.

View File

@@ -25,7 +25,6 @@ use crate::context::RequestContext;
use crate::tenant::Timeline;
use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::dispatch_pgversion;
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
@@ -324,25 +323,14 @@ where
.timeline
.get_relmap_file(spcnode, dbnode, self.lsn, self.ctx)
.await?;
ensure!(
img.len()
== dispatch_pgversion!(
self.timeline.pg_version,
pgv::bindings::SIZEOF_RELMAPFILE
)
);
ensure!(img.len() == 512);
Some(img)
} else {
None
};
if spcnode == GLOBALTABLESPACE_OID {
let pg_version_str = match self.timeline.pg_version {
14 | 15 => self.timeline.pg_version.to_string(),
ver => format!("{ver}\x0A"),
};
let pg_version_str = self.timeline.pg_version.to_string();
let header = new_tar_header("PG_VERSION", pg_version_str.len() as u64)?;
self.ar.append(&header, pg_version_str.as_bytes()).await?;
@@ -386,10 +374,7 @@ where
if let Some(img) = relmap_img {
let dst_path = format!("base/{}/PG_VERSION", dbnode);
let pg_version_str = match self.timeline.pg_version {
14 | 15 => self.timeline.pg_version.to_string(),
ver => format!("{ver}\x0A"),
};
let pg_version_str = self.timeline.pg_version.to_string();
let header = new_tar_header(&dst_path, pg_version_str.len() as u64)?;
self.ar.append(&header, pg_version_str.as_bytes()).await?;

View File

@@ -477,19 +477,16 @@ fn start_pageserver(
{
let _rt_guard = MGMT_REQUEST_RUNTIME.enter();
let router_state = Arc::new(
http::routes::State::new(
conf,
http_auth.clone(),
remote_storage,
broker_client.clone(),
disk_usage_eviction_state,
)
.context("Failed to initialize router state")?,
);
let router = http::make_router(router_state, launch_ts, http_auth.clone())?
.build()
.map_err(|err| anyhow!(err))?;
let router = http::make_router(
conf,
launch_ts,
http_auth,
broker_client.clone(),
remote_storage,
disk_usage_eviction_state,
)?
.build()
.map_err(|err| anyhow!(err))?;
let service = utils::http::RouterService::new(router).unwrap();
let server = hyper::Server::from_tcp(http_listener)?
.serve(service)

View File

@@ -73,7 +73,7 @@ pub mod defaults {
/// Default built-in configuration file.
///
pub const DEFAULT_CONFIG_FILE: &str = formatcp!(
r#"
r###"
# Initial configuration file created by 'pageserver --init'
#listen_pg_addr = '{DEFAULT_PG_LISTEN_ADDR}'
#listen_http_addr = '{DEFAULT_HTTP_LISTEN_ADDR}'
@@ -118,7 +118,7 @@ pub mod defaults {
[remote_storage]
"#
"###
);
}
@@ -668,18 +668,26 @@ impl PageServerConf {
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
let path = self.pg_distrib_dir.clone();
#[allow(clippy::manual_range_patterns)]
match pg_version {
14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
14 => Ok(path.join(format!("v{pg_version}"))),
15 => Ok(path.join(format!("v{pg_version}"))),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
Ok(self.pg_distrib_dir(pg_version)?.join("bin"))
match pg_version {
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
Ok(self.pg_distrib_dir(pg_version)?.join("lib"))
match pg_version {
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}
/// Parse a configuration file (pageserver.toml) into a PageServerConf struct,

View File

@@ -12,6 +12,7 @@ use pageserver_api::models::{
DownloadRemoteLayersTaskSpawnRequest, TenantAttachRequest, TenantLoadRequest,
};
use remote_storage::GenericRemoteStorage;
use storage_broker::BrokerClientChannel;
use tenant_size_model::{SizeResult, StorageModel};
use tokio_util::sync::CancellationToken;
use tracing::*;
@@ -54,7 +55,7 @@ use utils::{
// Imports only used for testing APIs
use super::models::ConfigureFailpointsRequest;
pub struct State {
struct State {
conf: &'static PageServerConf,
auth: Option<Arc<JwtAuth>>,
allowlist_routes: Vec<Uri>,
@@ -64,7 +65,7 @@ pub struct State {
}
impl State {
pub fn new(
fn new(
conf: &'static PageServerConf,
auth: Option<Arc<JwtAuth>>,
remote_storage: Option<GenericRemoteStorage>,
@@ -285,8 +286,6 @@ async fn build_timeline_info_common(
let state = timeline.current_state();
let remote_consistent_lsn = timeline.get_remote_consistent_lsn().unwrap_or(Lsn(0));
let walreceiver_status = timeline.walreceiver_status();
let info = TimelineInfo {
tenant_id: timeline.tenant_id,
timeline_id: timeline.timeline_id,
@@ -307,8 +306,6 @@ async fn build_timeline_info_common(
pg_version: timeline.pg_version,
state,
walreceiver_status,
};
Ok(info)
}
@@ -1357,9 +1354,12 @@ where
}
pub fn make_router(
state: Arc<State>,
conf: &'static PageServerConf,
launch_ts: &'static LaunchTimestamp,
auth: Option<Arc<JwtAuth>>,
broker_client: BrokerClientChannel,
remote_storage: Option<GenericRemoteStorage>,
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
let spec = include_bytes!("openapi_spec.yml");
let mut router = attach_openapi_ui(endpoint::make_router(), spec, "/swagger.yml", "/v1/doc");
@@ -1383,7 +1383,16 @@ pub fn make_router(
);
Ok(router
.data(state)
.data(Arc::new(
State::new(
conf,
auth,
remote_storage,
broker_client,
disk_usage_eviction_state,
)
.context("Failed to initialize router state")?,
))
.get("/v1/status", |r| api_handler(r, status_handler))
.put("/v1/failpoints", |r| {
testing_api_handler("manage failpoints", r, failpoints_handler)

View File

@@ -537,7 +537,7 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
30.000, // 30000 ms
];
/// VirtualFile fs operation variants.
/// Tracks time taken by fs operations near VirtualFile.
///
/// Operations:
/// - open ([`std::fs::OpenOptions::open`])
@@ -548,66 +548,15 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
/// - seek (modify internal position or file length query)
/// - fsync ([`std::fs::File::sync_all`])
/// - metadata ([`std::fs::File::metadata`])
#[derive(
Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
)]
pub(crate) enum StorageIoOperation {
Open,
Close,
CloseByReplace,
Read,
Write,
Seek,
Fsync,
Metadata,
}
impl StorageIoOperation {
pub fn as_str(&self) -> &'static str {
match self {
StorageIoOperation::Open => "open",
StorageIoOperation::Close => "close",
StorageIoOperation::CloseByReplace => "close-by-replace",
StorageIoOperation::Read => "read",
StorageIoOperation::Write => "write",
StorageIoOperation::Seek => "seek",
StorageIoOperation::Fsync => "fsync",
StorageIoOperation::Metadata => "metadata",
}
}
}
/// Tracks time taken by fs operations near VirtualFile.
#[derive(Debug)]
pub(crate) struct StorageIoTime {
metrics: [Histogram; StorageIoOperation::COUNT],
}
impl StorageIoTime {
fn new() -> Self {
let storage_io_histogram_vec = register_histogram_vec!(
"pageserver_io_operations_seconds",
"Time spent in IO operations",
&["operation"],
STORAGE_IO_TIME_BUCKETS.into()
)
.expect("failed to define a metric");
let metrics = std::array::from_fn(|i| {
let op = StorageIoOperation::from_repr(i).unwrap();
let metric = storage_io_histogram_vec
.get_metric_with_label_values(&[op.as_str()])
.unwrap();
metric
});
Self { metrics }
}
pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
&self.metrics[op as usize]
}
}
pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
pub(crate) static STORAGE_IO_TIME: Lazy<HistogramVec> = Lazy::new(|| {
register_histogram_vec!(
"pageserver_io_operations_seconds",
"Time spent in IO operations",
&["operation"],
STORAGE_IO_TIME_BUCKETS.into()
)
.expect("failed to define a metric")
});
const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
@@ -1216,12 +1165,6 @@ impl TimelineMetrics {
),
}
}
pub fn record_new_file_metrics(&self, sz: u64) {
self.resident_physical_size_gauge.add(sz);
self.num_persistent_files_created.inc_by(1);
self.persistent_bytes_written.inc_by(sz);
}
}
impl Drop for TimelineMetrics {

View File

@@ -799,9 +799,8 @@ impl PageCache {
fn new(num_pages: usize) -> Self {
assert!(num_pages > 0, "page cache size must be > 0");
// We could use Vec::leak here, but that potentially also leaks
// uninitialized reserved capacity. With into_boxed_slice and Box::leak
// this is avoided.
// We use Box::leak here and into_boxed_slice to avoid leaking uninitialized
// memory that Vec's might contain.
let page_buffer = Box::leak(vec![0u8; num_pages * PAGE_SZ].into_boxed_slice());
let size_metrics = &crate::metrics::PAGE_CACHE_SIZE;

View File

@@ -71,7 +71,7 @@ impl<'a> Deref for BlockLease<'a> {
///
/// Unlike traits, we also support the read function to be async though.
pub(crate) enum BlockReaderRef<'a> {
FileBlockReader(&'a FileBlockReader),
FileBlockReaderVirtual(&'a FileBlockReader),
EphemeralFile(&'a EphemeralFile),
Adapter(Adapter<&'a DeltaLayerInner>),
#[cfg(test)]
@@ -85,7 +85,7 @@ impl<'a> BlockReaderRef<'a> {
async fn read_blk(&self, blknum: u32) -> Result<BlockLease, std::io::Error> {
use BlockReaderRef::*;
match self {
FileBlockReader(r) => r.read_blk(blknum).await,
FileBlockReaderVirtual(r) => r.read_blk(blknum).await,
EphemeralFile(r) => r.read_blk(blknum).await,
Adapter(r) => r.read_blk(blknum).await,
#[cfg(test)]
@@ -124,7 +124,7 @@ impl<'a> BlockCursor<'a> {
// Needed by cli
pub fn new_fileblockreader(reader: &'a FileBlockReader) -> Self {
BlockCursor {
reader: BlockReaderRef::FileBlockReader(reader),
reader: BlockReaderRef::FileBlockReaderVirtual(reader),
}
}
@@ -197,7 +197,7 @@ impl FileBlockReader {
impl BlockReader for FileBlockReader {
fn block_cursor(&self) -> BlockCursor<'_> {
BlockCursor::new(BlockReaderRef::FileBlockReader(self))
BlockCursor::new(BlockReaderRef::FileBlockReaderVirtual(self))
}
}

View File

@@ -1,10 +1,9 @@
//! This module acts as a switchboard to access different repositories managed by this
//! page server.
use rand::{distributions::Alphanumeric, Rng};
use std::collections::{hash_map, HashMap};
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::sync::Arc;
use tokio::fs;
@@ -71,11 +70,6 @@ impl TenantsMap {
/// This is pageserver-specific, as it relies on future processes after a crash to check
/// for TEMP_FILE_SUFFIX when loading things.
async fn safe_remove_tenant_dir_all(path: impl AsRef<Path>) -> std::io::Result<()> {
let tmp_path = safe_rename_tenant_dir(path).await?;
fs::remove_dir_all(tmp_path).await
}
async fn safe_rename_tenant_dir(path: impl AsRef<Path>) -> std::io::Result<PathBuf> {
let parent = path
.as_ref()
.parent()
@@ -85,16 +79,11 @@ async fn safe_rename_tenant_dir(path: impl AsRef<Path>) -> std::io::Result<PathB
std::io::ErrorKind::InvalidInput,
"Path must be absolute",
))?;
let rand_suffix = rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(8)
.map(char::from)
.collect::<String>()
+ TEMP_FILE_SUFFIX;
let tmp_path = path_with_suffix_extension(&path, &rand_suffix);
let tmp_path = path_with_suffix_extension(&path, TEMP_FILE_SUFFIX);
fs::rename(&path, &tmp_path).await?;
fs::File::open(parent).await?.sync_all().await?;
Ok(tmp_path)
fs::remove_dir_all(tmp_path).await
}
static TENANTS: Lazy<RwLock<TenantsMap>> = Lazy::new(|| RwLock::new(TenantsMap::Initializing));
@@ -503,8 +492,6 @@ pub enum GetTenantError {
/// Gets the tenant from the in-memory data, erroring if it's absent or is not fitting to the query.
/// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
///
/// This method is cancel-safe.
pub async fn get_tenant(
tenant_id: TenantId,
active_only: bool,
@@ -564,24 +551,7 @@ pub async fn detach_tenant(
tenant_id: TenantId,
detach_ignored: bool,
) -> Result<(), TenantStateError> {
let tmp_path = detach_tenant0(conf, &TENANTS, tenant_id, detach_ignored).await?;
// Although we are cleaning up the tenant, this task is not meant to be bound by the lifetime of the tenant in memory.
// After a tenant is detached, there are no more task_mgr tasks for that tenant_id.
let task_tenant_id = None;
task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(),
TaskKind::MgmtRequest,
task_tenant_id,
None,
"tenant_files_delete",
false,
async move {
fs::remove_dir_all(tmp_path.as_path())
.await
.with_context(|| format!("tenant directory {:?} deletion", tmp_path))
},
);
Ok(())
detach_tenant0(conf, &TENANTS, tenant_id, detach_ignored).await
}
async fn detach_tenant0(
@@ -589,16 +559,20 @@ async fn detach_tenant0(
tenants: &tokio::sync::RwLock<TenantsMap>,
tenant_id: TenantId,
detach_ignored: bool,
) -> Result<PathBuf, TenantStateError> {
let tenant_dir_rename_operation = |tenant_id_to_clean| async move {
) -> Result<(), TenantStateError> {
let local_files_cleanup_operation = |tenant_id_to_clean| async move {
let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean);
safe_rename_tenant_dir(&local_tenant_directory)
safe_remove_tenant_dir_all(&local_tenant_directory)
.await
.with_context(|| format!("local tenant directory {local_tenant_directory:?} rename"))
.with_context(|| {
format!("local tenant directory {local_tenant_directory:?} removal")
})?;
Ok(())
};
let removal_result =
remove_tenant_from_memory(tenants, tenant_id, tenant_dir_rename_operation(tenant_id)).await;
remove_tenant_from_memory(tenants, tenant_id, local_files_cleanup_operation(tenant_id))
.await;
// Ignored tenants are not present in memory and will bail the removal from memory operation.
// Before returning the error, check for ignored tenant removal case — we only need to clean its local files then.
@@ -606,10 +580,10 @@ async fn detach_tenant0(
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_id);
if tenant_ignore_mark.exists() {
info!("Detaching an ignored tenant");
let tmp_path = tenant_dir_rename_operation(tenant_id)
local_files_cleanup_operation(tenant_id)
.await
.with_context(|| format!("Ignored tenant {tenant_id} local directory rename"))?;
return Ok(tmp_path);
.with_context(|| format!("Ignored tenant {tenant_id} local files cleanup"))?;
return Ok(());
}
}

View File

@@ -74,7 +74,7 @@ impl Layer for RemoteLayer {
_reconstruct_state: &mut ValueReconstructState,
_ctx: &RequestContext,
) -> Result<ValueReconstructResult> {
Err(anyhow::anyhow!("layer {self} needs to be downloaded"))
bail!("layer {self} needs to be downloaded");
}
}

View File

@@ -585,7 +585,15 @@ impl Timeline {
Err(e) => {
// don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo
drop(_timer);
let walreceiver_status = self.walreceiver_status();
let walreceiver_status = {
match &*self.walreceiver.lock().unwrap() {
None => "stopping or stopped".to_string(),
Some(walreceiver) => match walreceiver.status() {
Some(status) => status.to_human_readable_string(),
None => "Not active".to_string(),
},
}
};
Err(anyhow::Error::new(e).context({
format!(
"Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}",
@@ -599,16 +607,6 @@ impl Timeline {
}
}
pub(crate) fn walreceiver_status(&self) -> String {
match &*self.walreceiver.lock().unwrap() {
None => "stopping or stopped".to_string(),
Some(walreceiver) => match walreceiver.status() {
Some(status) => status.to_human_readable_string(),
None => "Not active".to_string(),
},
}
}
/// Check that it is valid to request operations with that lsn.
pub fn check_lsn_is_in_scope(
&self,
@@ -2758,7 +2756,9 @@ impl Timeline {
// update metrics
let sz = l.layer_desc().file_size;
self.metrics.record_new_file_metrics(sz);
self.metrics.resident_physical_size_gauge.add(sz);
self.metrics.num_persistent_files_created.inc_by(1);
self.metrics.persistent_bytes_written.inc_by(sz);
}
guard.finish_flush_l0_layer(delta_layer_to_add, &frozen_layer);
@@ -3133,8 +3133,9 @@ impl Timeline {
LayerFileMetadata::new(metadata.len(), self.generation),
);
// update metrics
self.metrics.record_new_file_metrics(metadata.len());
self.metrics
.resident_physical_size_gauge
.add(metadata.len());
let l = Arc::new(l);
l.access_stats().record_residence_event(
LayerResidenceStatus::Resident,
@@ -3816,8 +3817,10 @@ impl Timeline {
)?;
}
// update metrics, including the timeline's physical size
self.metrics.record_new_file_metrics(metadata.len());
// update the timeline's physical size
self.metrics
.resident_physical_size_gauge
.add(metadata.len());
new_layer_paths.insert(
new_delta_path,

View File

@@ -328,24 +328,9 @@ impl Timeline {
// Make one of the tenant's timelines draw the short straw and run the calculation.
// The others wait until the calculation is done so that they take into account the
// imitated accesses that the winner made.
//
// It is critical we are responsive to cancellation here. Otherwise, we deadlock with
// tenant deletion (holds TENANTS in read mode) any other task that attempts to
// acquire TENANTS in write mode before we here call get_tenant.
// See https://github.com/neondatabase/neon/issues/5284.
let res = tokio::select! {
_ = cancel.cancelled() => {
return ControlFlow::Break(());
}
res = crate::tenant::mgr::get_tenant(self.tenant_id, true) => {
res
}
};
let tenant = match res {
Ok(t) => t,
Err(_) => {
return ControlFlow::Break(());
}
let Ok(tenant) = crate::tenant::mgr::get_tenant(self.tenant_id, true).await else {
// likely, we're shutting down
return ControlFlow::Break(());
};
let mut state = tenant.eviction_task_tenant_state.lock().await;
match state.last_layer_access_imitation {

View File

@@ -135,7 +135,7 @@ impl WalReceiver {
.await;
}
pub(crate) fn status(&self) -> Option<ConnectionManagerStatus> {
pub(super) fn status(&self) -> Option<ConnectionManagerStatus> {
self.manager_status.read().unwrap().clone()
}
}

View File

@@ -10,15 +10,16 @@
//! This is similar to PostgreSQL's virtual file descriptor facility in
//! src/backend/storage/file/fd.c
//!
use crate::metrics::{StorageIoOperation, STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC};
use crate::metrics::{STORAGE_IO_SIZE, STORAGE_IO_TIME};
use crate::tenant::TENANTS_SEGMENT_NAME;
use futures::Future;
use once_cell::sync::OnceCell;
use std::fs::{self, File, OpenOptions};
use std::io::{Error, ErrorKind, Seek, SeekFrom};
use std::os::unix::fs::FileExt;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{RwLock, RwLockWriteGuard};
use tokio::sync::{RwLock, RwLockWriteGuard};
///
/// A virtual file descriptor. You can use this just like std::fs::File, but internally
@@ -110,7 +111,7 @@ impl OpenFiles {
///
/// On return, we hold a lock on the slot, and its 'tag' has been updated
/// recently_used has been set. It's all ready for reuse.
fn find_victim_slot(&self) -> (SlotHandle, RwLockWriteGuard<SlotInner>) {
async fn find_victim_slot(&self) -> (SlotHandle, RwLockWriteGuard<SlotInner>) {
//
// Run the clock algorithm to find a slot to replace.
//
@@ -142,7 +143,7 @@ impl OpenFiles {
}
retries += 1;
} else {
slot_guard = slot.inner.write().unwrap();
slot_guard = slot.inner.write().await;
index = next;
break;
}
@@ -155,8 +156,8 @@ impl OpenFiles {
if let Some(old_file) = slot_guard.file.take() {
// the normal path of dropping VirtualFile uses "close", use "close-by-replace" here to
// distinguish the two.
STORAGE_IO_TIME_METRIC
.get(StorageIoOperation::CloseByReplace)
STORAGE_IO_TIME
.with_label_values(&["close-by-replace"])
.observe_closure_duration(|| drop(old_file));
}
@@ -244,10 +245,9 @@ impl VirtualFile {
tenant_id = "*".to_string();
timeline_id = "*".to_string();
}
let (handle, mut slot_guard) = get_open_files().find_victim_slot();
let file = STORAGE_IO_TIME_METRIC
.get(StorageIoOperation::Open)
let (handle, mut slot_guard) = get_open_files().find_victim_slot().await;
let file = STORAGE_IO_TIME
.with_label_values(&["open"])
.observe_closure_duration(|| open_options.open(path))?;
// Strip all options other than read and write.
@@ -331,21 +331,22 @@ impl VirtualFile {
/// Call File::sync_all() on the underlying File.
pub async fn sync_all(&self) -> Result<(), Error> {
self.with_file(StorageIoOperation::Fsync, |file| file.sync_all())
self.with_file("fsync", |file| async move { file.sync_all() })
.await?
}
pub async fn metadata(&self) -> Result<fs::Metadata, Error> {
self.with_file(StorageIoOperation::Metadata, |file| file.metadata())
self.with_file("metadata", |file| async move { file.metadata() })
.await?
}
/// Helper function that looks up the underlying File for this VirtualFile,
/// opening it and evicting some other File if necessary. It calls 'func'
/// with the physical File.
async fn with_file<F, R>(&self, op: StorageIoOperation, mut func: F) -> Result<R, Error>
async fn with_file<F, R, FR>(&self, _op: &str, func: F) -> Result<R, Error>
where
F: FnMut(&File) -> R,
F: FnOnce(&File) -> FR,
FR: Future<Output = R>,
{
let open_files = get_open_files();
@@ -356,19 +357,17 @@ impl VirtualFile {
// We only need to hold the handle lock while we read the current handle. If
// another thread closes the file and recycles the slot for a different file,
// we will notice that the handle we read is no longer valid and retry.
let mut handle = *self.handle.read().unwrap();
let mut handle = *self.handle.read().await;
loop {
// Check if the slot contains our File
{
let slot = &open_files.slots[handle.index];
let slot_guard = slot.inner.read().unwrap();
let slot_guard = slot.inner.read().await;
if slot_guard.tag == handle.tag {
if let Some(file) = &slot_guard.file {
// Found a cached file descriptor.
slot.recently_used.store(true, Ordering::Relaxed);
return Ok(STORAGE_IO_TIME_METRIC
.get(op)
.observe_closure_duration(|| func(file)));
return Ok(func(file).await);
}
}
}
@@ -376,7 +375,7 @@ impl VirtualFile {
// The slot didn't contain our File. We will have to open it ourselves,
// but before that, grab a write lock on handle in the VirtualFile, so
// that no other thread will try to concurrently open the same file.
let handle_guard = self.handle.write().unwrap();
let handle_guard = self.handle.write().await;
// If another thread changed the handle while we were not holding the lock,
// then the handle might now be valid again. Loop back to retry.
@@ -390,17 +389,15 @@ impl VirtualFile {
// We need to open the file ourselves. The handle in the VirtualFile is
// now locked in write-mode. Find a free slot to put it in.
let (handle, mut slot_guard) = open_files.find_victim_slot();
let (handle, mut slot_guard) = open_files.find_victim_slot().await;
// Open the physical file
let file = STORAGE_IO_TIME_METRIC
.get(StorageIoOperation::Open)
let file = STORAGE_IO_TIME
.with_label_values(&["open"])
.observe_closure_duration(|| self.open_options.open(&self.path))?;
// Perform the requested operation on it
let result = STORAGE_IO_TIME_METRIC
.get(op)
.observe_closure_duration(|| func(&file));
let result = func(&file).await;
// Store the File in the slot and update the handle in the VirtualFile
// to point to it.
@@ -424,7 +421,7 @@ impl VirtualFile {
}
SeekFrom::End(offset) => {
self.pos = self
.with_file(StorageIoOperation::Seek, |mut file| {
.with_file("seek", |mut file| async move {
file.seek(SeekFrom::End(offset))
})
.await??
@@ -516,7 +513,7 @@ impl VirtualFile {
pub async fn read_at(&self, buf: &mut [u8], offset: u64) -> Result<usize, Error> {
let result = self
.with_file(StorageIoOperation::Read, |file| file.read_at(buf, offset))
.with_file("read", |file| async move { file.read_at(buf, offset) })
.await?;
if let Ok(size) = result {
STORAGE_IO_SIZE
@@ -528,7 +525,7 @@ impl VirtualFile {
async fn write_at(&self, buf: &[u8], offset: u64) -> Result<usize, Error> {
let result = self
.with_file(StorageIoOperation::Write, |file| file.write_at(buf, offset))
.with_file("write", |file| async move { file.write_at(buf, offset) })
.await?;
if let Ok(size) = result {
STORAGE_IO_SIZE
@@ -571,18 +568,25 @@ impl VirtualFile {
impl Drop for VirtualFile {
/// If a VirtualFile is dropped, close the underlying file if it was open.
fn drop(&mut self) {
let handle = self.handle.get_mut().unwrap();
let handle = self.handle.get_mut();
// We could check with a read-lock first, to avoid waiting on an
// unrelated I/O.
// We don't have async drop so we cannot wait for the lock here.
// Instead, do a best-effort attempt at closing the underlying
// file descriptor by using `try_write`.
// This best-effort attempt should be quite good though
// as we have `&mut self` access. In other words, if the slot
// is still occupied by our file, we should be the only ones
// accessing it (and if it has been reassigned since, we don't
// need to bother with dropping anyways).
let slot = &get_open_files().slots[handle.index];
let mut slot_guard = slot.inner.write().unwrap();
let Ok(mut slot_guard) = slot.inner.try_write() else { return };
if slot_guard.tag == handle.tag {
slot.recently_used.store(false, Ordering::Relaxed);
// there is also operation "close-by-replace" for closes done on eviction for
// comparison.
STORAGE_IO_TIME_METRIC
.get(StorageIoOperation::Close)
STORAGE_IO_TIME
.with_label_values(&["close"])
.observe_closure_duration(|| drop(slot_guard.file.take()));
}
}

View File

@@ -25,7 +25,7 @@ use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
use anyhow::{bail, Context, Result};
use anyhow::{Context, Result};
use bytes::{Buf, Bytes, BytesMut};
use tracing::*;
@@ -106,10 +106,6 @@ impl<'a> WalIngest<'a> {
self.ingest_heapam_record(&mut buf, modification, decoded, ctx)
.await?;
}
if decoded.xl_rmid == pg_constants::RM_NEON_ID {
self.ingest_neonrmgr_record(&mut buf, modification, decoded, ctx)
.await?;
}
// Handle other special record types
if decoded.xl_rmid == pg_constants::RM_SMGR_ID
&& (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
@@ -176,32 +172,6 @@ impl<'a> WalIngest<'a> {
.await?;
}
}
} else if self.timeline.pg_version == 16 {
if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
== postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_WAL_LOG
{
debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
} else if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
== postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY
{
// The XLOG record was renamed between v14 and v15,
// but the record format is the same.
// So we can reuse XlCreateDatabase here.
debug!("XLOG_DBASE_CREATE_FILE_COPY");
let createdb = XlCreateDatabase::decode(&mut buf);
self.ingest_xlog_dbase_create(modification, &createdb, ctx)
.await?;
} else if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
== postgres_ffi::v16::bindings::XLOG_DBASE_DROP
{
let dropdb = XlDropDatabase::decode(&mut buf);
for tablespace_id in dropdb.tablespace_ids {
trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
modification
.drop_dbdir(tablespace_id, dropdb.db_id, ctx)
.await?;
}
}
}
} else if decoded.xl_rmid == pg_constants::RM_TBLSPC_ID {
trace!("XLOG_TBLSPC_CREATE/DROP is not handled yet");
@@ -444,346 +414,57 @@ impl<'a> WalIngest<'a> {
// need to clear the corresponding bits in the visibility map.
let mut new_heap_blkno: Option<u32> = None;
let mut old_heap_blkno: Option<u32> = None;
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
if info == pg_constants::XLOG_HEAP_INSERT {
let xlrec = XlHeapInsert::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_DELETE {
let xlrec = XlHeapDelete::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_UPDATE
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
{
let xlrec = XlHeapUpdate::decode(buf);
// the size of tuple data is inferred from the size of the record.
// we can't validate the remaining number of bytes without parsing
// the tuple data.
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
old_heap_blkno = Some(decoded.blocks[0].blkno);
}
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
// non-HOT update where the new tuple goes to different page than
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
// set.
new_heap_blkno = Some(decoded.blocks[1].blkno);
}
}
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
let xlrec = XlHeapMultiInsert::decode(buf);
match self.timeline.pg_version {
14 => {
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
if info == pg_constants::XLOG_HEAP_INSERT {
let xlrec = v14::XlHeapInsert::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_DELETE {
let xlrec = v14::XlHeapDelete::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_UPDATE
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
{
let xlrec = v14::XlHeapUpdate::decode(buf);
// the size of tuple data is inferred from the size of the record.
// we can't validate the remaining number of bytes without parsing
// the tuple data.
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
old_heap_blkno = Some(decoded.blocks[0].blkno);
}
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
// non-HOT update where the new tuple goes to different page than
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
// set.
new_heap_blkno = Some(decoded.blocks[1].blkno);
}
}
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
let xlrec = v14::XlHeapMultiInsert::decode(buf);
let offset_array_len =
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
0
} else {
std::mem::size_of::<u16>() * xlrec.ntuples as usize
};
assert_eq!(offset_array_len, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
}
let offset_array_len = if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
0
} else {
bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
}
}
15 => {
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
std::mem::size_of::<u16>() * xlrec.ntuples as usize
};
assert_eq!(offset_array_len, buf.remaining());
if info == pg_constants::XLOG_HEAP_INSERT {
let xlrec = v15::XlHeapInsert::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_DELETE {
let xlrec = v15::XlHeapDelete::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_UPDATE
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
{
let xlrec = v15::XlHeapUpdate::decode(buf);
// the size of tuple data is inferred from the size of the record.
// we can't validate the remaining number of bytes without parsing
// the tuple data.
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
old_heap_blkno = Some(decoded.blocks[0].blkno);
}
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
// non-HOT update where the new tuple goes to different page than
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
// set.
new_heap_blkno = Some(decoded.blocks[1].blkno);
}
}
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
let xlrec = v15::XlHeapMultiInsert::decode(buf);
let offset_array_len =
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
0
} else {
std::mem::size_of::<u16>() * xlrec.ntuples as usize
};
assert_eq!(offset_array_len, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
}
} else {
bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
}
}
16 => {
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
if info == pg_constants::XLOG_HEAP_INSERT {
let xlrec = v16::XlHeapInsert::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_DELETE {
let xlrec = v16::XlHeapDelete::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
} else if info == pg_constants::XLOG_HEAP_UPDATE
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
{
let xlrec = v16::XlHeapUpdate::decode(buf);
// the size of tuple data is inferred from the size of the record.
// we can't validate the remaining number of bytes without parsing
// the tuple data.
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
old_heap_blkno = Some(decoded.blocks[0].blkno);
}
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
// non-HOT update where the new tuple goes to different page than
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
// set.
new_heap_blkno = Some(decoded.blocks[1].blkno);
}
}
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
let xlrec = v16::XlHeapMultiInsert::decode(buf);
let offset_array_len =
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
0
} else {
std::mem::size_of::<u16>() * xlrec.ntuples as usize
};
assert_eq!(offset_array_len, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
}
} else {
bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
}
}
_ => {}
}
// FIXME: What about XLOG_HEAP_LOCK and XLOG_HEAP2_LOCK_UPDATED?
// Clear the VM bits if required.
if new_heap_blkno.is_some() || old_heap_blkno.is_some() {
let vm_rel = RelTag {
forknum: VISIBILITYMAP_FORKNUM,
spcnode: decoded.blocks[0].rnode_spcnode,
dbnode: decoded.blocks[0].rnode_dbnode,
relnode: decoded.blocks[0].rnode_relnode,
};
let mut new_vm_blk = new_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
let mut old_vm_blk = old_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
// Sometimes, Postgres seems to create heap WAL records with the
// ALL_VISIBLE_CLEARED flag set, even though the bit in the VM page is
// not set. In fact, it's possible that the VM page does not exist at all.
// In that case, we don't want to store a record to clear the VM bit;
// replaying it would fail to find the previous image of the page, because
// it doesn't exist. So check if the VM page(s) exist, and skip the WAL
// record if it doesn't.
let vm_size = self.get_relsize(vm_rel, modification.lsn, ctx).await?;
if let Some(blknum) = new_vm_blk {
if blknum >= vm_size {
new_vm_blk = None;
}
}
if let Some(blknum) = old_vm_blk {
if blknum >= vm_size {
old_vm_blk = None;
}
}
if new_vm_blk.is_some() || old_vm_blk.is_some() {
if new_vm_blk == old_vm_blk {
// An UPDATE record that needs to clear the bits for both old and the
// new page, both of which reside on the same VM page.
self.put_rel_wal_record(
modification,
vm_rel,
new_vm_blk.unwrap(),
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno,
old_heap_blkno,
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
},
ctx,
)
.await?;
} else {
// Clear VM bits for one heap page, or for two pages that reside on
// different VM pages.
if let Some(new_vm_blk) = new_vm_blk {
self.put_rel_wal_record(
modification,
vm_rel,
new_vm_blk,
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno,
old_heap_blkno: None,
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
},
ctx,
)
.await?;
}
if let Some(old_vm_blk) = old_vm_blk {
self.put_rel_wal_record(
modification,
vm_rel,
old_vm_blk,
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno: None,
old_heap_blkno,
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
},
ctx,
)
.await?;
}
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
}
}
Ok(())
}
async fn ingest_neonrmgr_record(
&mut self,
buf: &mut Bytes,
modification: &mut DatadirModification<'_>,
decoded: &mut DecodedWALRecord,
ctx: &RequestContext,
) -> anyhow::Result<()> {
// Handle VM bit updates that are implicitly part of heap records.
// First, look at the record to determine which VM bits need
// to be cleared. If either of these variables is set, we
// need to clear the corresponding bits in the visibility map.
let mut new_heap_blkno: Option<u32> = None;
let mut old_heap_blkno: Option<u32> = None;
assert_eq!(decoded.xl_rmid, pg_constants::RM_NEON_ID);
match self.timeline.pg_version {
16 => {
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
match info {
pg_constants::XLOG_NEON_HEAP_INSERT => {
let xlrec = v16::rm_neon::XlNeonHeapInsert::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
}
pg_constants::XLOG_NEON_HEAP_DELETE => {
let xlrec = v16::rm_neon::XlNeonHeapDelete::decode(buf);
assert_eq!(0, buf.remaining());
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
}
pg_constants::XLOG_NEON_HEAP_UPDATE
| pg_constants::XLOG_NEON_HEAP_HOT_UPDATE => {
let xlrec = v16::rm_neon::XlNeonHeapUpdate::decode(buf);
// the size of tuple data is inferred from the size of the record.
// we can't validate the remaining number of bytes without parsing
// the tuple data.
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
old_heap_blkno = Some(decoded.blocks[0].blkno);
}
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
// non-HOT update where the new tuple goes to different page than
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
// set.
new_heap_blkno = Some(decoded.blocks[1].blkno);
}
}
pg_constants::XLOG_NEON_HEAP_MULTI_INSERT => {
let xlrec = v16::rm_neon::XlNeonHeapMultiInsert::decode(buf);
let offset_array_len =
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
0
} else {
std::mem::size_of::<u16>() * xlrec.ntuples as usize
};
assert_eq!(offset_array_len, buf.remaining());
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
new_heap_blkno = Some(decoded.blocks[0].blkno);
}
}
pg_constants::XLOG_NEON_HEAP_LOCK => {
/* XLOG_NEON_HEAP_LOCK doesn't need special care */
}
info => bail!("Unknown WAL record type for Neon RMGR: {}", info),
}
}
_ => bail!(
"Neon RMGR has no known compatibility with PostgreSQL version {}",
self.timeline.pg_version
),
}
// FIXME: What about XLOG_NEON_HEAP_LOCK?
// FIXME: What about XLOG_HEAP_LOCK and XLOG_HEAP2_LOCK_UPDATED?
// Clear the VM bits if required.
if new_heap_blkno.is_some() || old_heap_blkno.is_some() {

View File

@@ -4,10 +4,9 @@
use anyhow::Result;
use bytes::{Buf, Bytes};
use postgres_ffi::dispatch_pgversion;
use postgres_ffi::pg_constants;
use postgres_ffi::BLCKSZ;
use postgres_ffi::{BlockNumber, TimestampTz};
use postgres_ffi::{BlockNumber, OffsetNumber, TimestampTz};
use postgres_ffi::{MultiXactId, MultiXactOffset, MultiXactStatus, Oid, TransactionId};
use postgres_ffi::{XLogRecord, XLOG_SIZE_OF_XLOG_RECORD};
use serde::{Deserialize, Serialize};
@@ -77,12 +76,9 @@ pub struct DecodedBkpBlock {
pub flags: u8,
/* Information on full-page image, if any */
pub has_image: bool,
/* has image, even for consistency checking */
pub apply_image: bool,
/* has image that should be restored */
pub will_init: bool,
/* record doesn't need previous page version to apply */
pub has_image: bool, /* has image, even for consistency checking */
pub apply_image: bool, /* has image that should be restored */
pub will_init: bool, /* record doesn't need previous page version to apply */
//char *bkp_image;
pub hole_offset: u16,
pub hole_length: u16,
@@ -138,237 +134,6 @@ impl XlRelmapUpdate {
}
}
pub mod v14 {
use bytes::{Buf, Bytes};
use postgres_ffi::{OffsetNumber, TransactionId};
#[repr(C)]
#[derive(Debug)]
pub struct XlHeapInsert {
pub offnum: OffsetNumber,
pub flags: u8,
}
impl XlHeapInsert {
pub fn decode(buf: &mut Bytes) -> XlHeapInsert {
XlHeapInsert {
offnum: buf.get_u16_le(),
flags: buf.get_u8(),
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlHeapMultiInsert {
pub flags: u8,
pub _padding: u8,
pub ntuples: u16,
}
impl XlHeapMultiInsert {
pub fn decode(buf: &mut Bytes) -> XlHeapMultiInsert {
XlHeapMultiInsert {
flags: buf.get_u8(),
_padding: buf.get_u8(),
ntuples: buf.get_u16_le(),
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlHeapDelete {
pub xmax: TransactionId,
pub offnum: OffsetNumber,
pub _padding: u16,
pub t_cid: u32,
pub infobits_set: u8,
pub flags: u8,
}
impl XlHeapDelete {
pub fn decode(buf: &mut Bytes) -> XlHeapDelete {
XlHeapDelete {
xmax: buf.get_u32_le(),
offnum: buf.get_u16_le(),
_padding: buf.get_u16_le(),
t_cid: buf.get_u32_le(),
infobits_set: buf.get_u8(),
flags: buf.get_u8(),
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlHeapUpdate {
pub old_xmax: TransactionId,
pub old_offnum: OffsetNumber,
pub old_infobits_set: u8,
pub flags: u8,
pub t_cid: u32,
pub new_xmax: TransactionId,
pub new_offnum: OffsetNumber,
}
impl XlHeapUpdate {
pub fn decode(buf: &mut Bytes) -> XlHeapUpdate {
XlHeapUpdate {
old_xmax: buf.get_u32_le(),
old_offnum: buf.get_u16_le(),
old_infobits_set: buf.get_u8(),
flags: buf.get_u8(),
t_cid: buf.get_u32(),
new_xmax: buf.get_u32_le(),
new_offnum: buf.get_u16_le(),
}
}
}
}
pub mod v15 {
pub use super::v14::{XlHeapDelete, XlHeapInsert, XlHeapMultiInsert, XlHeapUpdate};
}
pub mod v16 {
pub use super::v14::{XlHeapInsert, XlHeapMultiInsert};
use bytes::{Buf, Bytes};
use postgres_ffi::{OffsetNumber, TransactionId};
pub struct XlHeapDelete {
pub xmax: TransactionId,
pub offnum: OffsetNumber,
pub infobits_set: u8,
pub flags: u8,
}
impl XlHeapDelete {
pub fn decode(buf: &mut Bytes) -> XlHeapDelete {
XlHeapDelete {
xmax: buf.get_u32_le(),
offnum: buf.get_u16_le(),
infobits_set: buf.get_u8(),
flags: buf.get_u8(),
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlHeapUpdate {
pub old_xmax: TransactionId,
pub old_offnum: OffsetNumber,
pub old_infobits_set: u8,
pub flags: u8,
pub new_xmax: TransactionId,
pub new_offnum: OffsetNumber,
}
impl XlHeapUpdate {
pub fn decode(buf: &mut Bytes) -> XlHeapUpdate {
XlHeapUpdate {
old_xmax: buf.get_u32_le(),
old_offnum: buf.get_u16_le(),
old_infobits_set: buf.get_u8(),
flags: buf.get_u8(),
new_xmax: buf.get_u32_le(),
new_offnum: buf.get_u16_le(),
}
}
}
/* Since PG16, we have the Neon RMGR (RM_NEON_ID) to manage Neon-flavored WAL. */
pub mod rm_neon {
use bytes::{Buf, Bytes};
use postgres_ffi::{OffsetNumber, TransactionId};
#[repr(C)]
#[derive(Debug)]
pub struct XlNeonHeapInsert {
pub offnum: OffsetNumber,
pub flags: u8,
}
impl XlNeonHeapInsert {
pub fn decode(buf: &mut Bytes) -> XlNeonHeapInsert {
XlNeonHeapInsert {
offnum: buf.get_u16_le(),
flags: buf.get_u8(),
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlNeonHeapMultiInsert {
pub flags: u8,
pub _padding: u8,
pub ntuples: u16,
pub t_cid: u32,
}
impl XlNeonHeapMultiInsert {
pub fn decode(buf: &mut Bytes) -> XlNeonHeapMultiInsert {
XlNeonHeapMultiInsert {
flags: buf.get_u8(),
_padding: buf.get_u8(),
ntuples: buf.get_u16_le(),
t_cid: buf.get_u32_le(),
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlNeonHeapDelete {
pub xmax: TransactionId,
pub offnum: OffsetNumber,
pub infobits_set: u8,
pub flags: u8,
pub t_cid: u32,
}
impl XlNeonHeapDelete {
pub fn decode(buf: &mut Bytes) -> XlNeonHeapDelete {
XlNeonHeapDelete {
xmax: buf.get_u32_le(),
offnum: buf.get_u16_le(),
infobits_set: buf.get_u8(),
flags: buf.get_u8(),
t_cid: buf.get_u32_le(),
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlNeonHeapUpdate {
pub old_xmax: TransactionId,
pub old_offnum: OffsetNumber,
pub old_infobits_set: u8,
pub flags: u8,
pub t_cid: u32,
pub new_xmax: TransactionId,
pub new_offnum: OffsetNumber,
}
impl XlNeonHeapUpdate {
pub fn decode(buf: &mut Bytes) -> XlNeonHeapUpdate {
XlNeonHeapUpdate {
old_xmax: buf.get_u32_le(),
old_offnum: buf.get_u16_le(),
old_infobits_set: buf.get_u8(),
flags: buf.get_u8(),
t_cid: buf.get_u32(),
new_xmax: buf.get_u32_le(),
new_offnum: buf.get_u16_le(),
}
}
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlSmgrCreate {
@@ -458,6 +223,90 @@ impl XlDropDatabase {
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlHeapInsert {
pub offnum: OffsetNumber,
pub flags: u8,
}
impl XlHeapInsert {
pub fn decode(buf: &mut Bytes) -> XlHeapInsert {
XlHeapInsert {
offnum: buf.get_u16_le(),
flags: buf.get_u8(),
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlHeapMultiInsert {
pub flags: u8,
pub _padding: u8,
pub ntuples: u16,
}
impl XlHeapMultiInsert {
pub fn decode(buf: &mut Bytes) -> XlHeapMultiInsert {
XlHeapMultiInsert {
flags: buf.get_u8(),
_padding: buf.get_u8(),
ntuples: buf.get_u16_le(),
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlHeapDelete {
pub xmax: TransactionId,
pub offnum: OffsetNumber,
pub _padding: u16,
pub t_cid: u32,
pub infobits_set: u8,
pub flags: u8,
}
impl XlHeapDelete {
pub fn decode(buf: &mut Bytes) -> XlHeapDelete {
XlHeapDelete {
xmax: buf.get_u32_le(),
offnum: buf.get_u16_le(),
_padding: buf.get_u16_le(),
t_cid: buf.get_u32_le(),
infobits_set: buf.get_u8(),
flags: buf.get_u8(),
}
}
}
#[repr(C)]
#[derive(Debug)]
pub struct XlHeapUpdate {
pub old_xmax: TransactionId,
pub old_offnum: OffsetNumber,
pub old_infobits_set: u8,
pub flags: u8,
pub t_cid: u32,
pub new_xmax: TransactionId,
pub new_offnum: OffsetNumber,
}
impl XlHeapUpdate {
pub fn decode(buf: &mut Bytes) -> XlHeapUpdate {
XlHeapUpdate {
old_xmax: buf.get_u32_le(),
old_offnum: buf.get_u16_le(),
old_infobits_set: buf.get_u8(),
flags: buf.get_u8(),
t_cid: buf.get_u32(),
new_xmax: buf.get_u32_le(),
new_offnum: buf.get_u16_le(),
}
}
}
///
/// Note: Parsing some fields is missing, because they're not needed.
///
@@ -472,10 +321,9 @@ pub struct XlXactParsedRecord {
pub xact_time: TimestampTz,
pub xinfo: u32,
pub db_id: Oid,
/* MyDatabaseId */
pub ts_id: Oid,
/* MyDatabaseTableSpace */
pub db_id: Oid, /* MyDatabaseId */
pub ts_id: Oid, /* MyDatabaseTableSpace */
pub subxacts: Vec<TransactionId>,
pub xnodes: Vec<RelFileNode>,
@@ -607,12 +455,9 @@ impl MultiXactMember {
#[repr(C)]
#[derive(Debug)]
pub struct XlMultiXactCreate {
pub mid: MultiXactId,
/* new MultiXact's ID */
pub moff: MultiXactOffset,
/* its starting offset in members file */
pub nmembers: u32,
/* number of member XIDs */
pub mid: MultiXactId, /* new MultiXact's ID */
pub moff: MultiXactOffset, /* its starting offset in members file */
pub nmembers: u32, /* number of member XIDs */
pub members: Vec<MultiXactMember>,
}
@@ -639,8 +484,7 @@ impl XlMultiXactCreate {
pub struct XlMultiXactTruncate {
pub oldest_multi_db: Oid,
/* to-be-truncated range of multixact offsets */
pub start_trunc_off: MultiXactId,
/* just for completeness' sake */
pub start_trunc_off: MultiXactId, /* just for completeness' sake */
pub end_trunc_off: MultiXactId,
/* to-be-truncated range of multixact members */
@@ -782,10 +626,12 @@ pub fn decode_wal_record(
blk.hole_offset = buf.get_u16_le();
blk.bimg_info = buf.get_u8();
blk.apply_image = dispatch_pgversion!(
pg_version,
(blk.bimg_info & pgv::bindings::BKPIMAGE_APPLY) != 0
);
blk.apply_image = if pg_version == 14 {
(blk.bimg_info & postgres_ffi::v14::bindings::BKPIMAGE_APPLY) != 0
} else {
assert_eq!(pg_version, 15);
(blk.bimg_info & postgres_ffi::v15::bindings::BKPIMAGE_APPLY) != 0
};
let blk_img_is_compressed =
postgres_ffi::bkpimage_is_compressed(blk.bimg_info, pg_version)?;

View File

@@ -19,16 +19,13 @@
#include <fcntl.h>
#include "postgres.h"
#include "neon_pgversioncompat.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "pagestore_client.h"
#include "access/parallel.h"
#include "postmaster/bgworker.h"
#include RELFILEINFO_HDR
#include "storage/relfilenode.h"
#include "storage/buf_internals.h"
#include "storage/latch.h"
#include "storage/ipc.h"
@@ -80,7 +77,6 @@ typedef struct FileCacheEntry
typedef struct FileCacheControl
{
uint64 generation; /* generation is needed to handle correct hash reenabling */
uint32 size; /* size of cache file in chunks */
uint32 used; /* number of used chunks */
dlist_head lru; /* double linked list for LRU replacement algorithm */
@@ -92,6 +88,7 @@ static LWLockId lfc_lock;
static int lfc_max_size;
static int lfc_size_limit;
static int lfc_free_space_watermark;
static bool lfc_disabled_by_failure = false;
static char* lfc_path;
static FileCacheControl* lfc_ctl;
static shmem_startup_hook_type prev_shmem_startup_hook;
@@ -100,62 +97,10 @@ static shmem_request_hook_type prev_shmem_request_hook;
#endif
static int lfc_shrinking_factor; /* power of two by which local cache size will be shrinked when lfc_free_space_watermark is reached */
#define DISABLE_LFC() (lfc_max_size = 0, lfc_disabled_by_failure = true, lfc_desc = -1)
void FileCacheMonitorMain(Datum main_arg);
/*
* Local file cache is mandatory and Neon can work without it.
* In case of any any errors with this cache, we should disable it but to not throw error.
* Also we should allow re-enable it if source of failure (lack of disk space, permissions,...) is fixed.
* All cache content should be invalidated to avoid reading of stale or corrupted data
*/
static void
lfc_disable(char const* op)
{
HASH_SEQ_STATUS status;
FileCacheEntry* entry;
elog(WARNING, "Failed to %s local file cache at %s: %m, disabling local file cache", op, lfc_path);
if (lfc_desc > 0)
close(lfc_desc);
lfc_desc = -1;
lfc_size_limit = 0;
/* Invalidate hash */
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
hash_seq_init(&status, lfc_hash);
while ((entry = hash_seq_search(&status)) != NULL)
{
hash_search(lfc_hash, &entry->key, HASH_REMOVE, NULL);
memset(entry->bitmap, 0, sizeof entry->bitmap);
}
hash_seq_term(&status);
lfc_ctl->generation += 1;
lfc_ctl->size = 0;
lfc_ctl->used = 0;
dlist_init(&lfc_ctl->lru);
LWLockRelease(lfc_lock);
}
static bool
lfc_ensure_opened(void)
{
/* Open cache file if not done yet */
if (lfc_desc <= 0)
{
lfc_desc = BasicOpenFile(lfc_path, O_RDWR|O_CREAT);
if (lfc_desc < 0) {
lfc_disable("open");
return false;
}
}
return false;
}
static void
lfc_shmem_startup(void)
{
@@ -181,7 +126,6 @@ lfc_shmem_startup(void)
lfc_size+1, lfc_size+1,
&info,
HASH_ELEM | HASH_BLOBS);
lfc_ctl->generation = 0;
lfc_ctl->size = 0;
lfc_ctl->used = 0;
dlist_init(&lfc_ctl->lru);
@@ -387,7 +331,7 @@ lfc_init(void)
NULL,
NULL);
if (lfc_max_size == 0)
if (lfc_max_size == 0 || lfc_disabled_by_failure)
return;
if (lfc_free_space_watermark != 0)
@@ -408,7 +352,7 @@ lfc_init(void)
* Returns true if page is found in local cache.
*/
bool
lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno)
{
BufferTag tag;
FileCacheEntry* entry;
@@ -416,10 +360,10 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
bool found;
uint32 hash;
if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
if (lfc_size_limit == 0 || lfc_disabled_by_failure) /* fast exit if file cache is disabled */
return false;
CopyNRelFileInfoToBufTag(tag, rinfo);
tag.rnode = rnode;
tag.forkNum = forkNum;
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK-1);
hash = get_hash_value(lfc_hash, &tag);
@@ -435,7 +379,7 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
* Evict a page (if present) from the local file cache
*/
void
lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno)
{
BufferTag tag;
FileCacheEntry* entry;
@@ -443,12 +387,10 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1);
uint32 hash;
if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
if (lfc_size_limit == 0 || lfc_disabled_by_failure) /* fast exit if file cache is disabled */
return;
CopyNRelFileInfoToBufTag(tag, rinfo);
tag.forkNum = forkNum;
tag.blockNum = (blkno & ~(BLOCKS_PER_CHUNK - 1));
INIT_BUFFERTAG(tag, rnode, forkNum, (blkno & ~(BLOCKS_PER_CHUNK-1)));
hash = get_hash_value(lfc_hash, &tag);
@@ -506,7 +448,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
* In case of error lfc_size_limit is set to zero to disable any further opera-tins with cache.
*/
bool
lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
lfc_read(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
char *buffer)
{
BufferTag tag;
@@ -515,16 +457,11 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1);
bool result = true;
uint32 hash;
uint64 generation;
uint32 entry_offset;
if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
if (lfc_size_limit == 0 || lfc_disabled_by_failure) /* fast exit if file cache is disabled */
return false;
if (!lfc_ensure_opened())
return false;
CopyNRelFileInfoToBufTag(tag, rinfo);
tag.rnode = rnode;
tag.forkNum = forkNum;
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK-1);
hash = get_hash_value(lfc_hash, &tag);
@@ -540,29 +477,37 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
/* Unlink entry from LRU list to pin it for the duration of IO operation */
if (entry->access_count++ == 0)
dlist_delete(&entry->lru_node);
generation = lfc_ctl->generation;
entry_offset = entry->offset;
LWLockRelease(lfc_lock);
rc = pread(lfc_desc, buffer, BLCKSZ, ((off_t)entry_offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ);
if (rc != BLCKSZ)
/* Open cache file if not done yet */
if (lfc_desc <= 0)
{
lfc_disable("read");
return false;
lfc_desc = BasicOpenFile(lfc_path, O_RDWR|O_CREAT);
if (lfc_desc < 0) {
elog(LOG, "Failed to open file cache %s: %m", lfc_path);
DISABLE_LFC();
result = false;
}
}
if (lfc_desc > 0)
{
rc = pread(lfc_desc, buffer, BLCKSZ, ((off_t)entry->offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ);
if (rc != BLCKSZ)
{
elog(INFO, "Failed to read file cache: %m");
DISABLE_LFC();
result = false;
}
}
/* Place entry to the head of LRU list */
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
if (lfc_ctl->generation == generation)
{
Assert(entry->access_count > 0);
if (--entry->access_count == 0)
dlist_push_tail(&lfc_ctl->lru, &entry->lru_node);
}
else
result = false;
Assert(entry->access_count > 0);
if (--entry->access_count == 0)
dlist_push_tail(&lfc_ctl->lru, &entry->lru_node);
LWLockRelease(lfc_lock);
return result;
@@ -573,12 +518,8 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
* If cache is full then evict some other page.
*/
void
lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
#if PG_MAJORVERSION_NUM < 16
lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
char *buffer)
#else
const void *buffer)
#endif
{
BufferTag tag;
FileCacheEntry* entry;
@@ -587,17 +528,12 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1);
uint32 hash;
if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
return;
if (!lfc_ensure_opened())
if (lfc_size_limit == 0 || lfc_disabled_by_failure) /* fast exit if file cache is disabled */
return;
tag.rnode = rnode;
tag.forkNum = forkNum;
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK-1);
CopyNRelFileInfoToBufTag(tag, rinfo);
hash = get_hash_value(lfc_hash, &tag);
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
@@ -636,22 +572,33 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
entry->access_count = 1;
memset(entry->bitmap, 0, sizeof entry->bitmap);
}
LWLockRelease(lfc_lock);
rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t)entry->offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ);
if (rc != BLCKSZ)
/* Open cache file if not done yet */
if (lfc_desc <= 0)
{
lfc_disable("write");
lfc_desc = BasicOpenFile(lfc_path, O_RDWR|O_CREAT);
if (lfc_desc < 0) {
elog(WARNING, "Failed to open file cache %s: %m, disabling file cache", lfc_path);
DISABLE_LFC(); /* disable file cache */
}
}
else
if (lfc_desc > 0)
{
/* Place entry to the head of LRU list */
Assert(entry->access_count > 0);
if (--entry->access_count == 0)
dlist_push_tail(&lfc_ctl->lru, &entry->lru_node);
rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t)entry->offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ);
if (rc != BLCKSZ)
{
elog(WARNING, "Failed to write file cache: %m, disabling file cache");
DISABLE_LFC(); /* disable file cache */
}
}
/* Place entry to the head of LRU list */
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
Assert(entry->access_count > 0);
if (--entry->access_count == 0)
dlist_push_tail(&lfc_ctl->lru, &entry->lru_node);
if (lfc_size_limit != 0)
entry->bitmap[chunk_offs >> 5] |= (1 << (chunk_offs & 31));
}
LWLockRelease(lfc_lock);
}
@@ -729,13 +676,8 @@ local_cache_pages(PG_FUNCTION_ARGS)
tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "pageoffs",
INT8OID, -1, 0);
#if PG_MAJORVERSION_NUM < 16
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
OIDOID, -1, 0);
#else
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenumber",
OIDOID, -1, 0);
#endif
TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
OIDOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
@@ -757,7 +699,6 @@ local_cache_pages(PG_FUNCTION_ARGS)
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
n_pages += (entry->bitmap[i >> 5] & (1 << (i & 31))) != 0;
}
hash_seq_term(&status);
fctx->record = (LocalCachePagesRec *)
MemoryContextAllocHuge(CurrentMemoryContext,
sizeof(LocalCachePagesRec) * n_pages);
@@ -786,9 +727,9 @@ local_cache_pages(PG_FUNCTION_ARGS)
if (entry->bitmap[i >> 5] & (1 << (i & 31)))
{
fctx->record[n_pages].pageoffs = entry->offset*BLOCKS_PER_CHUNK + i;
fctx->record[n_pages].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
fctx->record[n_pages].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));
fctx->record[n_pages].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));
fctx->record[n_pages].relfilenode = entry->key.rnode.relNode;
fctx->record[n_pages].reltablespace = entry->key.rnode.spcNode;
fctx->record[n_pages].reldatabase = entry->key.rnode.dbNode;
fctx->record[n_pages].forknum = entry->key.forkNum;
fctx->record[n_pages].blocknum = entry->key.blockNum + i;
fctx->record[n_pages].accesscount = entry->access_count;
@@ -796,7 +737,6 @@ local_cache_pages(PG_FUNCTION_ARGS)
}
}
}
hash_seq_term(&status);
Assert(n_pages == funcctx->max_calls);
LWLockRelease(lfc_lock);
}

View File

@@ -442,7 +442,7 @@ pg_init_libpagestore(void)
"Maximal attempts to reconnect to pages server (with 1 second timeout)",
NULL,
&max_reconnect_attempts,
60, 0, INT_MAX,
10, 0, INT_MAX,
PGC_USERSET,
0,
NULL, NULL, NULL);

View File

@@ -33,14 +33,6 @@ void _PG_init(void);
void
_PG_init(void)
{
/*
* Also load 'neon_rmgr'. This makes it unnecessary to list both 'neon'
* and 'neon_rmgr' in shared_preload_libraries.
*/
#if PG_VERSION_NUM >= 160000
load_file("$libdir/neon_rmgr", false);
#endif
pg_init_libpagestore();
pg_init_walproposer();
@@ -48,9 +40,9 @@ _PG_init(void)
pg_init_extension_server();
// Important: This must happen after other parts of the extension
// are loaded, otherwise any settings to GUCs that were set before
// the extension was loaded will be removed.
// Important: This must happen after other parts of the extension
// are loaded, otherwise any settings to GUCs that were set before
// the extension was loaded will be removed.
EmitWarningsOnPlaceholders("neon");
}

View File

@@ -1,112 +0,0 @@
/*
* Compatibility macros to cover up differences between supported PostgreSQL versions,
* to help with compiling the same sources for all of them.
*/
#ifndef NEON_PGVERSIONCOMPAT_H
#define NEON_PGVERSIONCOMPAT_H
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
#define RelFileInfoEquals(a, b) ( \
NInfoGetSpcOid(a) == NInfoGetSpcOid(b) && \
NInfoGetDbOid(a) == NInfoGetDbOid(b) && \
NInfoGetRelNumber(a) == NInfoGetRelNumber(b) \
)
/* buftag population & RelFileNode/RelFileLocator rework */
#if PG_MAJORVERSION_NUM < 16
#define InitBufferTag(tag, rfn, fn, bn) INIT_BUFFERTAG(*tag, *rfn, fn, bn)
#define USE_RELFILENODE
#define RELFILEINFO_HDR "storage/relfilenode.h"
#define NRelFileInfo RelFileNode
#define NRelFileInfoBackend RelFileNodeBackend
#define NRelFileNumber Oid
#define InfoFromRelation(rel) (rel)->rd_node
#define InfoFromSMgrRel(srel) (srel)->smgr_rnode.node
#define InfoBFromSMgrRel(srel) (srel)->smgr_rnode
#define InfoFromNInfoB(ninfob) ninfob.node
#define RelFileInfoFmt(rinfo) \
(rinfo).spcNode, \
(rinfo).dbNode, \
(rinfo).relNode
#define RelFileInfoBackendFmt(ninfob) \
(ninfob).backend, \
(ninfob).node.spcNode, \
(ninfob).node.dbNode, \
(ninfob).node.relNode
#define NInfoGetSpcOid(ninfo) (ninfo).spcNode
#define NInfoGetDbOid(ninfo) (ninfo).dbNode
#define NInfoGetRelNumber(ninfo) (ninfo).relNode
#define CopyNRelFileInfoToBufTag(tag, rinfo) \
do { \
(tag).rnode = (rinfo); \
} while (false);
#define BufTagGetNRelFileInfo(tag) tag.rnode
#define SMgrRelGetRelInfo(reln) \
(reln->smgr_rnode.node)
#define DropRelationAllLocalBuffers DropRelFileNodeAllLocalBuffers
#else /* major version >= 16 */
#define USE_RELFILELOCATOR
#define BUFFERTAGS_EQUAL(a, b) BufferTagsEqual(&(a), &(b))
#define RELFILEINFO_HDR "storage/relfilelocator.h"
#define NRelFileInfo RelFileLocator
#define NRelFileInfoBackend RelFileLocatorBackend
#define InfoFromRelation(rel) (rel)->rd_locator
#define InfoFromSMgrRel(srel) (srel)->smgr_rlocator.locator
#define InfoBFromSMgrRel(srel) (srel)->smgr_rlocator
#define InfoFromNInfoB(ninfob) (ninfob).locator
#define RelFileInfoFmt(rinfo) \
(rinfo).spcOid, \
(rinfo).dbOid, \
(rinfo).relNumber
#define RelFileInfoBackendFmt(ninfob) \
(ninfob).backend, \
(ninfob).locator.spcOid, \
(ninfob).locator.dbOid, \
(ninfob).locator.relNumber
#define NInfoGetSpcOid(ninfo) (ninfo).spcOid
#define NInfoGetDbOid(ninfo) (ninfo).dbOid
#define NInfoGetRelNumber(ninfo) (ninfo).relNumber
#define CopyNRelFileInfoToBufTag(tag, rinfo) \
do { \
(tag).spcOid = (rinfo).spcOid; \
(tag).dbOid = (rinfo).dbOid; \
(tag).relNumber = (rinfo).relNumber; \
} while (false);
#define BufTagGetNRelFileInfo(tag) \
((RelFileLocator) { \
.spcOid = (tag).spcOid, \
.dbOid = (tag).dbOid, \
.relNumber = (tag).relNumber, \
})
#define SMgrRelGetRelInfo(reln) \
((reln)->smgr_rlocator)
#define DropRelationAllLocalBuffers DropRelationAllLocalBuffers
#endif
#endif //NEON_PGVERSIONCOMPAT_H

View File

@@ -14,10 +14,9 @@
#define pageserver_h
#include "postgres.h"
#include "neon_pgversioncompat.h"
#include "access/xlogdefs.h"
#include RELFILEINFO_HDR
#include "storage/relfilenode.h"
#include "storage/block.h"
#include "storage/smgr.h"
#include "lib/stringinfo.h"
@@ -72,14 +71,14 @@ typedef struct
typedef struct
{
NeonRequest req;
NRelFileInfo rinfo;
RelFileNode rnode;
ForkNumber forknum;
} NeonExistsRequest;
typedef struct
{
NeonRequest req;
NRelFileInfo rinfo;
RelFileNode rnode;
ForkNumber forknum;
} NeonNblocksRequest;
@@ -92,7 +91,7 @@ typedef struct
typedef struct
{
NeonRequest req;
NRelFileInfo rinfo;
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blkno;
} NeonGetPageRequest;
@@ -165,7 +164,7 @@ extern char *neon_tenant;
extern bool wal_redo;
extern int32 max_cluster_size;
extern const f_smgr *smgr_neon(BackendId backend, NRelFileInfo rinfo);
extern const f_smgr *smgr_neon(BackendId backend, RelFileNode rnode);
extern void smgr_init_neon(void);
extern void readahead_buffer_resize(int newsize, void *extra);
@@ -176,35 +175,19 @@ extern void neon_open(SMgrRelation reln);
extern void neon_close(SMgrRelation reln, ForkNumber forknum);
extern void neon_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern bool neon_exists(SMgrRelation reln, ForkNumber forknum);
extern void neon_unlink(NRelFileInfoBackend rnode, ForkNumber forknum, bool isRedo);
#if PG_MAJORVERSION_NUM < 16
extern void neon_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
#else
extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, const void *buffer, bool skipFsync);
extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nbuffers, bool skipFsync);
#endif
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
#if PG_MAJORVERSION_NUM < 16
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer);
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
extern void neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
XLogRecPtr request_lsn, bool request_latest, char *buffer);
extern void neon_write(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
#else
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
void *buffer);
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
XLogRecPtr request_lsn, bool request_latest, void *buffer);
extern void neon_write(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, const void *buffer, bool skipFsync);
#endif
extern void neon_writeback(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, BlockNumber nblocks);
extern BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
@@ -215,22 +198,16 @@ extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);
/* utils for neon relsize cache */
extern void relsize_hash_init(void);
extern bool get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size);
extern void set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);
extern void update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);
extern void forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum);
extern bool get_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber *size);
extern void set_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size);
extern void update_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size);
extern void forget_cached_relsize(RelFileNode rnode, ForkNumber forknum);
/* functions for local file cache */
#if PG_MAJORVERSION_NUM < 16
extern void lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
char *buffer);
#else
extern void lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
const void *buffer);
#endif
extern bool lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, char *buffer);
extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
extern void lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
extern void lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, char *buffer);
extern bool lfc_read(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, char *buffer);
extern bool lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno);
extern void lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno);
extern void lfc_init(void);

View File

@@ -58,6 +58,7 @@
#include "postmaster/autovacuum.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/relfilenode.h"
#include "storage/buf_internals.h"
#include "storage/smgr.h"
#include "storage/md.h"
@@ -85,10 +86,7 @@
static char *hexdump_page(char *page);
#endif
#define IS_LOCAL_REL(reln) (\
NInfoGetDbOid(InfoFromSMgrRel(reln)) != 0 && \
NInfoGetRelNumber(InfoFromSMgrRel(reln)) > FirstNormalObjectId \
)
#define IS_LOCAL_REL(reln) (reln->smgr_rnode.node.dbNode != 0 && reln->smgr_rnode.node.relNode > FirstNormalObjectId)
const int SmgrTrace = DEBUG5;
@@ -162,7 +160,6 @@ typedef enum PrefetchStatus {
typedef struct PrefetchRequest {
BufferTag buftag; /* must be first entry in the struct */
XLogRecPtr effective_request_lsn;
XLogRecPtr actual_request_lsn;
NeonResponse *response; /* may be null */
PrefetchStatus status;
uint64 my_ring_index;
@@ -258,7 +255,7 @@ static bool prefetch_wait_for(uint64 ring_index);
static void prefetch_cleanup_trailing_unused(void);
static inline void prefetch_set_unused(uint64 ring_index);
static XLogRecPtr neon_get_request_lsn(bool *latest, NRelFileInfo rinfo,
static XLogRecPtr neon_get_request_lsn(bool *latest, RelFileNode rnode,
ForkNumber forknum, BlockNumber blkno);
static bool
@@ -317,7 +314,6 @@ compact_prefetch_buffers(void)
target_slot->status = source_slot->status;
target_slot->response = source_slot->response;
target_slot->effective_request_lsn = source_slot->effective_request_lsn;
target_slot->actual_request_lsn = source_slot->actual_request_lsn;
target_slot->my_ring_index = empty_ring_index;
prfh_delete(MyPState->prf_hash, source_slot);
@@ -638,7 +634,7 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
.req.tag = T_NeonGetPageRequest,
.req.latest = false,
.req.lsn = 0,
.rinfo = BufTagGetNRelFileInfo(slot->buftag),
.rnode = slot->buftag.rnode,
.forknum = slot->buftag.forkNum,
.blkno = slot->buftag.blockNum,
};
@@ -647,13 +643,13 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
{
request.req.lsn = *force_lsn;
request.req.latest = *force_latest;
slot->actual_request_lsn = slot->effective_request_lsn = *force_lsn;
slot->effective_request_lsn = *force_lsn;
}
else
{
XLogRecPtr lsn = neon_get_request_lsn(
&request.req.latest,
BufTagGetNRelFileInfo(slot->buftag),
slot->buftag.rnode,
slot->buftag.forkNum,
slot->buftag.blockNum
);
@@ -675,7 +671,7 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
* The best LSN to use for effective_request_lsn would be
* XLogCtl->Insert.RedoRecPtr, but that's expensive to access.
*/
slot->actual_request_lsn = request.req.lsn = lsn;
request.req.lsn = lsn;
prefetch_lsn = Max(prefetch_lsn, lsn);
slot->effective_request_lsn = prefetch_lsn;
}
@@ -897,9 +893,9 @@ nm_pack_request(NeonRequest * msg)
pq_sendbyte(&s, msg_req->req.latest);
pq_sendint64(&s, msg_req->req.lsn);
pq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));
pq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));
pq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));
pq_sendint32(&s, msg_req->rnode.spcNode);
pq_sendint32(&s, msg_req->rnode.dbNode);
pq_sendint32(&s, msg_req->rnode.relNode);
pq_sendbyte(&s, msg_req->forknum);
break;
@@ -910,9 +906,9 @@ nm_pack_request(NeonRequest * msg)
pq_sendbyte(&s, msg_req->req.latest);
pq_sendint64(&s, msg_req->req.lsn);
pq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));
pq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));
pq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));
pq_sendint32(&s, msg_req->rnode.spcNode);
pq_sendint32(&s, msg_req->rnode.dbNode);
pq_sendint32(&s, msg_req->rnode.relNode);
pq_sendbyte(&s, msg_req->forknum);
break;
@@ -933,9 +929,9 @@ nm_pack_request(NeonRequest * msg)
pq_sendbyte(&s, msg_req->req.latest);
pq_sendint64(&s, msg_req->req.lsn);
pq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));
pq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));
pq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));
pq_sendint32(&s, msg_req->rnode.spcNode);
pq_sendint32(&s, msg_req->rnode.dbNode);
pq_sendint32(&s, msg_req->rnode.relNode);
pq_sendbyte(&s, msg_req->forknum);
pq_sendint32(&s, msg_req->blkno);
@@ -1067,7 +1063,10 @@ nm_to_string(NeonMessage * msg)
NeonExistsRequest *msg_req = (NeonExistsRequest *) msg;
appendStringInfoString(&s, "{\"type\": \"NeonExistsRequest\"");
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
appendStringInfo(&s, ", \"rnode\": \"%u/%u/%u\"",
msg_req->rnode.spcNode,
msg_req->rnode.dbNode,
msg_req->rnode.relNode);
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest);
@@ -1080,7 +1079,10 @@ nm_to_string(NeonMessage * msg)
NeonNblocksRequest *msg_req = (NeonNblocksRequest *) msg;
appendStringInfoString(&s, "{\"type\": \"NeonNblocksRequest\"");
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
appendStringInfo(&s, ", \"rnode\": \"%u/%u/%u\"",
msg_req->rnode.spcNode,
msg_req->rnode.dbNode,
msg_req->rnode.relNode);
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest);
@@ -1093,7 +1095,10 @@ nm_to_string(NeonMessage * msg)
NeonGetPageRequest *msg_req = (NeonGetPageRequest *) msg;
appendStringInfoString(&s, "{\"type\": \"NeonGetPageRequest\"");
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
appendStringInfo(&s, ", \"rnode\": \"%u/%u/%u\"",
msg_req->rnode.spcNode,
msg_req->rnode.dbNode,
msg_req->rnode.relNode);
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
appendStringInfo(&s, ", \"blkno\": %u", msg_req->blkno);
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
@@ -1182,13 +1187,13 @@ nm_to_string(NeonMessage * msg)
* directly because it skips the logging if the LSN is new enough.
*/
static XLogRecPtr
log_newpage_copy(NRelFileInfo *rinfo, ForkNumber forkNum, BlockNumber blkno,
log_newpage_copy(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
Page page, bool page_std)
{
PGAlignedBlock copied_buffer;
memcpy(copied_buffer.data, page, BLCKSZ);
return log_newpage(rinfo, forkNum, blkno, copied_buffer.data, page_std);
return log_newpage(rnode, forkNum, blkno, copied_buffer.data, page_std);
}
/*
@@ -1205,14 +1210,9 @@ PageIsEmptyHeapPage(char *buffer)
}
static void
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
#if PG_MAJORVERSION_NUM < 16
char *buffer, bool force)
#else
const char *buffer, bool force)
#endif
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool force)
{
XLogRecPtr lsn = PageGetLSN((Page) buffer);
XLogRecPtr lsn = PageGetLSN(buffer);
if (ShutdownRequestPending)
return;
@@ -1232,14 +1232,15 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
/* FSM is never WAL-logged and we don't care. */
XLogRecPtr recptr;
recptr = log_newpage_copy(&InfoFromSMgrRel(reln), forknum, blocknum,
(Page) buffer, false);
recptr = log_newpage_copy(&reln->smgr_rnode.node, forknum, blocknum, buffer, false);
XLogFlush(recptr);
lsn = recptr;
ereport(SmgrTrace,
(errmsg("Page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum, LSN_FORMAT_ARGS(lsn))));
}
else if (lsn == InvalidXLogRecPtr)
@@ -1262,20 +1263,24 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
* sign: it implies that the page was not WAL-logged, and its contents
* will be lost when it's evicted.
*/
if (PageIsNew((Page) buffer))
if (PageIsNew(buffer))
{
ereport(SmgrTrace,
(errmsg("Page %u of relation %u/%u/%u.%u is all-zeros",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum)));
}
else if (PageIsEmptyHeapPage((Page) buffer))
else if (PageIsEmptyHeapPage(buffer))
{
ereport(SmgrTrace,
(errmsg("Page %u of relation %u/%u/%u.%u is an empty heap page with no LSN",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum)));
}
else
@@ -1283,7 +1288,9 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
ereport(PANIC,
(errmsg("Page %u of relation %u/%u/%u.%u is evicted with zero LSN",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum)));
}
}
@@ -1292,7 +1299,9 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
ereport(SmgrTrace,
(errmsg("Page %u of relation %u/%u/%u.%u is already wal logged at lsn=%X/%X",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum, LSN_FORMAT_ARGS(lsn))));
}
@@ -1300,7 +1309,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
* Remember the LSN on this page. When we read the page again, we must
* read the same or newer version of it.
*/
SetLastWrittenLSNForBlock(lsn, InfoFromSMgrRel(reln), forknum, blocknum);
SetLastWrittenLSNForBlock(lsn, reln->smgr_rnode.node, forknum, blocknum);
}
/*
@@ -1370,7 +1379,7 @@ nm_adjust_lsn(XLogRecPtr lsn)
* Return LSN for requesting pages and number of blocks from page server
*/
static XLogRecPtr
neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno)
neon_get_request_lsn(bool *latest, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
{
XLogRecPtr lsn;
@@ -1385,7 +1394,7 @@ neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, Block
/*
* Get the last written LSN of this page.
*/
lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
lsn = GetLastWrittenLSN(rnode, forknum, blkno);
lsn = nm_adjust_lsn(lsn);
elog(DEBUG1, "neon_get_request_lsn GetXLogReplayRecPtr %X/%X request lsn 0 ",
@@ -1407,7 +1416,7 @@ neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, Block
* so our request cannot concern those.
*/
*latest = true;
lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
lsn = GetLastWrittenLSN(rnode, forknum, blkno);
Assert(lsn != InvalidXLogRecPtr);
elog(DEBUG1, "neon_get_request_lsn GetLastWrittenLSN lsn %X/%X ",
(uint32) ((lsn) >> 32), (uint32) (lsn));
@@ -1476,7 +1485,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
if (get_cached_relsize(InfoFromSMgrRel(reln), forkNum, &n_blocks))
if (get_cached_relsize(reln->smgr_rnode.node, forkNum, &n_blocks))
{
return true;
}
@@ -1491,26 +1500,20 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
*
* For now, handle that special case here.
*/
#if PG_MAJORVERSION_NUM >= 16
if (reln->smgr_rlocator.locator.spcOid == 0 &&
reln->smgr_rlocator.locator.dbOid == 0 &&
reln->smgr_rlocator.locator.relNumber == 0)
#else
if (reln->smgr_rnode.node.spcNode == 0 &&
reln->smgr_rnode.node.dbNode == 0 &&
reln->smgr_rnode.node.relNode == 0)
#endif
{
return false;
}
request_lsn = neon_get_request_lsn(&latest, InfoFromSMgrRel(reln), forkNum, REL_METADATA_PSEUDO_BLOCKNO);
request_lsn = neon_get_request_lsn(&latest, reln->smgr_rnode.node, forkNum, REL_METADATA_PSEUDO_BLOCKNO);
{
NeonExistsRequest request = {
.req.tag = T_NeonExistsRequest,
.req.latest = latest,
.req.lsn = request_lsn,
.rinfo = InfoFromSMgrRel(reln),
.rnode = reln->smgr_rnode.node,
.forknum = forkNum};
resp = page_server_request(&request);
@@ -1526,7 +1529,9 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg("could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn),
errdetail("page server returned error: %s",
@@ -1566,7 +1571,9 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
}
elog(SmgrTrace, "Create relation %u/%u/%u.%u",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forkNum);
/*
@@ -1590,12 +1597,12 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
*/
if (isRedo)
{
update_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
get_cached_relsize(InfoFromSMgrRel(reln), forkNum,
update_cached_relsize(reln->smgr_rnode.node, forkNum, 0);
get_cached_relsize(reln->smgr_rnode.node, forkNum,
&reln->smgr_cached_nblocks[forkNum]);
}
else
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
set_cached_relsize(reln->smgr_rnode.node, forkNum, 0);
#ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln))
@@ -1622,17 +1629,17 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
* we are usually not in a transaction anymore when this is called.
*/
void
neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
neon_unlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
{
/*
* Might or might not exist locally, depending on whether it's an unlogged
* or permanent relation (or if DEBUG_COMPARE_LOCAL is set). Try to
* unlink, it won't do any harm if the file doesn't exist.
*/
mdunlink(rinfo, forkNum, isRedo);
if (!NRelFileInfoBackendIsTemp(rinfo))
mdunlink(rnode, forkNum, isRedo);
if (!RelFileNodeBackendIsTemp(rnode))
{
forget_cached_relsize(InfoFromNInfoB(rinfo), forkNum);
forget_cached_relsize(rnode.node, forkNum);
}
}
@@ -1646,13 +1653,8 @@ neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
* causes intervening file space to become filled with zeroes.
*/
void
#if PG_MAJORVERSION_NUM < 16
neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
char *buffer, bool skipFsync)
#else
neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
const void *buffer, bool skipFsync)
#endif
{
XLogRecPtr lsn;
BlockNumber n_blocks = 0;
@@ -1705,15 +1707,17 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
neon_wallog_page(reln, forkNum, n_blocks++, buffer, true);
neon_wallog_page(reln, forkNum, blkno, buffer, false);
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1);
set_cached_relsize(reln->smgr_rnode.node, forkNum, blkno + 1);
lsn = PageGetLSN((Page) buffer);
lsn = PageGetLSN(buffer);
elog(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forkNum, blkno,
(uint32) (lsn >> 32), (uint32) lsn);
lfc_write(InfoFromSMgrRel(reln), forkNum, blkno, buffer);
lfc_write(reln->smgr_rnode.node, forkNum, blkno, buffer);
#ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln))
@@ -1728,98 +1732,11 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
if (lsn == InvalidXLogRecPtr)
{
lsn = GetXLogInsertRecPtr();
SetLastWrittenLSNForBlock(lsn, InfoFromSMgrRel(reln), forkNum, blkno);
SetLastWrittenLSNForBlock(lsn, reln->smgr_rnode.node, forkNum, blkno);
}
SetLastWrittenLSNForRelation(lsn, InfoFromSMgrRel(reln), forkNum);
SetLastWrittenLSNForRelation(lsn, reln->smgr_rnode.node, forkNum);
}
#if PG_MAJORVERSION_NUM >= 16
void
neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
int nblocks, bool skipFsync)
{
const PGAlignedBlock buffer = {0};
BlockNumber curblocknum = blocknum;
int remblocks = nblocks;
XLogRecPtr lsn = 0;
switch (reln->smgr_relpersistence)
{
case 0:
elog(ERROR, "cannot call smgrextend() on rel with unknown persistence");
case RELPERSISTENCE_PERMANENT:
break;
case RELPERSISTENCE_TEMP:
case RELPERSISTENCE_UNLOGGED:
mdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);
return;
default:
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
if (max_cluster_size > 0 &&
reln->smgr_relpersistence == RELPERSISTENCE_PERMANENT &&
!IsAutoVacuumWorkerProcess())
{
uint64 current_size = GetZenithCurrentClusterSize();
if (current_size >= ((uint64) max_cluster_size) * 1024 * 1024)
ereport(ERROR,
(errcode(ERRCODE_DISK_FULL),
errmsg("could not extend file because cluster size limit (%d MB) has been exceeded",
max_cluster_size),
errhint("This limit is defined by neon.max_cluster_size GUC")));
}
/*
* If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
* more --- we mustn't create a block whose number actually is
* InvalidBlockNumber or larger.
*/
if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("cannot extend file \"%s\" beyond %u blocks",
relpath(reln->smgr_rlocator, forkNum),
InvalidBlockNumber)));
/* Don't log any pages if we're not allowed to do so. */
if (!XLogInsertAllowed())
return;
while (remblocks > 0)
{
int count = Min(remblocks, XLR_MAX_BLOCK_ID);
XLogBeginInsert();
for (int i = 0; i < count; i++)
XLogRegisterBlock(i, &InfoFromSMgrRel(reln), forkNum, blocknum + i,
(char *) buffer.data, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
lsn = XLogInsert(RM_XLOG_ID, XLOG_FPI);
for (int i = 0; i < count; i++)
{
lfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, buffer.data);
SetLastWrittenLSNForBlock(lsn, InfoFromSMgrRel(reln), forkNum,
blocknum + i);
}
blocknum += count;
remblocks -= count;
}
Assert(lsn != 0);
SetLastWrittenLSNForRelation(lsn, InfoFromSMgrRel(reln), forkNum);
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum);
}
#endif
/*
* neon_open() -- Initialize newly-opened relation.
*/
@@ -1875,14 +1792,14 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
if (lfc_cache_contains(InfoFromSMgrRel(reln), forknum, blocknum))
if (lfc_cache_contains(reln->smgr_rnode.node, forknum, blocknum))
return false;
tag = (BufferTag) {
.rnode = reln->smgr_rnode.node,
.forkNum = forknum,
.blockNum = blocknum
};
CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));
ring_index = prefetch_register_buffer(tag, NULL, NULL);
@@ -1934,15 +1851,9 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum,
* While function is defined in the neon extension it's used within neon_test_utils directly.
* To avoid breaking tests in the runtime please keep function signature in sync.
*/
#if PG_MAJORVERSION_NUM < 16
void PGDLLEXPORT
neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
void
neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
XLogRecPtr request_lsn, bool request_latest, char *buffer)
#else
void PGDLLEXPORT
neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
XLogRecPtr request_lsn, bool request_latest, void *buffer)
#endif
{
NeonResponse *resp;
BufferTag buftag;
@@ -1951,12 +1862,11 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
PrefetchRequest *slot;
buftag = (BufferTag) {
.rnode = rnode,
.forkNum = forkNum,
.blockNum = blkno,
};
CopyNRelFileInfoToBufTag(buftag, rinfo);
/*
* The redo process does not lock pages that it needs to replay but are
* not in the shared buffers, so a concurrent process may request the
@@ -2047,7 +1957,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
{
case T_NeonGetPageResponse:
memcpy(buffer, ((NeonGetPageResponse *) resp)->page, BLCKSZ);
lfc_write(rinfo, forkNum, blkno, buffer);
lfc_write(rnode, forkNum, blkno, buffer);
break;
case T_NeonErrorResponse:
@@ -2055,7 +1965,9 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
(errcode(ERRCODE_IO_ERROR),
errmsg("could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
blkno,
RelFileInfoFmt(rinfo),
rnode.spcNode,
rnode.dbNode,
rnode.relNode,
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn),
errdetail("page server returned error: %s",
@@ -2075,11 +1987,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
*/
void
neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
#if PG_MAJORVERSION_NUM < 16
char *buffer)
#else
void *buffer)
#endif
{
bool latest;
XLogRecPtr request_lsn;
@@ -2102,13 +2010,13 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
}
/* Try to read from local file cache */
if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
if (lfc_read(reln->smgr_rnode.node, forkNum, blkno, buffer))
{
return;
}
request_lsn = neon_get_request_lsn(&latest, InfoFromSMgrRel(reln), forkNum, blkno);
neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsn, latest, buffer);
request_lsn = neon_get_request_lsn(&latest, reln->smgr_rnode.node, forkNum, blkno);
neon_read_at_lsn(reln->smgr_rnode.node, forkNum, blkno, request_lsn, latest, buffer);
#ifdef DEBUG_COMPARE_LOCAL
if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
@@ -2122,23 +2030,27 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
memcpy(pageserver_masked, buffer, BLCKSZ);
memcpy(mdbuf_masked, mdbuf, BLCKSZ);
if (PageIsNew((Page) mdbuf))
if (PageIsNew(mdbuf))
{
if (!PageIsNew((Page) pageserver_masked))
if (!PageIsNew(pageserver_masked))
{
elog(PANIC, "page is new in MD but not in Page Server at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(buffer));
}
}
else if (PageIsNew((Page) buffer))
else if (PageIsNew(buffer))
{
elog(PANIC, "page is new in Page Server but not in MD at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(mdbuf));
@@ -2153,7 +2065,9 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
{
elog(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(mdbuf_masked),
@@ -2172,7 +2086,9 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
{
elog(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
hexdump_page(mdbuf_masked),
@@ -2214,11 +2130,7 @@ hexdump_page(char *page)
*/
void
neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
#if PG_MAJORVERSION_NUM < 16
char *buffer, bool skipFsync)
#else
const void *buffer, bool skipFsync)
#endif
{
XLogRecPtr lsn;
@@ -2256,13 +2168,15 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
neon_wallog_page(reln, forknum, blocknum, buffer, false);
lsn = PageGetLSN((Page) buffer);
lsn = PageGetLSN(buffer);
elog(SmgrTrace, "smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum, blocknum,
(uint32) (lsn >> 32), (uint32) lsn);
lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer);
lfc_write(reln->smgr_rnode.node, forknum, blocknum, buffer);
#ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln))
@@ -2298,21 +2212,23 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
if (get_cached_relsize(InfoFromSMgrRel(reln), forknum, &n_blocks))
if (get_cached_relsize(reln->smgr_rnode.node, forknum, &n_blocks))
{
elog(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum, n_blocks);
return n_blocks;
}
request_lsn = neon_get_request_lsn(&latest, InfoFromSMgrRel(reln), forknum, REL_METADATA_PSEUDO_BLOCKNO);
request_lsn = neon_get_request_lsn(&latest, reln->smgr_rnode.node, forknum, REL_METADATA_PSEUDO_BLOCKNO);
{
NeonNblocksRequest request = {
.req.tag = T_NeonNblocksRequest,
.req.latest = latest,
.req.lsn = request_lsn,
.rinfo = InfoFromSMgrRel(reln),
.rnode = reln->smgr_rnode.node,
.forknum = forknum,
};
@@ -2329,7 +2245,9 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg("could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum,
(uint32) (request_lsn >> 32), (uint32) request_lsn),
errdetail("page server returned error: %s",
@@ -2339,10 +2257,12 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
default:
elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
}
update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
update_cached_relsize(reln->smgr_rnode.node, forknum, n_blocks);
elog(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
n_blocks);
@@ -2361,7 +2281,7 @@ neon_dbsize(Oid dbNode)
int64 db_size;
XLogRecPtr request_lsn;
bool latest;
NRelFileInfo dummy_node = {0};
RelFileNode dummy_node = {InvalidOid, InvalidOid, InvalidOid};
request_lsn = neon_get_request_lsn(&latest, dummy_node, MAIN_FORKNUM, REL_METADATA_PSEUDO_BLOCKNO);
{
@@ -2430,7 +2350,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);
set_cached_relsize(reln->smgr_rnode.node, forknum, nblocks);
/*
* Truncating a relation drops all its buffers from the buffer cache
@@ -2458,7 +2378,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
* for the extended pages, so there's no harm in leaving behind obsolete
* entries for the truncated chunks.
*/
SetLastWrittenLSNForRelation(lsn, InfoFromSMgrRel(reln), forknum);
SetLastWrittenLSNForRelation(lsn, reln->smgr_rnode.node, forknum);
#ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln))
@@ -2528,7 +2448,9 @@ neon_start_unlogged_build(SMgrRelation reln)
ereport(SmgrTrace,
(errmsg("starting unlogged build of relation %u/%u/%u",
RelFileInfoFmt(InfoFromSMgrRel(reln)))));
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode)));
switch (reln->smgr_relpersistence)
{
@@ -2578,7 +2500,9 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln)
ereport(SmgrTrace,
(errmsg("finishing phase 1 of unlogged build of relation %u/%u/%u",
RelFileInfoFmt(InfoFromSMgrRel(reln)))));
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode)));
if (unlogged_build_phase == UNLOGGED_BUILD_NOT_PERMANENT)
return;
@@ -2601,16 +2525,18 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln)
static void
neon_end_unlogged_build(SMgrRelation reln)
{
NRelFileInfoBackend rinfob = InfoBFromSMgrRel(reln);
Assert(unlogged_build_rel == reln);
ereport(SmgrTrace,
(errmsg("ending unlogged build of relation %u/%u/%u",
RelFileInfoFmt(InfoFromNInfoB(rinfob)))));
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode)));
if (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT)
{
RelFileNodeBackend rnode;
Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_2);
Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED);
@@ -2618,17 +2544,19 @@ neon_end_unlogged_build(SMgrRelation reln)
reln->smgr_relpersistence = RELPERSISTENCE_PERMANENT;
/* Remove local copy */
rinfob = InfoBFromSMgrRel(reln);
rnode = reln->smgr_rnode;
for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
{
elog(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u",
RelFileInfoFmt(InfoFromNInfoB(rinfob)),
rnode.node.spcNode,
rnode.node.dbNode,
rnode.node.relNode,
forknum);
forget_cached_relsize(InfoFromNInfoB(rinfob), forknum);
forget_cached_relsize(rnode.node, forknum);
mdclose(reln, forknum);
/* use isRedo == true, so that we drop it immediately */
mdunlink(rinfob, forknum, true);
mdunlink(rnode, forknum, true);
}
}
@@ -2680,9 +2608,6 @@ static const struct f_smgr neon_smgr =
.smgr_exists = neon_exists,
.smgr_unlink = neon_unlink,
.smgr_extend = neon_extend,
#if PG_MAJORVERSION_NUM >= 16
.smgr_zeroextend = neon_zeroextend,
#endif
.smgr_prefetch = neon_prefetch,
.smgr_read = neon_read,
.smgr_write = neon_write,
@@ -2697,12 +2622,12 @@ static const struct f_smgr neon_smgr =
};
const f_smgr *
smgr_neon(BackendId backend, NRelFileInfo rinfo)
smgr_neon(BackendId backend, RelFileNode rnode)
{
/* Don't use page server for temp relations */
if (backend != InvalidBackendId)
return smgr_standard(backend, rinfo);
return smgr_standard(backend, rnode);
else
return &neon_smgr;
}
@@ -2756,7 +2681,7 @@ bool
neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
{
XLogRecPtr end_recptr = record->EndRecPtr;
NRelFileInfo rinfo;
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blkno;
BufferTag tag;
@@ -2770,10 +2695,10 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
return true;
#if PG_VERSION_NUM < 150000
if (!XLogRecGetBlockTag(record, block_id, &rinfo, &forknum, &blkno))
if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
elog(PANIC, "failed to locate backup block with ID %d", block_id);
#else
XLogRecGetBlockTag(record, block_id, &rinfo, &forknum, &blkno);
XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno);
#endif
/*
@@ -2781,13 +2706,10 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
* regardless of whether the block is stored in shared buffers.
* See also this function's top comment.
*/
if (!OidIsValid(NInfoGetDbOid(rinfo)))
if (!OidIsValid(rnode.dbNode))
return false;
CopyNRelFileInfoToBufTag(tag, rinfo);
tag.forkNum = forknum;
tag.blockNum = blkno;
INIT_BUFFERTAG(tag, rnode, forknum, blkno);
hash = BufTableHashCode(&tag);
partitionLock = BufMappingPartitionLock(hash);
@@ -2803,24 +2725,24 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
no_redo_needed = buffer < 0;
/* In both cases st lwlsn past this WAL record */
SetLastWrittenLSNForBlock(end_recptr, rinfo, forknum, blkno);
SetLastWrittenLSNForBlock(end_recptr, rnode, forknum, blkno);
/* we don't have the buffer in memory, update lwLsn past this record,
* also evict page fro file cache
*/
if (no_redo_needed)
lfc_evict(rinfo, forknum, blkno);
lfc_evict(rnode, forknum, blkno);
LWLockRelease(partitionLock);
/* Extend the relation if we know its size */
if (get_cached_relsize(rinfo, forknum, &relsize))
if (get_cached_relsize(rnode, forknum, &relsize))
{
if (relsize < blkno + 1)
{
update_cached_relsize(rinfo, forknum, blkno + 1);
SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum);
update_cached_relsize(rnode, forknum, blkno + 1);
SetLastWrittenLSNForRelation(end_recptr, rnode, forknum);
}
}
else
@@ -2841,7 +2763,7 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
.latest = false,
.tag = T_NeonNblocksRequest,
},
.rinfo = rinfo,
.rnode = rnode,
.forknum = forknum,
};
@@ -2852,8 +2774,8 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
Assert(nbresponse->n_blocks > blkno);
set_cached_relsize(rinfo, forknum, nbresponse->n_blocks);
SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum);
set_cached_relsize(rnode, forknum, nbresponse->n_blocks);
SetLastWrittenLSNForRelation(end_recptr, rnode, forknum);
elog(SmgrTrace, "Set length to %d", nbresponse->n_blocks);
}

View File

@@ -14,10 +14,8 @@
*/
#include "postgres.h"
#include "neon_pgversioncompat.h"
#include "pagestore_client.h"
#include RELFILEINFO_HDR
#include "storage/relfilenode.h"
#include "storage/smgr.h"
#include "storage/lwlock.h"
#include "storage/ipc.h"
@@ -32,7 +30,7 @@
typedef struct
{
NRelFileInfo rinfo;
RelFileNode rnode;
ForkNumber forknum;
} RelTag;
@@ -77,7 +75,7 @@ neon_smgr_shmem_startup(void)
}
bool
get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size)
get_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber *size)
{
bool found = false;
@@ -86,7 +84,7 @@ get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size)
RelTag tag;
RelSizeEntry *entry;
tag.rinfo = rinfo;
tag.rnode = rnode;
tag.forknum = forknum;
LWLockAcquire(relsize_lock, LW_SHARED);
entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL);
@@ -101,14 +99,14 @@ get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size)
}
void
set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
set_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size)
{
if (relsize_hash_size > 0)
{
RelTag tag;
RelSizeEntry *entry;
tag.rinfo = rinfo;
tag.rnode = rnode;
tag.forknum = forknum;
LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
entry = hash_search(relsize_hash, &tag, HASH_ENTER, NULL);
@@ -118,7 +116,7 @@ set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
}
void
update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
update_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size)
{
if (relsize_hash_size > 0)
{
@@ -126,7 +124,7 @@ update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
RelSizeEntry *entry;
bool found;
tag.rinfo = rinfo;
tag.rnode = rnode;
tag.forknum = forknum;
LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
entry = hash_search(relsize_hash, &tag, HASH_ENTER, &found);
@@ -137,13 +135,13 @@ update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
}
void
forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum)
forget_cached_relsize(RelFileNode rnode, ForkNumber forknum)
{
if (relsize_hash_size > 0)
{
RelTag tag;
tag.rinfo = rinfo;
tag.rnode = rnode;
tag.forknum = forknum;
LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
hash_search(relsize_hash, &tag, HASH_REMOVE, NULL);

View File

@@ -51,9 +51,6 @@
#include "libpq/pqformat.h"
#include "replication/slot.h"
#include "replication/walreceiver.h"
#if PG_VERSION_NUM >= 160000
#include "replication/walsender_private.h"
#endif
#include "postmaster/bgworker.h"
#include "postmaster/interrupt.h"
#include "postmaster/postmaster.h"
@@ -76,10 +73,10 @@
static bool syncSafekeepers = false;
char *wal_acceptors_list = "";
int wal_acceptor_reconnect_timeout = 1000;
int wal_acceptor_connection_timeout = 10000;
bool am_wal_proposer = false;
char *wal_acceptors_list;
int wal_acceptor_reconnect_timeout;
int wal_acceptor_connection_timeout;
bool am_wal_proposer;
#define WAL_PROPOSER_SLOT_NAME "wal_proposer_slot"
@@ -194,7 +191,7 @@ pg_init_walproposer(void)
/*
* Entry point for `postgres --sync-safekeepers`.
*/
PGDLLEXPORT void
void
WalProposerSync(int argc, char *argv[])
{
struct stat stat_buf;
@@ -318,7 +315,7 @@ nwp_shmem_startup_hook(void)
/*
* WAL proposer bgworker entry point.
*/
PGDLLEXPORT void
void
WalProposerMain(Datum main_arg)
{
#if PG_VERSION_NUM >= 150000
@@ -386,55 +383,21 @@ WalProposerPoll(void)
{
while (true)
{
Safekeeper *sk = NULL;
bool wait_timeout = false;
bool late_cv_trigger = false;
WaitEvent event = {0};
int rc = 0;
Safekeeper *sk;
int rc;
WaitEvent event;
TimestampTz now = GetCurrentTimestamp();
long timeout = TimeToReconnect(now);
#if PG_MAJORVERSION_NUM >= 16
if (WalSndCtl != NULL)
ConditionVariablePrepareToSleep(&WalSndCtl->wal_flush_cv);
#endif
/*
* Wait for a wait event to happen, or timeout:
* - Safekeeper socket can become available for READ or WRITE
* - Our latch got set, because
* * PG15-: We got woken up by a process triggering the WalSender
* * PG16+: WalSndCtl->wal_flush_cv was triggered
*/
rc = WaitEventSetWait(waitEvents, timeout,
rc = WaitEventSetWait(waitEvents, TimeToReconnect(now),
&event, 1, WAIT_EVENT_WAL_SENDER_MAIN);
#if PG_MAJORVERSION_NUM >= 16
if (WalSndCtl != NULL)
late_cv_trigger = ConditionVariableCancelSleep();
#endif
sk = (Safekeeper *) event.user_data;
/*
* If wait is terminated by latch set (walsenders' latch is set on
* each wal flush), then exit loop. (no need for pm death check due to
* WL_EXIT_ON_PM_DEATH)
*/
if ((rc == 1 && event.events & WL_LATCH_SET) || late_cv_trigger)
{
/* Reset our latch */
ResetLatch(MyLatch);
break;
}
/*
* If the event contains something that one of our safekeeper states
* was waiting for, we'll advance its state.
*/
if (rc == 1 && (event.events & (WL_SOCKET_MASK)))
{
sk = (Safekeeper *) event.user_data;
if (rc != 0 && (event.events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)))
AdvancePollState(sk, event.events);
}
/*
* If the timeout expired, attempt to reconnect to any safekeepers
@@ -442,26 +405,15 @@ WalProposerPoll(void)
*/
ReconnectSafekeepers();
if (rc == 0) /* timeout expired */
/*
* If wait is terminated by latch set (walsenders' latch is set on
* each wal flush), then exit loop. (no need for pm death check due to
* WL_EXIT_ON_PM_DEATH)
*/
if (rc != 0 && (event.events & WL_LATCH_SET))
{
wait_timeout = true;
/*
* Ensure flushrecptr is set to a recent value. This fixes a case
* where we've not been notified of new WAL records when we were
* planning on consuming them.
*/
if (!syncSafekeepers) {
XLogRecPtr flushed;
#if PG_MAJORVERSION_NUM < 15
flushed = GetFlushRecPtr();
#else
flushed = GetFlushRecPtr(NULL);
#endif
if (flushed > availableLsn)
break;
}
ResetLatch(MyLatch);
break;
}
now = GetCurrentTimestamp();
@@ -659,8 +611,7 @@ UpdateEventSet(Safekeeper *sk, uint32 events)
ModifyWaitEvent(waitEvents, sk->eventPos, events, NULL);
}
/*
* Hack: provides a way to remove the event corresponding to an individual walproposer from the set.
/* Hack: provides a way to remove the event corresponding to an individual walproposer from the set.
*
* Note: Internally, this completely reconstructs the event set. It should be avoided if possible.
*/
@@ -1457,12 +1408,7 @@ WalProposerRecovery(int donor, TimeLineID timeline, XLogRecPtr startpos, XLogRec
elog(FATAL, "could not append password to the safekeeper connection string");
}
#if PG_MAJORVERSION_NUM < 16
wrconn = walrcv_connect(conninfo, false, "wal_proposer_recovery", &err);
#else
wrconn = walrcv_connect(conninfo, false, false, "wal_proposer_recovery", &err);
#endif
if (!wrconn)
{
ereport(WARNING,
@@ -2296,10 +2242,9 @@ HandleSafekeeperResponse(void)
if (synced)
n_synced++;
}
if (n_synced >= quorum)
{
/* A quorum of safekeepers has been synced! */
/* All safekeepers synced! */
/*
* Send empty message to broadcast latest truncateLsn to all safekeepers.
@@ -2594,15 +2539,8 @@ backpressure_throttling_impl(void)
? PrevProcessInterruptsCallback()
: false;
/*
* Don't throttle read only transactions or wal sender.
* Do throttle CREATE INDEX CONCURRENTLY, however. It performs some
* stages outside a transaction, even though it writes a lot of WAL.
* Check PROC_IN_SAFE_IC flag to cover that case.
*/
if (am_walsender
|| (!(MyProc->statusFlags & PROC_IN_SAFE_IC)
&& !TransactionIdIsValid(GetCurrentTransactionIdIfAny())))
/* Don't throttle read only transactions and wal sender. */
if (am_walsender || !TransactionIdIsValid(GetCurrentTransactionIdIfAny()))
return retry;
/* Calculate replicas lag */

View File

@@ -379,8 +379,8 @@ typedef struct Safekeeper
AppendResponse appendResponse; /* feedback for master */
} Safekeeper;
extern void PGDLLEXPORT WalProposerSync(int argc, char *argv[]);
extern void PGDLLEXPORT WalProposerMain(Datum main_arg);
extern void WalProposerSync(int argc, char *argv[]);
extern void WalProposerMain(Datum main_arg);
extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
extern void WalProposerPoll(void);
extern void ParsePageserverFeedbackMessage(StringInfo reply_message,

View File

@@ -25,9 +25,6 @@
#include "access/xlogutils.h"
#include "access/xlogrecovery.h"
#endif
#if PG_MAJORVERSION_NUM >= 16
#include "utils/guc.h"
#endif
/*
* These variables are used similarly to openLogFile/SegNo,
@@ -561,11 +558,11 @@ StartProposerReplication(StartReplicationCmd *cmd)
static void
WalSndLoop(void)
{
/* Clear any already-pending wakeups */
ResetLatch(MyLatch);
for (;;)
{
/* Clear any already-pending wakeups */
ResetLatch(MyLatch);
CHECK_FOR_INTERRUPTS();
XLogBroadcastWalProposer();

View File

@@ -1,19 +0,0 @@
# pgxs/neon/Makefile
MODULE_big = neon_rmgr
OBJS = \
$(WIN32RES) \
neon_rmgr.o \
neon_rmgr_decode.o \
neon_rmgr_desc.o
EXTENSION = neon_rmgr
DATA =
PGFILEDESC = "Neon WAL Resource Manager - custom WAL records used to make Neon work (since PG 16)"
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)

View File

@@ -1,886 +0,0 @@
#include "postgres.h"
#include "fmgr.h"
#if PG_MAJORVERSION_NUM >= 16
#include "access/bufmask.h"
#include "access/heapam_xlog.h"
#include "access/htup_details.h"
#include "access/neon_xlog.h"
#include "access/rmgr.h"
#include "access/visibilitymap.h"
#include "access/xlog_internal.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "storage/buf.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/freespace.h"
#include "neon_rmgr.h"
PG_MODULE_MAGIC;
void _PG_init(void);
static void neon_rm_redo(XLogReaderState *record);
static void neon_rm_startup(void);
static void neon_rm_cleanup(void);
static void neon_rm_mask(char *pagedata, BlockNumber blkno);
static void redo_neon_heap_insert(XLogReaderState *record);
static void redo_neon_heap_delete(XLogReaderState *record);
static void redo_neon_heap_update(XLogReaderState *record, bool hot_update);
static void redo_neon_heap_lock(XLogReaderState *record);
static void redo_neon_heap_multi_insert(XLogReaderState *record);
const static RmgrData NeonRmgr = {
.rm_name = "neon",
.rm_redo = neon_rm_redo,
.rm_desc = neon_rm_desc,
.rm_identify = neon_rm_identify,
.rm_startup = neon_rm_startup,
.rm_cleanup = neon_rm_cleanup,
.rm_mask = neon_rm_mask,
.rm_decode = neon_rm_decode,
};
void
_PG_init(void)
{
if (!process_shared_preload_libraries_in_progress)
return;
RegisterCustomRmgr(RM_NEON_ID, &NeonRmgr);
}
static void
neon_rm_redo(XLogReaderState *record)
{
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
switch (info & XLOG_NEON_OPMASK)
{
case XLOG_NEON_HEAP_INSERT:
redo_neon_heap_insert(record);
break;
case XLOG_NEON_HEAP_DELETE:
redo_neon_heap_delete(record);
break;
case XLOG_NEON_HEAP_UPDATE:
redo_neon_heap_update(record, false);
break;
case XLOG_NEON_HEAP_HOT_UPDATE:
redo_neon_heap_update(record, true);
break;
case XLOG_NEON_HEAP_LOCK:
redo_neon_heap_lock(record);
break;
case XLOG_NEON_HEAP_MULTI_INSERT:
redo_neon_heap_multi_insert(record);
break;
default:
elog(PANIC, "neon_rm_redo: unknown op code %u", info);
}
}
static void
neon_rm_startup(void)
{
/* nothing to do here */
}
static void
neon_rm_cleanup(void)
{
/* nothing to do here */
}
static void
neon_rm_mask(char *pagedata, BlockNumber blkno)
{
Page page = (Page) pagedata;
OffsetNumber off;
mask_page_lsn_and_checksum(page);
mask_page_hint_bits(page);
mask_unused_space(page);
for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
{
ItemId iid = PageGetItemId(page, off);
char *page_item;
page_item = (char *) (page + ItemIdGetOffset(iid));
if (ItemIdIsNormal(iid))
{
HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
/*
* If xmin of a tuple is not yet frozen, we should ignore
* differences in hint bits, since they can be set without
* emitting WAL.
*/
if (!HeapTupleHeaderXminFrozen(page_htup))
page_htup->t_infomask &= ~HEAP_XACT_MASK;
else
{
/* Still we need to mask xmax hint bits. */
page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
}
/*
* During replay, we set Command Id to FirstCommandId. Hence, mask
* it. See heap_xlog_insert() for details.
*/
page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
/*
* For a speculative tuple, heap_insert() does not set ctid in the
* caller-passed heap tuple itself, leaving the ctid field to
* contain a speculative token value - a per-backend monotonically
* increasing identifier. Besides, it does not WAL-log ctid under
* any circumstances.
*
* During redo, heap_xlog_insert() sets t_ctid to current block
* number and self offset number. It doesn't care about any
* speculative insertions on the primary. Hence, we set t_ctid to
* current block number and self offset number to ignore any
* inconsistency.
*/
if (HeapTupleHeaderIsSpeculative(page_htup))
ItemPointerSet(&page_htup->t_ctid, blkno, off);
/*
* NB: Not ignoring ctid changes due to the tuple having moved
* (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
* important information that needs to be in-sync between primary
* and standby, and thus is WAL logged.
*/
}
/*
* Ignore any padding bytes after the tuple, when the length of the
* item is not MAXALIGNed.
*/
if (ItemIdHasStorage(iid))
{
int len = ItemIdGetLength(iid);
int padlen = MAXALIGN(len) - len;
if (padlen > 0)
memset(page_item + len, MASK_MARKER, padlen);
}
}
}
/*
* COPIED FROM heapam.c
* Given an "infobits" field from an XLog record, set the correct bits in the
* given infomask and infomask2 for the tuple touched by the record.
*
* (This is the reverse of compute_infobits).
*/
static void
fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
{
*infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
*infomask2 &= ~HEAP_KEYS_UPDATED;
if (infobits & XLHL_XMAX_IS_MULTI)
*infomask |= HEAP_XMAX_IS_MULTI;
if (infobits & XLHL_XMAX_LOCK_ONLY)
*infomask |= HEAP_XMAX_LOCK_ONLY;
if (infobits & XLHL_XMAX_EXCL_LOCK)
*infomask |= HEAP_XMAX_EXCL_LOCK;
/* note HEAP_XMAX_SHR_LOCK isn't considered here */
if (infobits & XLHL_XMAX_KEYSHR_LOCK)
*infomask |= HEAP_XMAX_KEYSHR_LOCK;
if (infobits & XLHL_KEYS_UPDATED)
*infomask2 |= HEAP_KEYS_UPDATED;
}
static void
redo_neon_heap_insert(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
xl_neon_heap_insert *xlrec = (xl_neon_heap_insert *) XLogRecGetData(record);
Buffer buffer;
Page page;
union
{
HeapTupleHeaderData hdr;
char data[MaxHeapTupleSize];
} tbuf;
HeapTupleHeader htup;
xl_neon_heap_header xlhdr;
uint32 newlen;
Size freespace = 0;
RelFileLocator target_locator;
BlockNumber blkno;
ItemPointerData target_tid;
XLogRedoAction action;
XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
ItemPointerSetBlockNumber(&target_tid, blkno);
ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
/*
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(target_locator);
Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, blkno, &vmbuffer);
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
}
/*
* If we inserted the first and only tuple on the page, re-initialize the
* page from scratch.
*/
if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
{
buffer = XLogInitBufferForRedo(record, 0);
page = BufferGetPage(buffer);
PageInit(page, BufferGetPageSize(buffer), 0);
action = BLK_NEEDS_REDO;
}
else
action = XLogReadBufferForRedo(record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
Size datalen;
char *data;
page = BufferGetPage(buffer);
if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
elog(PANIC, "neon_rm_redo: invalid max offset number");
data = XLogRecGetBlockData(record, 0, &datalen);
newlen = datalen - SizeOfNeonHeapHeader;
Assert(datalen > SizeOfNeonHeapHeader && newlen <= MaxHeapTupleSize);
memcpy((char *) &xlhdr, data, SizeOfNeonHeapHeader);
data += SizeOfNeonHeapHeader;
htup = &tbuf.hdr;
MemSet((char *) htup, 0, SizeofHeapTupleHeader);
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
memcpy((char *) htup + SizeofHeapTupleHeader,
data,
newlen);
newlen += SizeofHeapTupleHeader;
htup->t_infomask2 = xlhdr.t_infomask2;
htup->t_infomask = xlhdr.t_infomask;
htup->t_hoff = xlhdr.t_hoff;
HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
HeapTupleHeaderSetCmin(htup, xlhdr.t_cid);
htup->t_ctid = target_tid;
if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,
true, true) == InvalidOffsetNumber)
elog(PANIC, "neon_rm_redo: failed to add tuple");
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
PageSetLSN(page, lsn);
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
/* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
PageSetAllVisible(page);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/*
* If the page is running low on free space, update the FSM as well.
* Arbitrarily, our definition of "low" is less than 20%. We can't do much
* better than that without knowing the fill-factor for the table.
*
* XXX: Don't do this if the page was restored from full page image. We
* don't bother to update the FSM in that case, it doesn't need to be
* totally accurate anyway.
*/
if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
}
static void
redo_neon_heap_delete(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
xl_neon_heap_delete *xlrec = (xl_neon_heap_delete *) XLogRecGetData(record);
Buffer buffer;
Page page;
ItemId lp = NULL;
HeapTupleHeader htup;
BlockNumber blkno;
RelFileLocator target_locator;
ItemPointerData target_tid;
XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
ItemPointerSetBlockNumber(&target_tid, blkno);
ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
/*
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(target_locator);
Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, blkno, &vmbuffer);
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
}
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
lp = PageGetItemId(page, xlrec->offnum);
if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
elog(PANIC, "neon_rm_redo: invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
HeapTupleHeaderClearHotUpdated(htup);
fix_infomask_from_infobits(xlrec->infobits_set,
&htup->t_infomask, &htup->t_infomask2);
if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
HeapTupleHeaderSetXmax(htup, xlrec->xmax);
else
HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
HeapTupleHeaderSetCmax(htup, xlrec->t_cid, false);
/* Mark the page as a candidate for pruning */
PageSetPrunable(page, XLogRecGetXid(record));
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
/* Make sure t_ctid is set correctly */
if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
HeapTupleHeaderSetMovedPartitions(htup);
else
htup->t_ctid = target_tid;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
static void
redo_neon_heap_update(XLogReaderState *record, bool hot_update)
{
XLogRecPtr lsn = record->EndRecPtr;
xl_neon_heap_update *xlrec = (xl_neon_heap_update *) XLogRecGetData(record);
RelFileLocator rlocator;
BlockNumber oldblk;
BlockNumber newblk;
ItemPointerData newtid;
Buffer obuffer,
nbuffer;
Page page;
OffsetNumber offnum;
ItemId lp = NULL;
HeapTupleData oldtup;
HeapTupleHeader htup;
uint16 prefixlen = 0,
suffixlen = 0;
char *newp;
union
{
HeapTupleHeaderData hdr;
char data[MaxHeapTupleSize];
} tbuf;
xl_neon_heap_header xlhdr;
uint32 newlen;
Size freespace = 0;
XLogRedoAction oldaction;
XLogRedoAction newaction;
/* initialize to keep the compiler quiet */
oldtup.t_data = NULL;
oldtup.t_len = 0;
XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
{
/* HOT updates are never done across pages */
Assert(!hot_update);
}
else
oldblk = newblk;
ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
/*
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(rlocator);
Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, oldblk, &vmbuffer);
visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
}
/*
* In normal operation, it is important to lock the two pages in
* page-number order, to avoid possible deadlocks against other update
* operations going the other way. However, during WAL replay there can
* be no other update happening, so we don't need to worry about that. But
* we *do* need to worry that we don't expose an inconsistent state to Hot
* Standby queries --- so the original page can't be unlocked before we've
* added the new tuple to the new page.
*/
/* Deal with old tuple version */
oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
&obuffer);
if (oldaction == BLK_NEEDS_REDO)
{
page = BufferGetPage(obuffer);
offnum = xlrec->old_offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
elog(PANIC, "neon_rm_redo: invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
oldtup.t_data = htup;
oldtup.t_len = ItemIdGetLength(lp);
htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
if (hot_update)
HeapTupleHeaderSetHotUpdated(htup);
else
HeapTupleHeaderClearHotUpdated(htup);
fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
&htup->t_infomask2);
HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
HeapTupleHeaderSetCmax(htup, xlrec->t_cid, false);
/* Set forward chain link in t_ctid */
htup->t_ctid = newtid;
/* Mark the page as a candidate for pruning */
PageSetPrunable(page, XLogRecGetXid(record));
if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
PageSetLSN(page, lsn);
MarkBufferDirty(obuffer);
}
/*
* Read the page the new tuple goes into, if different from old.
*/
if (oldblk == newblk)
{
nbuffer = obuffer;
newaction = oldaction;
}
else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
{
nbuffer = XLogInitBufferForRedo(record, 0);
page = (Page) BufferGetPage(nbuffer);
PageInit(page, BufferGetPageSize(nbuffer), 0);
newaction = BLK_NEEDS_REDO;
}
else
newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
/*
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(rlocator);
Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, newblk, &vmbuffer);
visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
}
/* Deal with new tuple */
if (newaction == BLK_NEEDS_REDO)
{
char *recdata;
char *recdata_end;
Size datalen;
Size tuplen;
recdata = XLogRecGetBlockData(record, 0, &datalen);
recdata_end = recdata + datalen;
page = BufferGetPage(nbuffer);
offnum = xlrec->new_offnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "neon_rm_redo: invalid max offset number");
if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
{
Assert(newblk == oldblk);
memcpy(&prefixlen, recdata, sizeof(uint16));
recdata += sizeof(uint16);
}
if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
{
Assert(newblk == oldblk);
memcpy(&suffixlen, recdata, sizeof(uint16));
recdata += sizeof(uint16);
}
memcpy((char *) &xlhdr, recdata, SizeOfNeonHeapHeader);
recdata += SizeOfNeonHeapHeader;
tuplen = recdata_end - recdata;
Assert(tuplen <= MaxHeapTupleSize);
htup = &tbuf.hdr;
MemSet((char *) htup, 0, SizeofHeapTupleHeader);
/*
* Reconstruct the new tuple using the prefix and/or suffix from the
* old tuple, and the data stored in the WAL record.
*/
newp = (char *) htup + SizeofHeapTupleHeader;
if (prefixlen > 0)
{
int len;
/* copy bitmap [+ padding] [+ oid] from WAL record */
len = xlhdr.t_hoff - SizeofHeapTupleHeader;
memcpy(newp, recdata, len);
recdata += len;
newp += len;
/* copy prefix from old tuple */
memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
newp += prefixlen;
/* copy new tuple data from WAL record */
len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
memcpy(newp, recdata, len);
recdata += len;
newp += len;
}
else
{
/*
* copy bitmap [+ padding] [+ oid] + data from record, all in one
* go
*/
memcpy(newp, recdata, tuplen);
recdata += tuplen;
newp += tuplen;
}
Assert(recdata == recdata_end);
/* copy suffix from old tuple */
if (suffixlen > 0)
memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
htup->t_infomask2 = xlhdr.t_infomask2;
htup->t_infomask = xlhdr.t_infomask;
htup->t_hoff = xlhdr.t_hoff;
HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
HeapTupleHeaderSetCmin(htup, xlhdr.t_cid);
HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = newtid;
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber)
elog(PANIC, "neon_rm_redo: failed to add tuple");
if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
PageSetLSN(page, lsn);
MarkBufferDirty(nbuffer);
}
if (BufferIsValid(nbuffer) && nbuffer != obuffer)
UnlockReleaseBuffer(nbuffer);
if (BufferIsValid(obuffer))
UnlockReleaseBuffer(obuffer);
/*
* If the new page is running low on free space, update the FSM as well.
* Arbitrarily, our definition of "low" is less than 20%. We can't do much
* better than that without knowing the fill-factor for the table.
*
* However, don't update the FSM on HOT updates, because after crash
* recovery, either the old or the new tuple will certainly be dead and
* prunable. After pruning, the page will have roughly as much free space
* as it did before the update, assuming the new tuple is about the same
* size as the old one.
*
* XXX: Don't do this if the page was restored from full page image. We
* don't bother to update the FSM in that case, it doesn't need to be
* totally accurate anyway.
*/
if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
}
static void
redo_neon_heap_lock(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
xl_neon_heap_lock *xlrec = (xl_neon_heap_lock *) XLogRecGetData(record);
Buffer buffer;
Page page;
OffsetNumber offnum;
ItemId lp = NULL;
HeapTupleHeader htup;
/*
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
{
RelFileLocator rlocator;
Buffer vmbuffer = InvalidBuffer;
BlockNumber block;
Relation reln;
XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
reln = CreateFakeRelcacheEntry(rlocator);
visibilitymap_pin(reln, block, &vmbuffer);
visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
}
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
elog(PANIC, "neon_rm_redo: invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
&htup->t_infomask2);
/*
* Clear relevant update flags, but only if the modified infomask says
* there's no update.
*/
if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
{
HeapTupleHeaderClearHotUpdated(htup);
/* Make sure there is no forward chain link in t_ctid */
ItemPointerSet(&htup->t_ctid,
BufferGetBlockNumber(buffer),
offnum);
}
HeapTupleHeaderSetXmax(htup, xlrec->xmax);
HeapTupleHeaderSetCmax(htup, xlrec->t_cid, false);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
static void
redo_neon_heap_multi_insert(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
xl_neon_heap_multi_insert *xlrec;
RelFileLocator rlocator;
BlockNumber blkno;
Buffer buffer;
Page page;
union
{
HeapTupleHeaderData hdr;
char data[MaxHeapTupleSize];
} tbuf;
HeapTupleHeader htup;
uint32 newlen;
Size freespace = 0;
int i;
bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
XLogRedoAction action;
/*
* Insertion doesn't overwrite MVCC data, so no conflict processing is
* required.
*/
xlrec = (xl_neon_heap_multi_insert *) XLogRecGetData(record);
XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
/* check that the mutually exclusive flags are not both set */
Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
/*
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(rlocator);
Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, blkno, &vmbuffer);
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
}
if (isinit)
{
buffer = XLogInitBufferForRedo(record, 0);
page = BufferGetPage(buffer);
PageInit(page, BufferGetPageSize(buffer), 0);
action = BLK_NEEDS_REDO;
}
else
action = XLogReadBufferForRedo(record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
char *tupdata;
char *endptr;
Size len;
/* Tuples are stored as block data */
tupdata = XLogRecGetBlockData(record, 0, &len);
endptr = tupdata + len;
page = (Page) BufferGetPage(buffer);
for (i = 0; i < xlrec->ntuples; i++)
{
OffsetNumber offnum;
xl_neon_multi_insert_tuple *xlhdr;
/*
* If we're reinitializing the page, the tuples are stored in
* order from FirstOffsetNumber. Otherwise there's an array of
* offsets in the WAL record, and the tuples come after that.
*/
if (isinit)
offnum = FirstOffsetNumber + i;
else
offnum = xlrec->offsets[i];
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "neon_rm_redo: invalid max offset number");
xlhdr = (xl_neon_multi_insert_tuple *) SHORTALIGN(tupdata);
tupdata = ((char *) xlhdr) + SizeOfNeonMultiInsertTuple;
newlen = xlhdr->datalen;
Assert(newlen <= MaxHeapTupleSize);
htup = &tbuf.hdr;
MemSet((char *) htup, 0, SizeofHeapTupleHeader);
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
memcpy((char *) htup + SizeofHeapTupleHeader,
(char *) tupdata,
newlen);
tupdata += newlen;
newlen += SizeofHeapTupleHeader;
htup->t_infomask2 = xlhdr->t_infomask2;
htup->t_infomask = xlhdr->t_infomask;
htup->t_hoff = xlhdr->t_hoff;
HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
HeapTupleHeaderSetCmin(htup, xlrec->t_cid);
ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber)
elog(PANIC, "neon_rm_redo: failed to add tuple");
}
if (tupdata != endptr)
elog(PANIC, "neon_rm_redo: total tuple length mismatch");
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
PageSetLSN(page, lsn);
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
/* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
PageSetAllVisible(page);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/*
* If the page is running low on free space, update the FSM as well.
* Arbitrarily, our definition of "low" is less than 20%. We can't do much
* better than that without knowing the fill-factor for the table.
*
* XXX: Don't do this if the page was restored from full page image. We
* don't bother to update the FSM in that case, it doesn't need to be
* totally accurate anyway.
*/
if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
}
#else
/* safeguard for older PostgreSQL versions */
PG_MODULE_MAGIC;
#endif

View File

@@ -1,4 +0,0 @@
# neon_rmgr extension
comment = 'Neon WAL Resource Manager - custom WAL records used to make Neon work (since PG 16)'
default_version = '1.0'
module_pathname = '$libdir/neon_rmgr'

View File

@@ -1,13 +0,0 @@
#ifndef NEON_RMGR_H
#define NEON_RMGR_H
#if PG_MAJORVERSION_NUM >= 16
#include "access/xlog_internal.h"
#include "replication/decode.h"
#include "replication/logical.h"
extern void neon_rm_desc(StringInfo buf, XLogReaderState *record);
extern void neon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
extern const char *neon_rm_identify(uint8 info);
#endif
#endif //NEON_RMGR_H

View File

@@ -1,404 +0,0 @@
#include "postgres.h"
#if PG_MAJORVERSION_NUM >= 16
#include "access/heapam_xlog.h"
#include "access/neon_xlog.h"
#include "replication/decode.h"
#include "replication/logical.h"
#include "replication/snapbuild.h"
#include "neon_rmgr.h"
/* individual record(group)'s handlers */
static void DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
/* common function to decode tuples */
static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple);
void
neon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
uint8 info = XLogRecGetInfo(buf->record) & XLOG_NEON_OPMASK;
TransactionId xid = XLogRecGetXid(buf->record);
SnapBuild *builder = ctx->snapshot_builder;
ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
/*
* If we don't have snapshot or we are just fast-forwarding, there is no
* point in decoding data changes.
*/
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
ctx->fast_forward)
return;
switch (info)
{
case XLOG_NEON_HEAP_INSERT:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeNeonInsert(ctx, buf);
break;
case XLOG_NEON_HEAP_DELETE:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeNeonDelete(ctx, buf);
break;
case XLOG_NEON_HEAP_UPDATE:
case XLOG_NEON_HEAP_HOT_UPDATE:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeNeonUpdate(ctx, buf);
break;
case XLOG_NEON_HEAP_LOCK:
break;
case XLOG_NEON_HEAP_MULTI_INSERT:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeNeonMultiInsert(ctx, buf);
break;
default:
elog(ERROR, "unexpected RM_HEAP_ID record type: %u", info);
break;
}
}
static inline bool
FilterByOrigin(LogicalDecodingContext *ctx, RepOriginId origin_id)
{
if (ctx->callbacks.filter_by_origin_cb == NULL)
return false;
return filter_by_origin_cb_wrapper(ctx, origin_id);
}
/*
* Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs.
*
* Deletes can contain the new tuple.
*/
static void
DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
Size datalen;
char *tupledata;
Size tuplelen;
XLogReaderState *r = buf->record;
xl_neon_heap_insert *xlrec;
ReorderBufferChange *change;
RelFileLocator target_locator;
xlrec = (xl_neon_heap_insert *) XLogRecGetData(r);
/*
* Ignore insert records without new tuples (this does happen when
* raw_heap_insert marks the TOAST record as HEAP_INSERT_NO_LOGICAL).
*/
if (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))
return;
/* only interested in our database */
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
if (target_locator.dbOid != ctx->slot->data.database)
return;
/* output plugin doesn't look for this origin, no need to queue */
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
return;
change = ReorderBufferGetChange(ctx->reorder);
if (!(xlrec->flags & XLH_INSERT_IS_SPECULATIVE))
change->action = REORDER_BUFFER_CHANGE_INSERT;
else
change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT;
change->origin_id = XLogRecGetOrigin(r);
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
tupledata = XLogRecGetBlockData(r, 0, &datalen);
tuplelen = datalen - SizeOfNeonHeapHeader;
change->data.tp.newtuple =
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
DecodeXLogTuple(tupledata, datalen, change->data.tp.newtuple);
change->data.tp.clear_toast_afterwards = true;
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
change,
xlrec->flags & XLH_INSERT_ON_TOAST_RELATION);
}
/*
* Parse XLOG_HEAP_DELETE from wal into proper tuplebufs.
*
* Deletes can possibly contain the old primary key.
*/
static void
DecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
XLogReaderState *r = buf->record;
xl_neon_heap_delete *xlrec;
ReorderBufferChange *change;
RelFileLocator target_locator;
xlrec = (xl_neon_heap_delete *) XLogRecGetData(r);
/* only interested in our database */
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
if (target_locator.dbOid != ctx->slot->data.database)
return;
/* output plugin doesn't look for this origin, no need to queue */
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
return;
change = ReorderBufferGetChange(ctx->reorder);
if (xlrec->flags & XLH_DELETE_IS_SUPER)
change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT;
else
change->action = REORDER_BUFFER_CHANGE_DELETE;
change->origin_id = XLogRecGetOrigin(r);
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
/* old primary key stored */
if (xlrec->flags & XLH_DELETE_CONTAINS_OLD)
{
Size datalen = XLogRecGetDataLen(r) - SizeOfNeonHeapHeader;
Size tuplelen = datalen - SizeOfNeonHeapHeader;
Assert(XLogRecGetDataLen(r) > (SizeOfNeonHeapDelete + SizeOfNeonHeapHeader));
change->data.tp.oldtuple =
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
DecodeXLogTuple((char *) xlrec + SizeOfNeonHeapDelete,
datalen, change->data.tp.oldtuple);
}
change->data.tp.clear_toast_afterwards = true;
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
change, false);
}
/*
* Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout
* in the record, from wal into proper tuplebufs.
*
* Updates can possibly contain a new tuple and the old primary key.
*/
static void
DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
XLogReaderState *r = buf->record;
xl_neon_heap_update *xlrec;
ReorderBufferChange *change;
char *data;
RelFileLocator target_locator;
xlrec = (xl_neon_heap_update *) XLogRecGetData(r);
/* only interested in our database */
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
if (target_locator.dbOid != ctx->slot->data.database)
return;
/* output plugin doesn't look for this origin, no need to queue */
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_UPDATE;
change->origin_id = XLogRecGetOrigin(r);
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
if (xlrec->flags & XLH_UPDATE_CONTAINS_NEW_TUPLE)
{
Size datalen;
Size tuplelen;
data = XLogRecGetBlockData(r, 0, &datalen);
tuplelen = datalen - SizeOfNeonHeapHeader;
change->data.tp.newtuple =
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
}
if (xlrec->flags & XLH_UPDATE_CONTAINS_OLD)
{
Size datalen;
Size tuplelen;
/* caution, remaining data in record is not aligned */
data = XLogRecGetData(r) + SizeOfNeonHeapUpdate;
datalen = XLogRecGetDataLen(r) - SizeOfNeonHeapUpdate;
tuplelen = datalen - SizeOfNeonHeapHeader;
change->data.tp.oldtuple =
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
}
change->data.tp.clear_toast_afterwards = true;
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
change, false);
}
/*
* Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs.
*
* Currently MULTI_INSERT will always contain the full tuples.
*/
static void
DecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
XLogReaderState *r = buf->record;
xl_neon_heap_multi_insert *xlrec;
int i;
char *data;
char *tupledata;
Size tuplelen;
RelFileLocator rlocator;
xlrec = (xl_neon_heap_multi_insert *) XLogRecGetData(r);
/*
* Ignore insert records without new tuples. This happens when a
* multi_insert is done on a catalog or on a non-persistent relation.
*/
if (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))
return;
/* only interested in our database */
XLogRecGetBlockTag(r, 0, &rlocator, NULL, NULL);
if (rlocator.dbOid != ctx->slot->data.database)
return;
/* output plugin doesn't look for this origin, no need to queue */
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
return;
/*
* We know that this multi_insert isn't for a catalog, so the block should
* always have data even if a full-page write of it is taken.
*/
tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
Assert(tupledata != NULL);
data = tupledata;
for (i = 0; i < xlrec->ntuples; i++)
{
ReorderBufferChange *change;
xl_neon_multi_insert_tuple *xlhdr;
int datalen;
ReorderBufferTupleBuf *tuple;
HeapTupleHeader header;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_INSERT;
change->origin_id = XLogRecGetOrigin(r);
memcpy(&change->data.tp.rlocator, &rlocator, sizeof(RelFileLocator));
xlhdr = (xl_neon_multi_insert_tuple *) SHORTALIGN(data);
data = ((char *) xlhdr) + SizeOfNeonMultiInsertTuple;
datalen = xlhdr->datalen;
change->data.tp.newtuple =
ReorderBufferGetTupleBuf(ctx->reorder, datalen);
tuple = change->data.tp.newtuple;
header = tuple->tuple.t_data;
/* not a disk based tuple */
ItemPointerSetInvalid(&tuple->tuple.t_self);
/*
* We can only figure this out after reassembling the transactions.
*/
tuple->tuple.t_tableOid = InvalidOid;
tuple->tuple.t_len = datalen + SizeofHeapTupleHeader;
memset(header, 0, SizeofHeapTupleHeader);
memcpy((char *) tuple->tuple.t_data + SizeofHeapTupleHeader,
(char *) data,
datalen);
header->t_infomask = xlhdr->t_infomask;
header->t_infomask2 = xlhdr->t_infomask2;
header->t_hoff = xlhdr->t_hoff;
/*
* Reset toast reassembly state only after the last row in the last
* xl_multi_insert_tuple record emitted by one heap_multi_insert()
* call.
*/
if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI &&
(i + 1) == xlrec->ntuples)
change->data.tp.clear_toast_afterwards = true;
else
change->data.tp.clear_toast_afterwards = false;
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),
buf->origptr, change, false);
/* move to the next xl_neon_multi_insert_tuple entry */
data += datalen;
}
Assert(data == tupledata + tuplelen);
}
/*
* Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete
* (but not by heap_multi_insert) into a tuplebuf.
*
* The size 'len' and the pointer 'data' in the record need to be
* computed outside as they are record specific.
*/
static void
DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)
{
xl_neon_heap_header xlhdr;
int datalen = len - SizeOfNeonHeapHeader;
HeapTupleHeader header;
Assert(datalen >= 0);
tuple->tuple.t_len = datalen + SizeofHeapTupleHeader;
header = tuple->tuple.t_data;
/* not a disk based tuple */
ItemPointerSetInvalid(&tuple->tuple.t_self);
/* we can only figure this out after reassembling the transactions */
tuple->tuple.t_tableOid = InvalidOid;
/* data is not stored aligned, copy to aligned storage */
memcpy((char *) &xlhdr,
data,
SizeOfNeonHeapHeader);
memset(header, 0, SizeofHeapTupleHeader);
memcpy(((char *) tuple->tuple.t_data) + SizeofHeapTupleHeader,
data + SizeOfNeonHeapHeader,
datalen);
header->t_infomask = xlhdr.t_infomask;
header->t_infomask2 = xlhdr.t_infomask2;
header->t_hoff = xlhdr.t_hoff;
}
#endif

View File

@@ -1,181 +0,0 @@
#include "postgres.h"
#if PG_MAJORVERSION_NUM >= 16
#include "access/heapam_xlog.h"
#include "access/neon_xlog.h"
#include "access/rmgr.h"
#include "access/rmgrdesc_utils.h"
#include "access/xlog_internal.h"
#include "miscadmin.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
#include "neon_rmgr.h"
/*
* NOTE: "keyname" argument cannot have trailing spaces or punctuation
* characters
*/
static void
infobits_desc(StringInfo buf, uint8 infobits, const char *keyname)
{
appendStringInfo(buf, "%s: [", keyname);
Assert(buf->data[buf->len - 1] != ' ');
if (infobits & XLHL_XMAX_IS_MULTI)
appendStringInfoString(buf, "IS_MULTI, ");
if (infobits & XLHL_XMAX_LOCK_ONLY)
appendStringInfoString(buf, "LOCK_ONLY, ");
if (infobits & XLHL_XMAX_EXCL_LOCK)
appendStringInfoString(buf, "EXCL_LOCK, ");
if (infobits & XLHL_XMAX_KEYSHR_LOCK)
appendStringInfoString(buf, "KEYSHR_LOCK, ");
if (infobits & XLHL_KEYS_UPDATED)
appendStringInfoString(buf, "KEYS_UPDATED, ");
if (buf->data[buf->len - 1] == ' ')
{
/* Truncate-away final unneeded ", " */
Assert(buf->data[buf->len - 2] == ',');
buf->len -= 2;
buf->data[buf->len] = '\0';
}
appendStringInfoString(buf, "]");
}
static void
truncate_flags_desc(StringInfo buf, uint8 flags)
{
appendStringInfoString(buf, "flags: [");
if (flags & XLH_TRUNCATE_CASCADE)
appendStringInfoString(buf, "CASCADE, ");
if (flags & XLH_TRUNCATE_RESTART_SEQS)
appendStringInfoString(buf, "RESTART_SEQS, ");
if (buf->data[buf->len - 1] == ' ')
{
/* Truncate-away final unneeded ", " */
Assert(buf->data[buf->len - 2] == ',');
buf->len -= 2;
buf->data[buf->len] = '\0';
}
appendStringInfoString(buf, "]");
}
void
neon_rm_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
info &= XLOG_NEON_OPMASK;
if (info == XLOG_NEON_HEAP_INSERT)
{
xl_neon_heap_insert *xlrec = (xl_neon_heap_insert *) rec;
appendStringInfo(buf, "off: %u, flags: 0x%02X",
xlrec->offnum,
xlrec->flags);
}
else if (info == XLOG_NEON_HEAP_DELETE)
{
xl_neon_heap_delete *xlrec = (xl_neon_heap_delete *) rec;
appendStringInfo(buf, "xmax: %u, off: %u, ",
xlrec->xmax, xlrec->offnum);
infobits_desc(buf, xlrec->infobits_set, "infobits");
appendStringInfo(buf, ", flags: 0x%02X", xlrec->flags);
}
else if (info == XLOG_NEON_HEAP_UPDATE)
{
xl_neon_heap_update *xlrec = (xl_neon_heap_update *) rec;
appendStringInfo(buf, "old_xmax: %u, old_off: %u, ",
xlrec->old_xmax, xlrec->old_offnum);
infobits_desc(buf, xlrec->old_infobits_set, "old_infobits");
appendStringInfo(buf, ", flags: 0x%02X, new_xmax: %u, new_off: %u",
xlrec->flags, xlrec->new_xmax, xlrec->new_offnum);
}
else if (info == XLOG_NEON_HEAP_HOT_UPDATE)
{
xl_neon_heap_update *xlrec = (xl_neon_heap_update *) rec;
appendStringInfo(buf, "old_xmax: %u, old_off: %u, ",
xlrec->old_xmax, xlrec->old_offnum);
infobits_desc(buf, xlrec->old_infobits_set, "old_infobits");
appendStringInfo(buf, ", flags: 0x%02X, new_xmax: %u, new_off: %u",
xlrec->flags, xlrec->new_xmax, xlrec->new_offnum);
}
else if (info == XLOG_NEON_HEAP_LOCK)
{
xl_neon_heap_lock *xlrec = (xl_neon_heap_lock *) rec;
appendStringInfo(buf, "xmax: %u, off: %u, ",
xlrec->xmax, xlrec->offnum);
infobits_desc(buf, xlrec->infobits_set, "infobits");
appendStringInfo(buf, ", flags: 0x%02X", xlrec->flags);
}
else if (info == XLOG_NEON_HEAP_MULTI_INSERT)
{
xl_neon_heap_multi_insert *xlrec = (xl_neon_heap_multi_insert *) rec;
bool isinit = (XLogRecGetInfo(record) & XLOG_NEON_INIT_PAGE) != 0;
appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples,
xlrec->flags);
if (XLogRecHasBlockData(record, 0) && !isinit)
{
appendStringInfoString(buf, ", offsets:");
array_desc(buf, xlrec->offsets, sizeof(OffsetNumber),
xlrec->ntuples, &offset_elem_desc, NULL);
}
}
}
const char *
neon_rm_identify(uint8 info)
{
const char *id = NULL;
switch (info & ~XLR_INFO_MASK)
{
case XLOG_NEON_HEAP_INSERT:
id = "INSERT";
break;
case XLOG_NEON_HEAP_INSERT | XLOG_NEON_INIT_PAGE:
id = "INSERT+INIT";
break;
case XLOG_NEON_HEAP_DELETE:
id = "DELETE";
break;
case XLOG_NEON_HEAP_UPDATE:
id = "UPDATE";
break;
case XLOG_NEON_HEAP_UPDATE | XLOG_NEON_INIT_PAGE:
id = "UPDATE+INIT";
break;
case XLOG_NEON_HEAP_HOT_UPDATE:
id = "HOT_UPDATE";
break;
case XLOG_NEON_HEAP_HOT_UPDATE | XLOG_HEAP_INIT_PAGE:
id = "HOT_UPDATE+INIT";
break;
case XLOG_NEON_HEAP_LOCK:
id = "LOCK";
break;
case XLOG_NEON_HEAP_MULTI_INSERT:
id = "MULTI_INSERT";
break;
case XLOG_NEON_HEAP_MULTI_INSERT | XLOG_NEON_INIT_PAGE:
id = "MULTI_INSERT+INIT";
break;
}
return id;
}
#endif

View File

@@ -10,8 +10,6 @@
*/
#include "postgres.h"
#include "../neon/neon_pgversioncompat.h"
#include "access/relation.h"
#include "access/xact.h"
#include "access/xlog.h"
@@ -41,13 +39,8 @@ PG_FUNCTION_INFO_V1(neon_xlogflush);
* Linkage to functions in neon module.
* The signature here would need to be updated whenever function parameters change in pagestore_smgr.c
*/
#if PG_MAJORVERSION_NUM < 16
typedef void (*neon_read_at_lsn_type) (NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
typedef void (*neon_read_at_lsn_type) (RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
XLogRecPtr request_lsn, bool request_latest, char *buffer);
#else
typedef void (*neon_read_at_lsn_type) (NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
XLogRecPtr request_lsn, bool request_latest, void *buffer);
#endif
static neon_read_at_lsn_type neon_read_at_lsn_ptr;
@@ -122,7 +115,7 @@ clear_buffer_cache(PG_FUNCTION_ARGS)
uint32 buf_state;
Buffer bufferid;
bool isvalid;
NRelFileInfo rinfo;
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blocknum;
@@ -135,7 +128,7 @@ clear_buffer_cache(PG_FUNCTION_ARGS)
else
isvalid = false;
bufferid = BufferDescriptorGetBuffer(bufHdr);
rinfo = BufTagGetNRelFileInfo(bufHdr->tag);
rnode = bufHdr->tag.rnode;
forknum = bufHdr->tag.forkNum;
blocknum = bufHdr->tag.blockNum;
@@ -148,7 +141,7 @@ clear_buffer_cache(PG_FUNCTION_ARGS)
*/
if (isvalid)
{
if (ReadRecentBuffer(rinfo, forknum, blocknum, bufferid))
if (ReadRecentBuffer(rnode, forknum, blocknum, bufferid))
ReleaseBuffer(bufferid);
}
}
@@ -245,7 +238,7 @@ get_raw_page_at_lsn(PG_FUNCTION_ARGS)
SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);
raw_page_data = VARDATA(raw_page);
neon_read_at_lsn(InfoFromRelation(rel), forknum, blkno, read_lsn, request_latest, raw_page_data);
neon_read_at_lsn(rel->rd_node, forknum, blkno, read_lsn, request_latest, raw_page_data);
relation_close(rel, AccessShareLock);
@@ -274,17 +267,10 @@ get_raw_page_at_lsn_ex(PG_FUNCTION_ARGS)
PG_RETURN_NULL();
{
NRelFileInfo rinfo = {
#if PG_MAJORVERSION_NUM < 16
RelFileNode rnode = {
.spcNode = PG_GETARG_OID(0),
.dbNode = PG_GETARG_OID(1),
.relNode = PG_GETARG_OID(2)
#else
.spcOid = PG_GETARG_OID(0),
.dbOid = PG_GETARG_OID(1),
.relNumber = PG_GETARG_OID(2)
#endif
};
.relNode = PG_GETARG_OID(2)};
ForkNumber forknum = PG_GETARG_UINT32(3);
@@ -298,7 +284,7 @@ get_raw_page_at_lsn_ex(PG_FUNCTION_ARGS)
SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);
raw_page_data = VARDATA(raw_page);
neon_read_at_lsn(rinfo, forknum, blkno, read_lsn, request_latest, raw_page_data);
neon_read_at_lsn(rnode, forknum, blkno, read_lsn, request_latest, raw_page_data);
PG_RETURN_BYTEA_P(raw_page);
}
}

View File

@@ -18,12 +18,10 @@
*/
#include "postgres.h"
#include "../neon/neon_pgversioncompat.h"
#include "access/xlog.h"
#include "storage/block.h"
#include "storage/buf_internals.h"
#include RELFILEINFO_HDR
#include "storage/relfilenode.h"
#include "storage/smgr.h"
#if PG_VERSION_NUM >= 150000
@@ -45,12 +43,10 @@ static int used_pages;
static int
locate_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno)
{
NRelFileInfo rinfo = InfoFromSMgrRel(reln);
/* We only hold a small number of pages, so linear search */
for (int i = 0; i < used_pages; i++)
{
if (RelFileInfoEquals(rinfo, BufTagGetNRelFileInfo(page_tag[i]))
if (RelFileNodeEquals(reln->smgr_rnode.node, page_tag[i].rnode)
&& forknum == page_tag[i].forkNum
&& blkno == page_tag[i].blockNum)
{
@@ -67,26 +63,15 @@ static void inmem_open(SMgrRelation reln);
static void inmem_close(SMgrRelation reln, ForkNumber forknum);
static void inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
static bool inmem_exists(SMgrRelation reln, ForkNumber forknum);
static void inmem_unlink(NRelFileInfoBackend rinfo, ForkNumber forknum, bool isRedo);
static bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
#if PG_MAJORVERSION_NUM < 16
static void inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
static void inmem_extend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
static bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
static void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer);
static void inmem_write(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
#else
static void inmem_extend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, const void *buffer, bool skipFsync);
static void inmem_zeroextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks, bool skipFsync);
static void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
void *buffer);
static void inmem_write(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, const void *buffer, bool skipFsync);
#endif
static void inmem_writeback(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, BlockNumber nblocks);
static BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum);
@@ -110,11 +95,9 @@ inmem_init(void)
static bool
inmem_exists(SMgrRelation reln, ForkNumber forknum)
{
NRelFileInfo rinfo = InfoFromSMgrRel(reln);
for (int i = 0; i < used_pages; i++)
{
if (RelFileInfoEquals(rinfo, BufTagGetNRelFileInfo(page_tag[i]))
if (RelFileNodeEquals(reln->smgr_rnode.node, page_tag[i].rnode)
&& forknum == page_tag[i].forkNum)
{
return true;
@@ -137,7 +120,7 @@ inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo)
* inmem_unlink() -- Unlink a relation.
*/
static void
inmem_unlink(NRelFileInfoBackend rinfo, ForkNumber forknum, bool isRedo)
inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo)
{
}
@@ -152,28 +135,12 @@ inmem_unlink(NRelFileInfoBackend rinfo, ForkNumber forknum, bool isRedo)
*/
static void
inmem_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
#if PG_MAJORVERSION_NUM < 16
char *buffer, bool skipFsync)
#else
const void *buffer, bool skipFsync)
#endif
{
/* same as smgwrite() for us */
inmem_write(reln, forknum, blkno, buffer, skipFsync);
}
#if PG_MAJORVERSION_NUM >= 16
static void
inmem_zeroextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks, bool skipFsync)
{
char buffer[BLCKSZ] = {0};
for (int i = 0; i < nblocks; i++)
inmem_extend(reln, forknum, blocknum + i, buffer, skipFsync);
}
#endif
/*
* inmem_open() -- Initialize newly-opened relation.
*/
@@ -213,11 +180,7 @@ inmem_writeback(SMgrRelation reln, ForkNumber forknum,
*/
static void
inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
#if PG_MAJORVERSION_NUM < 16
char *buffer)
#else
void *buffer)
#endif
{
int pg;
@@ -237,11 +200,7 @@ inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
*/
static void
inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
#if PG_MAJORVERSION_NUM < 16
char *buffer, bool skipFsync)
#else
const void *buffer, bool skipFsync)
#endif
{
int pg;
@@ -257,7 +216,9 @@ inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
*/
elog(used_pages >= WARN_PAGES ? WARNING : DEBUG1,
"inmem_write() called for %u/%u/%u.%u blk %u: used_pages %u",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum,
blocknum,
used_pages);
@@ -266,13 +227,14 @@ inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
pg = used_pages;
used_pages++;
InitBufferTag(&page_tag[pg], &InfoFromSMgrRel(reln), forknum, blocknum);
INIT_BUFFERTAG(page_tag[pg], reln->smgr_rnode.node, forknum, blocknum);
}
else
{
elog(DEBUG1, "inmem_write() called for %u/%u/%u.%u blk %u: found at %u",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode,
forknum,
blocknum,
used_pages);
@@ -325,9 +287,6 @@ static const struct f_smgr inmem_smgr =
.smgr_exists = inmem_exists,
.smgr_unlink = inmem_unlink,
.smgr_extend = inmem_extend,
#if PG_MAJORVERSION_NUM >= 16
.smgr_zeroextend = inmem_zeroextend,
#endif
.smgr_prefetch = inmem_prefetch,
.smgr_read = inmem_read,
.smgr_write = inmem_write,
@@ -338,11 +297,11 @@ static const struct f_smgr inmem_smgr =
};
const f_smgr *
smgr_inmem(BackendId backend, NRelFileInfo rinfo)
smgr_inmem(BackendId backend, RelFileNode rnode)
{
Assert(InRecovery);
if (backend != InvalidBackendId)
return smgr_standard(backend, rinfo);
return smgr_standard(backend, rnode);
else
return &inmem_smgr;
}

View File

@@ -11,7 +11,7 @@
#ifndef INMEM_SMGR_H
#define INMEM_SMGR_H
extern const f_smgr *smgr_inmem(BackendId backend, NRelFileInfo rinfo);
extern const f_smgr *smgr_inmem(BackendId backend, RelFileNode rnode);
extern void smgr_init_inmem(void);
#endif /* INMEM_SMGR_H */

View File

@@ -43,8 +43,6 @@
#include "postgres.h"
#include "../neon/neon_pgversioncompat.h"
#include <fcntl.h>
#include <limits.h>
#include <signal.h>
@@ -63,11 +61,9 @@
#include <malloc.h>
#endif
#if PG_MAJORVERSION_NUM < 16
#ifndef HAVE_GETRUSAGE
#include "rusagestub.h"
#endif
#endif
#include "access/clog.h"
#include "access/commit_ts.h"
@@ -191,7 +187,7 @@ enter_seccomp_mode(void)
* backend processes. Some initialization was done in CallExtMain
* already.
*/
PGDLLEXPORT void
void
WalRedoMain(int argc, char *argv[])
{
int firstchar;
@@ -204,7 +200,7 @@ WalRedoMain(int argc, char *argv[])
/*
* WAL redo does not need a large number of buffers. And speed of
* DropRelationAllLocalBuffers() is proportional to the number of
* DropRelFileNodeAllLocalBuffers() is proportional to the number of
* buffers. So let's keep it small (default value is 1024)
*/
num_temp_buffers = 4;
@@ -216,12 +212,6 @@ WalRedoMain(int argc, char *argv[])
smgr_hook = smgr_inmem;
smgr_init_hook = smgr_init_inmem;
#if PG_VERSION_NUM >= 160000
/* make rmgr registry believe we can register the resource manager */
process_shared_preload_libraries_in_progress = true;
load_file("$libdir/neon_rmgr", false);
process_shared_preload_libraries_in_progress = false;
#endif
/* Initialize MaxBackends (if under postmaster, was done already) */
MaxConnections = 1;
@@ -310,9 +300,6 @@ WalRedoMain(int argc, char *argv[])
*/
MemoryContextSwitchTo(MessageContext);
initStringInfo(&input_message);
#if PG_MAJORVERSION_NUM >= 16
MyBackendType = B_BACKEND;
#endif
for (;;)
{
@@ -547,16 +534,16 @@ CreateFakeSharedMemoryAndSemaphores()
/* Version compatility wrapper for ReadBufferWithoutRelcache */
static inline Buffer
NeonRedoReadBuffer(NRelFileInfo rinfo,
NeonRedoReadBuffer(RelFileNode rnode,
ForkNumber forkNum, BlockNumber blockNum,
ReadBufferMode mode)
{
#if PG_VERSION_NUM >= 150000
return ReadBufferWithoutRelcache(rinfo, forkNum, blockNum, mode,
return ReadBufferWithoutRelcache(rnode, forkNum, blockNum, mode,
NULL, /* no strategy */
true); /* WAL redo is only performed on permanent rels */
#else
return ReadBufferWithoutRelcache(rinfo, forkNum, blockNum, mode,
return ReadBufferWithoutRelcache(rnode, forkNum, blockNum, mode,
NULL); /* no strategy */
#endif
}
@@ -660,7 +647,7 @@ ReadRedoCommand(StringInfo inBuf)
static void
BeginRedoForBlock(StringInfo input_message)
{
NRelFileInfo rinfo;
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blknum;
SMgrRelation reln;
@@ -675,26 +662,22 @@ BeginRedoForBlock(StringInfo input_message)
* BlockNumber
*/
forknum = pq_getmsgbyte(input_message);
#if PG_MAJORVERSION_NUM < 16
rinfo.spcNode = pq_getmsgint(input_message, 4);
rinfo.dbNode = pq_getmsgint(input_message, 4);
rinfo.relNode = pq_getmsgint(input_message, 4);
#else
rinfo.spcOid = pq_getmsgint(input_message, 4);
rinfo.dbOid = pq_getmsgint(input_message, 4);
rinfo.relNumber = pq_getmsgint(input_message, 4);
#endif
rnode.spcNode = pq_getmsgint(input_message, 4);
rnode.dbNode = pq_getmsgint(input_message, 4);
rnode.relNode = pq_getmsgint(input_message, 4);
blknum = pq_getmsgint(input_message, 4);
wal_redo_buffer = InvalidBuffer;
InitBufferTag(&target_redo_tag, &rinfo, forknum, blknum);
INIT_BUFFERTAG(target_redo_tag, rnode, forknum, blknum);
elog(TRACE, "BeginRedoForBlock %u/%u/%u.%d blk %u",
RelFileInfoFmt(rinfo),
target_redo_tag.rnode.spcNode,
target_redo_tag.rnode.dbNode,
target_redo_tag.rnode.relNode,
target_redo_tag.forkNum,
target_redo_tag.blockNum);
reln = smgropen(rinfo, InvalidBackendId, RELPERSISTENCE_PERMANENT);
reln = smgropen(rnode, InvalidBackendId, RELPERSISTENCE_PERMANENT);
if (reln->smgr_cached_nblocks[forknum] == InvalidBlockNumber ||
reln->smgr_cached_nblocks[forknum] < blknum + 1)
{
@@ -708,7 +691,7 @@ BeginRedoForBlock(StringInfo input_message)
static void
PushPage(StringInfo input_message)
{
NRelFileInfo rinfo;
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blknum;
const char *content;
@@ -726,19 +709,13 @@ PushPage(StringInfo input_message)
* 8k page content
*/
forknum = pq_getmsgbyte(input_message);
#if PG_MAJORVERSION_NUM < 16
rinfo.spcNode = pq_getmsgint(input_message, 4);
rinfo.dbNode = pq_getmsgint(input_message, 4);
rinfo.relNode = pq_getmsgint(input_message, 4);
#else
rinfo.spcOid = pq_getmsgint(input_message, 4);
rinfo.dbOid = pq_getmsgint(input_message, 4);
rinfo.relNumber = pq_getmsgint(input_message, 4);
#endif
rnode.spcNode = pq_getmsgint(input_message, 4);
rnode.dbNode = pq_getmsgint(input_message, 4);
rnode.relNode = pq_getmsgint(input_message, 4);
blknum = pq_getmsgint(input_message, 4);
content = pq_getmsgbytes(input_message, BLCKSZ);
buf = NeonRedoReadBuffer(rinfo, forknum, blknum, RBM_ZERO_AND_LOCK);
buf = NeonRedoReadBuffer(rnode, forknum, blknum, RBM_ZERO_AND_LOCK);
wal_redo_buffer = buf;
page = BufferGetPage(buf);
memcpy(page, content, BLCKSZ);
@@ -854,7 +831,7 @@ ApplyRecord(StringInfo input_message)
*/
if (BufferIsInvalid(wal_redo_buffer))
{
wal_redo_buffer = NeonRedoReadBuffer(BufTagGetNRelFileInfo(target_redo_tag),
wal_redo_buffer = NeonRedoReadBuffer(target_redo_tag.rnode,
target_redo_tag.forkNum,
target_redo_tag.blockNum,
RBM_NORMAL);
@@ -901,29 +878,26 @@ static bool
redo_block_filter(XLogReaderState *record, uint8 block_id)
{
BufferTag target_tag;
NRelFileInfo rinfo;
#if PG_VERSION_NUM >= 150000
XLogRecGetBlockTag(record, block_id,
&rinfo, &target_tag.forkNum, &target_tag.blockNum);
&target_tag.rnode, &target_tag.forkNum, &target_tag.blockNum);
#else
if (!XLogRecGetBlockTag(record, block_id,
&rinfo, &target_tag.forkNum, &target_tag.blockNum))
&target_tag.rnode, &target_tag.forkNum, &target_tag.blockNum))
{
/* Caller specified a bogus block_id */
elog(PANIC, "failed to locate backup block with ID %d", block_id);
}
#endif
CopyNRelFileInfoToBufTag(target_tag, rinfo);
/*
* Can a WAL redo function ever access a relation other than the one that
* it modifies? I don't see why it would.
* Custom RMGRs may be affected by this.
*/
if (!RelFileInfoEquals(rinfo, BufTagGetNRelFileInfo(target_redo_tag)))
if (!RelFileNodeEquals(target_tag.rnode, target_redo_tag.rnode))
elog(WARNING, "REDO accessing unexpected page: %u/%u/%u.%u blk %u",
RelFileInfoFmt(rinfo), target_tag.forkNum, target_tag.blockNum);
target_tag.rnode.spcNode, target_tag.rnode.dbNode, target_tag.rnode.relNode, target_tag.forkNum, target_tag.blockNum);
/*
* If this block isn't one we are currently restoring, then return 'true'
@@ -940,7 +914,7 @@ redo_block_filter(XLogReaderState *record, uint8 block_id)
static void
GetPage(StringInfo input_message)
{
NRelFileInfo rinfo;
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blknum;
Buffer buf;
@@ -957,20 +931,14 @@ GetPage(StringInfo input_message)
* BlockNumber
*/
forknum = pq_getmsgbyte(input_message);
#if PG_MAJORVERSION_NUM < 16
rinfo.spcNode = pq_getmsgint(input_message, 4);
rinfo.dbNode = pq_getmsgint(input_message, 4);
rinfo.relNode = pq_getmsgint(input_message, 4);
#else
rinfo.spcOid = pq_getmsgint(input_message, 4);
rinfo.dbOid = pq_getmsgint(input_message, 4);
rinfo.relNumber = pq_getmsgint(input_message, 4);
#endif
rnode.spcNode = pq_getmsgint(input_message, 4);
rnode.dbNode = pq_getmsgint(input_message, 4);
rnode.relNode = pq_getmsgint(input_message, 4);
blknum = pq_getmsgint(input_message, 4);
/* FIXME: check that we got a BeginRedoForBlock message or this earlier */
buf = NeonRedoReadBuffer(rinfo, forknum, blknum, RBM_NORMAL);
buf = NeonRedoReadBuffer(rnode, forknum, blknum, RBM_NORMAL);
Assert(buf == wal_redo_buffer);
page = BufferGetPage(buf);
/* single thread, so don't bother locking the page */
@@ -993,7 +961,7 @@ GetPage(StringInfo input_message)
} while (tot_written < BLCKSZ);
ReleaseBuffer(buf);
DropRelationAllLocalBuffers(rinfo);
DropRelFileNodeAllLocalBuffers(rnode);
wal_redo_buffer = InvalidBuffer;
elog(TRACE, "Page sent back for block %u", blknum);

View File

@@ -1,5 +1,5 @@
[toolchain]
channel = "1.72.0"
channel = "1.71.0"
profile = "default"
# The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
# https://rust-lang.github.io/rustup/concepts/profiles.html

View File

@@ -32,10 +32,10 @@ chrono = { workspace = true, default-features = false, features = ["clock", "ser
reqwest = { workspace = true, default-features = false, features = ["rustls-tls", "json"] }
aws-config = { workspace = true, default-features = false, features = ["rustls", "credentials-sso"] }
pageserver = { path = "../pageserver" }
pageserver = {path="../pageserver"}
tracing.workspace = true
tracing-subscriber.workspace = true
clap.workspace = true
tracing-appender = "0.2"
histogram = "0.7"
histogram = "0.7"

View File

@@ -46,10 +46,7 @@ pub async fn validate_pageserver_active_tenant_and_timelines(
.push(active_branch);
let Some(active_project) = s3_active_projects.get(&active_project_id) else {
error!(
"Branch {:?} for project {:?} has no such project in the active projects",
active_branch_id, active_project_id
);
error!("Branch {:?} for project {:?} has no such project in the active projects", active_branch_id, active_project_id);
continue;
};

View File

@@ -19,7 +19,6 @@ use aws_sdk_s3::{Client, Config};
use reqwest::Url;
pub use s3_deletion::S3Deleter;
use std::io::IsTerminal;
use tokio::io::AsyncReadExt;
use tracing::error;
use tracing_appender::non_blocking::WorkerGuard;
@@ -180,7 +179,6 @@ pub fn init_logging(file_name: &str) -> WorkerGuard {
.with_ansi(false)
.with_writer(file_writer);
let stdout_logs = fmt::Layer::new()
.with_ansi(std::io::stdout().is_terminal())
.with_target(false)
.with_writer(std::io::stdout);
tracing_subscriber::registry()

View File

@@ -11,7 +11,7 @@ use anyhow::{bail, Context, Result};
use bytes::Bytes;
use futures::future::BoxFuture;
use postgres_ffi::v14::xlog_utils::{IsPartialXLogFileName, IsXLogFileName, XLogFromFileName};
use postgres_ffi::{dispatch_pgversion, XLogSegNo, PG_TLI};
use postgres_ffi::{XLogSegNo, PG_TLI};
use remote_storage::RemotePath;
use std::cmp::{max, min};
use std::io::{self, SeekFrom};
@@ -138,13 +138,19 @@ impl PhysicalStorage {
let write_lsn = if state.commit_lsn == Lsn(0) {
Lsn(0)
} else {
let version = state.server.pg_version / 10000;
dispatch_pgversion!(
version,
pgv::xlog_utils::find_end_of_wal(&timeline_dir, wal_seg_size, state.commit_lsn,)?,
bail!("unsupported postgres version: {}", version)
)
match state.server.pg_version / 10000 {
14 => postgres_ffi::v14::xlog_utils::find_end_of_wal(
&timeline_dir,
wal_seg_size,
state.commit_lsn,
)?,
15 => postgres_ffi::v15::xlog_utils::find_end_of_wal(
&timeline_dir,
wal_seg_size,
state.commit_lsn,
)?,
_ => bail!("unsupported postgres version: {}", state.server.pg_version),
}
};
// TODO: do we really know that write_lsn is fully flushed to disk?

View File

@@ -139,7 +139,7 @@ const reportSummary = async (params) => {
// Print test resuls from the newest to the oldest Postgres version for release and debug builds.
for (const pgVersion of Array.from(pgVersions).sort().reverse()) {
if (Object.keys(failedTests[pgVersion]).length > 0) {
summary += `#### Failures on Postgres ${pgVersion}\n\n`
summary += `#### Failures on Posgres ${pgVersion}\n\n`
for (const [testName, tests] of Object.entries(failedTests[pgVersion])) {
const links = []
for (const test of tests) {
@@ -188,7 +188,7 @@ const reportSummary = async (params) => {
}
const parseCoverageSummary = async ({ summaryJsonUrl, coverageUrl, fetch }) => {
let summary = `\n### Code coverage ([full report](${coverageUrl}))\n`
let summary = `### Code coverage [full report](${coverageUrl})\n`
const coverage = await (await fetch(summaryJsonUrl)).json()
for (const covType of Object.keys(coverage).sort()) {
@@ -205,22 +205,10 @@ const parseCoverageSummary = async ({ summaryJsonUrl, coverageUrl, fetch }) => {
}
module.exports = async ({ github, context, fetch, report, coverage }) => {
// Marker to find the comment in the subsequent runs
const startMarker = `<!--AUTOMATIC COMMENT START #${context.payload.number}-->`
// If we run the script in the PR or in the branch (main/release/...)
const isPullRequest = !!context.payload.pull_request
// Which PR to comment (for ci-run/pr-* it will comment the parent PR, not the ci-run/pr-* PR)
let prToComment
if (isPullRequest) {
const branchName = context.payload.pull_request.head.ref.replace(/^refs\/heads\//, "")
const match = branchName.match(/ci-run\/pr-(?<prNumber>\d+)/)?.groups
if (match) {
({ prNumber } = match)
prToComment = parseInt(prNumber, 10)
} else {
prToComment = context.payload.number
}
}
// Marker to find the comment in the subsequent runs
const startMarker = `<!--AUTOMATIC COMMENT START #${prToComment}-->`
// Latest commit in PR or in the branch
const commitSha = isPullRequest ? context.payload.pull_request.head.sha : context.sha
// Let users know that the comment is updated automatically
@@ -268,7 +256,7 @@ module.exports = async ({ github, context, fetch, report, coverage }) => {
commentBody += "```\n"
}
} else {
commentBody += `\n#### Test coverage report is not available\n`
commentBody += `#### Test coverage report is not avaibale\n`
}
commentBody += autoupdateNotice
@@ -279,7 +267,7 @@ module.exports = async ({ github, context, fetch, report, coverage }) => {
listCommentsFn = github.rest.issues.listComments
updateCommentFn = github.rest.issues.updateComment
issueNumberOrSha = {
issue_number: prToComment,
issue_number: context.payload.number,
}
} else {
updateCommentFn = github.rest.repos.updateCommitComment

View File

@@ -14,7 +14,6 @@ from fixtures.neon_fixtures import (
PgProtocol,
RemotePostgres,
VanillaPostgres,
wait_for_last_flush_lsn,
)
from fixtures.pg_stats import PgStatTable
@@ -130,7 +129,6 @@ class NeonCompare(PgCompare):
return self._pg_bin
def flush(self):
wait_for_last_flush_lsn(self.env, self._pg, self.tenant, self.timeline)
self.pageserver_http_client.timeline_checkpoint(self.tenant, self.timeline)
self.pageserver_http_client.timeline_gc(self.tenant, self.timeline, 0)

View File

@@ -24,6 +24,7 @@ from urllib.parse import urlparse
import asyncpg
import backoff
import boto3
import jwt
import psycopg2
import pytest
@@ -31,6 +32,7 @@ import requests
from _pytest.config import Config
from _pytest.config.argparsing import Parser
from _pytest.fixtures import FixtureRequest
from mypy_boto3_s3 import S3Client
# Type-related stuff
from psycopg2.extensions import connection as PgConnection
@@ -45,10 +47,11 @@ from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import (
LocalFsStorage,
MockS3Server,
RemoteStorage,
RemoteStorageKind,
RemoteStorageUser,
RemoteStorageUsers,
S3Storage,
remote_storage_to_toml_inline_table,
)
@@ -413,10 +416,10 @@ class NeonEnvBuilder:
pg_version: PgVersion,
test_name: str,
test_output_dir: Path,
pageserver_remote_storage: Optional[RemoteStorage] = None,
remote_storage: Optional[RemoteStorage] = None,
remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
pageserver_config_override: Optional[str] = None,
num_safekeepers: int = 1,
num_pageservers: int = 1,
# Use non-standard SK ids to check for various parsing bugs
safekeepers_id_start: int = 0,
# fsync is disabled by default to make the tests go faster
@@ -431,25 +434,21 @@ class NeonEnvBuilder:
self.repo_dir = repo_dir
self.rust_log_override = rust_log_override
self.port_distributor = port_distributor
# Pageserver remote storage
self.pageserver_remote_storage = pageserver_remote_storage
# Extensions remote storage
self.remote_storage = remote_storage
self.ext_remote_storage: Optional[S3Storage] = None
# Safekeepers remote storage
self.sk_remote_storage: Optional[RemoteStorage] = None
self.remote_storage_client: Optional[S3Client] = None
self.remote_storage_users = remote_storage_users
self.broker = broker
self.run_id = run_id
self.mock_s3_server: MockS3Server = mock_s3_server
self.pageserver_config_override = pageserver_config_override
self.num_safekeepers = num_safekeepers
self.num_pageservers = num_pageservers
self.safekeepers_id_start = safekeepers_id_start
self.safekeepers_enable_fsync = safekeepers_enable_fsync
self.auth_enabled = auth_enabled
self.default_branch_name = default_branch_name
self.env: Optional[NeonEnv] = None
self.remote_storage_prefix: Optional[str] = None
self.keep_remote_storage_contents: bool = True
self.neon_binpath = neon_binpath
self.pg_distrib_dir = pg_distrib_dir
@@ -502,75 +501,150 @@ class NeonEnvBuilder:
the test didn't produce any invalid remote state.
"""
if not isinstance(self.pageserver_remote_storage, S3Storage):
if not isinstance(self.remote_storage, S3Storage):
# The scrubber can't talk to e.g. LocalFS -- it needs
# an HTTP endpoint (mock is fine) to connect to.
raise RuntimeError(
"Cannot scrub with remote_storage={self.pageserver_remote_storage}, require an S3 endpoint"
"Cannot scrub with remote_storage={self.remote_storage}, require an S3 endpoint"
)
self.scrub_on_exit = True
def enable_pageserver_remote_storage(
def enable_remote_storage(
self,
remote_storage_kind: RemoteStorageKind,
force_enable: bool = True,
enable_remote_extensions: bool = False,
):
assert self.pageserver_remote_storage is None, "remote storage is enabled already"
ret = self._configure_and_create_remote_storage(
remote_storage_kind, RemoteStorageUser.PAGESERVER
)
self.pageserver_remote_storage = ret
bucket_name = re.sub(r"[_\[\]]", "-", self.test_name)[:63]
def enable_extensions_remote_storage(self, kind: RemoteStorageKind):
assert self.ext_remote_storage is None, "already configured extensions remote storage"
if remote_storage_kind == RemoteStorageKind.NOOP:
return
elif remote_storage_kind == RemoteStorageKind.LOCAL_FS:
self.enable_local_fs_remote_storage(force_enable=force_enable)
elif remote_storage_kind == RemoteStorageKind.MOCK_S3:
self.enable_mock_s3_remote_storage(
bucket_name=bucket_name,
force_enable=force_enable,
enable_remote_extensions=enable_remote_extensions,
)
elif remote_storage_kind == RemoteStorageKind.REAL_S3:
self.enable_real_s3_remote_storage(
test_name=bucket_name,
force_enable=force_enable,
enable_remote_extensions=enable_remote_extensions,
)
else:
raise RuntimeError(f"Unknown storage type: {remote_storage_kind}")
# there is an assumption that REAL_S3 for extensions is never
# cleaned up these are also special in that they have a hardcoded
# bucket and region, which is most likely the same as our normal
ext = self._configure_and_create_remote_storage(
kind,
RemoteStorageUser.EXTENSIONS,
bucket_name="neon-dev-extensions-eu-central-1",
bucket_region="eu-central-1",
)
assert isinstance(
ext, S3Storage
), "unsure why, but only MOCK_S3 and REAL_S3 are currently supported for extensions"
ext.cleanup = False
self.ext_remote_storage = ext
self.remote_storage_kind = remote_storage_kind
def enable_safekeeper_remote_storage(self, kind: RemoteStorageKind):
assert self.sk_remote_storage is None, "sk_remote_storage already configured"
def enable_local_fs_remote_storage(self, force_enable: bool = True):
"""
Sets up the pageserver to use the local fs at the `test_dir/local_fs_remote_storage` path.
Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
"""
assert force_enable or self.remote_storage is None, "remote storage is enabled already"
self.remote_storage = LocalFsStorage(Path(self.repo_dir / "local_fs_remote_storage"))
self.sk_remote_storage = self._configure_and_create_remote_storage(
kind, RemoteStorageUser.SAFEKEEPER
)
def _configure_and_create_remote_storage(
def enable_mock_s3_remote_storage(
self,
kind: RemoteStorageKind,
user: RemoteStorageUser,
bucket_name: Optional[str] = None,
bucket_region: Optional[str] = None,
) -> Optional[RemoteStorage]:
ret = kind.configure(
self.repo_dir,
self.mock_s3_server,
str(self.run_id),
self.test_name,
user,
bucket_name: str,
force_enable: bool = True,
enable_remote_extensions: bool = False,
):
"""
Sets up the pageserver to use the S3 mock server, creates the bucket, if it's not present already.
Starts up the mock server, if that does not run yet.
Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
Also creates the bucket for extensions, self.ext_remote_storage bucket
"""
assert force_enable or self.remote_storage is None, "remote storage is enabled already"
mock_endpoint = self.mock_s3_server.endpoint()
mock_region = self.mock_s3_server.region()
self.remote_storage_client = boto3.client(
"s3",
endpoint_url=mock_endpoint,
region_name=mock_region,
aws_access_key_id=self.mock_s3_server.access_key(),
aws_secret_access_key=self.mock_s3_server.secret_key(),
)
self.remote_storage_client.create_bucket(Bucket=bucket_name)
self.remote_storage = S3Storage(
bucket_name=bucket_name,
bucket_region=bucket_region,
endpoint=mock_endpoint,
bucket_region=mock_region,
access_key=self.mock_s3_server.access_key(),
secret_key=self.mock_s3_server.secret_key(),
prefix_in_bucket="pageserver",
)
if kind == RemoteStorageKind.MOCK_S3:
assert isinstance(ret, S3Storage)
ret.client.create_bucket(Bucket=ret.bucket_name)
elif kind == RemoteStorageKind.REAL_S3:
assert isinstance(ret, S3Storage)
assert ret.cleanup, "we should not leave files in REAL_S3"
if enable_remote_extensions:
self.ext_remote_storage = S3Storage(
bucket_name=bucket_name,
endpoint=mock_endpoint,
bucket_region=mock_region,
access_key=self.mock_s3_server.access_key(),
secret_key=self.mock_s3_server.secret_key(),
prefix_in_bucket="ext",
)
return ret
def enable_real_s3_remote_storage(
self,
test_name: str,
force_enable: bool = True,
enable_remote_extensions: bool = False,
):
"""
Sets up configuration to use real s3 endpoint without mock server
"""
assert force_enable or self.remote_storage is None, "remote storage is enabled already"
access_key = os.getenv("AWS_ACCESS_KEY_ID")
assert access_key, "no aws access key provided"
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
assert secret_key, "no aws access key provided"
# session token is needed for local runs with sso auth
session_token = os.getenv("AWS_SESSION_TOKEN")
bucket_name = os.getenv("REMOTE_STORAGE_S3_BUCKET")
assert bucket_name, "no remote storage bucket name provided"
region = os.getenv("REMOTE_STORAGE_S3_REGION")
assert region, "no remote storage region provided"
# do not leave data in real s3
self.keep_remote_storage_contents = False
# construct a prefix inside bucket for the particular test case and test run
self.remote_storage_prefix = f"{self.run_id}/{test_name}"
self.remote_storage_client = boto3.client(
"s3",
region_name=region,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
aws_session_token=session_token,
)
self.remote_storage = S3Storage(
bucket_name=bucket_name,
bucket_region=region,
access_key=access_key,
secret_key=secret_key,
prefix_in_bucket=self.remote_storage_prefix,
)
if enable_remote_extensions:
self.ext_remote_storage = S3Storage(
bucket_name="neon-dev-extensions-eu-central-1",
bucket_region="eu-central-1",
access_key=access_key,
secret_key=secret_key,
prefix_in_bucket=None,
)
def cleanup_local_storage(self):
if self.preserve_database_files:
@@ -595,10 +669,60 @@ class NeonEnvBuilder:
directory_to_clean.rmdir()
def cleanup_remote_storage(self):
# extensions are currently not cleaned up, disabled when creating
for x in [self.pageserver_remote_storage, self.ext_remote_storage, self.sk_remote_storage]:
if isinstance(x, S3Storage):
x.do_cleanup()
# here wee check for true remote storage, no the local one
# local cleanup is not needed after test because in ci all env will be destroyed anyway
if self.remote_storage_prefix is None:
log.info("no remote storage was set up, skipping cleanup")
return
# Making mypy happy with allowing only `S3Storage` further.
# `self.remote_storage_prefix` is coupled with `S3Storage` storage type,
# so this line effectively a no-op
assert isinstance(self.remote_storage, S3Storage)
assert self.remote_storage_client is not None
if self.keep_remote_storage_contents:
log.info("keep_remote_storage_contents skipping remote storage cleanup")
return
log.info(
"removing data from test s3 bucket %s by prefix %s",
self.remote_storage.bucket_name,
self.remote_storage_prefix,
)
paginator = self.remote_storage_client.get_paginator("list_objects_v2")
pages = paginator.paginate(
Bucket=self.remote_storage.bucket_name,
Prefix=self.remote_storage_prefix,
)
# Using Any because DeleteTypeDef (from boto3-stubs) doesn't fit our case
objects_to_delete: Any = {"Objects": []}
cnt = 0
for item in pages.search("Contents"):
# weirdly when nothing is found it returns [None]
if item is None:
break
objects_to_delete["Objects"].append({"Key": item["Key"]})
# flush once aws limit reached
if len(objects_to_delete["Objects"]) >= 1000:
self.remote_storage_client.delete_objects(
Bucket=self.remote_storage.bucket_name,
Delete=objects_to_delete,
)
objects_to_delete = {"Objects": []}
cnt += 1
# flush rest
if len(objects_to_delete["Objects"]):
self.remote_storage_client.delete_objects(
Bucket=self.remote_storage.bucket_name,
Delete=objects_to_delete,
)
log.info(f"deleted {cnt} objects from remote storage")
def __enter__(self) -> "NeonEnvBuilder":
return self
@@ -615,9 +739,7 @@ class NeonEnvBuilder:
self.env.endpoints.stop_all()
for sk in self.env.safekeepers:
sk.stop(immediate=True)
for pageserver in self.env.pageservers:
pageserver.stop(immediate=True)
self.env.pageserver.stop(immediate=True)
if self.env.attachment_service is not None:
self.env.attachment_service.stop(immediate=True)
@@ -648,8 +770,7 @@ class NeonEnvBuilder:
if cleanup_error is not None:
raise cleanup_error
for pageserver in self.env.pageservers:
pageserver.assert_no_errors()
self.env.pageserver.assert_no_errors()
class NeonEnv:
@@ -669,7 +790,8 @@ class NeonEnv:
postgres - A factory object for creating postgres compute nodes.
pageservers - An array containing objects representing the pageservers
pageserver - An object that contains functions for manipulating and
connecting to the pageserver
safekeepers - An array containing objects representing the safekeepers
@@ -684,7 +806,7 @@ class NeonEnv:
the tenant id
"""
BASE_PAGESERVER_ID = 1
PAGESERVER_ID = 1
def __init__(self, config: NeonEnvBuilder):
self.repo_dir = config.repo_dir
@@ -694,20 +816,15 @@ class NeonEnv:
self.neon_cli = NeonCli(env=self)
self.endpoints = EndpointFactory(self)
self.safekeepers: List[Safekeeper] = []
self.pageservers: List[NeonPageserver] = []
self.broker = config.broker
self.pageserver_remote_storage = config.pageserver_remote_storage
self.ext_remote_storage = config.ext_remote_storage
self.safekeepers_remote_storage = config.sk_remote_storage
self.remote_storage = config.remote_storage
self.remote_storage_users = config.remote_storage_users
self.pg_version = config.pg_version
# Binary path for pageserver, safekeeper, etc
self.neon_binpath = config.neon_binpath
# Binary path for neon_local test-specific binaries: may be overridden
# after construction for compat testing
self.neon_local_binpath = config.neon_binpath
self.pg_distrib_dir = config.pg_distrib_dir
self.endpoint_counter = 0
self.pageserver_config_override = config.pageserver_config_override
self.remote_storage_client = config.remote_storage_client
self.ext_remote_storage = config.ext_remote_storage
# generate initial tenant ID here instead of letting 'neon init' generate it,
# so that we don't need to dig it out of the config file afterwards.
@@ -729,13 +846,6 @@ class NeonEnv:
"""
)
if self.control_plane_api is not None:
toml += textwrap.dedent(
f"""
control_plane_api = '{self.control_plane_api}'
"""
)
toml += textwrap.dedent(
f"""
[broker]
@@ -744,36 +854,36 @@ class NeonEnv:
)
# Create config for pageserver
pageserver_port = PageserverPort(
pg=self.port_distributor.get_port(),
http=self.port_distributor.get_port(),
)
http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
pg_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
for ps_id in range(
self.BASE_PAGESERVER_ID, self.BASE_PAGESERVER_ID + config.num_pageservers
):
pageserver_port = PageserverPort(
pg=self.port_distributor.get_port(),
http=self.port_distributor.get_port(),
)
toml += textwrap.dedent(
f"""
[pageserver]
id={self.PAGESERVER_ID}
listen_pg_addr = 'localhost:{pageserver_port.pg}'
listen_http_addr = 'localhost:{pageserver_port.http}'
pg_auth_type = '{pg_auth_type}'
http_auth_type = '{http_auth_type}'
"""
)
if self.control_plane_api is not None:
toml += textwrap.dedent(
f"""
[[pageservers]]
id={ps_id}
listen_pg_addr = 'localhost:{pageserver_port.pg}'
listen_http_addr = 'localhost:{pageserver_port.http}'
pg_auth_type = '{pg_auth_type}'
http_auth_type = '{http_auth_type}'
control_plane_api = '{self.control_plane_api}'
"""
)
# Create a corresponding NeonPageserver object
self.pageservers.append(
NeonPageserver(
self,
ps_id,
port=pageserver_port,
config_override=config.pageserver_config_override,
)
)
# Create a corresponding NeonPageserver object
self.pageserver = NeonPageserver(
self, port=pageserver_port, config_override=config.pageserver_config_override
)
# Create config and a Safekeeper object for each safekeeper
for i in range(1, config.num_safekeepers + 1):
port = SafekeeperPort(
@@ -797,10 +907,13 @@ class NeonEnv:
auth_enabled = true
"""
)
if config.sk_remote_storage is not None:
if (
bool(self.remote_storage_users & RemoteStorageUsers.SAFEKEEPER)
and self.remote_storage is not None
):
toml += textwrap.dedent(
f"""
remote_storage = "{remote_storage_to_toml_inline_table(config.sk_remote_storage)}"
remote_storage = "{remote_storage_to_toml_inline_table(self.remote_storage)}"
"""
)
safekeeper = Safekeeper(env=self, id=id, port=port)
@@ -815,55 +928,25 @@ class NeonEnv:
if self.attachment_service is not None:
self.attachment_service.start()
for pageserver in self.pageservers:
pageserver.start()
self.pageserver.start()
for safekeeper in self.safekeepers:
safekeeper.start()
@property
def pageserver(self) -> NeonPageserver:
"""
For tests that are naive to multiple pageservers: give them the 1st in the list, and
assert that there is only one. Tests with multiple pageservers should always use
get_pageserver with an explicit ID.
"""
assert len(self.pageservers) == 1
return self.pageservers[0]
def get_pageserver(self, id: Optional[int]) -> NeonPageserver:
"""
Look up a pageserver by its node ID.
As a convenience for tests that do not use multiple pageservers, passing None
will yield the same default pageserver as `self.pageserver`.
"""
if id is None:
return self.pageserver
for ps in self.pageservers:
if ps.id == id:
return ps
raise RuntimeError(f"Pageserver with ID {id} not found")
def get_safekeeper_connstrs(self) -> str:
"""Get list of safekeeper endpoints suitable for safekeepers GUC"""
return ",".join(f"localhost:{wa.port.pg}" for wa in self.safekeepers)
def timeline_dir(
self, tenant_id: TenantId, timeline_id: TimelineId, pageserver_id: Optional[int] = None
) -> Path:
def timeline_dir(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
"""Get a timeline directory's path based on the repo directory of the test environment"""
return (
self.tenant_dir(tenant_id, pageserver_id=pageserver_id) / "timelines" / str(timeline_id)
)
return self.tenant_dir(tenant_id) / "timelines" / str(timeline_id)
def tenant_dir(self, tenant_id: TenantId, pageserver_id: Optional[int] = None) -> Path:
def tenant_dir(
self,
tenant_id: TenantId,
) -> Path:
"""Get a tenant directory's path based on the repo directory of the test environment"""
return self.get_pageserver(pageserver_id).workdir / "tenants" / str(tenant_id)
return self.repo_dir / "tenants" / str(tenant_id)
def get_pageserver_version(self) -> str:
bin_pageserver = str(self.neon_binpath / "pageserver")
@@ -1031,7 +1114,6 @@ class AbstractNeonCli(abc.ABC):
extra_env_vars: Optional[Dict[str, str]] = None,
check_return_code=True,
timeout=None,
local_binpath=False,
) -> "subprocess.CompletedProcess[str]":
"""
Run the command with the specified arguments.
@@ -1045,19 +1127,12 @@ class AbstractNeonCli(abc.ABC):
>>> log.info(result.stdout)
If `check_return_code`, on non-zero exit code logs failure and raises.
If `local_binpath` is true, then we are invoking a test utility
"""
assert type(arguments) == list
assert type(self.COMMAND) == str
if local_binpath:
# Test utility
bin_neon = str(self.env.neon_local_binpath / self.COMMAND)
else:
# Normal binary
bin_neon = str(self.env.neon_binpath / self.COMMAND)
bin_neon = str(self.env.neon_binpath / self.COMMAND)
args = [bin_neon] + arguments
log.info('Running command "{}"'.format(" ".join(args)))
@@ -1111,10 +1186,6 @@ class NeonCli(AbstractNeonCli):
COMMAND = "neon_local"
def raw_cli(self, *args, **kwargs) -> subprocess.CompletedProcess[str]:
kwargs["local_binpath"] = True
return super().raw_cli(*args, **kwargs)
def create_tenant(
self,
tenant_id: Optional[TenantId] = None,
@@ -1268,17 +1339,18 @@ class NeonCli(AbstractNeonCli):
cmd = ["init", f"--config={tmp.name}", "--pg-version", self.env.pg_version]
storage = self.env.pageserver_remote_storage
append_pageserver_param_overrides(
params_to_update=cmd,
remote_storage=storage,
pageserver_config_override=self.env.pageserver_config_override,
remote_storage=self.env.remote_storage,
remote_storage_users=self.env.remote_storage_users,
pageserver_config_override=self.env.pageserver.config_override,
)
s3_env_vars = None
if isinstance(storage, S3Storage):
s3_env_vars = storage.access_env_vars()
if self.env.remote_storage is not None and isinstance(
self.env.remote_storage, S3Storage
):
s3_env_vars = self.env.remote_storage.access_env_vars()
res = self.raw_cli(cmd, extra_env_vars=s3_env_vars)
res.check_returncode()
return res
@@ -1295,26 +1367,25 @@ class NeonCli(AbstractNeonCli):
def pageserver_start(
self,
id: int,
overrides: Tuple[str, ...] = (),
extra_env_vars: Optional[Dict[str, str]] = None,
) -> "subprocess.CompletedProcess[str]":
start_args = ["pageserver", "start", f"--id={id}", *overrides]
storage = self.env.pageserver_remote_storage
start_args = ["pageserver", "start", *overrides]
append_pageserver_param_overrides(
params_to_update=start_args,
remote_storage=storage,
pageserver_config_override=self.env.pageserver_config_override,
remote_storage=self.env.remote_storage,
remote_storage_users=self.env.remote_storage_users,
pageserver_config_override=self.env.pageserver.config_override,
)
if isinstance(storage, S3Storage):
s3_env_vars = storage.access_env_vars()
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
s3_env_vars = self.env.remote_storage.access_env_vars()
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
def pageserver_stop(self, id: int, immediate=False) -> "subprocess.CompletedProcess[str]":
cmd = ["pageserver", "stop", f"--id={id}"]
def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]":
cmd = ["pageserver", "stop"]
if immediate:
cmd.extend(["-m", "immediate"])
@@ -1325,8 +1396,8 @@ class NeonCli(AbstractNeonCli):
self, id: int, extra_opts: Optional[List[str]] = None
) -> "subprocess.CompletedProcess[str]":
s3_env_vars = None
if isinstance(self.env.safekeepers_remote_storage, S3Storage):
s3_env_vars = self.env.safekeepers_remote_storage.access_env_vars()
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
s3_env_vars = self.env.remote_storage.access_env_vars()
if extra_opts is not None:
extra_opts = [f"-e={opt}" for opt in extra_opts]
@@ -1355,7 +1426,6 @@ class NeonCli(AbstractNeonCli):
tenant_id: Optional[TenantId] = None,
hot_standby: bool = False,
lsn: Optional[Lsn] = None,
pageserver_id: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
@@ -1377,8 +1447,6 @@ class NeonCli(AbstractNeonCli):
args.append(endpoint_id)
if hot_standby:
args.extend(["--hot-standby", "true"])
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
res = self.raw_cli(args)
res.check_returncode()
@@ -1394,7 +1462,6 @@ class NeonCli(AbstractNeonCli):
lsn: Optional[Lsn] = None,
branch_name: Optional[str] = None,
remote_ext_config: Optional[str] = None,
pageserver_id: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
@@ -1417,13 +1484,10 @@ class NeonCli(AbstractNeonCli):
args.extend(["--branch-name", branch_name])
if endpoint_id is not None:
args.append(endpoint_id)
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
storage = self.env.ext_remote_storage
s3_env_vars = None
if isinstance(storage, S3Storage):
s3_env_vars = storage.access_env_vars()
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
s3_env_vars = self.env.remote_storage.access_env_vars()
res = self.raw_cli(args, extra_env_vars=s3_env_vars)
res.check_returncode()
@@ -1518,12 +1582,9 @@ class NeonPageserver(PgProtocol):
TEMP_FILE_SUFFIX = "___temp"
def __init__(
self, env: NeonEnv, id: int, port: PageserverPort, config_override: Optional[str] = None
):
def __init__(self, env: NeonEnv, port: PageserverPort, config_override: Optional[str] = None):
super().__init__(host="localhost", port=port.pg, user="cloud_admin")
self.env = env
self.id = id
self.running = False
self.service_port = port
self.config_override = config_override
@@ -1583,7 +1644,6 @@ class NeonPageserver(PgProtocol):
".*took more than expected to complete.*",
# these can happen during shutdown, but it should not be a reason to fail a test
".*completed, took longer than expected.*",
'.*registered custom resource manager "neon".*',
]
def start(
@@ -1598,9 +1658,7 @@ class NeonPageserver(PgProtocol):
"""
assert self.running is False
self.env.neon_cli.pageserver_start(
self.id, overrides=overrides, extra_env_vars=extra_env_vars
)
self.env.neon_cli.pageserver_start(overrides=overrides, extra_env_vars=extra_env_vars)
self.running = True
return self
@@ -1610,7 +1668,7 @@ class NeonPageserver(PgProtocol):
Returns self.
"""
if self.running:
self.env.neon_cli.pageserver_stop(self.id, immediate)
self.env.neon_cli.pageserver_stop(immediate)
self.running = False
return self
@@ -1636,12 +1694,8 @@ class NeonPageserver(PgProtocol):
is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
)
@property
def workdir(self) -> Path:
return Path(os.path.join(self.env.repo_dir, f"pageserver_{self.id}"))
def assert_no_errors(self):
logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
logfile = open(os.path.join(self.env.repo_dir, "pageserver.log"), "r")
error_or_warn = re.compile(r"\s(ERROR|WARN)")
errors = []
while True:
@@ -1664,7 +1718,7 @@ class NeonPageserver(PgProtocol):
def log_contains(self, pattern: str) -> Optional[str]:
"""Check that the pageserver log contains a line that matches the given regex"""
logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
logfile = open(os.path.join(self.env.repo_dir, "pageserver.log"), "r")
contains_re = re.compile(pattern)
@@ -1694,23 +1748,24 @@ class NeonPageserver(PgProtocol):
if self.env.attachment_service is not None:
response = requests.post(
f"{self.env.control_plane_api}/attach_hook",
json={"tenant_id": str(tenant_id), "pageserver_id": self.id},
json={"tenant_id": str(tenant_id), "pageserver_id": self.env.PAGESERVER_ID},
)
response.raise_for_status()
generation = response.json()["gen"]
else:
generation = None
client = self.http_client()
client = self.env.pageserver.http_client()
return client.tenant_attach(tenant_id, config, config_null, generation=generation)
def append_pageserver_param_overrides(
params_to_update: List[str],
remote_storage: Optional[RemoteStorage],
remote_storage_users: RemoteStorageUsers,
pageserver_config_override: Optional[str] = None,
):
if remote_storage is not None:
if bool(remote_storage_users & RemoteStorageUsers.PAGESERVER) and remote_storage is not None:
remote_storage_toml_table = remote_storage_to_toml_inline_table(remote_storage)
params_to_update.append(
@@ -2312,7 +2367,6 @@ class Endpoint(PgProtocol):
hot_standby: bool = False,
lsn: Optional[Lsn] = None,
config_lines: Optional[List[str]] = None,
pageserver_id: Optional[int] = None,
) -> "Endpoint":
"""
Create a new Postgres endpoint.
@@ -2334,7 +2388,6 @@ class Endpoint(PgProtocol):
hot_standby=hot_standby,
pg_port=self.pg_port,
http_port=self.http_port,
pageserver_id=pageserver_id,
)
path = Path("endpoints") / self.endpoint_id / "pgdata"
self.pgdata_dir = os.path.join(self.env.repo_dir, path)
@@ -2348,9 +2401,7 @@ class Endpoint(PgProtocol):
return self
def start(
self, remote_ext_config: Optional[str] = None, pageserver_id: Optional[int] = None
) -> "Endpoint":
def start(self, remote_ext_config: Optional[str] = None) -> "Endpoint":
"""
Start the Postgres instance.
Returns self.
@@ -2367,7 +2418,6 @@ class Endpoint(PgProtocol):
tenant_id=self.tenant_id,
safekeepers=self.active_safekeepers,
remote_ext_config=remote_ext_config,
pageserver_id=pageserver_id,
)
self.running = True
@@ -2458,7 +2508,6 @@ class Endpoint(PgProtocol):
lsn: Optional[Lsn] = None,
config_lines: Optional[List[str]] = None,
remote_ext_config: Optional[str] = None,
pageserver_id: Optional[int] = None,
) -> "Endpoint":
"""
Create an endpoint, apply config, and start Postgres.
@@ -2473,7 +2522,6 @@ class Endpoint(PgProtocol):
config_lines=config_lines,
hot_standby=hot_standby,
lsn=lsn,
pageserver_id=pageserver_id,
).start(remote_ext_config=remote_ext_config)
log.info(f"Postgres startup took {time.time() - started_at} seconds")
@@ -2509,7 +2557,6 @@ class EndpointFactory:
hot_standby: bool = False,
config_lines: Optional[List[str]] = None,
remote_ext_config: Optional[str] = None,
pageserver_id: Optional[int] = None,
) -> Endpoint:
ep = Endpoint(
self.env,
@@ -2527,7 +2574,6 @@ class EndpointFactory:
config_lines=config_lines,
lsn=lsn,
remote_ext_config=remote_ext_config,
pageserver_id=pageserver_id,
)
def create(
@@ -2812,8 +2858,8 @@ class S3Scrubber:
self.log_dir = log_dir
def scrubber_cli(self, args, timeout):
assert isinstance(self.env.pageserver_remote_storage, S3Storage)
s3_storage = self.env.pageserver_remote_storage
assert isinstance(self.env.remote_storage, S3Storage)
s3_storage = self.env.remote_storage
env = {
"REGION": s3_storage.bucket_region,
@@ -2962,7 +3008,9 @@ def list_files_to_compare(pgdata_dir: Path) -> List[str]:
# pg is the existing and running compute node, that we want to compare with a basebackup
def check_restored_datadir_content(
test_output_dir: Path, env: NeonEnv, endpoint: Endpoint, pageserver_id: Optional[int] = None
test_output_dir: Path,
env: NeonEnv,
endpoint: Endpoint,
):
# Get the timeline ID. We need it for the 'basebackup' command
timeline_id = TimelineId(endpoint.safe_psql("SHOW neon.timeline_id")[0][0])
@@ -2987,7 +3035,7 @@ def check_restored_datadir_content(
cmd = rf"""
{psql_path} \
--no-psqlrc \
postgres://localhost:{env.get_pageserver(pageserver_id).service_port.pg} \
postgres://localhost:{env.pageserver.service_port.pg} \
-c 'basebackup {endpoint.tenant_id} {timeline_id}' \
| tar -x -C {restored_dir_path}
"""
@@ -3037,32 +3085,19 @@ def check_restored_datadir_content(
def wait_for_last_flush_lsn(
env: NeonEnv,
endpoint: Endpoint,
tenant: TenantId,
timeline: TimelineId,
pageserver_id: Optional[int] = None,
env: NeonEnv, endpoint: Endpoint, tenant: TenantId, timeline: TimelineId
) -> Lsn:
"""Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn."""
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
return wait_for_last_record_lsn(
env.get_pageserver(pageserver_id).http_client(), tenant, timeline, last_flush_lsn
)
return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)
def wait_for_wal_insert_lsn(
env: NeonEnv,
endpoint: Endpoint,
tenant: TenantId,
timeline: TimelineId,
pageserver_id: Optional[int] = None,
env: NeonEnv, endpoint: Endpoint, tenant: TenantId, timeline: TimelineId
) -> Lsn:
"""Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn."""
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0])
return wait_for_last_record_lsn(
env.get_pageserver(pageserver_id).http_client(), tenant, timeline, last_flush_lsn
)
return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)
def fork_at_current_lsn(
@@ -3082,21 +3117,15 @@ def fork_at_current_lsn(
def last_flush_lsn_upload(
env: NeonEnv,
endpoint: Endpoint,
tenant_id: TenantId,
timeline_id: TimelineId,
pageserver_id: Optional[int] = None,
env: NeonEnv, endpoint: Endpoint, tenant_id: TenantId, timeline_id: TimelineId
) -> Lsn:
"""
Wait for pageserver to catch to the latest flush LSN of given endpoint,
checkpoint pageserver, and wait for it to be uploaded (remote_consistent_lsn
reaching flush LSN).
"""
last_flush_lsn = wait_for_last_flush_lsn(
env, endpoint, tenant_id, timeline_id, pageserver_id=pageserver_id
)
ps_http = env.get_pageserver(pageserver_id).http_client()
last_flush_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
ps_http = env.pageserver.http_client()
wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, last_flush_lsn)
# force a checkpoint to trigger upload
ps_http.timeline_checkpoint(tenant_id, timeline_id)

View File

@@ -154,17 +154,16 @@ def wait_for_last_record_lsn(
lsn: Lsn,
) -> Lsn:
"""waits for pageserver to catch up to a certain lsn, returns the last observed lsn."""
for i in range(100):
for i in range(10):
current_lsn = last_record_lsn(pageserver_http, tenant, timeline)
if current_lsn >= lsn:
return current_lsn
if i % 10 == 0:
log.info(
"waiting for last_record_lsn to reach {}, now {}, iteration {}".format(
lsn, current_lsn, i + 1
)
log.info(
"waiting for last_record_lsn to reach {}, now {}, iteration {}".format(
lsn, current_lsn, i + 1
)
time.sleep(0.1)
)
time.sleep(1)
raise Exception(
"timed out while waiting for last_record_lsn to reach {}, was {}".format(lsn, current_lsn)
)
@@ -261,11 +260,15 @@ def list_prefix(
Note that this function takes into account prefix_in_bucket.
"""
# For local_fs we need to properly handle empty directories, which we currently dont, so for simplicity stick to s3 api.
remote = neon_env_builder.pageserver_remote_storage
assert isinstance(remote, S3Storage), "localfs is currently not supported"
assert remote.client is not None
assert neon_env_builder.remote_storage_kind in (
RemoteStorageKind.MOCK_S3,
RemoteStorageKind.REAL_S3,
)
# For mypy
assert isinstance(neon_env_builder.remote_storage, S3Storage)
assert neon_env_builder.remote_storage_client is not None
prefix_in_bucket = remote.prefix_in_bucket or ""
prefix_in_bucket = neon_env_builder.remote_storage.prefix_in_bucket or ""
if not prefix:
prefix = prefix_in_bucket
else:
@@ -274,9 +277,9 @@ def list_prefix(
prefix = "/".join((prefix_in_bucket, prefix))
# Note that this doesnt use pagination, so list is not guaranteed to be exhaustive.
response = remote.client.list_objects_v2(
response = neon_env_builder.remote_storage_client.list_objects_v2(
Delimiter="/",
Bucket=remote.bucket_name,
Bucket=neon_env_builder.remote_storage.bucket_name,
Prefix=prefix,
)
return response

View File

@@ -17,7 +17,6 @@ This fixture is used to determine which version of Postgres to use for tests.
class PgVersion(str, enum.Enum):
V14 = "14"
V15 = "15"
V16 = "16"
# Instead of making version an optional parameter in methods, we can use this fake entry
# to explicitly rely on the default server version (could be different from pg_version fixture value)
NOT_SET = "<-POSTRGRES VERSION IS NOT SET->"

View File

@@ -1,15 +1,10 @@
import enum
import hashlib
import json
import os
import re
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
import boto3
from mypy_boto3_s3 import S3Client
from typing import Dict, List, Optional, Union
from fixtures.log_helper import log
from fixtures.types import TenantId, TimelineId
@@ -17,20 +12,6 @@ from fixtures.types import TenantId, TimelineId
TIMELINE_INDEX_PART_FILE_NAME = "index_part.json"
@enum.unique
class RemoteStorageUser(str, enum.Enum):
"""
Instead of using strings for the users, use a more strict enum.
"""
PAGESERVER = "pageserver"
EXTENSIONS = "ext"
SAFEKEEPER = "safekeeper"
def __str__(self) -> str:
return self.value
class MockS3Server:
"""
Starts a mock S3 server for testing on a port given, errors if the server fails to start or exits prematurely.
@@ -77,124 +58,6 @@ class MockS3Server:
self.subprocess.kill()
@dataclass
class LocalFsStorage:
root: Path
def tenant_path(self, tenant_id: TenantId) -> Path:
return self.root / "tenants" / str(tenant_id)
def timeline_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
return self.tenant_path(tenant_id) / "timelines" / str(timeline_id)
def index_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
return self.timeline_path(tenant_id, timeline_id) / TIMELINE_INDEX_PART_FILE_NAME
def index_content(self, tenant_id: TenantId, timeline_id: TimelineId):
with self.index_path(tenant_id, timeline_id).open("r") as f:
return json.load(f)
def to_toml_inline_table(self) -> str:
return f"local_path='{self.root}'"
def cleanup(self):
# no cleanup is done here, because there's NeonEnvBuilder.cleanup_local_storage which will remove everything, including localfs files
pass
@staticmethod
def component_path(repo_dir: Path, user: RemoteStorageUser) -> Path:
return repo_dir / "local_fs_remote_storage" / str(user)
@dataclass
class S3Storage:
bucket_name: str
bucket_region: str
access_key: str
secret_key: str
prefix_in_bucket: str
client: S3Client
cleanup: bool
endpoint: Optional[str] = None
def access_env_vars(self) -> Dict[str, str]:
return {
"AWS_ACCESS_KEY_ID": self.access_key,
"AWS_SECRET_ACCESS_KEY": self.secret_key,
}
def to_string(self) -> str:
return json.dumps(
{
"bucket": self.bucket_name,
"region": self.bucket_region,
"endpoint": self.endpoint,
"prefix": self.prefix_in_bucket,
}
)
def to_toml_inline_table(self) -> str:
s = [
f"bucket_name='{self.bucket_name}'",
f"bucket_region='{self.bucket_region}'",
]
if self.prefix_in_bucket is not None:
s.append(f"prefix_in_bucket='{self.prefix_in_bucket}'")
if self.endpoint is not None:
s.append(f"endpoint='{self.endpoint}'")
return ",".join(s)
def do_cleanup(self):
if not self.cleanup:
# handles previous keep_remote_storage_contents
return
log.info(
"removing data from test s3 bucket %s by prefix %s",
self.bucket_name,
self.prefix_in_bucket,
)
paginator = self.client.get_paginator("list_objects_v2")
pages = paginator.paginate(
Bucket=self.bucket_name,
Prefix=self.prefix_in_bucket,
)
# Using Any because DeleteTypeDef (from boto3-stubs) doesn't fit our case
objects_to_delete: Any = {"Objects": []}
cnt = 0
for item in pages.search("Contents"):
# weirdly when nothing is found it returns [None]
if item is None:
break
objects_to_delete["Objects"].append({"Key": item["Key"]})
# flush once aws limit reached
if len(objects_to_delete["Objects"]) >= 1000:
self.client.delete_objects(
Bucket=self.bucket_name,
Delete=objects_to_delete,
)
objects_to_delete = {"Objects": []}
cnt += 1
# flush rest
if len(objects_to_delete["Objects"]):
self.client.delete_objects(
Bucket=self.bucket_name,
Delete=objects_to_delete,
)
log.info(f"deleted {cnt} objects from remote storage")
RemoteStorage = Union[LocalFsStorage, S3Storage]
@enum.unique
class RemoteStorageKind(str, enum.Enum):
LOCAL_FS = "local_fs"
@@ -204,104 +67,6 @@ class RemoteStorageKind(str, enum.Enum):
# to ensure the test pass with or without the remote storage
NOOP = "noop"
def configure(
self,
repo_dir: Path,
mock_s3_server,
run_id: str,
test_name: str,
user: RemoteStorageUser,
bucket_name: Optional[str] = None,
bucket_region: Optional[str] = None,
) -> Optional[RemoteStorage]:
if self == RemoteStorageKind.NOOP:
return None
if self == RemoteStorageKind.LOCAL_FS:
return LocalFsStorage(LocalFsStorage.component_path(repo_dir, user))
# real_s3 uses this as part of prefix, mock_s3 uses this as part of
# bucket name, giving all users unique buckets because we have to
# create them
test_name = re.sub(r"[_\[\]]", "-", test_name)
def to_bucket_name(user: str, test_name: str) -> str:
s = f"{user}-{test_name}"
if len(s) > 63:
prefix = s[:30]
suffix = hashlib.sha256(test_name.encode()).hexdigest()[:32]
s = f"{prefix}-{suffix}"
assert len(s) == 63
return s
if self == RemoteStorageKind.MOCK_S3:
# there's a single mock_s3 server for each process running the tests
mock_endpoint = mock_s3_server.endpoint()
mock_region = mock_s3_server.region()
access_key, secret_key = mock_s3_server.access_key(), mock_s3_server.secret_key()
client = boto3.client(
"s3",
endpoint_url=mock_endpoint,
region_name=mock_region,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
)
bucket_name = to_bucket_name(user, test_name)
log.info(
f"using mock_s3 bucket name {bucket_name} for user={user}, test_name={test_name}"
)
return S3Storage(
bucket_name=bucket_name,
endpoint=mock_endpoint,
bucket_region=mock_region,
access_key=access_key,
secret_key=secret_key,
prefix_in_bucket="",
client=client,
cleanup=False,
)
assert self == RemoteStorageKind.REAL_S3
env_access_key = os.getenv("AWS_ACCESS_KEY_ID")
assert env_access_key, "no aws access key provided"
env_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
assert env_secret_key, "no aws access key provided"
# session token is needed for local runs with sso auth
session_token = os.getenv("AWS_SESSION_TOKEN")
bucket_name = bucket_name or os.getenv("REMOTE_STORAGE_S3_BUCKET")
assert bucket_name is not None, "no remote storage bucket name provided"
bucket_region = bucket_region or os.getenv("REMOTE_STORAGE_S3_REGION")
assert bucket_region is not None, "no remote storage region provided"
prefix_in_bucket = f"{run_id}/{test_name}/{user}"
client = boto3.client(
"s3",
region_name=bucket_region,
aws_access_key_id=env_access_key,
aws_secret_access_key=env_secret_key,
aws_session_token=session_token,
)
return S3Storage(
bucket_name=bucket_name,
bucket_region=bucket_region,
access_key=env_access_key,
secret_key=env_secret_key,
prefix_in_bucket=prefix_in_bucket,
client=client,
cleanup=True,
)
def available_remote_storages() -> List[RemoteStorageKind]:
remote_storages = [RemoteStorageKind.LOCAL_FS, RemoteStorageKind.MOCK_S3]
@@ -336,9 +101,72 @@ def s3_storage() -> RemoteStorageKind:
return RemoteStorageKind.MOCK_S3
@dataclass
class LocalFsStorage:
root: Path
def tenant_path(self, tenant_id: TenantId) -> Path:
return self.root / "tenants" / str(tenant_id)
def timeline_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
return self.tenant_path(tenant_id) / "timelines" / str(timeline_id)
def index_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
return self.timeline_path(tenant_id, timeline_id) / TIMELINE_INDEX_PART_FILE_NAME
def index_content(self, tenant_id: TenantId, timeline_id: TimelineId):
with self.index_path(tenant_id, timeline_id).open("r") as f:
return json.load(f)
@dataclass
class S3Storage:
bucket_name: str
bucket_region: str
access_key: str
secret_key: str
endpoint: Optional[str] = None
prefix_in_bucket: Optional[str] = ""
def access_env_vars(self) -> Dict[str, str]:
return {
"AWS_ACCESS_KEY_ID": self.access_key,
"AWS_SECRET_ACCESS_KEY": self.secret_key,
}
def to_string(self) -> str:
return json.dumps(
{
"bucket": self.bucket_name,
"region": self.bucket_region,
"endpoint": self.endpoint,
"prefix": self.prefix_in_bucket,
}
)
RemoteStorage = Union[LocalFsStorage, S3Storage]
# serialize as toml inline table
def remote_storage_to_toml_inline_table(remote_storage: RemoteStorage) -> str:
if not isinstance(remote_storage, (LocalFsStorage, S3Storage)):
if isinstance(remote_storage, LocalFsStorage):
remote_storage_config = f"local_path='{remote_storage.root}'"
elif isinstance(remote_storage, S3Storage):
remote_storage_config = f"bucket_name='{remote_storage.bucket_name}',\
bucket_region='{remote_storage.bucket_region}'"
if remote_storage.prefix_in_bucket is not None:
remote_storage_config += f",prefix_in_bucket='{remote_storage.prefix_in_bucket}'"
if remote_storage.endpoint is not None:
remote_storage_config += f",endpoint='{remote_storage.endpoint}'"
else:
raise Exception("invalid remote storage type")
return f"{{{remote_storage.to_toml_inline_table()}}}"
return f"{{{remote_storage_config}}}"
class RemoteStorageUsers(enum.Flag):
PAGESERVER = enum.auto()
SAFEKEEPER = enum.auto()

View File

@@ -1,8 +1,6 @@
import shutil
from contextlib import closing
from fixtures.compare_fixtures import NeonCompare, PgCompare
from fixtures.pg_version import PgVersion
from fixtures.compare_fixtures import PgCompare
#
@@ -30,30 +28,3 @@ def test_bulk_insert(neon_with_baseline: PgCompare):
env.report_peak_memory_use()
env.report_size()
# When testing neon, also check how long it takes the pageserver to reingest the
# wal from safekeepers. If this number is close to total runtime, then the pageserver
# is the bottleneck.
if isinstance(env, NeonCompare):
measure_recovery_time(env)
def measure_recovery_time(env: NeonCompare):
client = env.env.pageserver.http_client()
pg_version = PgVersion(client.timeline_detail(env.tenant, env.timeline)["pg_version"])
# Stop pageserver and remove tenant data
env.env.pageserver.stop()
timeline_dir = env.env.timeline_dir(env.tenant, env.timeline)
shutil.rmtree(timeline_dir)
# Start pageserver
env.env.pageserver.start()
# Measure recovery time
with env.record_duration("wal_recovery"):
# Create the tenant, which will start walingest
client.timeline_create(pg_version, env.tenant, env.timeline)
# Flush, which will also wait for lsn to catch up
env.flush()

View File

@@ -84,7 +84,7 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc
# This test sometimes runs for longer than the global 5 minute timeout.
@pytest.mark.timeout(900)
@pytest.mark.timeout(600)
def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()

View File

@@ -48,6 +48,6 @@ def test_pg_clients(test_output_dir: Path, remote_pg: RemotePostgres, client: st
subprocess_capture(test_output_dir, build_cmd, check=True)
run_cmd = [docker_bin, "run", "--rm", "--env-file", env_file, image_tag]
basepath, _, _ = subprocess_capture(test_output_dir, run_cmd, check=True)
basepath = subprocess_capture(test_output_dir, run_cmd, check=True)
assert Path(f"{basepath}.stdout").read_text().strip() == "1"

View File

@@ -1,7 +0,0 @@
{
"public_extensions": [],
"library_index": {
"TODO": "We still need PG16 extensions"
},
"extension_data": {}
}

View File

@@ -14,14 +14,16 @@ from fixtures.utils import wait_until
@pytest.fixture
def positive_env(neon_env_builder: NeonEnvBuilder) -> NeonEnv:
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
neon_env_builder.enable_remote_storage(
remote_storage_kind=RemoteStorageKind.LOCAL_FS,
)
env = neon_env_builder.init_start()
# eviction might be the first one after an attach to access the layers
env.pageserver.allowed_errors.append(
".*unexpectedly on-demand downloading remote layer remote.* for task kind Eviction"
)
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
assert isinstance(env.remote_storage, LocalFsStorage)
return env
@@ -34,9 +36,11 @@ class NegativeTests:
@pytest.fixture
def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, None, None]:
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
neon_env_builder.enable_remote_storage(
remote_storage_kind=RemoteStorageKind.LOCAL_FS,
)
env = neon_env_builder.init_start()
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
assert isinstance(env.remote_storage, LocalFsStorage)
ps_http = env.pageserver.http_client()
(tenant_id, _) = env.neon_cli.create_tenant()

View File

@@ -44,14 +44,14 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
log.info(f"Timeline {tenant0}/{timeline0} is left intact")
(tenant1, timeline1, pg1) = tenant_timelines[1]
metadata_path = f"{env.pageserver.workdir}/tenants/{tenant1}/timelines/{timeline1}/metadata"
metadata_path = f"{env.repo_dir}/tenants/{tenant1}/timelines/{timeline1}/metadata"
f = open(metadata_path, "w")
f.write("overwritten with garbage!")
f.close()
log.info(f"Timeline {tenant1}/{timeline1} got its metadata spoiled")
(tenant2, timeline2, pg2) = tenant_timelines[2]
timeline_path = f"{env.pageserver.workdir}/tenants/{tenant2}/timelines/{timeline2}/"
timeline_path = f"{env.repo_dir}/tenants/{tenant2}/timelines/{timeline2}/"
for filename in os.listdir(timeline_path):
if filename.startswith("00000"):
# Looks like a layer file. Remove it
@@ -61,7 +61,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
)
(tenant3, timeline3, pg3) = tenant_timelines[3]
timeline_path = f"{env.pageserver.workdir}/tenants/{tenant3}/timelines/{timeline3}/"
timeline_path = f"{env.repo_dir}/tenants/{tenant3}/timelines/{timeline3}/"
for filename in os.listdir(timeline_path):
if filename.startswith("00000"):
# Looks like a layer file. Corrupt it
@@ -122,8 +122,8 @@ def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
future.result()
def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
env.pageserver.allowed_errors.extend(
@@ -133,9 +133,9 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
]
)
tenant_id = env.initial_tenant
tenant_id, _ = env.neon_cli.create_tenant()
timelines_dir = env.pageserver.workdir / "tenants" / str(tenant_id) / "timelines"
timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
@@ -145,8 +145,8 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
_ = env.neon_cli.create_timeline("test_timeline_init_break_before_checkpoint", tenant_id)
# Restart the page server
env.pageserver.stop(immediate=True)
env.pageserver.start()
env.neon_cli.pageserver_stop(immediate=True)
env.neon_cli.pageserver_start()
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
@@ -160,13 +160,13 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
), "pageserver should clean its temp timeline files on timeline creation failure"
def test_timeline_create_break_after_uninit_mark(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
def test_timeline_create_break_after_uninit_mark(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
tenant_id = env.initial_tenant
tenant_id, _ = env.neon_cli.create_tenant()
timelines_dir = env.pageserver.workdir / "tenants" / str(tenant_id) / "timelines"
timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]

View File

@@ -33,7 +33,7 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv):
workload_thread = threading.Thread(target=start_workload, args=(), daemon=True)
workload_thread.start()
path = os.path.join(env.pageserver.workdir, "pageserver.pid")
path = os.path.join(env.repo_dir, "pageserver.pid")
lsof = lsof_path()
while workload_thread.is_alive():
res = subprocess.run(

View File

@@ -20,9 +20,8 @@ from fixtures.pageserver.utils import (
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.pg_version import PgVersion, skip_on_postgres
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, RemoteStorageUser
from fixtures.types import Lsn
from pytest import FixtureRequest
@@ -40,54 +39,6 @@ from pytest import FixtureRequest
# - prepare_snapshot copies the snapshot, cleans it up and makes it ready for the current version of Neon (replaces paths and ports in config files).
# - check_neon_works performs the test itself, feel free to add more checks there.
#
#
# How to run `test_backward_compatibility` locally:
#
# export DEFAULT_PG_VERSION=15
# export BUILD_TYPE=release
# export CHECK_ONDISK_DATA_COMPATIBILITY=true
#
# # Build previous version of binaries and create a data snapshot:
# rm -rf pg_install target
# git checkout <previous version>
# CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
# ./scripts/pytest -k test_create_snapshot
#
# # Build current version of binaries
# rm -rf pg_install target
# git checkout <current version>
# CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
#
# # Run backward compatibility test
# COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION} \
# ./scripts/pytest -k test_backward_compatibility
#
#
# How to run `test_forward_compatibility` locally:
#
# export DEFAULT_PG_VERSION=15
# export BUILD_TYPE=release
# export CHECK_ONDISK_DATA_COMPATIBILITY=true
#
# # Build previous version of binaries and store them somewhere:
# rm -rf pg_install target
# git checkout <previous version>
# CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
# mkdir -p neon_previous/target
# cp -a target/${BUILD_TYPE} ./neon_previous/target/${BUILD_TYPE}
# cp -a pg_install ./neon_previous/pg_install
#
# # Build current version of binaries and create a data snapshot:
# rm -rf pg_install target
# git checkout <current version>
# CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
# ./scripts/pytest -k test_create_snapshot
#
# # Run forward compatibility test
# COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE} \
# COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install \
# ./scripts/pytest -k test_forward_compatibility
#
check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
os.environ.get("CHECK_ONDISK_DATA_COMPATIBILITY") is None,
@@ -110,7 +61,7 @@ def test_create_snapshot(
# There's no cleanup here, it allows to adjust the data in `test_backward_compatibility` itself without re-collecting it.
neon_env_builder.pg_version = pg_version
neon_env_builder.num_safekeepers = 3
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
neon_env_builder.enable_local_fs_remote_storage()
env = neon_env_builder.init_start()
endpoint = env.endpoints.create_start("main")
@@ -151,7 +102,6 @@ def test_create_snapshot(
shutil.copytree(test_output_dir, compatibility_snapshot_dir)
@skip_on_postgres(PgVersion.V16, reason="TODO: Enable after the first Postgres 16 release")
@check_ondisk_data_compatibility_if_enabled
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
@@ -182,6 +132,7 @@ def test_backward_compatibility(
prepare_snapshot(
from_dir=compatibility_snapshot_dir,
to_dir=test_output_dir / "compatibility_snapshot",
neon_binpath=neon_binpath,
port_distributor=port_distributor,
)
@@ -209,7 +160,6 @@ def test_backward_compatibility(
), "Breaking changes are allowed by ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
@skip_on_postgres(PgVersion.V16, reason="TODO: Enable after the first Postgres 16 release")
@check_ondisk_data_compatibility_if_enabled
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
@@ -251,6 +201,7 @@ def test_forward_compatibility(
from_dir=compatibility_snapshot_dir,
to_dir=test_output_dir / "compatibility_snapshot",
port_distributor=port_distributor,
neon_binpath=compatibility_neon_bin,
pg_distrib_dir=compatibility_postgres_distrib_dir,
)
@@ -282,6 +233,7 @@ def prepare_snapshot(
from_dir: Path,
to_dir: Path,
port_distributor: PortDistributor,
neon_binpath: Path,
pg_distrib_dir: Optional[Path] = None,
):
assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist"
@@ -293,9 +245,6 @@ def prepare_snapshot(
repo_dir = to_dir / "repo"
snapshot_config_toml = repo_dir / "config"
snapshot_config = toml.load(snapshot_config_toml)
# Remove old logs to avoid confusion in test artifacts
for logfile in repo_dir.glob("**/*.log"):
logfile.unlink()
@@ -309,53 +258,29 @@ def prepare_snapshot(
os.mkdir(repo_dir / "endpoints")
# Update paths and ports in config files
legacy_pageserver_toml = repo_dir / "pageserver.toml"
legacy_bundle = os.path.exists(legacy_pageserver_toml)
pageserver_toml = repo_dir / "pageserver.toml"
pageserver_config = toml.load(pageserver_toml)
pageserver_config["remote_storage"]["local_path"] = str(repo_dir / "local_fs_remote_storage")
for param in ("listen_http_addr", "listen_pg_addr", "broker_endpoint"):
pageserver_config[param] = port_distributor.replace_with_new_port(pageserver_config[param])
path_to_config: dict[Path, dict[Any, Any]] = {}
if legacy_bundle:
os.mkdir(repo_dir / "pageserver_1")
path_to_config[repo_dir / "pageserver_1" / "pageserver.toml"] = toml.load(
legacy_pageserver_toml
)
os.remove(legacy_pageserver_toml)
os.rename(repo_dir / "tenants", repo_dir / "pageserver_1" / "tenants")
else:
for ps_conf in snapshot_config["pageservers"]:
config_path = repo_dir / f"pageserver_{ps_conf['id']}" / "pageserver.toml"
path_to_config[config_path] = toml.load(config_path)
# We don't use authentication in compatibility tests
# so just remove authentication related settings.
pageserver_config.pop("pg_auth_type", None)
pageserver_config.pop("http_auth_type", None)
# For each pageserver config, edit it and rewrite
for config_path, pageserver_config in path_to_config.items():
pageserver_config["remote_storage"]["local_path"] = str(
LocalFsStorage.component_path(repo_dir, RemoteStorageUser.PAGESERVER)
)
if pg_distrib_dir:
pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
for param in ("listen_http_addr", "listen_pg_addr", "broker_endpoint"):
pageserver_config[param] = port_distributor.replace_with_new_port(
pageserver_config[param]
)
# We don't use authentication in compatibility tests
# so just remove authentication related settings.
pageserver_config.pop("pg_auth_type", None)
pageserver_config.pop("http_auth_type", None)
if pg_distrib_dir:
pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
with config_path.open("w") as f:
toml.dump(pageserver_config, f)
# neon_local config doesn't have to be backward compatible. If we're using a dump from before
# it supported multiple pageservers, fix it up.
if "pageservers" not in snapshot_config:
snapshot_config["pageservers"] = [snapshot_config["pageserver"]]
del snapshot_config["pageserver"]
with pageserver_toml.open("w") as f:
toml.dump(pageserver_config, f)
snapshot_config_toml = repo_dir / "config"
snapshot_config = toml.load(snapshot_config_toml)
for param in ("listen_http_addr", "listen_pg_addr"):
for pageserver in snapshot_config["pageservers"]:
pageserver[param] = port_distributor.replace_with_new_port(pageserver[param])
snapshot_config["pageserver"][param] = port_distributor.replace_with_new_port(
snapshot_config["pageserver"][param]
)
snapshot_config["broker"]["listen_addr"] = port_distributor.replace_with_new_port(
snapshot_config["broker"]["listen_addr"]
)
@@ -413,15 +338,10 @@ def check_neon_works(
config.initial_tenant = snapshot_config["default_tenant_id"]
config.pg_distrib_dir = pg_distrib_dir
config.remote_storage = None
config.ext_remote_storage = None
config.sk_remote_storage = None
# Use the "target" binaries to launch the storage nodes
config_target = config
config_target.neon_binpath = neon_target_binpath
# We are using maybe-old binaries for neon services, but want to use current
# binaries for test utilities like neon_local
config_target.neon_local_binpath = neon_current_binpath
cli_target = NeonCli(config_target)
# And the current binaries to launch computes
@@ -454,7 +374,7 @@ def check_neon_works(
# loosely based on https://github.com/neondatabase/cloud/wiki/Recovery-from-WAL
tenant_id = snapshot_config["default_tenant_id"]
timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id]
pageserver_port = snapshot_config["pageservers"][0]["listen_http_addr"].split(":")[-1]
pageserver_port = snapshot_config["pageserver"]["listen_http_addr"].split(":")[-1]
pageserver_http = PageserverHttpClient(
port=pageserver_port,
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled

View File

@@ -135,7 +135,7 @@ def eviction_env(request, neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) -> Ev
log.info(f"setting up eviction_env for test {request.node.name}")
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
neon_env_builder.enable_remote_storage(RemoteStorageKind.LOCAL_FS)
# initial tenant will not be present on this pageserver
env = neon_env_builder.init_configs()

View File

@@ -9,12 +9,8 @@ from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
)
from fixtures.pg_version import PgVersion, skip_on_postgres
from fixtures.remote_storage import (
RemoteStorageKind,
S3Storage,
available_s3_storages,
)
from fixtures.pg_version import PgVersion
from fixtures.remote_storage import RemoteStorageKind, available_s3_storages
# Cleaning up downloaded files is important for local tests
@@ -76,8 +72,7 @@ def upload_files(env):
with open(full_path, "rb") as f:
log.info(f"UPLOAD {full_path} to ext/{full_path}")
assert isinstance(env.pageserver_remote_storage, S3Storage)
env.pageserver_remote_storage.client.upload_fileobj(
env.remote_storage_client.upload_fileobj(
f,
env.ext_remote_storage.bucket_name,
f"ext/{full_path}",
@@ -86,7 +81,6 @@ def upload_files(env):
# Test downloading remote extension.
@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
@pytest.mark.parametrize("remote_storage_kind", available_s3_storages())
@pytest.mark.skip(reason="https://github.com/neondatabase/neon/issues/4949")
def test_remote_extensions(
@@ -94,12 +88,16 @@ def test_remote_extensions(
remote_storage_kind: RemoteStorageKind,
pg_version: PgVersion,
):
neon_env_builder.enable_extensions_remote_storage(remote_storage_kind)
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storage_kind,
enable_remote_extensions=True,
)
env = neon_env_builder.init_start()
tenant_id, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline("test_remote_extensions", tenant_id=tenant_id)
assert env.ext_remote_storage is not None # satisfy mypy
assert env.remote_storage_client is not None # satisfy mypy
# For MOCK_S3 we upload test files.
# For REAL_S3 we use the files already in the bucket
@@ -149,7 +147,6 @@ def test_remote_extensions(
# Test downloading remote library.
@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
@pytest.mark.parametrize("remote_storage_kind", available_s3_storages())
@pytest.mark.skip(reason="https://github.com/neondatabase/neon/issues/4949")
def test_remote_library(
@@ -157,12 +154,16 @@ def test_remote_library(
remote_storage_kind: RemoteStorageKind,
pg_version: PgVersion,
):
neon_env_builder.enable_extensions_remote_storage(remote_storage_kind)
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storage_kind,
enable_remote_extensions=True,
)
env = neon_env_builder.init_start()
tenant_id, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline("test_remote_library", tenant_id=tenant_id)
assert env.ext_remote_storage is not None # satisfy mypy
assert env.remote_storage_client is not None # satisfy mypy
# For MOCK_S3 we upload test files.
# For REAL_S3 we use the files already in the bucket
@@ -208,18 +209,21 @@ def test_remote_library(
# RemoteStorageKind.REAL_S3 not in available_s3_storages(),
# reason="skipping test because real s3 not enabled",
# )
@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
@pytest.mark.skip(reason="https://github.com/neondatabase/neon/issues/4949")
def test_multiple_extensions_one_archive(
neon_env_builder: NeonEnvBuilder,
pg_version: PgVersion,
):
neon_env_builder.enable_extensions_remote_storage(RemoteStorageKind.REAL_S3)
neon_env_builder.enable_remote_storage(
remote_storage_kind=RemoteStorageKind.REAL_S3,
enable_remote_extensions=True,
)
env = neon_env_builder.init_start()
tenant_id, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline("test_multiple_extensions_one_archive", tenant_id=tenant_id)
assert env.ext_remote_storage is not None # satisfy mypy
assert env.remote_storage_client is not None # satisfy mypy
endpoint = env.endpoints.create_start(
"test_multiple_extensions_one_archive",
@@ -254,16 +258,19 @@ def test_extension_download_after_restart(
neon_env_builder: NeonEnvBuilder,
pg_version: PgVersion,
):
# TODO: PG15 + PG16 extension building
if "v14" not in pg_version: # test set only has extension built for v14
if "15" in pg_version: # SKIP v15 for now because test set only has extension built for v14
return None
neon_env_builder.enable_extensions_remote_storage(RemoteStorageKind.MOCK_S3)
neon_env_builder.enable_remote_storage(
remote_storage_kind=RemoteStorageKind.MOCK_S3,
enable_remote_extensions=True,
)
env = neon_env_builder.init_start()
tenant_id, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline("test_extension_download_after_restart", tenant_id=tenant_id)
assert env.ext_remote_storage is not None # satisfy mypy
assert env.remote_storage_client is not None # satisfy mypy
# For MOCK_S3 we upload test files.
upload_files(env)

View File

@@ -100,7 +100,9 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind:
# Disable time-based pitr, we will use LSN-based thresholds in the manual GC calls
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storage_kind,
)
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant

View File

@@ -19,7 +19,6 @@ from fixtures.pageserver.utils import (
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.remote_storage import RemoteStorageKind
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import subprocess_capture
@@ -81,7 +80,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
timeline = TimelineId.generate()
# Set up pageserver for import
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
neon_env_builder.enable_local_fs_remote_storage()
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
@@ -164,7 +163,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
neon_env_builder.enable_local_fs_remote_storage()
env = neon_env_builder.init_start()
# FIXME: Is this expected?
@@ -186,7 +185,7 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
# @pytest.mark.skipif(os.environ.get('BUILD_TYPE') == "debug", reason="only run with release build")
@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/2255")
def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
neon_env_builder.enable_local_fs_remote_storage()
env = neon_env_builder.init_start()
timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment")
@@ -271,7 +270,7 @@ def _import(
env.endpoints.stop_all()
env.pageserver.stop()
dir_to_clear = Path(env.pageserver.workdir) / "tenants"
dir_to_clear = Path(env.repo_dir) / "tenants"
shutil.rmtree(dir_to_clear)
os.mkdir(dir_to_clear)

Some files were not shown because too many files have changed in this diff Show More