mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-04 03:52:56 +00:00
Feat/postgres 16 (#4761)
This adds PostgreSQL 16 as a vendored postgresql version, and adapts the code to support this version. The important changes to PostgreSQL 16 compared to the PostgreSQL 15 changeset include the addition of a neon_rmgr instead of altering Postgres's original WAL format. Co-authored-by: Alexander Bayandin <alexander@neon.tech> Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
!trace/
|
||||
!vendor/postgres-v14/
|
||||
!vendor/postgres-v15/
|
||||
!vendor/postgres-v16/
|
||||
!workspace_hack/
|
||||
!neon_local/
|
||||
!scripts/ninstall.sh
|
||||
|
||||
@@ -70,6 +70,9 @@ runs:
|
||||
name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
|
||||
path: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
|
||||
prefix: latest
|
||||
# The lack of compatibility snapshot (for example, for the new Postgres version)
|
||||
# shouldn't fail the whole job. Only relevant test should fail.
|
||||
skip-if-does-not-exist: true
|
||||
|
||||
- name: Checkout
|
||||
if: inputs.needs_postgres_source == 'true'
|
||||
|
||||
33
.github/workflows/build_and_test.yml
vendored
33
.github/workflows/build_and_test.yml
vendored
@@ -212,7 +212,7 @@ jobs:
|
||||
# Eventually it will be replaced by a regression test https://github.com/neondatabase/neon/pull/4603
|
||||
|
||||
FAILED=false
|
||||
for postgres in postgres-v14 postgres-v15; do
|
||||
for postgres in postgres-v14 postgres-v15 postgres-v16; do
|
||||
expected=$(cat vendor/revisions.json | jq --raw-output '."'"${postgres}"'"')
|
||||
actual=$(git rev-parse "HEAD:vendor/${postgres}")
|
||||
if [ "${expected}" != "${actual}" ]; then
|
||||
@@ -234,6 +234,10 @@ jobs:
|
||||
id: pg_v15_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 16 revision for caching
|
||||
id: pg_v16_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||
|
||||
# Set some environment variables used by all the steps.
|
||||
#
|
||||
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
|
||||
@@ -294,6 +298,13 @@ jobs:
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v14 -j$(nproc)
|
||||
@@ -302,6 +313,10 @@ jobs:
|
||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v15 -j$(nproc)
|
||||
|
||||
- name: Build postgres v16
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v16 -j$(nproc)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: mold -run make neon-pg-ext -j$(nproc)
|
||||
|
||||
@@ -385,7 +400,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ debug, release ]
|
||||
pg_version: [ v14, v15 ]
|
||||
pg_version: [ v14, v15, v16 ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
@@ -760,7 +775,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: [ v14, v15 ]
|
||||
version: [ v14, v15, v16 ]
|
||||
defaults:
|
||||
run:
|
||||
shell: sh -eu {0}
|
||||
@@ -814,7 +829,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: [ v14, v15 ]
|
||||
version: [ v14, v15, v16 ]
|
||||
defaults:
|
||||
run:
|
||||
shell: sh -eu {0}
|
||||
@@ -915,6 +930,7 @@ jobs:
|
||||
run: |
|
||||
crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14
|
||||
crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15
|
||||
crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} vm-compute-node-v16
|
||||
|
||||
- name: Add latest tag to images
|
||||
if: |
|
||||
@@ -927,6 +943,8 @@ jobs:
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v16:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} latest
|
||||
|
||||
- name: Push images to production ECR
|
||||
if: |
|
||||
@@ -939,6 +957,8 @@ jobs:
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v16:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v16:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:latest
|
||||
|
||||
- name: Configure Docker Hub login
|
||||
run: |
|
||||
@@ -950,6 +970,7 @@ jobs:
|
||||
run: |
|
||||
crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}}
|
||||
crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}}
|
||||
crane push vm-compute-node-v16 neondatabase/vm-compute-node-v16:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Push latest tags to Docker Hub
|
||||
if: |
|
||||
@@ -962,6 +983,8 @@ jobs:
|
||||
crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node-v16:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} latest
|
||||
|
||||
- name: Cleanup ECR folder
|
||||
run: rm -rf ~/.ecr
|
||||
@@ -1119,7 +1142,7 @@ jobs:
|
||||
PREFIX: artifacts/latest
|
||||
run: |
|
||||
# Update compatibility snapshot for the release
|
||||
for pg_version in v14 v15; do
|
||||
for pg_version in v14 v15 v16; do
|
||||
for build_type in debug release; do
|
||||
OLD_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}-${GITHUB_RUN_ID}.tar.zst
|
||||
NEW_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}.tar.zst
|
||||
|
||||
17
.github/workflows/neon_extra_builds.yml
vendored
17
.github/workflows/neon_extra_builds.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Install macOS postgres dependencies
|
||||
run: brew install flex bison openssl protobuf
|
||||
run: brew install flex bison openssl protobuf icu4c pkg-config
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
@@ -48,6 +48,10 @@ jobs:
|
||||
id: pg_v15_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 16 revision for caching
|
||||
id: pg_v16_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
uses: actions/cache@v3
|
||||
@@ -62,6 +66,13 @@ jobs:
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Set extra env for macOS
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
@@ -85,6 +96,10 @@ jobs:
|
||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||
run: make postgres-v15 -j$(nproc)
|
||||
|
||||
- name: Build postgres v16
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: make postgres-v16 -j$(nproc)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: make neon-pg-ext -j$(nproc)
|
||||
|
||||
|
||||
4
.gitmodules
vendored
4
.gitmodules
vendored
@@ -6,3 +6,7 @@
|
||||
path = vendor/postgres-v15
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
branch = REL_15_STABLE_neon
|
||||
[submodule "vendor/postgres-v16"]
|
||||
path = vendor/postgres-v16
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
branch = REL_16_STABLE_neon
|
||||
|
||||
@@ -12,6 +12,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
|
||||
COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
|
||||
COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
|
||||
COPY --chown=nonroot pgxn pgxn
|
||||
COPY --chown=nonroot Makefile Makefile
|
||||
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
|
||||
@@ -39,6 +40,7 @@ ARG CACHEPOT_BUCKET=neon-github-dev
|
||||
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
|
||||
COPY --chown=nonroot . .
|
||||
|
||||
# Show build caching stats to check if it was used in the end.
|
||||
@@ -65,6 +67,7 @@ RUN set -e \
|
||||
&& apt install -y \
|
||||
libreadline-dev \
|
||||
libseccomp-dev \
|
||||
libicu67 \
|
||||
openssl \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
|
||||
@@ -81,6 +84,7 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local
|
||||
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
|
||||
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
|
||||
|
||||
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
|
||||
|
||||
@@ -371,12 +371,21 @@ RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.4.0.tar.gz
|
||||
FROM build-deps AS timescaledb-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ARG PG_VERSION
|
||||
ENV PATH "/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
RUN apt-get update && \
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
export TIMESCALEDB_VERSION=2.10.1 \
|
||||
export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
|
||||
;; \
|
||||
*) \
|
||||
echo "TimescaleDB not supported on this PostgreSQL version. See https://github.com/timescale/timescaledb/issues/5752" && exit 0;; \
|
||||
esac && \
|
||||
apt-get update && \
|
||||
apt-get install -y cmake && \
|
||||
wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \
|
||||
echo "6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 timescaledb.tar.gz" | sha256sum --check && \
|
||||
wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
|
||||
echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
|
||||
mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
|
||||
./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
|
||||
cd build && \
|
||||
@@ -405,6 +414,10 @@ RUN case "${PG_VERSION}" in \
|
||||
export PG_HINT_PLAN_VERSION=15_1_5_0 \
|
||||
export PG_HINT_PLAN_CHECKSUM=564cbbf4820973ffece63fbf76e3c0af62c4ab23543142c7caaa682bc48918be \
|
||||
;; \
|
||||
"v16") \
|
||||
export PG_HINT_PLAN_VERSION=16_1_6_0 \
|
||||
export PG_HINT_PLAN_CHECKSUM=ce6a8040c78012000f5da7240caf6a971401412f41d33f930f09291e6c304b99 \
|
||||
;; \
|
||||
*) \
|
||||
echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
|
||||
;; \
|
||||
@@ -551,8 +564,16 @@ FROM build-deps AS pg-embedding-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/0.3.5.tar.gz -O pg_embedding.tar.gz && \
|
||||
echo "0e95b27b8b6196e2cf0a0c9ec143fe2219b82e54c5bb4ee064e76398cbe69ae9 pg_embedding.tar.gz" | sha256sum --check && \
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
export PG_EMBEDDING_VERSION=0.3.5 \
|
||||
export PG_EMBEDDING_CHECKSUM=0e95b27b8b6196e2cf0a0c9ec143fe2219b82e54c5bb4ee064e76398cbe69ae9 \
|
||||
;; \
|
||||
*) \
|
||||
echo "pg_embedding not supported on this PostgreSQL version. Use pgvector instead." && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/${PG_EMBEDDING_VERSION}.tar.gz -O pg_embedding.tar.gz && \
|
||||
echo "${PG_EMBEDDING_CHECKSUM} pg_embedding.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_embedding-src && cd pg_embedding-src && tar xvzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
@@ -584,6 +605,10 @@ RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/1.1.0/postgre
|
||||
# Layer "rust extensions"
|
||||
# This layer is used to build `pgx` deps
|
||||
#
|
||||
# FIXME: This needs to be updated to latest version of 'pgrx' (it was renamed from
|
||||
# 'pgx' to 'pgrx') for PostgreSQL 16. And that in turn requires bumping the pgx
|
||||
# dependency on all the rust extension that depend on it, too.
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS rust-extensions-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
@@ -598,7 +623,17 @@ USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
;; \
|
||||
"v16") \
|
||||
echo "TODO: Not yet supported for PostgreSQL 16. Need to update pgrx dependencies" && exit 0 \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version ${PG_VERSION}" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
|
||||
rm rustup-init && \
|
||||
@@ -615,10 +650,21 @@ USER root
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build AS pg-jsonschema-pg-build
|
||||
ARG PG_VERSION
|
||||
|
||||
# caeab60d70b2fd3ae421ec66466a3abbb37b7ee6 made on 06/03/2023
|
||||
# there is no release tag yet, but we need it due to the superuser fix in the control file, switch to git tag after release >= 0.1.5
|
||||
RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421ec66466a3abbb37b7ee6.tar.gz -O pg_jsonschema.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
;; \
|
||||
"v16") \
|
||||
echo "TODO: Not yet supported for PostgreSQL 16. Need to update pgrx dependencies" && exit 0 \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version \"${PG_VERSION}\"" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421ec66466a3abbb37b7ee6.tar.gz -O pg_jsonschema.tar.gz && \
|
||||
echo "54129ce2e7ee7a585648dbb4cef6d73f795d94fe72f248ac01119992518469a4 pg_jsonschema.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgx = "0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
@@ -633,12 +679,23 @@ RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421e
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build AS pg-graphql-pg-build
|
||||
ARG PG_VERSION
|
||||
|
||||
# b4988843647450a153439be367168ed09971af85 made on 22/02/2023 (from remove-pgx-contrib-spiext branch)
|
||||
# Currently pgx version bump to >= 0.7.2 causes "call to unsafe function" compliation errors in
|
||||
# pgx-contrib-spiext. There is a branch that removes that dependency, so use it. It is on the
|
||||
# same 1.1 version we've used before.
|
||||
RUN wget https://github.com/yrashk/pg_graphql/archive/b4988843647450a153439be367168ed09971af85.tar.gz -O pg_graphql.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
;; \
|
||||
"v16") \
|
||||
echo "TODO: Not yet supported for PostgreSQL 16. Need to update pgrx dependencies" && exit 0 \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/yrashk/pg_graphql/archive/b4988843647450a153439be367168ed09971af85.tar.gz -O pg_graphql.tar.gz && \
|
||||
echo "0c7b0e746441b2ec24187d0e03555faf935c2159e2839bddd14df6dafbc8c9bd pg_graphql.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgx = "~0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
@@ -656,9 +713,20 @@ RUN wget https://github.com/yrashk/pg_graphql/archive/b4988843647450a153439be367
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build AS pg-tiktoken-pg-build
|
||||
ARG PG_VERSION
|
||||
|
||||
# 801f84f08c6881c8aa30f405fafbf00eec386a72 made on 10/03/2023
|
||||
RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405fafbf00eec386a72.tar.gz -O pg_tiktoken.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
;; \
|
||||
"v16") \
|
||||
echo "TODO: Not yet supported for PostgreSQL 16. Need to update pgrx dependencies" && exit 0 \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405fafbf00eec386a72.tar.gz -O pg_tiktoken.tar.gz && \
|
||||
echo "52f60ac800993a49aa8c609961842b611b6b1949717b69ce2ec9117117e16e4a pg_tiktoken.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
|
||||
cargo pgx install --release && \
|
||||
@@ -672,8 +740,19 @@ RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build AS pg-pgx-ulid-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.0.tar.gz -O pgx_ulid.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
;; \
|
||||
"v16") \
|
||||
echo "TODO: Not yet supported for PostgreSQL 16. Need to update pgrx dependencies" && exit 0 \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.0.tar.gz -O pgx_ulid.tar.gz && \
|
||||
echo "908b7358e6f846e87db508ae5349fb56a88ee6305519074b12f3d5b0ff09f791 pgx_ulid.tar.gz" | sha256sum --check && \
|
||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xvzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgx = "=0.7.3"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
|
||||
40
Makefile
40
Makefile
@@ -29,6 +29,7 @@ else ifeq ($(UNAME_S),Darwin)
|
||||
# It can be configured with OPENSSL_PREFIX variable
|
||||
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||
PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
|
||||
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
|
||||
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
|
||||
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
|
||||
@@ -83,6 +84,8 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
||||
# I'm not sure why it wouldn't work, but this is the only place (apart from
|
||||
# the "build-all-versions" entry points) where direct mention of PostgreSQL
|
||||
# versions is used.
|
||||
.PHONY: postgres-configure-v16
|
||||
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
|
||||
.PHONY: postgres-configure-v15
|
||||
postgres-configure-v15: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
|
||||
.PHONY: postgres-configure-v14
|
||||
@@ -118,6 +121,10 @@ postgres-clean-%:
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect clean
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq clean
|
||||
|
||||
.PHONY: postgres-check-%
|
||||
postgres-check-%: postgres-%
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 check
|
||||
|
||||
.PHONY: neon-pg-ext-%
|
||||
neon-pg-ext-%: postgres-%
|
||||
+@echo "Compiling neon $*"
|
||||
@@ -130,6 +137,11 @@ neon-pg-ext-%: postgres-%
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
|
||||
+@echo "Compiling neon_rmgr $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
|
||||
+@echo "Compiling neon_test_utils $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
@@ -140,6 +152,13 @@ neon-pg-ext-%: postgres-%
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
|
||||
|
||||
# pg_embedding was temporarily released as hnsw from this repo, when we only
|
||||
# supported PostgreSQL 14 and 15
|
||||
neon-pg-ext-v14: neon-pg-ext-hnsw-v14
|
||||
neon-pg-ext-v15: neon-pg-ext-hnsw-v15
|
||||
|
||||
neon-pg-ext-hnsw-%: postgres-headers-% postgres-%
|
||||
+@echo "Compiling hnsw $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/hnsw-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
@@ -167,28 +186,39 @@ neon-pg-ext-clean-%:
|
||||
.PHONY: neon-pg-ext
|
||||
neon-pg-ext: \
|
||||
neon-pg-ext-v14 \
|
||||
neon-pg-ext-v15
|
||||
neon-pg-ext-v15 \
|
||||
neon-pg-ext-v16
|
||||
|
||||
.PHONY: neon-pg-ext-clean
|
||||
neon-pg-ext-clean: \
|
||||
neon-pg-ext-clean-v14 \
|
||||
neon-pg-ext-clean-v15
|
||||
neon-pg-ext-clean-v15 \
|
||||
neon-pg-ext-clean-v16
|
||||
|
||||
# shorthand to build all Postgres versions
|
||||
.PHONY: postgres
|
||||
postgres: \
|
||||
postgres-v14 \
|
||||
postgres-v15
|
||||
postgres-v15 \
|
||||
postgres-v16
|
||||
|
||||
.PHONY: postgres-headers
|
||||
postgres-headers: \
|
||||
postgres-headers-v14 \
|
||||
postgres-headers-v15
|
||||
postgres-headers-v15 \
|
||||
postgres-headers-v16
|
||||
|
||||
.PHONY: postgres-clean
|
||||
postgres-clean: \
|
||||
postgres-clean-v14 \
|
||||
postgres-clean-v15
|
||||
postgres-clean-v15 \
|
||||
postgres-clean-v16
|
||||
|
||||
.PHONY: postgres-check
|
||||
postgres-check: \
|
||||
postgres-check-v14 \
|
||||
postgres-check-v15 \
|
||||
postgres-check-v16
|
||||
|
||||
# This doesn't remove the effects of 'configure'.
|
||||
.PHONY: clean
|
||||
|
||||
@@ -55,7 +55,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
1. Install XCode and dependencies
|
||||
```
|
||||
xcode-select --install
|
||||
brew install protobuf openssl flex bison
|
||||
brew install protobuf openssl flex bison icu4c pkg-config
|
||||
|
||||
# add openssl to PATH, required for ed25519 keys generation in neon_local
|
||||
echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
|
||||
|
||||
@@ -107,19 +107,25 @@ fn get_pg_config(argument: &str, pgbin: &str) -> String {
|
||||
|
||||
pub fn get_pg_version(pgbin: &str) -> String {
|
||||
// pg_config --version returns a (platform specific) human readable string
|
||||
// such as "PostgreSQL 15.4". We parse this to v14/v15
|
||||
// such as "PostgreSQL 15.4". We parse this to v14/v15/v16 etc.
|
||||
let human_version = get_pg_config("--version", pgbin);
|
||||
return parse_pg_version(&human_version).to_string();
|
||||
}
|
||||
|
||||
fn parse_pg_version(human_version: &str) -> &str {
|
||||
match Regex::new(r"(?<major>\d+)\.(?<minor>\d+)")
|
||||
// Normal releases have version strings like "PostgreSQL 15.4". But there
|
||||
// are also pre-release versions like "PostgreSQL 17devel" or "PostgreSQL
|
||||
// 16beta2" or "PostgreSQL 17rc1". And with the --with-extra-version
|
||||
// configure option, you can tack any string to the version number,
|
||||
// e.g. "PostgreSQL 15.4foobar".
|
||||
match Regex::new(r"^PostgreSQL (?<major>\d+).+")
|
||||
.unwrap()
|
||||
.captures(human_version)
|
||||
{
|
||||
Some(captures) if captures.len() == 3 => match &captures["major"] {
|
||||
Some(captures) if captures.len() == 2 => match &captures["major"] {
|
||||
"14" => return "v14",
|
||||
"15" => return "v15",
|
||||
"16" => return "v16",
|
||||
_ => {}
|
||||
},
|
||||
_ => {}
|
||||
@@ -146,6 +152,11 @@ mod tests {
|
||||
parse_pg_version("PostgreSQL 14.9 (Debian 14.9-1.pgdg120+1"),
|
||||
"v14"
|
||||
);
|
||||
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16devel"), "v16");
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16beta1"), "v16");
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16rc2"), "v16");
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16extra"), "v16");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -182,26 +182,18 @@ impl LocalEnv {
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
let path = self.pg_distrib_dir.clone();
|
||||
|
||||
#[allow(clippy::manual_range_patterns)]
|
||||
match pg_version {
|
||||
14 => Ok(path.join(format!("v{pg_version}"))),
|
||||
15 => Ok(path.join(format!("v{pg_version}"))),
|
||||
14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
Ok(self.pg_distrib_dir(pg_version)?.join("bin"))
|
||||
}
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
Ok(self.pg_distrib_dir(pg_version)?.join("lib"))
|
||||
}
|
||||
|
||||
pub fn pageserver_bin(&self) -> PathBuf {
|
||||
|
||||
@@ -10,9 +10,11 @@ should be auto-generated too, but that's a TODO.
|
||||
The PostgreSQL on-disk file format is not portable across different
|
||||
CPU architectures and operating systems. It is also subject to change
|
||||
in each major PostgreSQL version. Currently, this module supports
|
||||
PostgreSQL v14 and v15: bindings and code that depends on them are version-specific.
|
||||
This code is organized in modules: `postgres_ffi::v14` and `postgres_ffi::v15`
|
||||
Version independend code is explicitly exported into shared `postgres_ffi`.
|
||||
PostgreSQL v14, v15 and v16: bindings and code that depends on them are
|
||||
version-specific.
|
||||
This code is organized in modules `postgres_ffi::v14`, `postgres_ffi::v15` and
|
||||
`postgres_ffi::v16`. Version independent code is explicitly exported into
|
||||
shared `postgres_ffi`.
|
||||
|
||||
|
||||
TODO: Currently, there is also some code that deals with WAL records
|
||||
|
||||
@@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> {
|
||||
PathBuf::from("pg_install")
|
||||
};
|
||||
|
||||
for pg_version in &["v14", "v15"] {
|
||||
for pg_version in &["v14", "v15", "v16"] {
|
||||
let mut pg_install_dir_versioned = pg_install_dir.join(pg_version);
|
||||
if pg_install_dir_versioned.is_relative() {
|
||||
let cwd = env::current_dir().context("Failed to get current_dir")?;
|
||||
@@ -125,6 +125,7 @@ fn main() -> anyhow::Result<()> {
|
||||
.allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
|
||||
.allowlist_type("PageHeaderData")
|
||||
.allowlist_type("DBState")
|
||||
.allowlist_type("RelMapFile")
|
||||
// Because structs are used for serialization, tell bindgen to emit
|
||||
// explicit padding fields.
|
||||
.explicit_padding(true)
|
||||
|
||||
@@ -51,11 +51,59 @@ macro_rules! for_all_postgres_versions {
|
||||
($macro:tt) => {
|
||||
$macro!(v14);
|
||||
$macro!(v15);
|
||||
$macro!(v16);
|
||||
};
|
||||
}
|
||||
|
||||
for_all_postgres_versions! { postgres_ffi }
|
||||
|
||||
/// dispatch_pgversion
|
||||
///
|
||||
/// Run a code block in a context where the postgres_ffi bindings for a
|
||||
/// specific (supported) PostgreSQL version are `use`-ed in scope under the pgv
|
||||
/// identifier.
|
||||
/// If the provided pg_version is not supported, we panic!(), unless the
|
||||
/// optional third argument was provided (in which case that code will provide
|
||||
/// the default handling instead).
|
||||
///
|
||||
/// Use like
|
||||
///
|
||||
/// dispatch_pgversion!(my_pgversion, { pgv::constants::XLOG_DBASE_CREATE })
|
||||
/// dispatch_pgversion!(my_pgversion, pgv::constants::XLOG_DBASE_CREATE)
|
||||
///
|
||||
/// Other uses are for macro-internal purposes only and strictly unsupported.
|
||||
///
|
||||
#[macro_export]
|
||||
macro_rules! dispatch_pgversion {
|
||||
($version:expr, $code:expr) => {
|
||||
dispatch_pgversion!($version, $code, panic!("Unknown PostgreSQL version {}", $version))
|
||||
};
|
||||
($version:expr, $code:expr, $invalid_pgver_handling:expr) => {
|
||||
dispatch_pgversion!(
|
||||
$version => $code,
|
||||
default = $invalid_pgver_handling,
|
||||
pgversions = [
|
||||
14 : v14,
|
||||
15 : v15,
|
||||
16 : v16,
|
||||
]
|
||||
)
|
||||
};
|
||||
($pgversion:expr => $code:expr,
|
||||
default = $default:expr,
|
||||
pgversions = [$($sv:literal : $vsv:ident),+ $(,)?]) => {
|
||||
match ($pgversion) {
|
||||
$($sv => {
|
||||
use $crate::$vsv as pgv;
|
||||
$code
|
||||
},)+
|
||||
_ => {
|
||||
$default
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub mod pg_constants;
|
||||
pub mod relfile_utils;
|
||||
|
||||
@@ -90,13 +138,7 @@ pub use v14::xlog_utils::XLogFileName;
|
||||
pub use v14::bindings::DBState_DB_SHUTDOWNED;
|
||||
|
||||
pub fn bkpimage_is_compressed(bimg_info: u8, version: u32) -> anyhow::Result<bool> {
|
||||
match version {
|
||||
14 => Ok(bimg_info & v14::bindings::BKPIMAGE_IS_COMPRESSED != 0),
|
||||
15 => Ok(bimg_info & v15::bindings::BKPIMAGE_COMPRESS_PGLZ != 0
|
||||
|| bimg_info & v15::bindings::BKPIMAGE_COMPRESS_LZ4 != 0
|
||||
|| bimg_info & v15::bindings::BKPIMAGE_COMPRESS_ZSTD != 0),
|
||||
_ => anyhow::bail!("Unknown version {}", version),
|
||||
}
|
||||
dispatch_pgversion!(version, Ok(pgv::bindings::bkpimg_is_compressed(bimg_info)))
|
||||
}
|
||||
|
||||
pub fn generate_wal_segment(
|
||||
@@ -107,11 +149,11 @@ pub fn generate_wal_segment(
|
||||
) -> Result<Bytes, SerializeError> {
|
||||
assert_eq!(segno, lsn.segment_number(WAL_SEGMENT_SIZE));
|
||||
|
||||
match pg_version {
|
||||
14 => v14::xlog_utils::generate_wal_segment(segno, system_id, lsn),
|
||||
15 => v15::xlog_utils::generate_wal_segment(segno, system_id, lsn),
|
||||
_ => Err(SerializeError::BadInput),
|
||||
}
|
||||
dispatch_pgversion!(
|
||||
pg_version,
|
||||
pgv::xlog_utils::generate_wal_segment(segno, system_id, lsn),
|
||||
Err(SerializeError::BadInput)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn generate_pg_control(
|
||||
@@ -120,11 +162,11 @@ pub fn generate_pg_control(
|
||||
lsn: Lsn,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<(Bytes, u64)> {
|
||||
match pg_version {
|
||||
14 => v14::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
|
||||
15 => v15::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
|
||||
_ => anyhow::bail!("Unknown version {}", pg_version),
|
||||
}
|
||||
dispatch_pgversion!(
|
||||
pg_version,
|
||||
pgv::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
|
||||
anyhow::bail!("Unknown version {}", pg_version)
|
||||
)
|
||||
}
|
||||
|
||||
// PG timeline is always 1, changing it doesn't have any useful meaning in Neon.
|
||||
@@ -196,8 +238,6 @@ pub fn fsm_logical_to_physical(addr: BlockNumber) -> BlockNumber {
|
||||
}
|
||||
|
||||
pub mod waldecoder {
|
||||
|
||||
use crate::{v14, v15};
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use std::num::NonZeroU32;
|
||||
use thiserror::Error;
|
||||
@@ -248,22 +288,17 @@ pub mod waldecoder {
|
||||
}
|
||||
|
||||
pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
|
||||
match self.pg_version {
|
||||
// This is a trick to support both versions simultaneously.
|
||||
// See WalStreamDecoderHandler comments.
|
||||
14 => {
|
||||
use self::v14::waldecoder_handler::WalStreamDecoderHandler;
|
||||
dispatch_pgversion!(
|
||||
self.pg_version,
|
||||
{
|
||||
use pgv::waldecoder_handler::WalStreamDecoderHandler;
|
||||
self.poll_decode_internal()
|
||||
}
|
||||
15 => {
|
||||
use self::v15::waldecoder_handler::WalStreamDecoderHandler;
|
||||
self.poll_decode_internal()
|
||||
}
|
||||
_ => Err(WalDecodeError {
|
||||
},
|
||||
Err(WalDecodeError {
|
||||
msg: format!("Unknown version {}", self.pg_version),
|
||||
lsn: self.lsn,
|
||||
}),
|
||||
}
|
||||
})
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -163,6 +163,20 @@ pub const RM_HEAP2_ID: u8 = 9;
|
||||
pub const RM_HEAP_ID: u8 = 10;
|
||||
pub const RM_LOGICALMSG_ID: u8 = 21;
|
||||
|
||||
// from neon_rmgr.h
|
||||
pub const RM_NEON_ID: u8 = 134;
|
||||
|
||||
pub const XLOG_NEON_HEAP_INIT_PAGE: u8 = 0x80;
|
||||
|
||||
pub const XLOG_NEON_HEAP_INSERT: u8 = 0x00;
|
||||
pub const XLOG_NEON_HEAP_DELETE: u8 = 0x10;
|
||||
pub const XLOG_NEON_HEAP_UPDATE: u8 = 0x20;
|
||||
pub const XLOG_NEON_HEAP_HOT_UPDATE: u8 = 0x30;
|
||||
pub const XLOG_NEON_HEAP_LOCK: u8 = 0x40;
|
||||
pub const XLOG_NEON_HEAP_MULTI_INSERT: u8 = 0x50;
|
||||
|
||||
pub const XLOG_NEON_HEAP_VISIBLE: u8 = 0x40;
|
||||
|
||||
// from xlogreader.h
|
||||
pub const XLR_INFO_MASK: u8 = 0x0F;
|
||||
pub const XLR_RMGR_INFO_MASK: u8 = 0xF0;
|
||||
|
||||
@@ -3,3 +3,8 @@ pub const XLOG_DBASE_DROP: u8 = 0x10;
|
||||
|
||||
pub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
|
||||
pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
|
||||
pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */
|
||||
|
||||
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
|
||||
(bimg_info & BKPIMAGE_IS_COMPRESSED) != 0
|
||||
}
|
||||
|
||||
@@ -1,10 +1,18 @@
|
||||
pub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;
|
||||
|
||||
pub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;
|
||||
pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x00;
|
||||
pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;
|
||||
pub const XLOG_DBASE_DROP: u8 = 0x20;
|
||||
|
||||
pub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */
|
||||
pub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */
|
||||
pub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */
|
||||
pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
|
||||
|
||||
pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */
|
||||
|
||||
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
|
||||
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
|
||||
|
||||
(bimg_info & ANY_COMPRESS_FLAG) != 0
|
||||
}
|
||||
|
||||
18
libs/postgres_ffi/src/pg_constants_v16.rs
Normal file
18
libs/postgres_ffi/src/pg_constants_v16.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
pub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;
|
||||
|
||||
pub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;
|
||||
pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;
|
||||
pub const XLOG_DBASE_DROP: u8 = 0x20;
|
||||
|
||||
pub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */
|
||||
pub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */
|
||||
pub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */
|
||||
pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
|
||||
|
||||
pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */
|
||||
|
||||
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
|
||||
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
|
||||
|
||||
(bimg_info & ANY_COMPRESS_FLAG) != 0
|
||||
}
|
||||
@@ -49,9 +49,9 @@ impl Conf {
|
||||
pub fn pg_distrib_dir(&self) -> anyhow::Result<PathBuf> {
|
||||
let path = self.pg_distrib_dir.clone();
|
||||
|
||||
#[allow(clippy::manual_range_patterns)]
|
||||
match self.pg_version {
|
||||
14 => Ok(path.join(format!("v{}", self.pg_version))),
|
||||
15 => Ok(path.join(format!("v{}", self.pg_version))),
|
||||
14 | 15 | 16 => Ok(path.join(format!("v{}", self.pg_version))),
|
||||
_ => bail!("Unsupported postgres version: {}", self.pg_version),
|
||||
}
|
||||
}
|
||||
@@ -250,11 +250,18 @@ fn craft_internal<C: postgres::GenericClient>(
|
||||
let (mut intermediate_lsns, last_lsn) = f(client, initial_lsn)?;
|
||||
let last_lsn = match last_lsn {
|
||||
None => client.pg_current_wal_insert_lsn()?,
|
||||
Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
|
||||
Ordering::Less => bail!("Some records were inserted after the crafted WAL"),
|
||||
Ordering::Equal => last_lsn,
|
||||
Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
|
||||
},
|
||||
Some(last_lsn) => {
|
||||
let insert_lsn = client.pg_current_wal_insert_lsn()?;
|
||||
match last_lsn.cmp(&insert_lsn) {
|
||||
Ordering::Less => bail!(
|
||||
"Some records were inserted after the crafted WAL: {} vs {}",
|
||||
last_lsn,
|
||||
insert_lsn
|
||||
),
|
||||
Ordering::Equal => last_lsn,
|
||||
Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
|
||||
}
|
||||
}
|
||||
};
|
||||
if !intermediate_lsns.starts_with(&[initial_lsn]) {
|
||||
intermediate_lsns.insert(0, initial_lsn);
|
||||
@@ -363,8 +370,9 @@ impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
|
||||
);
|
||||
ensure!(
|
||||
u64::from(after_xlog_switch) as usize % XLOG_BLCKSZ == XLOG_SIZE_OF_XLOG_SHORT_PHD,
|
||||
"XLOG_SWITCH message ended not on page boundary: {}",
|
||||
after_xlog_switch
|
||||
"XLOG_SWITCH message ended not on page boundary: {}, offset = {}",
|
||||
after_xlog_switch,
|
||||
u64::from(after_xlog_switch) as usize % XLOG_BLCKSZ
|
||||
);
|
||||
Ok((vec![before_xlog_switch, after_xlog_switch], next_segment))
|
||||
}
|
||||
|
||||
@@ -9,11 +9,12 @@ PORT=$4
|
||||
SYSID=$(od -A n -j 24 -N 8 -t d8 "$WAL_PATH"/000000010000000000000002* | cut -c 3-)
|
||||
rm -fr "$DATA_DIR"
|
||||
env -i LD_LIBRARY_PATH="$PG_BIN"/../lib "$PG_BIN"/initdb -E utf8 -U cloud_admin -D "$DATA_DIR" --sysid="$SYSID"
|
||||
echo port="$PORT" >> "$DATA_DIR"/postgresql.conf
|
||||
echo "port=$PORT" >> "$DATA_DIR"/postgresql.conf
|
||||
echo "shared_preload_libraries='\$libdir/neon_rmgr.so'" >> "$DATA_DIR"/postgresql.conf
|
||||
REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| cut -c 42-)
|
||||
declare -i WAL_SIZE=$REDO_POS+114
|
||||
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l logfile start
|
||||
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l logfile stop -m immediate
|
||||
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start
|
||||
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate
|
||||
cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
|
||||
cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
|
||||
for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
|
||||
|
||||
@@ -25,6 +25,7 @@ use crate::context::RequestContext;
|
||||
use crate::tenant::Timeline;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
|
||||
use postgres_ffi::dispatch_pgversion;
|
||||
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
||||
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
|
||||
use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
|
||||
@@ -323,14 +324,25 @@ where
|
||||
.timeline
|
||||
.get_relmap_file(spcnode, dbnode, self.lsn, self.ctx)
|
||||
.await?;
|
||||
ensure!(img.len() == 512);
|
||||
|
||||
ensure!(
|
||||
img.len()
|
||||
== dispatch_pgversion!(
|
||||
self.timeline.pg_version,
|
||||
pgv::bindings::SIZEOF_RELMAPFILE
|
||||
)
|
||||
);
|
||||
|
||||
Some(img)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if spcnode == GLOBALTABLESPACE_OID {
|
||||
let pg_version_str = self.timeline.pg_version.to_string();
|
||||
let pg_version_str = match self.timeline.pg_version {
|
||||
14 | 15 => self.timeline.pg_version.to_string(),
|
||||
ver => format!("{ver}\x0A"),
|
||||
};
|
||||
let header = new_tar_header("PG_VERSION", pg_version_str.len() as u64)?;
|
||||
self.ar.append(&header, pg_version_str.as_bytes()).await?;
|
||||
|
||||
@@ -374,7 +386,10 @@ where
|
||||
if let Some(img) = relmap_img {
|
||||
let dst_path = format!("base/{}/PG_VERSION", dbnode);
|
||||
|
||||
let pg_version_str = self.timeline.pg_version.to_string();
|
||||
let pg_version_str = match self.timeline.pg_version {
|
||||
14 | 15 => self.timeline.pg_version.to_string(),
|
||||
ver => format!("{ver}\x0A"),
|
||||
};
|
||||
let header = new_tar_header(&dst_path, pg_version_str.len() as u64)?;
|
||||
self.ar.append(&header, pg_version_str.as_bytes()).await?;
|
||||
|
||||
|
||||
@@ -668,26 +668,18 @@ impl PageServerConf {
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
let path = self.pg_distrib_dir.clone();
|
||||
|
||||
#[allow(clippy::manual_range_patterns)]
|
||||
match pg_version {
|
||||
14 => Ok(path.join(format!("v{pg_version}"))),
|
||||
15 => Ok(path.join(format!("v{pg_version}"))),
|
||||
14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
Ok(self.pg_distrib_dir(pg_version)?.join("bin"))
|
||||
}
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
Ok(self.pg_distrib_dir(pg_version)?.join("lib"))
|
||||
}
|
||||
|
||||
/// Parse a configuration file (pageserver.toml) into a PageServerConf struct,
|
||||
|
||||
@@ -25,7 +25,7 @@ use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
|
||||
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
|
||||
use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use tracing::*;
|
||||
|
||||
@@ -106,6 +106,10 @@ impl<'a> WalIngest<'a> {
|
||||
self.ingest_heapam_record(&mut buf, modification, decoded, ctx)
|
||||
.await?;
|
||||
}
|
||||
if decoded.xl_rmid == pg_constants::RM_NEON_ID {
|
||||
self.ingest_neonrmgr_record(&mut buf, modification, decoded, ctx)
|
||||
.await?;
|
||||
}
|
||||
// Handle other special record types
|
||||
if decoded.xl_rmid == pg_constants::RM_SMGR_ID
|
||||
&& (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
|
||||
@@ -172,6 +176,32 @@ impl<'a> WalIngest<'a> {
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
} else if self.timeline.pg_version == 16 {
|
||||
if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
|
||||
== postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_WAL_LOG
|
||||
{
|
||||
debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
|
||||
} else if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
|
||||
== postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY
|
||||
{
|
||||
// The XLOG record was renamed between v14 and v15,
|
||||
// but the record format is the same.
|
||||
// So we can reuse XlCreateDatabase here.
|
||||
debug!("XLOG_DBASE_CREATE_FILE_COPY");
|
||||
let createdb = XlCreateDatabase::decode(&mut buf);
|
||||
self.ingest_xlog_dbase_create(modification, &createdb, ctx)
|
||||
.await?;
|
||||
} else if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
|
||||
== postgres_ffi::v16::bindings::XLOG_DBASE_DROP
|
||||
{
|
||||
let dropdb = XlDropDatabase::decode(&mut buf);
|
||||
for tablespace_id in dropdb.tablespace_ids {
|
||||
trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
|
||||
modification
|
||||
.drop_dbdir(tablespace_id, dropdb.db_id, ctx)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_TBLSPC_ID {
|
||||
trace!("XLOG_TBLSPC_CREATE/DROP is not handled yet");
|
||||
@@ -414,57 +444,346 @@ impl<'a> WalIngest<'a> {
|
||||
// need to clear the corresponding bits in the visibility map.
|
||||
let mut new_heap_blkno: Option<u32> = None;
|
||||
let mut old_heap_blkno: Option<u32> = None;
|
||||
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP_INSERT {
|
||||
let xlrec = XlHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_DELETE {
|
||||
let xlrec = XlHeapDelete::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_UPDATE
|
||||
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
|
||||
{
|
||||
let xlrec = XlHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[1].blkno);
|
||||
|
||||
match self.timeline.pg_version {
|
||||
14 => {
|
||||
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
if info == pg_constants::XLOG_HEAP_INSERT {
|
||||
let xlrec = v14::XlHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_DELETE {
|
||||
let xlrec = v14::XlHeapDelete::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_UPDATE
|
||||
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
|
||||
{
|
||||
let xlrec = v14::XlHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[1].blkno);
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
|
||||
let xlrec = v14::XlHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
} else {
|
||||
std::mem::size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
|
||||
let xlrec = XlHeapMultiInsert::decode(buf);
|
||||
15 => {
|
||||
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
let offset_array_len = if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
if info == pg_constants::XLOG_HEAP_INSERT {
|
||||
let xlrec = v15::XlHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_DELETE {
|
||||
let xlrec = v15::XlHeapDelete::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_UPDATE
|
||||
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
|
||||
{
|
||||
let xlrec = v15::XlHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[1].blkno);
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
|
||||
let xlrec = v15::XlHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
} else {
|
||||
std::mem::size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::mem::size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
|
||||
}
|
||||
}
|
||||
16 => {
|
||||
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
if info == pg_constants::XLOG_HEAP_INSERT {
|
||||
let xlrec = v16::XlHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_DELETE {
|
||||
let xlrec = v16::XlHeapDelete::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_UPDATE
|
||||
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
|
||||
{
|
||||
let xlrec = v16::XlHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[1].blkno);
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
|
||||
let xlrec = v16::XlHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
} else {
|
||||
std::mem::size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
// FIXME: What about XLOG_HEAP_LOCK and XLOG_HEAP2_LOCK_UPDATED?
|
||||
|
||||
// Clear the VM bits if required.
|
||||
if new_heap_blkno.is_some() || old_heap_blkno.is_some() {
|
||||
let vm_rel = RelTag {
|
||||
forknum: VISIBILITYMAP_FORKNUM,
|
||||
spcnode: decoded.blocks[0].rnode_spcnode,
|
||||
dbnode: decoded.blocks[0].rnode_dbnode,
|
||||
relnode: decoded.blocks[0].rnode_relnode,
|
||||
};
|
||||
|
||||
let mut new_vm_blk = new_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
|
||||
let mut old_vm_blk = old_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
|
||||
|
||||
// Sometimes, Postgres seems to create heap WAL records with the
|
||||
// ALL_VISIBLE_CLEARED flag set, even though the bit in the VM page is
|
||||
// not set. In fact, it's possible that the VM page does not exist at all.
|
||||
// In that case, we don't want to store a record to clear the VM bit;
|
||||
// replaying it would fail to find the previous image of the page, because
|
||||
// it doesn't exist. So check if the VM page(s) exist, and skip the WAL
|
||||
// record if it doesn't.
|
||||
let vm_size = self.get_relsize(vm_rel, modification.lsn, ctx).await?;
|
||||
if let Some(blknum) = new_vm_blk {
|
||||
if blknum >= vm_size {
|
||||
new_vm_blk = None;
|
||||
}
|
||||
}
|
||||
if let Some(blknum) = old_vm_blk {
|
||||
if blknum >= vm_size {
|
||||
old_vm_blk = None;
|
||||
}
|
||||
}
|
||||
|
||||
if new_vm_blk.is_some() || old_vm_blk.is_some() {
|
||||
if new_vm_blk == old_vm_blk {
|
||||
// An UPDATE record that needs to clear the bits for both old and the
|
||||
// new page, both of which reside on the same VM page.
|
||||
self.put_rel_wal_record(
|
||||
modification,
|
||||
vm_rel,
|
||||
new_vm_blk.unwrap(),
|
||||
NeonWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
} else {
|
||||
// Clear VM bits for one heap page, or for two pages that reside on
|
||||
// different VM pages.
|
||||
if let Some(new_vm_blk) = new_vm_blk {
|
||||
self.put_rel_wal_record(
|
||||
modification,
|
||||
vm_rel,
|
||||
new_vm_blk,
|
||||
NeonWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno: None,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
if let Some(old_vm_blk) = old_vm_blk {
|
||||
self.put_rel_wal_record(
|
||||
modification,
|
||||
vm_rel,
|
||||
old_vm_blk,
|
||||
NeonWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno: None,
|
||||
old_heap_blkno,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// FIXME: What about XLOG_HEAP_LOCK and XLOG_HEAP2_LOCK_UPDATED?
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ingest_neonrmgr_record(
|
||||
&mut self,
|
||||
buf: &mut Bytes,
|
||||
modification: &mut DatadirModification<'_>,
|
||||
decoded: &mut DecodedWALRecord,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
// Handle VM bit updates that are implicitly part of heap records.
|
||||
|
||||
// First, look at the record to determine which VM bits need
|
||||
// to be cleared. If either of these variables is set, we
|
||||
// need to clear the corresponding bits in the visibility map.
|
||||
let mut new_heap_blkno: Option<u32> = None;
|
||||
let mut old_heap_blkno: Option<u32> = None;
|
||||
assert_eq!(decoded.xl_rmid, pg_constants::RM_NEON_ID);
|
||||
|
||||
match self.timeline.pg_version {
|
||||
16 => {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
match info {
|
||||
pg_constants::XLOG_NEON_HEAP_INSERT => {
|
||||
let xlrec = v16::rm_neon::XlNeonHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_DELETE => {
|
||||
let xlrec = v16::rm_neon::XlNeonHeapDelete::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_UPDATE
|
||||
| pg_constants::XLOG_NEON_HEAP_HOT_UPDATE => {
|
||||
let xlrec = v16::rm_neon::XlNeonHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[1].blkno);
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_MULTI_INSERT => {
|
||||
let xlrec = v16::rm_neon::XlNeonHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
} else {
|
||||
std::mem::size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_LOCK => {
|
||||
/* XLOG_NEON_HEAP_LOCK doesn't need special care */
|
||||
}
|
||||
info => bail!("Unknown WAL record type for Neon RMGR: {}", info),
|
||||
}
|
||||
}
|
||||
_ => bail!(
|
||||
"Neon RMGR has no known compatibility with PostgreSQL version {}",
|
||||
self.timeline.pg_version
|
||||
),
|
||||
}
|
||||
|
||||
// FIXME: What about XLOG_NEON_HEAP_LOCK?
|
||||
|
||||
// Clear the VM bits if required.
|
||||
if new_heap_blkno.is_some() || old_heap_blkno.is_some() {
|
||||
|
||||
@@ -4,9 +4,10 @@
|
||||
|
||||
use anyhow::Result;
|
||||
use bytes::{Buf, Bytes};
|
||||
use postgres_ffi::dispatch_pgversion;
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use postgres_ffi::{BlockNumber, OffsetNumber, TimestampTz};
|
||||
use postgres_ffi::{BlockNumber, TimestampTz};
|
||||
use postgres_ffi::{MultiXactId, MultiXactOffset, MultiXactStatus, Oid, TransactionId};
|
||||
use postgres_ffi::{XLogRecord, XLOG_SIZE_OF_XLOG_RECORD};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -76,9 +77,12 @@ pub struct DecodedBkpBlock {
|
||||
pub flags: u8,
|
||||
|
||||
/* Information on full-page image, if any */
|
||||
pub has_image: bool, /* has image, even for consistency checking */
|
||||
pub apply_image: bool, /* has image that should be restored */
|
||||
pub will_init: bool, /* record doesn't need previous page version to apply */
|
||||
pub has_image: bool,
|
||||
/* has image, even for consistency checking */
|
||||
pub apply_image: bool,
|
||||
/* has image that should be restored */
|
||||
pub will_init: bool,
|
||||
/* record doesn't need previous page version to apply */
|
||||
//char *bkp_image;
|
||||
pub hole_offset: u16,
|
||||
pub hole_length: u16,
|
||||
@@ -134,6 +138,237 @@ impl XlRelmapUpdate {
|
||||
}
|
||||
}
|
||||
|
||||
pub mod v14 {
|
||||
use bytes::{Buf, Bytes};
|
||||
use postgres_ffi::{OffsetNumber, TransactionId};
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlHeapInsert {
|
||||
pub offnum: OffsetNumber,
|
||||
pub flags: u8,
|
||||
}
|
||||
|
||||
impl XlHeapInsert {
|
||||
pub fn decode(buf: &mut Bytes) -> XlHeapInsert {
|
||||
XlHeapInsert {
|
||||
offnum: buf.get_u16_le(),
|
||||
flags: buf.get_u8(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlHeapMultiInsert {
|
||||
pub flags: u8,
|
||||
pub _padding: u8,
|
||||
pub ntuples: u16,
|
||||
}
|
||||
|
||||
impl XlHeapMultiInsert {
|
||||
pub fn decode(buf: &mut Bytes) -> XlHeapMultiInsert {
|
||||
XlHeapMultiInsert {
|
||||
flags: buf.get_u8(),
|
||||
_padding: buf.get_u8(),
|
||||
ntuples: buf.get_u16_le(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlHeapDelete {
|
||||
pub xmax: TransactionId,
|
||||
pub offnum: OffsetNumber,
|
||||
pub _padding: u16,
|
||||
pub t_cid: u32,
|
||||
pub infobits_set: u8,
|
||||
pub flags: u8,
|
||||
}
|
||||
|
||||
impl XlHeapDelete {
|
||||
pub fn decode(buf: &mut Bytes) -> XlHeapDelete {
|
||||
XlHeapDelete {
|
||||
xmax: buf.get_u32_le(),
|
||||
offnum: buf.get_u16_le(),
|
||||
_padding: buf.get_u16_le(),
|
||||
t_cid: buf.get_u32_le(),
|
||||
infobits_set: buf.get_u8(),
|
||||
flags: buf.get_u8(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlHeapUpdate {
|
||||
pub old_xmax: TransactionId,
|
||||
pub old_offnum: OffsetNumber,
|
||||
pub old_infobits_set: u8,
|
||||
pub flags: u8,
|
||||
pub t_cid: u32,
|
||||
pub new_xmax: TransactionId,
|
||||
pub new_offnum: OffsetNumber,
|
||||
}
|
||||
|
||||
impl XlHeapUpdate {
|
||||
pub fn decode(buf: &mut Bytes) -> XlHeapUpdate {
|
||||
XlHeapUpdate {
|
||||
old_xmax: buf.get_u32_le(),
|
||||
old_offnum: buf.get_u16_le(),
|
||||
old_infobits_set: buf.get_u8(),
|
||||
flags: buf.get_u8(),
|
||||
t_cid: buf.get_u32(),
|
||||
new_xmax: buf.get_u32_le(),
|
||||
new_offnum: buf.get_u16_le(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod v15 {
|
||||
pub use super::v14::{XlHeapDelete, XlHeapInsert, XlHeapMultiInsert, XlHeapUpdate};
|
||||
}
|
||||
|
||||
pub mod v16 {
|
||||
pub use super::v14::{XlHeapInsert, XlHeapMultiInsert};
|
||||
use bytes::{Buf, Bytes};
|
||||
use postgres_ffi::{OffsetNumber, TransactionId};
|
||||
|
||||
pub struct XlHeapDelete {
|
||||
pub xmax: TransactionId,
|
||||
pub offnum: OffsetNumber,
|
||||
pub infobits_set: u8,
|
||||
pub flags: u8,
|
||||
}
|
||||
|
||||
impl XlHeapDelete {
|
||||
pub fn decode(buf: &mut Bytes) -> XlHeapDelete {
|
||||
XlHeapDelete {
|
||||
xmax: buf.get_u32_le(),
|
||||
offnum: buf.get_u16_le(),
|
||||
infobits_set: buf.get_u8(),
|
||||
flags: buf.get_u8(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlHeapUpdate {
|
||||
pub old_xmax: TransactionId,
|
||||
pub old_offnum: OffsetNumber,
|
||||
pub old_infobits_set: u8,
|
||||
pub flags: u8,
|
||||
pub new_xmax: TransactionId,
|
||||
pub new_offnum: OffsetNumber,
|
||||
}
|
||||
|
||||
impl XlHeapUpdate {
|
||||
pub fn decode(buf: &mut Bytes) -> XlHeapUpdate {
|
||||
XlHeapUpdate {
|
||||
old_xmax: buf.get_u32_le(),
|
||||
old_offnum: buf.get_u16_le(),
|
||||
old_infobits_set: buf.get_u8(),
|
||||
flags: buf.get_u8(),
|
||||
new_xmax: buf.get_u32_le(),
|
||||
new_offnum: buf.get_u16_le(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Since PG16, we have the Neon RMGR (RM_NEON_ID) to manage Neon-flavored WAL. */
|
||||
pub mod rm_neon {
|
||||
use bytes::{Buf, Bytes};
|
||||
use postgres_ffi::{OffsetNumber, TransactionId};
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlNeonHeapInsert {
|
||||
pub offnum: OffsetNumber,
|
||||
pub flags: u8,
|
||||
}
|
||||
|
||||
impl XlNeonHeapInsert {
|
||||
pub fn decode(buf: &mut Bytes) -> XlNeonHeapInsert {
|
||||
XlNeonHeapInsert {
|
||||
offnum: buf.get_u16_le(),
|
||||
flags: buf.get_u8(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlNeonHeapMultiInsert {
|
||||
pub flags: u8,
|
||||
pub _padding: u8,
|
||||
pub ntuples: u16,
|
||||
pub t_cid: u32,
|
||||
}
|
||||
|
||||
impl XlNeonHeapMultiInsert {
|
||||
pub fn decode(buf: &mut Bytes) -> XlNeonHeapMultiInsert {
|
||||
XlNeonHeapMultiInsert {
|
||||
flags: buf.get_u8(),
|
||||
_padding: buf.get_u8(),
|
||||
ntuples: buf.get_u16_le(),
|
||||
t_cid: buf.get_u32_le(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlNeonHeapDelete {
|
||||
pub xmax: TransactionId,
|
||||
pub offnum: OffsetNumber,
|
||||
pub infobits_set: u8,
|
||||
pub flags: u8,
|
||||
pub t_cid: u32,
|
||||
}
|
||||
|
||||
impl XlNeonHeapDelete {
|
||||
pub fn decode(buf: &mut Bytes) -> XlNeonHeapDelete {
|
||||
XlNeonHeapDelete {
|
||||
xmax: buf.get_u32_le(),
|
||||
offnum: buf.get_u16_le(),
|
||||
infobits_set: buf.get_u8(),
|
||||
flags: buf.get_u8(),
|
||||
t_cid: buf.get_u32_le(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlNeonHeapUpdate {
|
||||
pub old_xmax: TransactionId,
|
||||
pub old_offnum: OffsetNumber,
|
||||
pub old_infobits_set: u8,
|
||||
pub flags: u8,
|
||||
pub t_cid: u32,
|
||||
pub new_xmax: TransactionId,
|
||||
pub new_offnum: OffsetNumber,
|
||||
}
|
||||
|
||||
impl XlNeonHeapUpdate {
|
||||
pub fn decode(buf: &mut Bytes) -> XlNeonHeapUpdate {
|
||||
XlNeonHeapUpdate {
|
||||
old_xmax: buf.get_u32_le(),
|
||||
old_offnum: buf.get_u16_le(),
|
||||
old_infobits_set: buf.get_u8(),
|
||||
flags: buf.get_u8(),
|
||||
t_cid: buf.get_u32(),
|
||||
new_xmax: buf.get_u32_le(),
|
||||
new_offnum: buf.get_u16_le(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlSmgrCreate {
|
||||
@@ -223,90 +458,6 @@ impl XlDropDatabase {
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlHeapInsert {
|
||||
pub offnum: OffsetNumber,
|
||||
pub flags: u8,
|
||||
}
|
||||
|
||||
impl XlHeapInsert {
|
||||
pub fn decode(buf: &mut Bytes) -> XlHeapInsert {
|
||||
XlHeapInsert {
|
||||
offnum: buf.get_u16_le(),
|
||||
flags: buf.get_u8(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlHeapMultiInsert {
|
||||
pub flags: u8,
|
||||
pub _padding: u8,
|
||||
pub ntuples: u16,
|
||||
}
|
||||
|
||||
impl XlHeapMultiInsert {
|
||||
pub fn decode(buf: &mut Bytes) -> XlHeapMultiInsert {
|
||||
XlHeapMultiInsert {
|
||||
flags: buf.get_u8(),
|
||||
_padding: buf.get_u8(),
|
||||
ntuples: buf.get_u16_le(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlHeapDelete {
|
||||
pub xmax: TransactionId,
|
||||
pub offnum: OffsetNumber,
|
||||
pub _padding: u16,
|
||||
pub t_cid: u32,
|
||||
pub infobits_set: u8,
|
||||
pub flags: u8,
|
||||
}
|
||||
|
||||
impl XlHeapDelete {
|
||||
pub fn decode(buf: &mut Bytes) -> XlHeapDelete {
|
||||
XlHeapDelete {
|
||||
xmax: buf.get_u32_le(),
|
||||
offnum: buf.get_u16_le(),
|
||||
_padding: buf.get_u16_le(),
|
||||
t_cid: buf.get_u32_le(),
|
||||
infobits_set: buf.get_u8(),
|
||||
flags: buf.get_u8(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlHeapUpdate {
|
||||
pub old_xmax: TransactionId,
|
||||
pub old_offnum: OffsetNumber,
|
||||
pub old_infobits_set: u8,
|
||||
pub flags: u8,
|
||||
pub t_cid: u32,
|
||||
pub new_xmax: TransactionId,
|
||||
pub new_offnum: OffsetNumber,
|
||||
}
|
||||
|
||||
impl XlHeapUpdate {
|
||||
pub fn decode(buf: &mut Bytes) -> XlHeapUpdate {
|
||||
XlHeapUpdate {
|
||||
old_xmax: buf.get_u32_le(),
|
||||
old_offnum: buf.get_u16_le(),
|
||||
old_infobits_set: buf.get_u8(),
|
||||
flags: buf.get_u8(),
|
||||
t_cid: buf.get_u32(),
|
||||
new_xmax: buf.get_u32_le(),
|
||||
new_offnum: buf.get_u16_le(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Note: Parsing some fields is missing, because they're not needed.
|
||||
///
|
||||
@@ -321,9 +472,10 @@ pub struct XlXactParsedRecord {
|
||||
pub xact_time: TimestampTz,
|
||||
pub xinfo: u32,
|
||||
|
||||
pub db_id: Oid, /* MyDatabaseId */
|
||||
pub ts_id: Oid, /* MyDatabaseTableSpace */
|
||||
|
||||
pub db_id: Oid,
|
||||
/* MyDatabaseId */
|
||||
pub ts_id: Oid,
|
||||
/* MyDatabaseTableSpace */
|
||||
pub subxacts: Vec<TransactionId>,
|
||||
|
||||
pub xnodes: Vec<RelFileNode>,
|
||||
@@ -455,9 +607,12 @@ impl MultiXactMember {
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlMultiXactCreate {
|
||||
pub mid: MultiXactId, /* new MultiXact's ID */
|
||||
pub moff: MultiXactOffset, /* its starting offset in members file */
|
||||
pub nmembers: u32, /* number of member XIDs */
|
||||
pub mid: MultiXactId,
|
||||
/* new MultiXact's ID */
|
||||
pub moff: MultiXactOffset,
|
||||
/* its starting offset in members file */
|
||||
pub nmembers: u32,
|
||||
/* number of member XIDs */
|
||||
pub members: Vec<MultiXactMember>,
|
||||
}
|
||||
|
||||
@@ -484,7 +639,8 @@ impl XlMultiXactCreate {
|
||||
pub struct XlMultiXactTruncate {
|
||||
pub oldest_multi_db: Oid,
|
||||
/* to-be-truncated range of multixact offsets */
|
||||
pub start_trunc_off: MultiXactId, /* just for completeness' sake */
|
||||
pub start_trunc_off: MultiXactId,
|
||||
/* just for completeness' sake */
|
||||
pub end_trunc_off: MultiXactId,
|
||||
|
||||
/* to-be-truncated range of multixact members */
|
||||
@@ -626,12 +782,10 @@ pub fn decode_wal_record(
|
||||
blk.hole_offset = buf.get_u16_le();
|
||||
blk.bimg_info = buf.get_u8();
|
||||
|
||||
blk.apply_image = if pg_version == 14 {
|
||||
(blk.bimg_info & postgres_ffi::v14::bindings::BKPIMAGE_APPLY) != 0
|
||||
} else {
|
||||
assert_eq!(pg_version, 15);
|
||||
(blk.bimg_info & postgres_ffi::v15::bindings::BKPIMAGE_APPLY) != 0
|
||||
};
|
||||
blk.apply_image = dispatch_pgversion!(
|
||||
pg_version,
|
||||
(blk.bimg_info & pgv::bindings::BKPIMAGE_APPLY) != 0
|
||||
);
|
||||
|
||||
let blk_img_is_compressed =
|
||||
postgres_ffi::bkpimage_is_compressed(blk.bimg_info, pg_version)?;
|
||||
|
||||
@@ -19,13 +19,16 @@
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "neon_pgversioncompat.h"
|
||||
|
||||
#include "funcapi.h"
|
||||
#include "miscadmin.h"
|
||||
#include "pgstat.h"
|
||||
#include "pagestore_client.h"
|
||||
#include "access/parallel.h"
|
||||
#include "postmaster/bgworker.h"
|
||||
#include "storage/relfilenode.h"
|
||||
#include RELFILEINFO_HDR
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/latch.h"
|
||||
#include "storage/ipc.h"
|
||||
@@ -405,7 +408,7 @@ lfc_init(void)
|
||||
* Returns true if page is found in local cache.
|
||||
*/
|
||||
bool
|
||||
lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno)
|
||||
lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
|
||||
{
|
||||
BufferTag tag;
|
||||
FileCacheEntry* entry;
|
||||
@@ -416,7 +419,7 @@ lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno)
|
||||
if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
|
||||
return false;
|
||||
|
||||
tag.rnode = rnode;
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
tag.forkNum = forkNum;
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK-1);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
@@ -432,7 +435,7 @@ lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno)
|
||||
* Evict a page (if present) from the local file cache
|
||||
*/
|
||||
void
|
||||
lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno)
|
||||
lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
|
||||
{
|
||||
BufferTag tag;
|
||||
FileCacheEntry* entry;
|
||||
@@ -443,7 +446,9 @@ lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno)
|
||||
if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
|
||||
return;
|
||||
|
||||
INIT_BUFFERTAG(tag, rnode, forkNum, (blkno & ~(BLOCKS_PER_CHUNK-1)));
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
tag.forkNum = forkNum;
|
||||
tag.blockNum = (blkno & ~(BLOCKS_PER_CHUNK - 1));
|
||||
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
|
||||
@@ -501,7 +506,7 @@ lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno)
|
||||
* In case of error lfc_size_limit is set to zero to disable any further opera-tins with cache.
|
||||
*/
|
||||
bool
|
||||
lfc_read(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
char *buffer)
|
||||
{
|
||||
BufferTag tag;
|
||||
@@ -519,7 +524,7 @@ lfc_read(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
if (!lfc_ensure_opened())
|
||||
return false;
|
||||
|
||||
tag.rnode = rnode;
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
tag.forkNum = forkNum;
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK-1);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
@@ -568,8 +573,12 @@ lfc_read(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
* If cache is full then evict some other page.
|
||||
*/
|
||||
void
|
||||
lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
char *buffer)
|
||||
#else
|
||||
const void *buffer)
|
||||
#endif
|
||||
{
|
||||
BufferTag tag;
|
||||
FileCacheEntry* entry;
|
||||
@@ -584,9 +593,11 @@ lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
if (!lfc_ensure_opened())
|
||||
return;
|
||||
|
||||
tag.rnode = rnode;
|
||||
tag.forkNum = forkNum;
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK-1);
|
||||
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
@@ -718,8 +729,13 @@ local_cache_pages(PG_FUNCTION_ARGS)
|
||||
tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "pageoffs",
|
||||
INT8OID, -1, 0);
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
|
||||
OIDOID, -1, 0);
|
||||
#else
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenumber",
|
||||
OIDOID, -1, 0);
|
||||
#endif
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
|
||||
OIDOID, -1, 0);
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
|
||||
@@ -770,9 +786,9 @@ local_cache_pages(PG_FUNCTION_ARGS)
|
||||
if (entry->bitmap[i >> 5] & (1 << (i & 31)))
|
||||
{
|
||||
fctx->record[n_pages].pageoffs = entry->offset*BLOCKS_PER_CHUNK + i;
|
||||
fctx->record[n_pages].relfilenode = entry->key.rnode.relNode;
|
||||
fctx->record[n_pages].reltablespace = entry->key.rnode.spcNode;
|
||||
fctx->record[n_pages].reldatabase = entry->key.rnode.dbNode;
|
||||
fctx->record[n_pages].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n_pages].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n_pages].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n_pages].forknum = entry->key.forkNum;
|
||||
fctx->record[n_pages].blocknum = entry->key.blockNum + i;
|
||||
fctx->record[n_pages].accesscount = entry->access_count;
|
||||
|
||||
@@ -442,7 +442,7 @@ pg_init_libpagestore(void)
|
||||
"Maximal attempts to reconnect to pages server (with 1 second timeout)",
|
||||
NULL,
|
||||
&max_reconnect_attempts,
|
||||
10, 0, INT_MAX,
|
||||
60, 0, INT_MAX,
|
||||
PGC_USERSET,
|
||||
0,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
@@ -33,6 +33,14 @@ void _PG_init(void);
|
||||
void
|
||||
_PG_init(void)
|
||||
{
|
||||
/*
|
||||
* Also load 'neon_rmgr'. This makes it unnecessary to list both 'neon'
|
||||
* and 'neon_rmgr' in shared_preload_libraries.
|
||||
*/
|
||||
#if PG_VERSION_NUM >= 160000
|
||||
load_file("$libdir/neon_rmgr", false);
|
||||
#endif
|
||||
|
||||
pg_init_libpagestore();
|
||||
pg_init_walproposer();
|
||||
|
||||
@@ -40,9 +48,9 @@ _PG_init(void)
|
||||
|
||||
pg_init_extension_server();
|
||||
|
||||
// Important: This must happen after other parts of the extension
|
||||
// are loaded, otherwise any settings to GUCs that were set before
|
||||
// the extension was loaded will be removed.
|
||||
// Important: This must happen after other parts of the extension
|
||||
// are loaded, otherwise any settings to GUCs that were set before
|
||||
// the extension was loaded will be removed.
|
||||
EmitWarningsOnPlaceholders("neon");
|
||||
}
|
||||
|
||||
|
||||
112
pgxn/neon/neon_pgversioncompat.h
Normal file
112
pgxn/neon/neon_pgversioncompat.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Compatibility macros to cover up differences between supported PostgreSQL versions,
|
||||
* to help with compiling the same sources for all of them.
|
||||
*/
|
||||
|
||||
#ifndef NEON_PGVERSIONCOMPAT_H
|
||||
#define NEON_PGVERSIONCOMPAT_H
|
||||
|
||||
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
|
||||
|
||||
#define RelFileInfoEquals(a, b) ( \
|
||||
NInfoGetSpcOid(a) == NInfoGetSpcOid(b) && \
|
||||
NInfoGetDbOid(a) == NInfoGetDbOid(b) && \
|
||||
NInfoGetRelNumber(a) == NInfoGetRelNumber(b) \
|
||||
)
|
||||
|
||||
/* buftag population & RelFileNode/RelFileLocator rework */
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
|
||||
#define InitBufferTag(tag, rfn, fn, bn) INIT_BUFFERTAG(*tag, *rfn, fn, bn)
|
||||
|
||||
#define USE_RELFILENODE
|
||||
|
||||
#define RELFILEINFO_HDR "storage/relfilenode.h"
|
||||
|
||||
#define NRelFileInfo RelFileNode
|
||||
#define NRelFileInfoBackend RelFileNodeBackend
|
||||
#define NRelFileNumber Oid
|
||||
|
||||
#define InfoFromRelation(rel) (rel)->rd_node
|
||||
#define InfoFromSMgrRel(srel) (srel)->smgr_rnode.node
|
||||
#define InfoBFromSMgrRel(srel) (srel)->smgr_rnode
|
||||
#define InfoFromNInfoB(ninfob) ninfob.node
|
||||
|
||||
#define RelFileInfoFmt(rinfo) \
|
||||
(rinfo).spcNode, \
|
||||
(rinfo).dbNode, \
|
||||
(rinfo).relNode
|
||||
|
||||
#define RelFileInfoBackendFmt(ninfob) \
|
||||
(ninfob).backend, \
|
||||
(ninfob).node.spcNode, \
|
||||
(ninfob).node.dbNode, \
|
||||
(ninfob).node.relNode
|
||||
|
||||
#define NInfoGetSpcOid(ninfo) (ninfo).spcNode
|
||||
#define NInfoGetDbOid(ninfo) (ninfo).dbNode
|
||||
#define NInfoGetRelNumber(ninfo) (ninfo).relNode
|
||||
|
||||
#define CopyNRelFileInfoToBufTag(tag, rinfo) \
|
||||
do { \
|
||||
(tag).rnode = (rinfo); \
|
||||
} while (false);
|
||||
|
||||
#define BufTagGetNRelFileInfo(tag) tag.rnode
|
||||
|
||||
#define SMgrRelGetRelInfo(reln) \
|
||||
(reln->smgr_rnode.node)
|
||||
|
||||
#define DropRelationAllLocalBuffers DropRelFileNodeAllLocalBuffers
|
||||
|
||||
#else /* major version >= 16 */
|
||||
|
||||
#define USE_RELFILELOCATOR
|
||||
|
||||
#define BUFFERTAGS_EQUAL(a, b) BufferTagsEqual(&(a), &(b))
|
||||
|
||||
#define RELFILEINFO_HDR "storage/relfilelocator.h"
|
||||
|
||||
#define NRelFileInfo RelFileLocator
|
||||
#define NRelFileInfoBackend RelFileLocatorBackend
|
||||
|
||||
#define InfoFromRelation(rel) (rel)->rd_locator
|
||||
#define InfoFromSMgrRel(srel) (srel)->smgr_rlocator.locator
|
||||
#define InfoBFromSMgrRel(srel) (srel)->smgr_rlocator
|
||||
#define InfoFromNInfoB(ninfob) (ninfob).locator
|
||||
|
||||
#define RelFileInfoFmt(rinfo) \
|
||||
(rinfo).spcOid, \
|
||||
(rinfo).dbOid, \
|
||||
(rinfo).relNumber
|
||||
#define RelFileInfoBackendFmt(ninfob) \
|
||||
(ninfob).backend, \
|
||||
(ninfob).locator.spcOid, \
|
||||
(ninfob).locator.dbOid, \
|
||||
(ninfob).locator.relNumber
|
||||
|
||||
#define NInfoGetSpcOid(ninfo) (ninfo).spcOid
|
||||
#define NInfoGetDbOid(ninfo) (ninfo).dbOid
|
||||
#define NInfoGetRelNumber(ninfo) (ninfo).relNumber
|
||||
|
||||
#define CopyNRelFileInfoToBufTag(tag, rinfo) \
|
||||
do { \
|
||||
(tag).spcOid = (rinfo).spcOid; \
|
||||
(tag).dbOid = (rinfo).dbOid; \
|
||||
(tag).relNumber = (rinfo).relNumber; \
|
||||
} while (false);
|
||||
|
||||
#define BufTagGetNRelFileInfo(tag) \
|
||||
((RelFileLocator) { \
|
||||
.spcOid = (tag).spcOid, \
|
||||
.dbOid = (tag).dbOid, \
|
||||
.relNumber = (tag).relNumber, \
|
||||
})
|
||||
|
||||
#define SMgrRelGetRelInfo(reln) \
|
||||
((reln)->smgr_rlocator)
|
||||
|
||||
#define DropRelationAllLocalBuffers DropRelationAllLocalBuffers
|
||||
#endif
|
||||
|
||||
#endif //NEON_PGVERSIONCOMPAT_H
|
||||
@@ -14,9 +14,10 @@
|
||||
#define pageserver_h
|
||||
|
||||
#include "postgres.h"
|
||||
#include "neon_pgversioncompat.h"
|
||||
|
||||
#include "access/xlogdefs.h"
|
||||
#include "storage/relfilenode.h"
|
||||
#include RELFILEINFO_HDR
|
||||
#include "storage/block.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "lib/stringinfo.h"
|
||||
@@ -71,14 +72,14 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
RelFileNode rnode;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
} NeonExistsRequest;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
RelFileNode rnode;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
} NeonNblocksRequest;
|
||||
|
||||
@@ -91,7 +92,7 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
RelFileNode rnode;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blkno;
|
||||
} NeonGetPageRequest;
|
||||
@@ -164,7 +165,7 @@ extern char *neon_tenant;
|
||||
extern bool wal_redo;
|
||||
extern int32 max_cluster_size;
|
||||
|
||||
extern const f_smgr *smgr_neon(BackendId backend, RelFileNode rnode);
|
||||
extern const f_smgr *smgr_neon(BackendId backend, NRelFileInfo rinfo);
|
||||
extern void smgr_init_neon(void);
|
||||
extern void readahead_buffer_resize(int newsize, void *extra);
|
||||
|
||||
@@ -175,19 +176,35 @@ extern void neon_open(SMgrRelation reln);
|
||||
extern void neon_close(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void neon_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
||||
extern bool neon_exists(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void neon_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
|
||||
extern void neon_unlink(NRelFileInfoBackend rnode, ForkNumber forknum, bool isRedo);
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
#else
|
||||
extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, const void *buffer, bool skipFsync);
|
||||
extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, int nbuffers, bool skipFsync);
|
||||
#endif
|
||||
|
||||
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
|
||||
extern void neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, bool request_latest, char *buffer);
|
||||
|
||||
extern void neon_write(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
#else
|
||||
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
void *buffer);
|
||||
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, bool request_latest, void *buffer);
|
||||
extern void neon_write(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, const void *buffer, bool skipFsync);
|
||||
#endif
|
||||
extern void neon_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, BlockNumber nblocks);
|
||||
extern BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
|
||||
@@ -198,16 +215,22 @@ extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);
|
||||
|
||||
/* utils for neon relsize cache */
|
||||
extern void relsize_hash_init(void);
|
||||
extern bool get_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber *size);
|
||||
extern void set_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size);
|
||||
extern void update_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size);
|
||||
extern void forget_cached_relsize(RelFileNode rnode, ForkNumber forknum);
|
||||
extern bool get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size);
|
||||
extern void set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);
|
||||
extern void update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);
|
||||
extern void forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum);
|
||||
|
||||
/* functions for local file cache */
|
||||
extern void lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, char *buffer);
|
||||
extern bool lfc_read(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, char *buffer);
|
||||
extern bool lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno);
|
||||
extern void lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno);
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
extern void lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
char *buffer);
|
||||
#else
|
||||
extern void lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
const void *buffer);
|
||||
#endif
|
||||
extern bool lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, char *buffer);
|
||||
extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
|
||||
extern void lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
|
||||
extern void lfc_init(void);
|
||||
|
||||
|
||||
|
||||
@@ -58,7 +58,6 @@
|
||||
#include "postmaster/autovacuum.h"
|
||||
#include "replication/walsender.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/relfilenode.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "storage/md.h"
|
||||
@@ -86,7 +85,10 @@
|
||||
static char *hexdump_page(char *page);
|
||||
#endif
|
||||
|
||||
#define IS_LOCAL_REL(reln) (reln->smgr_rnode.node.dbNode != 0 && reln->smgr_rnode.node.relNode > FirstNormalObjectId)
|
||||
#define IS_LOCAL_REL(reln) (\
|
||||
NInfoGetDbOid(InfoFromSMgrRel(reln)) != 0 && \
|
||||
NInfoGetRelNumber(InfoFromSMgrRel(reln)) > FirstNormalObjectId \
|
||||
)
|
||||
|
||||
const int SmgrTrace = DEBUG5;
|
||||
|
||||
@@ -160,6 +162,7 @@ typedef enum PrefetchStatus {
|
||||
typedef struct PrefetchRequest {
|
||||
BufferTag buftag; /* must be first entry in the struct */
|
||||
XLogRecPtr effective_request_lsn;
|
||||
XLogRecPtr actual_request_lsn;
|
||||
NeonResponse *response; /* may be null */
|
||||
PrefetchStatus status;
|
||||
uint64 my_ring_index;
|
||||
@@ -255,7 +258,7 @@ static bool prefetch_wait_for(uint64 ring_index);
|
||||
static void prefetch_cleanup_trailing_unused(void);
|
||||
static inline void prefetch_set_unused(uint64 ring_index);
|
||||
|
||||
static XLogRecPtr neon_get_request_lsn(bool *latest, RelFileNode rnode,
|
||||
static XLogRecPtr neon_get_request_lsn(bool *latest, NRelFileInfo rinfo,
|
||||
ForkNumber forknum, BlockNumber blkno);
|
||||
|
||||
static bool
|
||||
@@ -314,6 +317,7 @@ compact_prefetch_buffers(void)
|
||||
target_slot->status = source_slot->status;
|
||||
target_slot->response = source_slot->response;
|
||||
target_slot->effective_request_lsn = source_slot->effective_request_lsn;
|
||||
target_slot->actual_request_lsn = source_slot->actual_request_lsn;
|
||||
target_slot->my_ring_index = empty_ring_index;
|
||||
|
||||
prfh_delete(MyPState->prf_hash, source_slot);
|
||||
@@ -500,6 +504,11 @@ prefetch_wait_for(uint64 ring_index)
|
||||
{
|
||||
entry = GetPrfSlot(MyPState->ring_receive);
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
/* ensure the log is actually flushed up to the request point */
|
||||
XLogFlush(entry->actual_request_lsn);
|
||||
#endif
|
||||
|
||||
Assert(entry->status == PRFS_REQUESTED);
|
||||
if (!prefetch_read(entry))
|
||||
return false;
|
||||
@@ -634,7 +643,7 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
|
||||
.req.tag = T_NeonGetPageRequest,
|
||||
.req.latest = false,
|
||||
.req.lsn = 0,
|
||||
.rnode = slot->buftag.rnode,
|
||||
.rinfo = BufTagGetNRelFileInfo(slot->buftag),
|
||||
.forknum = slot->buftag.forkNum,
|
||||
.blkno = slot->buftag.blockNum,
|
||||
};
|
||||
@@ -643,13 +652,13 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
|
||||
{
|
||||
request.req.lsn = *force_lsn;
|
||||
request.req.latest = *force_latest;
|
||||
slot->effective_request_lsn = *force_lsn;
|
||||
slot->actual_request_lsn = slot->effective_request_lsn = *force_lsn;
|
||||
}
|
||||
else
|
||||
{
|
||||
XLogRecPtr lsn = neon_get_request_lsn(
|
||||
&request.req.latest,
|
||||
slot->buftag.rnode,
|
||||
BufTagGetNRelFileInfo(slot->buftag),
|
||||
slot->buftag.forkNum,
|
||||
slot->buftag.blockNum
|
||||
);
|
||||
@@ -671,7 +680,7 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
|
||||
* The best LSN to use for effective_request_lsn would be
|
||||
* XLogCtl->Insert.RedoRecPtr, but that's expensive to access.
|
||||
*/
|
||||
request.req.lsn = lsn;
|
||||
slot->actual_request_lsn = request.req.lsn = lsn;
|
||||
prefetch_lsn = Max(prefetch_lsn, lsn);
|
||||
slot->effective_request_lsn = prefetch_lsn;
|
||||
}
|
||||
@@ -893,9 +902,9 @@ nm_pack_request(NeonRequest * msg)
|
||||
|
||||
pq_sendbyte(&s, msg_req->req.latest);
|
||||
pq_sendint64(&s, msg_req->req.lsn);
|
||||
pq_sendint32(&s, msg_req->rnode.spcNode);
|
||||
pq_sendint32(&s, msg_req->rnode.dbNode);
|
||||
pq_sendint32(&s, msg_req->rnode.relNode);
|
||||
pq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));
|
||||
pq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));
|
||||
pq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));
|
||||
pq_sendbyte(&s, msg_req->forknum);
|
||||
|
||||
break;
|
||||
@@ -906,9 +915,9 @@ nm_pack_request(NeonRequest * msg)
|
||||
|
||||
pq_sendbyte(&s, msg_req->req.latest);
|
||||
pq_sendint64(&s, msg_req->req.lsn);
|
||||
pq_sendint32(&s, msg_req->rnode.spcNode);
|
||||
pq_sendint32(&s, msg_req->rnode.dbNode);
|
||||
pq_sendint32(&s, msg_req->rnode.relNode);
|
||||
pq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));
|
||||
pq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));
|
||||
pq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));
|
||||
pq_sendbyte(&s, msg_req->forknum);
|
||||
|
||||
break;
|
||||
@@ -929,9 +938,9 @@ nm_pack_request(NeonRequest * msg)
|
||||
|
||||
pq_sendbyte(&s, msg_req->req.latest);
|
||||
pq_sendint64(&s, msg_req->req.lsn);
|
||||
pq_sendint32(&s, msg_req->rnode.spcNode);
|
||||
pq_sendint32(&s, msg_req->rnode.dbNode);
|
||||
pq_sendint32(&s, msg_req->rnode.relNode);
|
||||
pq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));
|
||||
pq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));
|
||||
pq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));
|
||||
pq_sendbyte(&s, msg_req->forknum);
|
||||
pq_sendint32(&s, msg_req->blkno);
|
||||
|
||||
@@ -1063,10 +1072,7 @@ nm_to_string(NeonMessage * msg)
|
||||
NeonExistsRequest *msg_req = (NeonExistsRequest *) msg;
|
||||
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonExistsRequest\"");
|
||||
appendStringInfo(&s, ", \"rnode\": \"%u/%u/%u\"",
|
||||
msg_req->rnode.spcNode,
|
||||
msg_req->rnode.dbNode,
|
||||
msg_req->rnode.relNode);
|
||||
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
|
||||
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest);
|
||||
@@ -1079,10 +1085,7 @@ nm_to_string(NeonMessage * msg)
|
||||
NeonNblocksRequest *msg_req = (NeonNblocksRequest *) msg;
|
||||
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonNblocksRequest\"");
|
||||
appendStringInfo(&s, ", \"rnode\": \"%u/%u/%u\"",
|
||||
msg_req->rnode.spcNode,
|
||||
msg_req->rnode.dbNode,
|
||||
msg_req->rnode.relNode);
|
||||
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
|
||||
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest);
|
||||
@@ -1095,10 +1098,7 @@ nm_to_string(NeonMessage * msg)
|
||||
NeonGetPageRequest *msg_req = (NeonGetPageRequest *) msg;
|
||||
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonGetPageRequest\"");
|
||||
appendStringInfo(&s, ", \"rnode\": \"%u/%u/%u\"",
|
||||
msg_req->rnode.spcNode,
|
||||
msg_req->rnode.dbNode,
|
||||
msg_req->rnode.relNode);
|
||||
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
|
||||
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
|
||||
appendStringInfo(&s, ", \"blkno\": %u", msg_req->blkno);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
@@ -1187,13 +1187,13 @@ nm_to_string(NeonMessage * msg)
|
||||
* directly because it skips the logging if the LSN is new enough.
|
||||
*/
|
||||
static XLogRecPtr
|
||||
log_newpage_copy(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
log_newpage_copy(NRelFileInfo *rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
Page page, bool page_std)
|
||||
{
|
||||
PGAlignedBlock copied_buffer;
|
||||
|
||||
memcpy(copied_buffer.data, page, BLCKSZ);
|
||||
return log_newpage(rnode, forkNum, blkno, copied_buffer.data, page_std);
|
||||
return log_newpage(rinfo, forkNum, blkno, copied_buffer.data, page_std);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1210,9 +1210,14 @@ PageIsEmptyHeapPage(char *buffer)
|
||||
}
|
||||
|
||||
static void
|
||||
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool force)
|
||||
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
char *buffer, bool force)
|
||||
#else
|
||||
const char *buffer, bool force)
|
||||
#endif
|
||||
{
|
||||
XLogRecPtr lsn = PageGetLSN(buffer);
|
||||
XLogRecPtr lsn = PageGetLSN((Page) buffer);
|
||||
|
||||
if (ShutdownRequestPending)
|
||||
return;
|
||||
@@ -1232,15 +1237,14 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
|
||||
/* FSM is never WAL-logged and we don't care. */
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = log_newpage_copy(&reln->smgr_rnode.node, forknum, blocknum, buffer, false);
|
||||
recptr = log_newpage_copy(&InfoFromSMgrRel(reln), forknum, blocknum,
|
||||
(Page) buffer, false);
|
||||
XLogFlush(recptr);
|
||||
lsn = recptr;
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("Page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
|
||||
blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum, LSN_FORMAT_ARGS(lsn))));
|
||||
}
|
||||
else if (lsn == InvalidXLogRecPtr)
|
||||
@@ -1263,24 +1267,20 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
|
||||
* sign: it implies that the page was not WAL-logged, and its contents
|
||||
* will be lost when it's evicted.
|
||||
*/
|
||||
if (PageIsNew(buffer))
|
||||
if (PageIsNew((Page) buffer))
|
||||
{
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("Page %u of relation %u/%u/%u.%u is all-zeros",
|
||||
blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum)));
|
||||
}
|
||||
else if (PageIsEmptyHeapPage(buffer))
|
||||
else if (PageIsEmptyHeapPage((Page) buffer))
|
||||
{
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("Page %u of relation %u/%u/%u.%u is an empty heap page with no LSN",
|
||||
blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum)));
|
||||
}
|
||||
else
|
||||
@@ -1288,9 +1288,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
|
||||
ereport(PANIC,
|
||||
(errmsg("Page %u of relation %u/%u/%u.%u is evicted with zero LSN",
|
||||
blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum)));
|
||||
}
|
||||
}
|
||||
@@ -1299,9 +1297,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("Page %u of relation %u/%u/%u.%u is already wal logged at lsn=%X/%X",
|
||||
blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum, LSN_FORMAT_ARGS(lsn))));
|
||||
}
|
||||
|
||||
@@ -1309,7 +1305,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
|
||||
* Remember the LSN on this page. When we read the page again, we must
|
||||
* read the same or newer version of it.
|
||||
*/
|
||||
SetLastWrittenLSNForBlock(lsn, reln->smgr_rnode.node, forknum, blocknum);
|
||||
SetLastWrittenLSNForBlock(lsn, InfoFromSMgrRel(reln), forknum, blocknum);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1379,7 +1375,7 @@ nm_adjust_lsn(XLogRecPtr lsn)
|
||||
* Return LSN for requesting pages and number of blocks from page server
|
||||
*/
|
||||
static XLogRecPtr
|
||||
neon_get_request_lsn(bool *latest, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
|
||||
neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno)
|
||||
{
|
||||
XLogRecPtr lsn;
|
||||
|
||||
@@ -1394,7 +1390,7 @@ neon_get_request_lsn(bool *latest, RelFileNode rnode, ForkNumber forknum, BlockN
|
||||
/*
|
||||
* Get the last written LSN of this page.
|
||||
*/
|
||||
lsn = GetLastWrittenLSN(rnode, forknum, blkno);
|
||||
lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
|
||||
lsn = nm_adjust_lsn(lsn);
|
||||
|
||||
elog(DEBUG1, "neon_get_request_lsn GetXLogReplayRecPtr %X/%X request lsn 0 ",
|
||||
@@ -1416,7 +1412,7 @@ neon_get_request_lsn(bool *latest, RelFileNode rnode, ForkNumber forknum, BlockN
|
||||
* so our request cannot concern those.
|
||||
*/
|
||||
*latest = true;
|
||||
lsn = GetLastWrittenLSN(rnode, forknum, blkno);
|
||||
lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
|
||||
Assert(lsn != InvalidXLogRecPtr);
|
||||
elog(DEBUG1, "neon_get_request_lsn GetLastWrittenLSN lsn %X/%X ",
|
||||
(uint32) ((lsn) >> 32), (uint32) (lsn));
|
||||
@@ -1485,7 +1481,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
|
||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
}
|
||||
|
||||
if (get_cached_relsize(reln->smgr_rnode.node, forkNum, &n_blocks))
|
||||
if (get_cached_relsize(InfoFromSMgrRel(reln), forkNum, &n_blocks))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@@ -1500,20 +1496,26 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
|
||||
*
|
||||
* For now, handle that special case here.
|
||||
*/
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
if (reln->smgr_rlocator.locator.spcOid == 0 &&
|
||||
reln->smgr_rlocator.locator.dbOid == 0 &&
|
||||
reln->smgr_rlocator.locator.relNumber == 0)
|
||||
#else
|
||||
if (reln->smgr_rnode.node.spcNode == 0 &&
|
||||
reln->smgr_rnode.node.dbNode == 0 &&
|
||||
reln->smgr_rnode.node.relNode == 0)
|
||||
#endif
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
request_lsn = neon_get_request_lsn(&latest, reln->smgr_rnode.node, forkNum, REL_METADATA_PSEUDO_BLOCKNO);
|
||||
request_lsn = neon_get_request_lsn(&latest, InfoFromSMgrRel(reln), forkNum, REL_METADATA_PSEUDO_BLOCKNO);
|
||||
{
|
||||
NeonExistsRequest request = {
|
||||
.req.tag = T_NeonExistsRequest,
|
||||
.req.latest = latest,
|
||||
.req.lsn = request_lsn,
|
||||
.rnode = reln->smgr_rnode.node,
|
||||
.rinfo = InfoFromSMgrRel(reln),
|
||||
.forknum = forkNum};
|
||||
|
||||
resp = page_server_request(&request);
|
||||
@@ -1529,9 +1531,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg("could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn),
|
||||
errdetail("page server returned error: %s",
|
||||
@@ -1571,9 +1571,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
}
|
||||
|
||||
elog(SmgrTrace, "Create relation %u/%u/%u.%u",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum);
|
||||
|
||||
/*
|
||||
@@ -1597,12 +1595,12 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
*/
|
||||
if (isRedo)
|
||||
{
|
||||
update_cached_relsize(reln->smgr_rnode.node, forkNum, 0);
|
||||
get_cached_relsize(reln->smgr_rnode.node, forkNum,
|
||||
update_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
|
||||
get_cached_relsize(InfoFromSMgrRel(reln), forkNum,
|
||||
&reln->smgr_cached_nblocks[forkNum]);
|
||||
}
|
||||
else
|
||||
set_cached_relsize(reln->smgr_rnode.node, forkNum, 0);
|
||||
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (IS_LOCAL_REL(reln))
|
||||
@@ -1629,17 +1627,17 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
* we are usually not in a transaction anymore when this is called.
|
||||
*/
|
||||
void
|
||||
neon_unlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
|
||||
{
|
||||
/*
|
||||
* Might or might not exist locally, depending on whether it's an unlogged
|
||||
* or permanent relation (or if DEBUG_COMPARE_LOCAL is set). Try to
|
||||
* unlink, it won't do any harm if the file doesn't exist.
|
||||
*/
|
||||
mdunlink(rnode, forkNum, isRedo);
|
||||
if (!RelFileNodeBackendIsTemp(rnode))
|
||||
mdunlink(rinfo, forkNum, isRedo);
|
||||
if (!NRelFileInfoBackendIsTemp(rinfo))
|
||||
{
|
||||
forget_cached_relsize(rnode.node, forkNum);
|
||||
forget_cached_relsize(InfoFromNInfoB(rinfo), forkNum);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1653,8 +1651,13 @@ neon_unlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
* causes intervening file space to become filled with zeroes.
|
||||
*/
|
||||
void
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
char *buffer, bool skipFsync)
|
||||
#else
|
||||
neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
const void *buffer, bool skipFsync)
|
||||
#endif
|
||||
{
|
||||
XLogRecPtr lsn;
|
||||
BlockNumber n_blocks = 0;
|
||||
@@ -1707,17 +1710,15 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
neon_wallog_page(reln, forkNum, n_blocks++, buffer, true);
|
||||
|
||||
neon_wallog_page(reln, forkNum, blkno, buffer, false);
|
||||
set_cached_relsize(reln->smgr_rnode.node, forkNum, blkno + 1);
|
||||
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1);
|
||||
|
||||
lsn = PageGetLSN(buffer);
|
||||
lsn = PageGetLSN((Page) buffer);
|
||||
elog(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum, blkno,
|
||||
(uint32) (lsn >> 32), (uint32) lsn);
|
||||
|
||||
lfc_write(reln->smgr_rnode.node, forkNum, blkno, buffer);
|
||||
lfc_write(InfoFromSMgrRel(reln), forkNum, blkno, buffer);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (IS_LOCAL_REL(reln))
|
||||
@@ -1732,11 +1733,98 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
if (lsn == InvalidXLogRecPtr)
|
||||
{
|
||||
lsn = GetXLogInsertRecPtr();
|
||||
SetLastWrittenLSNForBlock(lsn, reln->smgr_rnode.node, forkNum, blkno);
|
||||
SetLastWrittenLSNForBlock(lsn, InfoFromSMgrRel(reln), forkNum, blkno);
|
||||
}
|
||||
SetLastWrittenLSNForRelation(lsn, reln->smgr_rnode.node, forkNum);
|
||||
SetLastWrittenLSNForRelation(lsn, InfoFromSMgrRel(reln), forkNum);
|
||||
}
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
void
|
||||
neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
|
||||
int nblocks, bool skipFsync)
|
||||
{
|
||||
const PGAlignedBlock buffer = {0};
|
||||
BlockNumber curblocknum = blocknum;
|
||||
int remblocks = nblocks;
|
||||
XLogRecPtr lsn = 0;
|
||||
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
case 0:
|
||||
elog(ERROR, "cannot call smgrextend() on rel with unknown persistence");
|
||||
|
||||
case RELPERSISTENCE_PERMANENT:
|
||||
break;
|
||||
|
||||
case RELPERSISTENCE_TEMP:
|
||||
case RELPERSISTENCE_UNLOGGED:
|
||||
mdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);
|
||||
return;
|
||||
|
||||
default:
|
||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
}
|
||||
|
||||
if (max_cluster_size > 0 &&
|
||||
reln->smgr_relpersistence == RELPERSISTENCE_PERMANENT &&
|
||||
!IsAutoVacuumWorkerProcess())
|
||||
{
|
||||
uint64 current_size = GetZenithCurrentClusterSize();
|
||||
|
||||
if (current_size >= ((uint64) max_cluster_size) * 1024 * 1024)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DISK_FULL),
|
||||
errmsg("could not extend file because cluster size limit (%d MB) has been exceeded",
|
||||
max_cluster_size),
|
||||
errhint("This limit is defined by neon.max_cluster_size GUC")));
|
||||
}
|
||||
|
||||
/*
|
||||
* If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
|
||||
* more --- we mustn't create a block whose number actually is
|
||||
* InvalidBlockNumber or larger.
|
||||
*/
|
||||
if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("cannot extend file \"%s\" beyond %u blocks",
|
||||
relpath(reln->smgr_rlocator, forkNum),
|
||||
InvalidBlockNumber)));
|
||||
|
||||
/* Don't log any pages if we're not allowed to do so. */
|
||||
if (!XLogInsertAllowed())
|
||||
return;
|
||||
|
||||
while (remblocks > 0)
|
||||
{
|
||||
int count = Min(remblocks, XLR_MAX_BLOCK_ID);
|
||||
|
||||
XLogBeginInsert();
|
||||
|
||||
for (int i = 0; i < count; i++)
|
||||
XLogRegisterBlock(i, &InfoFromSMgrRel(reln), forkNum, blocknum + i,
|
||||
(char *) buffer.data, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
|
||||
|
||||
lsn = XLogInsert(RM_XLOG_ID, XLOG_FPI);
|
||||
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
lfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, buffer.data);
|
||||
SetLastWrittenLSNForBlock(lsn, InfoFromSMgrRel(reln), forkNum,
|
||||
blocknum + i);
|
||||
}
|
||||
|
||||
blocknum += count;
|
||||
remblocks -= count;
|
||||
}
|
||||
|
||||
Assert(lsn != 0);
|
||||
|
||||
SetLastWrittenLSNForRelation(lsn, InfoFromSMgrRel(reln), forkNum);
|
||||
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* neon_open() -- Initialize newly-opened relation.
|
||||
*/
|
||||
@@ -1792,14 +1880,14 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
}
|
||||
|
||||
if (lfc_cache_contains(reln->smgr_rnode.node, forknum, blocknum))
|
||||
if (lfc_cache_contains(InfoFromSMgrRel(reln), forknum, blocknum))
|
||||
return false;
|
||||
|
||||
tag = (BufferTag) {
|
||||
.rnode = reln->smgr_rnode.node,
|
||||
.forkNum = forknum,
|
||||
.blockNum = blocknum
|
||||
};
|
||||
CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));
|
||||
|
||||
ring_index = prefetch_register_buffer(tag, NULL, NULL);
|
||||
|
||||
@@ -1851,9 +1939,15 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
* While function is defined in the neon extension it's used within neon_test_utils directly.
|
||||
* To avoid breaking tests in the runtime please keep function signature in sync.
|
||||
*/
|
||||
void
|
||||
neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
void PGDLLEXPORT
|
||||
neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, bool request_latest, char *buffer)
|
||||
#else
|
||||
void PGDLLEXPORT
|
||||
neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, bool request_latest, void *buffer)
|
||||
#endif
|
||||
{
|
||||
NeonResponse *resp;
|
||||
BufferTag buftag;
|
||||
@@ -1862,11 +1956,12 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
PrefetchRequest *slot;
|
||||
|
||||
buftag = (BufferTag) {
|
||||
.rnode = rnode,
|
||||
.forkNum = forkNum,
|
||||
.blockNum = blkno,
|
||||
};
|
||||
|
||||
CopyNRelFileInfoToBufTag(buftag, rinfo);
|
||||
|
||||
/*
|
||||
* The redo process does not lock pages that it needs to replay but are
|
||||
* not in the shared buffers, so a concurrent process may request the
|
||||
@@ -1957,7 +2052,7 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
{
|
||||
case T_NeonGetPageResponse:
|
||||
memcpy(buffer, ((NeonGetPageResponse *) resp)->page, BLCKSZ);
|
||||
lfc_write(rnode, forkNum, blkno, buffer);
|
||||
lfc_write(rinfo, forkNum, blkno, buffer);
|
||||
break;
|
||||
|
||||
case T_NeonErrorResponse:
|
||||
@@ -1965,9 +2060,7 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg("could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
blkno,
|
||||
rnode.spcNode,
|
||||
rnode.dbNode,
|
||||
rnode.relNode,
|
||||
RelFileInfoFmt(rinfo),
|
||||
forkNum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn),
|
||||
errdetail("page server returned error: %s",
|
||||
@@ -1987,7 +2080,11 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
*/
|
||||
void
|
||||
neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
char *buffer)
|
||||
#else
|
||||
void *buffer)
|
||||
#endif
|
||||
{
|
||||
bool latest;
|
||||
XLogRecPtr request_lsn;
|
||||
@@ -2010,13 +2107,13 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
}
|
||||
|
||||
/* Try to read from local file cache */
|
||||
if (lfc_read(reln->smgr_rnode.node, forkNum, blkno, buffer))
|
||||
if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
request_lsn = neon_get_request_lsn(&latest, reln->smgr_rnode.node, forkNum, blkno);
|
||||
neon_read_at_lsn(reln->smgr_rnode.node, forkNum, blkno, request_lsn, latest, buffer);
|
||||
request_lsn = neon_get_request_lsn(&latest, InfoFromSMgrRel(reln), forkNum, blkno);
|
||||
neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsn, latest, buffer);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
|
||||
@@ -2030,27 +2127,23 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
memcpy(pageserver_masked, buffer, BLCKSZ);
|
||||
memcpy(mdbuf_masked, mdbuf, BLCKSZ);
|
||||
|
||||
if (PageIsNew(mdbuf))
|
||||
if (PageIsNew((Page) mdbuf))
|
||||
{
|
||||
if (!PageIsNew(pageserver_masked))
|
||||
if (!PageIsNew((Page) pageserver_masked))
|
||||
{
|
||||
elog(PANIC, "page is new in MD but not in Page Server at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
|
||||
blkno,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn,
|
||||
hexdump_page(buffer));
|
||||
}
|
||||
}
|
||||
else if (PageIsNew(buffer))
|
||||
else if (PageIsNew((Page) buffer))
|
||||
{
|
||||
elog(PANIC, "page is new in Page Server but not in MD at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
|
||||
blkno,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn,
|
||||
hexdump_page(mdbuf));
|
||||
@@ -2065,9 +2158,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
{
|
||||
elog(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
|
||||
blkno,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn,
|
||||
hexdump_page(mdbuf_masked),
|
||||
@@ -2086,9 +2177,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
{
|
||||
elog(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
|
||||
blkno,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn,
|
||||
hexdump_page(mdbuf_masked),
|
||||
@@ -2130,7 +2219,11 @@ hexdump_page(char *page)
|
||||
*/
|
||||
void
|
||||
neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
char *buffer, bool skipFsync)
|
||||
#else
|
||||
const void *buffer, bool skipFsync)
|
||||
#endif
|
||||
{
|
||||
XLogRecPtr lsn;
|
||||
|
||||
@@ -2168,15 +2261,13 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
|
||||
neon_wallog_page(reln, forknum, blocknum, buffer, false);
|
||||
|
||||
lsn = PageGetLSN(buffer);
|
||||
lsn = PageGetLSN((Page) buffer);
|
||||
elog(SmgrTrace, "smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum, blocknum,
|
||||
(uint32) (lsn >> 32), (uint32) lsn);
|
||||
|
||||
lfc_write(reln->smgr_rnode.node, forknum, blocknum, buffer);
|
||||
lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (IS_LOCAL_REL(reln))
|
||||
@@ -2212,23 +2303,21 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
}
|
||||
|
||||
if (get_cached_relsize(reln->smgr_rnode.node, forknum, &n_blocks))
|
||||
if (get_cached_relsize(InfoFromSMgrRel(reln), forknum, &n_blocks))
|
||||
{
|
||||
elog(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum, n_blocks);
|
||||
return n_blocks;
|
||||
}
|
||||
|
||||
request_lsn = neon_get_request_lsn(&latest, reln->smgr_rnode.node, forknum, REL_METADATA_PSEUDO_BLOCKNO);
|
||||
request_lsn = neon_get_request_lsn(&latest, InfoFromSMgrRel(reln), forknum, REL_METADATA_PSEUDO_BLOCKNO);
|
||||
{
|
||||
NeonNblocksRequest request = {
|
||||
.req.tag = T_NeonNblocksRequest,
|
||||
.req.latest = latest,
|
||||
.req.lsn = request_lsn,
|
||||
.rnode = reln->smgr_rnode.node,
|
||||
.rinfo = InfoFromSMgrRel(reln),
|
||||
.forknum = forknum,
|
||||
};
|
||||
|
||||
@@ -2245,9 +2334,7 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg("could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn),
|
||||
errdetail("page server returned error: %s",
|
||||
@@ -2257,12 +2344,10 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
default:
|
||||
elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
|
||||
}
|
||||
update_cached_relsize(reln->smgr_rnode.node, forknum, n_blocks);
|
||||
update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
|
||||
|
||||
elog(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn,
|
||||
n_blocks);
|
||||
@@ -2281,7 +2366,7 @@ neon_dbsize(Oid dbNode)
|
||||
int64 db_size;
|
||||
XLogRecPtr request_lsn;
|
||||
bool latest;
|
||||
RelFileNode dummy_node = {InvalidOid, InvalidOid, InvalidOid};
|
||||
NRelFileInfo dummy_node = {0};
|
||||
|
||||
request_lsn = neon_get_request_lsn(&latest, dummy_node, MAIN_FORKNUM, REL_METADATA_PSEUDO_BLOCKNO);
|
||||
{
|
||||
@@ -2350,7 +2435,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
}
|
||||
|
||||
set_cached_relsize(reln->smgr_rnode.node, forknum, nblocks);
|
||||
set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);
|
||||
|
||||
/*
|
||||
* Truncating a relation drops all its buffers from the buffer cache
|
||||
@@ -2378,7 +2463,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
* for the extended pages, so there's no harm in leaving behind obsolete
|
||||
* entries for the truncated chunks.
|
||||
*/
|
||||
SetLastWrittenLSNForRelation(lsn, reln->smgr_rnode.node, forknum);
|
||||
SetLastWrittenLSNForRelation(lsn, InfoFromSMgrRel(reln), forknum);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (IS_LOCAL_REL(reln))
|
||||
@@ -2448,9 +2533,7 @@ neon_start_unlogged_build(SMgrRelation reln)
|
||||
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("starting unlogged build of relation %u/%u/%u",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode)));
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)))));
|
||||
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
@@ -2500,9 +2583,7 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln)
|
||||
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("finishing phase 1 of unlogged build of relation %u/%u/%u",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode)));
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)))));
|
||||
|
||||
if (unlogged_build_phase == UNLOGGED_BUILD_NOT_PERMANENT)
|
||||
return;
|
||||
@@ -2525,18 +2606,16 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln)
|
||||
static void
|
||||
neon_end_unlogged_build(SMgrRelation reln)
|
||||
{
|
||||
NRelFileInfoBackend rinfob = InfoBFromSMgrRel(reln);
|
||||
|
||||
Assert(unlogged_build_rel == reln);
|
||||
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("ending unlogged build of relation %u/%u/%u",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode)));
|
||||
RelFileInfoFmt(InfoFromNInfoB(rinfob)))));
|
||||
|
||||
if (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT)
|
||||
{
|
||||
RelFileNodeBackend rnode;
|
||||
|
||||
Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_2);
|
||||
Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED);
|
||||
|
||||
@@ -2544,19 +2623,17 @@ neon_end_unlogged_build(SMgrRelation reln)
|
||||
reln->smgr_relpersistence = RELPERSISTENCE_PERMANENT;
|
||||
|
||||
/* Remove local copy */
|
||||
rnode = reln->smgr_rnode;
|
||||
rinfob = InfoBFromSMgrRel(reln);
|
||||
for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
|
||||
{
|
||||
elog(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u",
|
||||
rnode.node.spcNode,
|
||||
rnode.node.dbNode,
|
||||
rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromNInfoB(rinfob)),
|
||||
forknum);
|
||||
|
||||
forget_cached_relsize(rnode.node, forknum);
|
||||
forget_cached_relsize(InfoFromNInfoB(rinfob), forknum);
|
||||
mdclose(reln, forknum);
|
||||
/* use isRedo == true, so that we drop it immediately */
|
||||
mdunlink(rnode, forknum, true);
|
||||
mdunlink(rinfob, forknum, true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2608,6 +2685,9 @@ static const struct f_smgr neon_smgr =
|
||||
.smgr_exists = neon_exists,
|
||||
.smgr_unlink = neon_unlink,
|
||||
.smgr_extend = neon_extend,
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
.smgr_zeroextend = neon_zeroextend,
|
||||
#endif
|
||||
.smgr_prefetch = neon_prefetch,
|
||||
.smgr_read = neon_read,
|
||||
.smgr_write = neon_write,
|
||||
@@ -2622,12 +2702,12 @@ static const struct f_smgr neon_smgr =
|
||||
};
|
||||
|
||||
const f_smgr *
|
||||
smgr_neon(BackendId backend, RelFileNode rnode)
|
||||
smgr_neon(BackendId backend, NRelFileInfo rinfo)
|
||||
{
|
||||
|
||||
/* Don't use page server for temp relations */
|
||||
if (backend != InvalidBackendId)
|
||||
return smgr_standard(backend, rnode);
|
||||
return smgr_standard(backend, rinfo);
|
||||
else
|
||||
return &neon_smgr;
|
||||
}
|
||||
@@ -2681,7 +2761,7 @@ bool
|
||||
neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
||||
{
|
||||
XLogRecPtr end_recptr = record->EndRecPtr;
|
||||
RelFileNode rnode;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blkno;
|
||||
BufferTag tag;
|
||||
@@ -2695,10 +2775,10 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
||||
return true;
|
||||
|
||||
#if PG_VERSION_NUM < 150000
|
||||
if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
|
||||
if (!XLogRecGetBlockTag(record, block_id, &rinfo, &forknum, &blkno))
|
||||
elog(PANIC, "failed to locate backup block with ID %d", block_id);
|
||||
#else
|
||||
XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno);
|
||||
XLogRecGetBlockTag(record, block_id, &rinfo, &forknum, &blkno);
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -2706,10 +2786,13 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
||||
* regardless of whether the block is stored in shared buffers.
|
||||
* See also this function's top comment.
|
||||
*/
|
||||
if (!OidIsValid(rnode.dbNode))
|
||||
if (!OidIsValid(NInfoGetDbOid(rinfo)))
|
||||
return false;
|
||||
|
||||
INIT_BUFFERTAG(tag, rnode, forknum, blkno);
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
tag.forkNum = forknum;
|
||||
tag.blockNum = blkno;
|
||||
|
||||
hash = BufTableHashCode(&tag);
|
||||
partitionLock = BufMappingPartitionLock(hash);
|
||||
|
||||
@@ -2725,24 +2808,24 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
||||
no_redo_needed = buffer < 0;
|
||||
|
||||
/* In both cases st lwlsn past this WAL record */
|
||||
SetLastWrittenLSNForBlock(end_recptr, rnode, forknum, blkno);
|
||||
SetLastWrittenLSNForBlock(end_recptr, rinfo, forknum, blkno);
|
||||
|
||||
/* we don't have the buffer in memory, update lwLsn past this record,
|
||||
* also evict page fro file cache
|
||||
*/
|
||||
if (no_redo_needed)
|
||||
lfc_evict(rnode, forknum, blkno);
|
||||
lfc_evict(rinfo, forknum, blkno);
|
||||
|
||||
|
||||
LWLockRelease(partitionLock);
|
||||
|
||||
/* Extend the relation if we know its size */
|
||||
if (get_cached_relsize(rnode, forknum, &relsize))
|
||||
if (get_cached_relsize(rinfo, forknum, &relsize))
|
||||
{
|
||||
if (relsize < blkno + 1)
|
||||
{
|
||||
update_cached_relsize(rnode, forknum, blkno + 1);
|
||||
SetLastWrittenLSNForRelation(end_recptr, rnode, forknum);
|
||||
update_cached_relsize(rinfo, forknum, blkno + 1);
|
||||
SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -2763,7 +2846,7 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
||||
.latest = false,
|
||||
.tag = T_NeonNblocksRequest,
|
||||
},
|
||||
.rnode = rnode,
|
||||
.rinfo = rinfo,
|
||||
.forknum = forknum,
|
||||
};
|
||||
|
||||
@@ -2774,8 +2857,8 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
||||
|
||||
Assert(nbresponse->n_blocks > blkno);
|
||||
|
||||
set_cached_relsize(rnode, forknum, nbresponse->n_blocks);
|
||||
SetLastWrittenLSNForRelation(end_recptr, rnode, forknum);
|
||||
set_cached_relsize(rinfo, forknum, nbresponse->n_blocks);
|
||||
SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum);
|
||||
|
||||
elog(SmgrTrace, "Set length to %d", nbresponse->n_blocks);
|
||||
}
|
||||
|
||||
@@ -14,8 +14,10 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "neon_pgversioncompat.h"
|
||||
|
||||
#include "pagestore_client.h"
|
||||
#include "storage/relfilenode.h"
|
||||
#include RELFILEINFO_HDR
|
||||
#include "storage/smgr.h"
|
||||
#include "storage/lwlock.h"
|
||||
#include "storage/ipc.h"
|
||||
@@ -30,7 +32,7 @@
|
||||
|
||||
typedef struct
|
||||
{
|
||||
RelFileNode rnode;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
} RelTag;
|
||||
|
||||
@@ -75,7 +77,7 @@ neon_smgr_shmem_startup(void)
|
||||
}
|
||||
|
||||
bool
|
||||
get_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber *size)
|
||||
get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size)
|
||||
{
|
||||
bool found = false;
|
||||
|
||||
@@ -84,7 +86,7 @@ get_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber *size)
|
||||
RelTag tag;
|
||||
RelSizeEntry *entry;
|
||||
|
||||
tag.rnode = rnode;
|
||||
tag.rinfo = rinfo;
|
||||
tag.forknum = forknum;
|
||||
LWLockAcquire(relsize_lock, LW_SHARED);
|
||||
entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL);
|
||||
@@ -99,14 +101,14 @@ get_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber *size)
|
||||
}
|
||||
|
||||
void
|
||||
set_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size)
|
||||
set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
|
||||
{
|
||||
if (relsize_hash_size > 0)
|
||||
{
|
||||
RelTag tag;
|
||||
RelSizeEntry *entry;
|
||||
|
||||
tag.rnode = rnode;
|
||||
tag.rinfo = rinfo;
|
||||
tag.forknum = forknum;
|
||||
LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
|
||||
entry = hash_search(relsize_hash, &tag, HASH_ENTER, NULL);
|
||||
@@ -116,7 +118,7 @@ set_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size)
|
||||
}
|
||||
|
||||
void
|
||||
update_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size)
|
||||
update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
|
||||
{
|
||||
if (relsize_hash_size > 0)
|
||||
{
|
||||
@@ -124,7 +126,7 @@ update_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size)
|
||||
RelSizeEntry *entry;
|
||||
bool found;
|
||||
|
||||
tag.rnode = rnode;
|
||||
tag.rinfo = rinfo;
|
||||
tag.forknum = forknum;
|
||||
LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
|
||||
entry = hash_search(relsize_hash, &tag, HASH_ENTER, &found);
|
||||
@@ -135,13 +137,13 @@ update_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size)
|
||||
}
|
||||
|
||||
void
|
||||
forget_cached_relsize(RelFileNode rnode, ForkNumber forknum)
|
||||
forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum)
|
||||
{
|
||||
if (relsize_hash_size > 0)
|
||||
{
|
||||
RelTag tag;
|
||||
|
||||
tag.rnode = rnode;
|
||||
tag.rinfo = rinfo;
|
||||
tag.forknum = forknum;
|
||||
LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
|
||||
hash_search(relsize_hash, &tag, HASH_REMOVE, NULL);
|
||||
|
||||
@@ -51,6 +51,9 @@
|
||||
#include "libpq/pqformat.h"
|
||||
#include "replication/slot.h"
|
||||
#include "replication/walreceiver.h"
|
||||
#if PG_VERSION_NUM >= 160000
|
||||
#include "replication/walsender_private.h"
|
||||
#endif
|
||||
#include "postmaster/bgworker.h"
|
||||
#include "postmaster/interrupt.h"
|
||||
#include "postmaster/postmaster.h"
|
||||
@@ -73,10 +76,10 @@
|
||||
|
||||
static bool syncSafekeepers = false;
|
||||
|
||||
char *wal_acceptors_list;
|
||||
int wal_acceptor_reconnect_timeout;
|
||||
int wal_acceptor_connection_timeout;
|
||||
bool am_wal_proposer;
|
||||
char *wal_acceptors_list = "";
|
||||
int wal_acceptor_reconnect_timeout = 1000;
|
||||
int wal_acceptor_connection_timeout = 10000;
|
||||
bool am_wal_proposer = false;
|
||||
|
||||
#define WAL_PROPOSER_SLOT_NAME "wal_proposer_slot"
|
||||
|
||||
@@ -191,7 +194,7 @@ pg_init_walproposer(void)
|
||||
/*
|
||||
* Entry point for `postgres --sync-safekeepers`.
|
||||
*/
|
||||
void
|
||||
PGDLLEXPORT void
|
||||
WalProposerSync(int argc, char *argv[])
|
||||
{
|
||||
struct stat stat_buf;
|
||||
@@ -315,7 +318,7 @@ nwp_shmem_startup_hook(void)
|
||||
/*
|
||||
* WAL proposer bgworker entry point.
|
||||
*/
|
||||
void
|
||||
PGDLLEXPORT void
|
||||
WalProposerMain(Datum main_arg)
|
||||
{
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
@@ -384,40 +387,89 @@ WalProposerPoll(void)
|
||||
while (true)
|
||||
{
|
||||
Safekeeper *sk;
|
||||
int rc;
|
||||
bool wait_timeout;
|
||||
WaitEvent event;
|
||||
TimestampTz now = GetCurrentTimestamp();
|
||||
|
||||
rc = WaitEventSetWait(waitEvents, TimeToReconnect(now),
|
||||
&event, 1, WAIT_EVENT_WAL_SENDER_MAIN);
|
||||
sk = (Safekeeper *) event.user_data;
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
if (WalSndCtl != NULL)
|
||||
ConditionVariablePrepareToSleep(&WalSndCtl->wal_flush_cv);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the event contains something that one of our safekeeper states
|
||||
* was waiting for, we'll advance its state.
|
||||
* Wait for a wait event to happen, or timeout:
|
||||
* - Safekeeper socket can become available for READ or WRITE
|
||||
* - Our latch got set, because
|
||||
* * PG15-: We got woken up by a process triggering the WalSender
|
||||
* * PG16+: WalSndCtl->wal_flush_cv was triggered
|
||||
*/
|
||||
if (rc != 0 && (event.events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)))
|
||||
AdvancePollState(sk, event.events);
|
||||
|
||||
/*
|
||||
* If the timeout expired, attempt to reconnect to any safekeepers
|
||||
* that we dropped
|
||||
*/
|
||||
ReconnectSafekeepers();
|
||||
|
||||
/*
|
||||
* If wait is terminated by latch set (walsenders' latch is set on
|
||||
* each wal flush), then exit loop. (no need for pm death check due to
|
||||
* WL_EXIT_ON_PM_DEATH)
|
||||
*/
|
||||
if (rc != 0 && (event.events & WL_LATCH_SET))
|
||||
if (WaitEventSetWait(waitEvents, TimeToReconnect(now),
|
||||
&event, 1, WAIT_EVENT_WAL_SENDER_MAIN) == 1)
|
||||
{
|
||||
ResetLatch(MyLatch);
|
||||
break;
|
||||
/*
|
||||
* If wait is terminated by latch set (walsenders' latch is set on
|
||||
* each wal flush), then exit loop. (no need for pm death check due to
|
||||
* WL_EXIT_ON_PM_DEATH)
|
||||
*/
|
||||
if (event.events & WL_LATCH_SET)
|
||||
{
|
||||
/* Reset our latch */
|
||||
ResetLatch(MyLatch);
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
if (WalSndCtl != NULL)
|
||||
ConditionVariableCancelSleep();
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the event contains something that one of our safekeeper states
|
||||
* was waiting for, we'll advance its state.
|
||||
*/
|
||||
if (event.events & (WL_SOCKET_MASK))
|
||||
{
|
||||
sk = (Safekeeper *) event.user_data;
|
||||
AdvancePollState(sk, event.events);
|
||||
}
|
||||
else
|
||||
pg_unreachable();
|
||||
}
|
||||
else /* timeout expired */
|
||||
{
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
/* First, cancel sleep - we might do some complex stuff after this */
|
||||
if (WalSndCtl != NULL)
|
||||
ConditionVariableCancelSleep();
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the timeout expired, attempt to reconnect to any safekeepers
|
||||
* that we dropped
|
||||
*/
|
||||
ReconnectSafekeepers();
|
||||
wait_timeout = true;
|
||||
|
||||
/*
|
||||
* Ensure flushrecptr is set to a recent value. This fixes a case
|
||||
* where we've not been notified of new WAL records when we were
|
||||
* planning on consuming them.
|
||||
*/
|
||||
if (!syncSafekeepers) {
|
||||
XLogRecPtr flushed;
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 15
|
||||
flushed = GetFlushRecPtr();
|
||||
#else
|
||||
flushed = GetFlushRecPtr(NULL);
|
||||
#endif
|
||||
if (flushed != availableLsn)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
now = GetCurrentTimestamp();
|
||||
if (rc == 0 || TimeToReconnect(now) <= 0) /* timeout expired: poll state */
|
||||
if (wait_timeout || TimeToReconnect(now) <= 0) /* timeout expired: poll state */
|
||||
{
|
||||
TimestampTz now;
|
||||
|
||||
@@ -611,7 +663,8 @@ UpdateEventSet(Safekeeper *sk, uint32 events)
|
||||
ModifyWaitEvent(waitEvents, sk->eventPos, events, NULL);
|
||||
}
|
||||
|
||||
/* Hack: provides a way to remove the event corresponding to an individual walproposer from the set.
|
||||
/*
|
||||
* Hack: provides a way to remove the event corresponding to an individual walproposer from the set.
|
||||
*
|
||||
* Note: Internally, this completely reconstructs the event set. It should be avoided if possible.
|
||||
*/
|
||||
@@ -1408,7 +1461,12 @@ WalProposerRecovery(int donor, TimeLineID timeline, XLogRecPtr startpos, XLogRec
|
||||
elog(FATAL, "could not append password to the safekeeper connection string");
|
||||
}
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
wrconn = walrcv_connect(conninfo, false, "wal_proposer_recovery", &err);
|
||||
#else
|
||||
wrconn = walrcv_connect(conninfo, false, false, "wal_proposer_recovery", &err);
|
||||
#endif
|
||||
|
||||
if (!wrconn)
|
||||
{
|
||||
ereport(WARNING,
|
||||
@@ -2242,9 +2300,10 @@ HandleSafekeeperResponse(void)
|
||||
if (synced)
|
||||
n_synced++;
|
||||
}
|
||||
|
||||
if (n_synced >= quorum)
|
||||
{
|
||||
/* All safekeepers synced! */
|
||||
/* A quorum of safekeepers has been synced! */
|
||||
|
||||
/*
|
||||
* Send empty message to broadcast latest truncateLsn to all safekeepers.
|
||||
|
||||
@@ -379,8 +379,8 @@ typedef struct Safekeeper
|
||||
AppendResponse appendResponse; /* feedback for master */
|
||||
} Safekeeper;
|
||||
|
||||
extern void WalProposerSync(int argc, char *argv[]);
|
||||
extern void WalProposerMain(Datum main_arg);
|
||||
extern void PGDLLEXPORT WalProposerSync(int argc, char *argv[]);
|
||||
extern void PGDLLEXPORT WalProposerMain(Datum main_arg);
|
||||
extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
|
||||
extern void WalProposerPoll(void);
|
||||
extern void ParsePageserverFeedbackMessage(StringInfo reply_message,
|
||||
|
||||
@@ -25,6 +25,9 @@
|
||||
#include "access/xlogutils.h"
|
||||
#include "access/xlogrecovery.h"
|
||||
#endif
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
#include "utils/guc.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* These variables are used similarly to openLogFile/SegNo,
|
||||
@@ -558,11 +561,11 @@ StartProposerReplication(StartReplicationCmd *cmd)
|
||||
static void
|
||||
WalSndLoop(void)
|
||||
{
|
||||
/* Clear any already-pending wakeups */
|
||||
ResetLatch(MyLatch);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* Clear any already-pending wakeups */
|
||||
ResetLatch(MyLatch);
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
XLogBroadcastWalProposer();
|
||||
|
||||
19
pgxn/neon_rmgr/Makefile
Normal file
19
pgxn/neon_rmgr/Makefile
Normal file
@@ -0,0 +1,19 @@
|
||||
# pgxs/neon/Makefile
|
||||
|
||||
|
||||
MODULE_big = neon_rmgr
|
||||
OBJS = \
|
||||
$(WIN32RES) \
|
||||
neon_rmgr.o \
|
||||
neon_rmgr_decode.o \
|
||||
neon_rmgr_desc.o
|
||||
|
||||
|
||||
EXTENSION = neon_rmgr
|
||||
DATA =
|
||||
PGFILEDESC = "Neon WAL Resource Manager - custom WAL records used to make Neon work (since PG 16)"
|
||||
|
||||
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
886
pgxn/neon_rmgr/neon_rmgr.c
Normal file
886
pgxn/neon_rmgr/neon_rmgr.c
Normal file
@@ -0,0 +1,886 @@
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
#include "access/bufmask.h"
|
||||
#include "access/heapam_xlog.h"
|
||||
#include "access/htup_details.h"
|
||||
#include "access/neon_xlog.h"
|
||||
#include "access/rmgr.h"
|
||||
#include "access/visibilitymap.h"
|
||||
#include "access/xlog_internal.h"
|
||||
#include "access/xlogutils.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/buf.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "storage/freespace.h"
|
||||
#include "neon_rmgr.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
void _PG_init(void);
|
||||
|
||||
static void neon_rm_redo(XLogReaderState *record);
|
||||
static void neon_rm_startup(void);
|
||||
static void neon_rm_cleanup(void);
|
||||
static void neon_rm_mask(char *pagedata, BlockNumber blkno);
|
||||
|
||||
static void redo_neon_heap_insert(XLogReaderState *record);
|
||||
static void redo_neon_heap_delete(XLogReaderState *record);
|
||||
static void redo_neon_heap_update(XLogReaderState *record, bool hot_update);
|
||||
static void redo_neon_heap_lock(XLogReaderState *record);
|
||||
static void redo_neon_heap_multi_insert(XLogReaderState *record);
|
||||
|
||||
const static RmgrData NeonRmgr = {
|
||||
.rm_name = "neon",
|
||||
.rm_redo = neon_rm_redo,
|
||||
.rm_desc = neon_rm_desc,
|
||||
.rm_identify = neon_rm_identify,
|
||||
.rm_startup = neon_rm_startup,
|
||||
.rm_cleanup = neon_rm_cleanup,
|
||||
.rm_mask = neon_rm_mask,
|
||||
.rm_decode = neon_rm_decode,
|
||||
};
|
||||
|
||||
void
|
||||
_PG_init(void)
|
||||
{
|
||||
if (!process_shared_preload_libraries_in_progress)
|
||||
return;
|
||||
|
||||
RegisterCustomRmgr(RM_NEON_ID, &NeonRmgr);
|
||||
}
|
||||
|
||||
static void
|
||||
neon_rm_redo(XLogReaderState *record)
|
||||
{
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
switch (info & XLOG_NEON_OPMASK)
|
||||
{
|
||||
case XLOG_NEON_HEAP_INSERT:
|
||||
redo_neon_heap_insert(record);
|
||||
break;
|
||||
case XLOG_NEON_HEAP_DELETE:
|
||||
redo_neon_heap_delete(record);
|
||||
break;
|
||||
case XLOG_NEON_HEAP_UPDATE:
|
||||
redo_neon_heap_update(record, false);
|
||||
break;
|
||||
case XLOG_NEON_HEAP_HOT_UPDATE:
|
||||
redo_neon_heap_update(record, true);
|
||||
break;
|
||||
case XLOG_NEON_HEAP_LOCK:
|
||||
redo_neon_heap_lock(record);
|
||||
break;
|
||||
case XLOG_NEON_HEAP_MULTI_INSERT:
|
||||
redo_neon_heap_multi_insert(record);
|
||||
break;
|
||||
default:
|
||||
elog(PANIC, "neon_rm_redo: unknown op code %u", info);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
neon_rm_startup(void)
|
||||
{
|
||||
/* nothing to do here */
|
||||
}
|
||||
|
||||
static void
|
||||
neon_rm_cleanup(void)
|
||||
{
|
||||
/* nothing to do here */
|
||||
}
|
||||
|
||||
static void
|
||||
neon_rm_mask(char *pagedata, BlockNumber blkno)
|
||||
{
|
||||
Page page = (Page) pagedata;
|
||||
OffsetNumber off;
|
||||
|
||||
mask_page_lsn_and_checksum(page);
|
||||
|
||||
mask_page_hint_bits(page);
|
||||
mask_unused_space(page);
|
||||
|
||||
for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, off);
|
||||
char *page_item;
|
||||
|
||||
page_item = (char *) (page + ItemIdGetOffset(iid));
|
||||
|
||||
if (ItemIdIsNormal(iid))
|
||||
{
|
||||
HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
|
||||
|
||||
/*
|
||||
* If xmin of a tuple is not yet frozen, we should ignore
|
||||
* differences in hint bits, since they can be set without
|
||||
* emitting WAL.
|
||||
*/
|
||||
if (!HeapTupleHeaderXminFrozen(page_htup))
|
||||
page_htup->t_infomask &= ~HEAP_XACT_MASK;
|
||||
else
|
||||
{
|
||||
/* Still we need to mask xmax hint bits. */
|
||||
page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
|
||||
page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* During replay, we set Command Id to FirstCommandId. Hence, mask
|
||||
* it. See heap_xlog_insert() for details.
|
||||
*/
|
||||
page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
|
||||
|
||||
/*
|
||||
* For a speculative tuple, heap_insert() does not set ctid in the
|
||||
* caller-passed heap tuple itself, leaving the ctid field to
|
||||
* contain a speculative token value - a per-backend monotonically
|
||||
* increasing identifier. Besides, it does not WAL-log ctid under
|
||||
* any circumstances.
|
||||
*
|
||||
* During redo, heap_xlog_insert() sets t_ctid to current block
|
||||
* number and self offset number. It doesn't care about any
|
||||
* speculative insertions on the primary. Hence, we set t_ctid to
|
||||
* current block number and self offset number to ignore any
|
||||
* inconsistency.
|
||||
*/
|
||||
if (HeapTupleHeaderIsSpeculative(page_htup))
|
||||
ItemPointerSet(&page_htup->t_ctid, blkno, off);
|
||||
|
||||
/*
|
||||
* NB: Not ignoring ctid changes due to the tuple having moved
|
||||
* (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
|
||||
* important information that needs to be in-sync between primary
|
||||
* and standby, and thus is WAL logged.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Ignore any padding bytes after the tuple, when the length of the
|
||||
* item is not MAXALIGNed.
|
||||
*/
|
||||
if (ItemIdHasStorage(iid))
|
||||
{
|
||||
int len = ItemIdGetLength(iid);
|
||||
int padlen = MAXALIGN(len) - len;
|
||||
|
||||
if (padlen > 0)
|
||||
memset(page_item + len, MASK_MARKER, padlen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* COPIED FROM heapam.c
|
||||
* Given an "infobits" field from an XLog record, set the correct bits in the
|
||||
* given infomask and infomask2 for the tuple touched by the record.
|
||||
*
|
||||
* (This is the reverse of compute_infobits).
|
||||
*/
|
||||
static void
|
||||
fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
|
||||
{
|
||||
*infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
|
||||
HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
|
||||
*infomask2 &= ~HEAP_KEYS_UPDATED;
|
||||
|
||||
if (infobits & XLHL_XMAX_IS_MULTI)
|
||||
*infomask |= HEAP_XMAX_IS_MULTI;
|
||||
if (infobits & XLHL_XMAX_LOCK_ONLY)
|
||||
*infomask |= HEAP_XMAX_LOCK_ONLY;
|
||||
if (infobits & XLHL_XMAX_EXCL_LOCK)
|
||||
*infomask |= HEAP_XMAX_EXCL_LOCK;
|
||||
/* note HEAP_XMAX_SHR_LOCK isn't considered here */
|
||||
if (infobits & XLHL_XMAX_KEYSHR_LOCK)
|
||||
*infomask |= HEAP_XMAX_KEYSHR_LOCK;
|
||||
|
||||
if (infobits & XLHL_KEYS_UPDATED)
|
||||
*infomask2 |= HEAP_KEYS_UPDATED;
|
||||
}
|
||||
|
||||
static void
|
||||
redo_neon_heap_insert(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_neon_heap_insert *xlrec = (xl_neon_heap_insert *) XLogRecGetData(record);
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
union
|
||||
{
|
||||
HeapTupleHeaderData hdr;
|
||||
char data[MaxHeapTupleSize];
|
||||
} tbuf;
|
||||
HeapTupleHeader htup;
|
||||
xl_neon_heap_header xlhdr;
|
||||
uint32 newlen;
|
||||
Size freespace = 0;
|
||||
RelFileLocator target_locator;
|
||||
BlockNumber blkno;
|
||||
ItemPointerData target_tid;
|
||||
XLogRedoAction action;
|
||||
|
||||
XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
|
||||
ItemPointerSetBlockNumber(&target_tid, blkno);
|
||||
ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
|
||||
|
||||
/*
|
||||
* The visibility map may need to be fixed even if the heap page is
|
||||
* already up-to-date.
|
||||
*/
|
||||
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
|
||||
{
|
||||
Relation reln = CreateFakeRelcacheEntry(target_locator);
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
|
||||
visibilitymap_pin(reln, blkno, &vmbuffer);
|
||||
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
|
||||
ReleaseBuffer(vmbuffer);
|
||||
FreeFakeRelcacheEntry(reln);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we inserted the first and only tuple on the page, re-initialize the
|
||||
* page from scratch.
|
||||
*/
|
||||
if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
|
||||
{
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
page = BufferGetPage(buffer);
|
||||
PageInit(page, BufferGetPageSize(buffer), 0);
|
||||
action = BLK_NEEDS_REDO;
|
||||
}
|
||||
else
|
||||
action = XLogReadBufferForRedo(record, 0, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
Size datalen;
|
||||
char *data;
|
||||
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
|
||||
elog(PANIC, "neon_rm_redo: invalid max offset number");
|
||||
|
||||
data = XLogRecGetBlockData(record, 0, &datalen);
|
||||
|
||||
newlen = datalen - SizeOfNeonHeapHeader;
|
||||
Assert(datalen > SizeOfNeonHeapHeader && newlen <= MaxHeapTupleSize);
|
||||
memcpy((char *) &xlhdr, data, SizeOfNeonHeapHeader);
|
||||
data += SizeOfNeonHeapHeader;
|
||||
|
||||
htup = &tbuf.hdr;
|
||||
MemSet((char *) htup, 0, SizeofHeapTupleHeader);
|
||||
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
|
||||
memcpy((char *) htup + SizeofHeapTupleHeader,
|
||||
data,
|
||||
newlen);
|
||||
newlen += SizeofHeapTupleHeader;
|
||||
htup->t_infomask2 = xlhdr.t_infomask2;
|
||||
htup->t_infomask = xlhdr.t_infomask;
|
||||
htup->t_hoff = xlhdr.t_hoff;
|
||||
HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
|
||||
HeapTupleHeaderSetCmin(htup, xlhdr.t_cid);
|
||||
htup->t_ctid = target_tid;
|
||||
|
||||
if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,
|
||||
true, true) == InvalidOffsetNumber)
|
||||
elog(PANIC, "neon_rm_redo: failed to add tuple");
|
||||
|
||||
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
|
||||
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
|
||||
PageClearAllVisible(page);
|
||||
|
||||
/* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
|
||||
if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
|
||||
PageSetAllVisible(page);
|
||||
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/*
|
||||
* If the page is running low on free space, update the FSM as well.
|
||||
* Arbitrarily, our definition of "low" is less than 20%. We can't do much
|
||||
* better than that without knowing the fill-factor for the table.
|
||||
*
|
||||
* XXX: Don't do this if the page was restored from full page image. We
|
||||
* don't bother to update the FSM in that case, it doesn't need to be
|
||||
* totally accurate anyway.
|
||||
*/
|
||||
if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
|
||||
XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
|
||||
}
|
||||
|
||||
static void
|
||||
redo_neon_heap_delete(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_neon_heap_delete *xlrec = (xl_neon_heap_delete *) XLogRecGetData(record);
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
ItemId lp = NULL;
|
||||
HeapTupleHeader htup;
|
||||
BlockNumber blkno;
|
||||
RelFileLocator target_locator;
|
||||
ItemPointerData target_tid;
|
||||
|
||||
XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
|
||||
ItemPointerSetBlockNumber(&target_tid, blkno);
|
||||
ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
|
||||
|
||||
/*
|
||||
* The visibility map may need to be fixed even if the heap page is
|
||||
* already up-to-date.
|
||||
*/
|
||||
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
|
||||
{
|
||||
Relation reln = CreateFakeRelcacheEntry(target_locator);
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
|
||||
visibilitymap_pin(reln, blkno, &vmbuffer);
|
||||
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
|
||||
ReleaseBuffer(vmbuffer);
|
||||
FreeFakeRelcacheEntry(reln);
|
||||
}
|
||||
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
|
||||
lp = PageGetItemId(page, xlrec->offnum);
|
||||
|
||||
if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
|
||||
elog(PANIC, "neon_rm_redo: invalid lp");
|
||||
|
||||
htup = (HeapTupleHeader) PageGetItem(page, lp);
|
||||
|
||||
htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
|
||||
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
|
||||
HeapTupleHeaderClearHotUpdated(htup);
|
||||
fix_infomask_from_infobits(xlrec->infobits_set,
|
||||
&htup->t_infomask, &htup->t_infomask2);
|
||||
if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
|
||||
HeapTupleHeaderSetXmax(htup, xlrec->xmax);
|
||||
else
|
||||
HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
|
||||
HeapTupleHeaderSetCmax(htup, xlrec->t_cid, false);
|
||||
|
||||
/* Mark the page as a candidate for pruning */
|
||||
PageSetPrunable(page, XLogRecGetXid(record));
|
||||
|
||||
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
|
||||
PageClearAllVisible(page);
|
||||
|
||||
/* Make sure t_ctid is set correctly */
|
||||
if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
|
||||
HeapTupleHeaderSetMovedPartitions(htup);
|
||||
else
|
||||
htup->t_ctid = target_tid;
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
redo_neon_heap_update(XLogReaderState *record, bool hot_update)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_neon_heap_update *xlrec = (xl_neon_heap_update *) XLogRecGetData(record);
|
||||
RelFileLocator rlocator;
|
||||
BlockNumber oldblk;
|
||||
BlockNumber newblk;
|
||||
ItemPointerData newtid;
|
||||
Buffer obuffer,
|
||||
nbuffer;
|
||||
Page page;
|
||||
OffsetNumber offnum;
|
||||
ItemId lp = NULL;
|
||||
HeapTupleData oldtup;
|
||||
HeapTupleHeader htup;
|
||||
uint16 prefixlen = 0,
|
||||
suffixlen = 0;
|
||||
char *newp;
|
||||
union
|
||||
{
|
||||
HeapTupleHeaderData hdr;
|
||||
char data[MaxHeapTupleSize];
|
||||
} tbuf;
|
||||
xl_neon_heap_header xlhdr;
|
||||
uint32 newlen;
|
||||
Size freespace = 0;
|
||||
XLogRedoAction oldaction;
|
||||
XLogRedoAction newaction;
|
||||
|
||||
/* initialize to keep the compiler quiet */
|
||||
oldtup.t_data = NULL;
|
||||
oldtup.t_len = 0;
|
||||
|
||||
XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
|
||||
if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
|
||||
{
|
||||
/* HOT updates are never done across pages */
|
||||
Assert(!hot_update);
|
||||
}
|
||||
else
|
||||
oldblk = newblk;
|
||||
|
||||
ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
|
||||
|
||||
/*
|
||||
* The visibility map may need to be fixed even if the heap page is
|
||||
* already up-to-date.
|
||||
*/
|
||||
if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
|
||||
{
|
||||
Relation reln = CreateFakeRelcacheEntry(rlocator);
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
|
||||
visibilitymap_pin(reln, oldblk, &vmbuffer);
|
||||
visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
|
||||
ReleaseBuffer(vmbuffer);
|
||||
FreeFakeRelcacheEntry(reln);
|
||||
}
|
||||
|
||||
/*
|
||||
* In normal operation, it is important to lock the two pages in
|
||||
* page-number order, to avoid possible deadlocks against other update
|
||||
* operations going the other way. However, during WAL replay there can
|
||||
* be no other update happening, so we don't need to worry about that. But
|
||||
* we *do* need to worry that we don't expose an inconsistent state to Hot
|
||||
* Standby queries --- so the original page can't be unlocked before we've
|
||||
* added the new tuple to the new page.
|
||||
*/
|
||||
|
||||
/* Deal with old tuple version */
|
||||
oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
|
||||
&obuffer);
|
||||
if (oldaction == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(obuffer);
|
||||
offnum = xlrec->old_offnum;
|
||||
if (PageGetMaxOffsetNumber(page) >= offnum)
|
||||
lp = PageGetItemId(page, offnum);
|
||||
|
||||
if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
|
||||
elog(PANIC, "neon_rm_redo: invalid lp");
|
||||
|
||||
htup = (HeapTupleHeader) PageGetItem(page, lp);
|
||||
|
||||
oldtup.t_data = htup;
|
||||
oldtup.t_len = ItemIdGetLength(lp);
|
||||
|
||||
htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
|
||||
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
|
||||
if (hot_update)
|
||||
HeapTupleHeaderSetHotUpdated(htup);
|
||||
else
|
||||
HeapTupleHeaderClearHotUpdated(htup);
|
||||
fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
|
||||
&htup->t_infomask2);
|
||||
HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
|
||||
HeapTupleHeaderSetCmax(htup, xlrec->t_cid, false);
|
||||
/* Set forward chain link in t_ctid */
|
||||
htup->t_ctid = newtid;
|
||||
|
||||
/* Mark the page as a candidate for pruning */
|
||||
PageSetPrunable(page, XLogRecGetXid(record));
|
||||
|
||||
if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
|
||||
PageClearAllVisible(page);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(obuffer);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the page the new tuple goes into, if different from old.
|
||||
*/
|
||||
if (oldblk == newblk)
|
||||
{
|
||||
nbuffer = obuffer;
|
||||
newaction = oldaction;
|
||||
}
|
||||
else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
|
||||
{
|
||||
nbuffer = XLogInitBufferForRedo(record, 0);
|
||||
page = (Page) BufferGetPage(nbuffer);
|
||||
PageInit(page, BufferGetPageSize(nbuffer), 0);
|
||||
newaction = BLK_NEEDS_REDO;
|
||||
}
|
||||
else
|
||||
newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
|
||||
|
||||
/*
|
||||
* The visibility map may need to be fixed even if the heap page is
|
||||
* already up-to-date.
|
||||
*/
|
||||
if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
|
||||
{
|
||||
Relation reln = CreateFakeRelcacheEntry(rlocator);
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
|
||||
visibilitymap_pin(reln, newblk, &vmbuffer);
|
||||
visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
|
||||
ReleaseBuffer(vmbuffer);
|
||||
FreeFakeRelcacheEntry(reln);
|
||||
}
|
||||
|
||||
/* Deal with new tuple */
|
||||
if (newaction == BLK_NEEDS_REDO)
|
||||
{
|
||||
char *recdata;
|
||||
char *recdata_end;
|
||||
Size datalen;
|
||||
Size tuplen;
|
||||
|
||||
recdata = XLogRecGetBlockData(record, 0, &datalen);
|
||||
recdata_end = recdata + datalen;
|
||||
|
||||
page = BufferGetPage(nbuffer);
|
||||
|
||||
offnum = xlrec->new_offnum;
|
||||
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
|
||||
elog(PANIC, "neon_rm_redo: invalid max offset number");
|
||||
|
||||
if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
|
||||
{
|
||||
Assert(newblk == oldblk);
|
||||
memcpy(&prefixlen, recdata, sizeof(uint16));
|
||||
recdata += sizeof(uint16);
|
||||
}
|
||||
if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
|
||||
{
|
||||
Assert(newblk == oldblk);
|
||||
memcpy(&suffixlen, recdata, sizeof(uint16));
|
||||
recdata += sizeof(uint16);
|
||||
}
|
||||
|
||||
memcpy((char *) &xlhdr, recdata, SizeOfNeonHeapHeader);
|
||||
recdata += SizeOfNeonHeapHeader;
|
||||
|
||||
tuplen = recdata_end - recdata;
|
||||
Assert(tuplen <= MaxHeapTupleSize);
|
||||
|
||||
htup = &tbuf.hdr;
|
||||
MemSet((char *) htup, 0, SizeofHeapTupleHeader);
|
||||
|
||||
/*
|
||||
* Reconstruct the new tuple using the prefix and/or suffix from the
|
||||
* old tuple, and the data stored in the WAL record.
|
||||
*/
|
||||
newp = (char *) htup + SizeofHeapTupleHeader;
|
||||
if (prefixlen > 0)
|
||||
{
|
||||
int len;
|
||||
|
||||
/* copy bitmap [+ padding] [+ oid] from WAL record */
|
||||
len = xlhdr.t_hoff - SizeofHeapTupleHeader;
|
||||
memcpy(newp, recdata, len);
|
||||
recdata += len;
|
||||
newp += len;
|
||||
|
||||
/* copy prefix from old tuple */
|
||||
memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
|
||||
newp += prefixlen;
|
||||
|
||||
/* copy new tuple data from WAL record */
|
||||
len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
|
||||
memcpy(newp, recdata, len);
|
||||
recdata += len;
|
||||
newp += len;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* copy bitmap [+ padding] [+ oid] + data from record, all in one
|
||||
* go
|
||||
*/
|
||||
memcpy(newp, recdata, tuplen);
|
||||
recdata += tuplen;
|
||||
newp += tuplen;
|
||||
}
|
||||
Assert(recdata == recdata_end);
|
||||
|
||||
/* copy suffix from old tuple */
|
||||
if (suffixlen > 0)
|
||||
memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
|
||||
|
||||
newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
|
||||
htup->t_infomask2 = xlhdr.t_infomask2;
|
||||
htup->t_infomask = xlhdr.t_infomask;
|
||||
htup->t_hoff = xlhdr.t_hoff;
|
||||
|
||||
HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
|
||||
HeapTupleHeaderSetCmin(htup, xlhdr.t_cid);
|
||||
HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
|
||||
/* Make sure there is no forward chain link in t_ctid */
|
||||
htup->t_ctid = newtid;
|
||||
|
||||
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
|
||||
if (offnum == InvalidOffsetNumber)
|
||||
elog(PANIC, "neon_rm_redo: failed to add tuple");
|
||||
|
||||
if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
|
||||
PageClearAllVisible(page);
|
||||
|
||||
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(nbuffer);
|
||||
}
|
||||
|
||||
if (BufferIsValid(nbuffer) && nbuffer != obuffer)
|
||||
UnlockReleaseBuffer(nbuffer);
|
||||
if (BufferIsValid(obuffer))
|
||||
UnlockReleaseBuffer(obuffer);
|
||||
|
||||
/*
|
||||
* If the new page is running low on free space, update the FSM as well.
|
||||
* Arbitrarily, our definition of "low" is less than 20%. We can't do much
|
||||
* better than that without knowing the fill-factor for the table.
|
||||
*
|
||||
* However, don't update the FSM on HOT updates, because after crash
|
||||
* recovery, either the old or the new tuple will certainly be dead and
|
||||
* prunable. After pruning, the page will have roughly as much free space
|
||||
* as it did before the update, assuming the new tuple is about the same
|
||||
* size as the old one.
|
||||
*
|
||||
* XXX: Don't do this if the page was restored from full page image. We
|
||||
* don't bother to update the FSM in that case, it doesn't need to be
|
||||
* totally accurate anyway.
|
||||
*/
|
||||
if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
|
||||
XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
|
||||
}
|
||||
|
||||
static void
|
||||
redo_neon_heap_lock(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_neon_heap_lock *xlrec = (xl_neon_heap_lock *) XLogRecGetData(record);
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
OffsetNumber offnum;
|
||||
ItemId lp = NULL;
|
||||
HeapTupleHeader htup;
|
||||
|
||||
/*
|
||||
* The visibility map may need to be fixed even if the heap page is
|
||||
* already up-to-date.
|
||||
*/
|
||||
if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
|
||||
{
|
||||
RelFileLocator rlocator;
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
BlockNumber block;
|
||||
Relation reln;
|
||||
|
||||
XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
|
||||
reln = CreateFakeRelcacheEntry(rlocator);
|
||||
|
||||
visibilitymap_pin(reln, block, &vmbuffer);
|
||||
visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
|
||||
|
||||
ReleaseBuffer(vmbuffer);
|
||||
FreeFakeRelcacheEntry(reln);
|
||||
}
|
||||
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
offnum = xlrec->offnum;
|
||||
if (PageGetMaxOffsetNumber(page) >= offnum)
|
||||
lp = PageGetItemId(page, offnum);
|
||||
|
||||
if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
|
||||
elog(PANIC, "neon_rm_redo: invalid lp");
|
||||
|
||||
htup = (HeapTupleHeader) PageGetItem(page, lp);
|
||||
|
||||
htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
|
||||
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
|
||||
fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
|
||||
&htup->t_infomask2);
|
||||
|
||||
/*
|
||||
* Clear relevant update flags, but only if the modified infomask says
|
||||
* there's no update.
|
||||
*/
|
||||
if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
|
||||
{
|
||||
HeapTupleHeaderClearHotUpdated(htup);
|
||||
/* Make sure there is no forward chain link in t_ctid */
|
||||
ItemPointerSet(&htup->t_ctid,
|
||||
BufferGetBlockNumber(buffer),
|
||||
offnum);
|
||||
}
|
||||
HeapTupleHeaderSetXmax(htup, xlrec->xmax);
|
||||
HeapTupleHeaderSetCmax(htup, xlrec->t_cid, false);
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
redo_neon_heap_multi_insert(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_neon_heap_multi_insert *xlrec;
|
||||
RelFileLocator rlocator;
|
||||
BlockNumber blkno;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
union
|
||||
{
|
||||
HeapTupleHeaderData hdr;
|
||||
char data[MaxHeapTupleSize];
|
||||
} tbuf;
|
||||
HeapTupleHeader htup;
|
||||
uint32 newlen;
|
||||
Size freespace = 0;
|
||||
int i;
|
||||
bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
|
||||
XLogRedoAction action;
|
||||
|
||||
/*
|
||||
* Insertion doesn't overwrite MVCC data, so no conflict processing is
|
||||
* required.
|
||||
*/
|
||||
xlrec = (xl_neon_heap_multi_insert *) XLogRecGetData(record);
|
||||
|
||||
XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
|
||||
|
||||
/* check that the mutually exclusive flags are not both set */
|
||||
Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
|
||||
(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
|
||||
|
||||
/*
|
||||
* The visibility map may need to be fixed even if the heap page is
|
||||
* already up-to-date.
|
||||
*/
|
||||
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
|
||||
{
|
||||
Relation reln = CreateFakeRelcacheEntry(rlocator);
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
|
||||
visibilitymap_pin(reln, blkno, &vmbuffer);
|
||||
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
|
||||
ReleaseBuffer(vmbuffer);
|
||||
FreeFakeRelcacheEntry(reln);
|
||||
}
|
||||
|
||||
if (isinit)
|
||||
{
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
page = BufferGetPage(buffer);
|
||||
PageInit(page, BufferGetPageSize(buffer), 0);
|
||||
action = BLK_NEEDS_REDO;
|
||||
}
|
||||
else
|
||||
action = XLogReadBufferForRedo(record, 0, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
char *tupdata;
|
||||
char *endptr;
|
||||
Size len;
|
||||
|
||||
/* Tuples are stored as block data */
|
||||
tupdata = XLogRecGetBlockData(record, 0, &len);
|
||||
endptr = tupdata + len;
|
||||
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
for (i = 0; i < xlrec->ntuples; i++)
|
||||
{
|
||||
OffsetNumber offnum;
|
||||
xl_neon_multi_insert_tuple *xlhdr;
|
||||
|
||||
/*
|
||||
* If we're reinitializing the page, the tuples are stored in
|
||||
* order from FirstOffsetNumber. Otherwise there's an array of
|
||||
* offsets in the WAL record, and the tuples come after that.
|
||||
*/
|
||||
if (isinit)
|
||||
offnum = FirstOffsetNumber + i;
|
||||
else
|
||||
offnum = xlrec->offsets[i];
|
||||
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
|
||||
elog(PANIC, "neon_rm_redo: invalid max offset number");
|
||||
|
||||
xlhdr = (xl_neon_multi_insert_tuple *) SHORTALIGN(tupdata);
|
||||
tupdata = ((char *) xlhdr) + SizeOfNeonMultiInsertTuple;
|
||||
|
||||
newlen = xlhdr->datalen;
|
||||
Assert(newlen <= MaxHeapTupleSize);
|
||||
htup = &tbuf.hdr;
|
||||
MemSet((char *) htup, 0, SizeofHeapTupleHeader);
|
||||
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
|
||||
memcpy((char *) htup + SizeofHeapTupleHeader,
|
||||
(char *) tupdata,
|
||||
newlen);
|
||||
tupdata += newlen;
|
||||
|
||||
newlen += SizeofHeapTupleHeader;
|
||||
htup->t_infomask2 = xlhdr->t_infomask2;
|
||||
htup->t_infomask = xlhdr->t_infomask;
|
||||
htup->t_hoff = xlhdr->t_hoff;
|
||||
HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
|
||||
HeapTupleHeaderSetCmin(htup, xlrec->t_cid);
|
||||
ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
|
||||
ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
|
||||
|
||||
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
|
||||
if (offnum == InvalidOffsetNumber)
|
||||
elog(PANIC, "neon_rm_redo: failed to add tuple");
|
||||
}
|
||||
if (tupdata != endptr)
|
||||
elog(PANIC, "neon_rm_redo: total tuple length mismatch");
|
||||
|
||||
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
|
||||
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
|
||||
PageClearAllVisible(page);
|
||||
|
||||
/* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
|
||||
if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
|
||||
PageSetAllVisible(page);
|
||||
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/*
|
||||
* If the page is running low on free space, update the FSM as well.
|
||||
* Arbitrarily, our definition of "low" is less than 20%. We can't do much
|
||||
* better than that without knowing the fill-factor for the table.
|
||||
*
|
||||
* XXX: Don't do this if the page was restored from full page image. We
|
||||
* don't bother to update the FSM in that case, it doesn't need to be
|
||||
* totally accurate anyway.
|
||||
*/
|
||||
if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
|
||||
XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
|
||||
}
|
||||
|
||||
#else
|
||||
/* safeguard for older PostgreSQL versions */
|
||||
PG_MODULE_MAGIC;
|
||||
#endif
|
||||
4
pgxn/neon_rmgr/neon_rmgr.control
Normal file
4
pgxn/neon_rmgr/neon_rmgr.control
Normal file
@@ -0,0 +1,4 @@
|
||||
# neon_rmgr extension
|
||||
comment = 'Neon WAL Resource Manager - custom WAL records used to make Neon work (since PG 16)'
|
||||
default_version = '1.0'
|
||||
module_pathname = '$libdir/neon_rmgr'
|
||||
13
pgxn/neon_rmgr/neon_rmgr.h
Normal file
13
pgxn/neon_rmgr/neon_rmgr.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef NEON_RMGR_H
|
||||
#define NEON_RMGR_H
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
#include "access/xlog_internal.h"
|
||||
#include "replication/decode.h"
|
||||
#include "replication/logical.h"
|
||||
|
||||
extern void neon_rm_desc(StringInfo buf, XLogReaderState *record);
|
||||
extern void neon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
||||
extern const char *neon_rm_identify(uint8 info);
|
||||
|
||||
#endif
|
||||
#endif //NEON_RMGR_H
|
||||
404
pgxn/neon_rmgr/neon_rmgr_decode.c
Normal file
404
pgxn/neon_rmgr/neon_rmgr_decode.c
Normal file
@@ -0,0 +1,404 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
#include "access/heapam_xlog.h"
|
||||
#include "access/neon_xlog.h"
|
||||
#include "replication/decode.h"
|
||||
#include "replication/logical.h"
|
||||
#include "replication/snapbuild.h"
|
||||
|
||||
#include "neon_rmgr.h"
|
||||
|
||||
/* individual record(group)'s handlers */
|
||||
static void DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
||||
static void DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
||||
static void DecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
||||
static void DecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
|
||||
|
||||
/* common function to decode tuples */
|
||||
static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple);
|
||||
|
||||
|
||||
void
|
||||
neon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||
{
|
||||
uint8 info = XLogRecGetInfo(buf->record) & XLOG_NEON_OPMASK;
|
||||
TransactionId xid = XLogRecGetXid(buf->record);
|
||||
SnapBuild *builder = ctx->snapshot_builder;
|
||||
|
||||
ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
|
||||
|
||||
/*
|
||||
* If we don't have snapshot or we are just fast-forwarding, there is no
|
||||
* point in decoding data changes.
|
||||
*/
|
||||
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
|
||||
ctx->fast_forward)
|
||||
return;
|
||||
|
||||
switch (info)
|
||||
{
|
||||
case XLOG_NEON_HEAP_INSERT:
|
||||
if (SnapBuildProcessChange(builder, xid, buf->origptr))
|
||||
DecodeNeonInsert(ctx, buf);
|
||||
break;
|
||||
case XLOG_NEON_HEAP_DELETE:
|
||||
if (SnapBuildProcessChange(builder, xid, buf->origptr))
|
||||
DecodeNeonDelete(ctx, buf);
|
||||
break;
|
||||
case XLOG_NEON_HEAP_UPDATE:
|
||||
case XLOG_NEON_HEAP_HOT_UPDATE:
|
||||
if (SnapBuildProcessChange(builder, xid, buf->origptr))
|
||||
DecodeNeonUpdate(ctx, buf);
|
||||
break;
|
||||
case XLOG_NEON_HEAP_LOCK:
|
||||
break;
|
||||
case XLOG_NEON_HEAP_MULTI_INSERT:
|
||||
if (SnapBuildProcessChange(builder, xid, buf->origptr))
|
||||
DecodeNeonMultiInsert(ctx, buf);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "unexpected RM_HEAP_ID record type: %u", info);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
FilterByOrigin(LogicalDecodingContext *ctx, RepOriginId origin_id)
|
||||
{
|
||||
if (ctx->callbacks.filter_by_origin_cb == NULL)
|
||||
return false;
|
||||
|
||||
return filter_by_origin_cb_wrapper(ctx, origin_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs.
|
||||
*
|
||||
* Deletes can contain the new tuple.
|
||||
*/
|
||||
static void
|
||||
DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||
{
|
||||
Size datalen;
|
||||
char *tupledata;
|
||||
Size tuplelen;
|
||||
XLogReaderState *r = buf->record;
|
||||
xl_neon_heap_insert *xlrec;
|
||||
ReorderBufferChange *change;
|
||||
RelFileLocator target_locator;
|
||||
|
||||
xlrec = (xl_neon_heap_insert *) XLogRecGetData(r);
|
||||
|
||||
/*
|
||||
* Ignore insert records without new tuples (this does happen when
|
||||
* raw_heap_insert marks the TOAST record as HEAP_INSERT_NO_LOGICAL).
|
||||
*/
|
||||
if (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))
|
||||
return;
|
||||
|
||||
/* only interested in our database */
|
||||
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
|
||||
if (target_locator.dbOid != ctx->slot->data.database)
|
||||
return;
|
||||
|
||||
/* output plugin doesn't look for this origin, no need to queue */
|
||||
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
|
||||
return;
|
||||
|
||||
change = ReorderBufferGetChange(ctx->reorder);
|
||||
if (!(xlrec->flags & XLH_INSERT_IS_SPECULATIVE))
|
||||
change->action = REORDER_BUFFER_CHANGE_INSERT;
|
||||
else
|
||||
change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT;
|
||||
change->origin_id = XLogRecGetOrigin(r);
|
||||
|
||||
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
|
||||
|
||||
tupledata = XLogRecGetBlockData(r, 0, &datalen);
|
||||
tuplelen = datalen - SizeOfNeonHeapHeader;
|
||||
|
||||
change->data.tp.newtuple =
|
||||
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
|
||||
|
||||
DecodeXLogTuple(tupledata, datalen, change->data.tp.newtuple);
|
||||
|
||||
change->data.tp.clear_toast_afterwards = true;
|
||||
|
||||
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
|
||||
change,
|
||||
xlrec->flags & XLH_INSERT_ON_TOAST_RELATION);
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse XLOG_HEAP_DELETE from wal into proper tuplebufs.
|
||||
*
|
||||
* Deletes can possibly contain the old primary key.
|
||||
*/
|
||||
static void
|
||||
DecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||
{
|
||||
XLogReaderState *r = buf->record;
|
||||
xl_neon_heap_delete *xlrec;
|
||||
ReorderBufferChange *change;
|
||||
RelFileLocator target_locator;
|
||||
|
||||
xlrec = (xl_neon_heap_delete *) XLogRecGetData(r);
|
||||
|
||||
/* only interested in our database */
|
||||
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
|
||||
if (target_locator.dbOid != ctx->slot->data.database)
|
||||
return;
|
||||
|
||||
/* output plugin doesn't look for this origin, no need to queue */
|
||||
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
|
||||
return;
|
||||
|
||||
change = ReorderBufferGetChange(ctx->reorder);
|
||||
|
||||
if (xlrec->flags & XLH_DELETE_IS_SUPER)
|
||||
change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT;
|
||||
else
|
||||
change->action = REORDER_BUFFER_CHANGE_DELETE;
|
||||
|
||||
change->origin_id = XLogRecGetOrigin(r);
|
||||
|
||||
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
|
||||
|
||||
/* old primary key stored */
|
||||
if (xlrec->flags & XLH_DELETE_CONTAINS_OLD)
|
||||
{
|
||||
Size datalen = XLogRecGetDataLen(r) - SizeOfNeonHeapHeader;
|
||||
Size tuplelen = datalen - SizeOfNeonHeapHeader;
|
||||
|
||||
Assert(XLogRecGetDataLen(r) > (SizeOfNeonHeapDelete + SizeOfNeonHeapHeader));
|
||||
|
||||
change->data.tp.oldtuple =
|
||||
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
|
||||
|
||||
DecodeXLogTuple((char *) xlrec + SizeOfNeonHeapDelete,
|
||||
datalen, change->data.tp.oldtuple);
|
||||
}
|
||||
|
||||
change->data.tp.clear_toast_afterwards = true;
|
||||
|
||||
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
|
||||
change, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout
|
||||
* in the record, from wal into proper tuplebufs.
|
||||
*
|
||||
* Updates can possibly contain a new tuple and the old primary key.
|
||||
*/
|
||||
static void
|
||||
DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||
{
|
||||
XLogReaderState *r = buf->record;
|
||||
xl_neon_heap_update *xlrec;
|
||||
ReorderBufferChange *change;
|
||||
char *data;
|
||||
RelFileLocator target_locator;
|
||||
|
||||
xlrec = (xl_neon_heap_update *) XLogRecGetData(r);
|
||||
|
||||
/* only interested in our database */
|
||||
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
|
||||
if (target_locator.dbOid != ctx->slot->data.database)
|
||||
return;
|
||||
|
||||
/* output plugin doesn't look for this origin, no need to queue */
|
||||
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
|
||||
return;
|
||||
|
||||
change = ReorderBufferGetChange(ctx->reorder);
|
||||
change->action = REORDER_BUFFER_CHANGE_UPDATE;
|
||||
change->origin_id = XLogRecGetOrigin(r);
|
||||
memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
|
||||
|
||||
if (xlrec->flags & XLH_UPDATE_CONTAINS_NEW_TUPLE)
|
||||
{
|
||||
Size datalen;
|
||||
Size tuplelen;
|
||||
|
||||
data = XLogRecGetBlockData(r, 0, &datalen);
|
||||
|
||||
tuplelen = datalen - SizeOfNeonHeapHeader;
|
||||
|
||||
change->data.tp.newtuple =
|
||||
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
|
||||
|
||||
DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
|
||||
}
|
||||
|
||||
if (xlrec->flags & XLH_UPDATE_CONTAINS_OLD)
|
||||
{
|
||||
Size datalen;
|
||||
Size tuplelen;
|
||||
|
||||
/* caution, remaining data in record is not aligned */
|
||||
data = XLogRecGetData(r) + SizeOfNeonHeapUpdate;
|
||||
datalen = XLogRecGetDataLen(r) - SizeOfNeonHeapUpdate;
|
||||
tuplelen = datalen - SizeOfNeonHeapHeader;
|
||||
|
||||
change->data.tp.oldtuple =
|
||||
ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
|
||||
|
||||
DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
|
||||
}
|
||||
|
||||
change->data.tp.clear_toast_afterwards = true;
|
||||
|
||||
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
|
||||
change, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs.
|
||||
*
|
||||
* Currently MULTI_INSERT will always contain the full tuples.
|
||||
*/
|
||||
static void
|
||||
DecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
||||
{
|
||||
XLogReaderState *r = buf->record;
|
||||
xl_neon_heap_multi_insert *xlrec;
|
||||
int i;
|
||||
char *data;
|
||||
char *tupledata;
|
||||
Size tuplelen;
|
||||
RelFileLocator rlocator;
|
||||
|
||||
xlrec = (xl_neon_heap_multi_insert *) XLogRecGetData(r);
|
||||
|
||||
/*
|
||||
* Ignore insert records without new tuples. This happens when a
|
||||
* multi_insert is done on a catalog or on a non-persistent relation.
|
||||
*/
|
||||
if (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))
|
||||
return;
|
||||
|
||||
/* only interested in our database */
|
||||
XLogRecGetBlockTag(r, 0, &rlocator, NULL, NULL);
|
||||
if (rlocator.dbOid != ctx->slot->data.database)
|
||||
return;
|
||||
|
||||
/* output plugin doesn't look for this origin, no need to queue */
|
||||
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We know that this multi_insert isn't for a catalog, so the block should
|
||||
* always have data even if a full-page write of it is taken.
|
||||
*/
|
||||
tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
|
||||
Assert(tupledata != NULL);
|
||||
|
||||
data = tupledata;
|
||||
for (i = 0; i < xlrec->ntuples; i++)
|
||||
{
|
||||
ReorderBufferChange *change;
|
||||
xl_neon_multi_insert_tuple *xlhdr;
|
||||
int datalen;
|
||||
ReorderBufferTupleBuf *tuple;
|
||||
HeapTupleHeader header;
|
||||
|
||||
change = ReorderBufferGetChange(ctx->reorder);
|
||||
change->action = REORDER_BUFFER_CHANGE_INSERT;
|
||||
change->origin_id = XLogRecGetOrigin(r);
|
||||
|
||||
memcpy(&change->data.tp.rlocator, &rlocator, sizeof(RelFileLocator));
|
||||
|
||||
xlhdr = (xl_neon_multi_insert_tuple *) SHORTALIGN(data);
|
||||
data = ((char *) xlhdr) + SizeOfNeonMultiInsertTuple;
|
||||
datalen = xlhdr->datalen;
|
||||
|
||||
change->data.tp.newtuple =
|
||||
ReorderBufferGetTupleBuf(ctx->reorder, datalen);
|
||||
|
||||
tuple = change->data.tp.newtuple;
|
||||
header = tuple->tuple.t_data;
|
||||
|
||||
/* not a disk based tuple */
|
||||
ItemPointerSetInvalid(&tuple->tuple.t_self);
|
||||
|
||||
/*
|
||||
* We can only figure this out after reassembling the transactions.
|
||||
*/
|
||||
tuple->tuple.t_tableOid = InvalidOid;
|
||||
|
||||
tuple->tuple.t_len = datalen + SizeofHeapTupleHeader;
|
||||
|
||||
memset(header, 0, SizeofHeapTupleHeader);
|
||||
|
||||
memcpy((char *) tuple->tuple.t_data + SizeofHeapTupleHeader,
|
||||
(char *) data,
|
||||
datalen);
|
||||
header->t_infomask = xlhdr->t_infomask;
|
||||
header->t_infomask2 = xlhdr->t_infomask2;
|
||||
header->t_hoff = xlhdr->t_hoff;
|
||||
|
||||
/*
|
||||
* Reset toast reassembly state only after the last row in the last
|
||||
* xl_multi_insert_tuple record emitted by one heap_multi_insert()
|
||||
* call.
|
||||
*/
|
||||
if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI &&
|
||||
(i + 1) == xlrec->ntuples)
|
||||
change->data.tp.clear_toast_afterwards = true;
|
||||
else
|
||||
change->data.tp.clear_toast_afterwards = false;
|
||||
|
||||
ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),
|
||||
buf->origptr, change, false);
|
||||
|
||||
/* move to the next xl_neon_multi_insert_tuple entry */
|
||||
data += datalen;
|
||||
}
|
||||
Assert(data == tupledata + tuplelen);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete
|
||||
* (but not by heap_multi_insert) into a tuplebuf.
|
||||
*
|
||||
* The size 'len' and the pointer 'data' in the record need to be
|
||||
* computed outside as they are record specific.
|
||||
*/
|
||||
static void
|
||||
DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)
|
||||
{
|
||||
xl_neon_heap_header xlhdr;
|
||||
int datalen = len - SizeOfNeonHeapHeader;
|
||||
HeapTupleHeader header;
|
||||
|
||||
Assert(datalen >= 0);
|
||||
|
||||
tuple->tuple.t_len = datalen + SizeofHeapTupleHeader;
|
||||
header = tuple->tuple.t_data;
|
||||
|
||||
/* not a disk based tuple */
|
||||
ItemPointerSetInvalid(&tuple->tuple.t_self);
|
||||
|
||||
/* we can only figure this out after reassembling the transactions */
|
||||
tuple->tuple.t_tableOid = InvalidOid;
|
||||
|
||||
/* data is not stored aligned, copy to aligned storage */
|
||||
memcpy((char *) &xlhdr,
|
||||
data,
|
||||
SizeOfNeonHeapHeader);
|
||||
|
||||
memset(header, 0, SizeofHeapTupleHeader);
|
||||
|
||||
memcpy(((char *) tuple->tuple.t_data) + SizeofHeapTupleHeader,
|
||||
data + SizeOfNeonHeapHeader,
|
||||
datalen);
|
||||
|
||||
header->t_infomask = xlhdr.t_infomask;
|
||||
header->t_infomask2 = xlhdr.t_infomask2;
|
||||
header->t_hoff = xlhdr.t_hoff;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
181
pgxn/neon_rmgr/neon_rmgr_desc.c
Normal file
181
pgxn/neon_rmgr/neon_rmgr_desc.c
Normal file
@@ -0,0 +1,181 @@
|
||||
#include "postgres.h"
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
#include "access/heapam_xlog.h"
|
||||
#include "access/neon_xlog.h"
|
||||
#include "access/rmgr.h"
|
||||
#include "access/rmgrdesc_utils.h"
|
||||
#include "access/xlog_internal.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/buf.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
#include "neon_rmgr.h"
|
||||
|
||||
/*
|
||||
* NOTE: "keyname" argument cannot have trailing spaces or punctuation
|
||||
* characters
|
||||
*/
|
||||
static void
|
||||
infobits_desc(StringInfo buf, uint8 infobits, const char *keyname)
|
||||
{
|
||||
appendStringInfo(buf, "%s: [", keyname);
|
||||
|
||||
Assert(buf->data[buf->len - 1] != ' ');
|
||||
|
||||
if (infobits & XLHL_XMAX_IS_MULTI)
|
||||
appendStringInfoString(buf, "IS_MULTI, ");
|
||||
if (infobits & XLHL_XMAX_LOCK_ONLY)
|
||||
appendStringInfoString(buf, "LOCK_ONLY, ");
|
||||
if (infobits & XLHL_XMAX_EXCL_LOCK)
|
||||
appendStringInfoString(buf, "EXCL_LOCK, ");
|
||||
if (infobits & XLHL_XMAX_KEYSHR_LOCK)
|
||||
appendStringInfoString(buf, "KEYSHR_LOCK, ");
|
||||
if (infobits & XLHL_KEYS_UPDATED)
|
||||
appendStringInfoString(buf, "KEYS_UPDATED, ");
|
||||
|
||||
if (buf->data[buf->len - 1] == ' ')
|
||||
{
|
||||
/* Truncate-away final unneeded ", " */
|
||||
Assert(buf->data[buf->len - 2] == ',');
|
||||
buf->len -= 2;
|
||||
buf->data[buf->len] = '\0';
|
||||
}
|
||||
|
||||
appendStringInfoString(buf, "]");
|
||||
}
|
||||
|
||||
static void
|
||||
truncate_flags_desc(StringInfo buf, uint8 flags)
|
||||
{
|
||||
appendStringInfoString(buf, "flags: [");
|
||||
|
||||
if (flags & XLH_TRUNCATE_CASCADE)
|
||||
appendStringInfoString(buf, "CASCADE, ");
|
||||
if (flags & XLH_TRUNCATE_RESTART_SEQS)
|
||||
appendStringInfoString(buf, "RESTART_SEQS, ");
|
||||
|
||||
if (buf->data[buf->len - 1] == ' ')
|
||||
{
|
||||
/* Truncate-away final unneeded ", " */
|
||||
Assert(buf->data[buf->len - 2] == ',');
|
||||
buf->len -= 2;
|
||||
buf->data[buf->len] = '\0';
|
||||
}
|
||||
|
||||
appendStringInfoString(buf, "]");
|
||||
}
|
||||
|
||||
void
|
||||
neon_rm_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
info &= XLOG_NEON_OPMASK;
|
||||
|
||||
if (info == XLOG_NEON_HEAP_INSERT)
|
||||
{
|
||||
xl_neon_heap_insert *xlrec = (xl_neon_heap_insert *) rec;
|
||||
|
||||
appendStringInfo(buf, "off: %u, flags: 0x%02X",
|
||||
xlrec->offnum,
|
||||
xlrec->flags);
|
||||
}
|
||||
else if (info == XLOG_NEON_HEAP_DELETE)
|
||||
{
|
||||
xl_neon_heap_delete *xlrec = (xl_neon_heap_delete *) rec;
|
||||
|
||||
appendStringInfo(buf, "xmax: %u, off: %u, ",
|
||||
xlrec->xmax, xlrec->offnum);
|
||||
infobits_desc(buf, xlrec->infobits_set, "infobits");
|
||||
appendStringInfo(buf, ", flags: 0x%02X", xlrec->flags);
|
||||
}
|
||||
else if (info == XLOG_NEON_HEAP_UPDATE)
|
||||
{
|
||||
xl_neon_heap_update *xlrec = (xl_neon_heap_update *) rec;
|
||||
|
||||
appendStringInfo(buf, "old_xmax: %u, old_off: %u, ",
|
||||
xlrec->old_xmax, xlrec->old_offnum);
|
||||
infobits_desc(buf, xlrec->old_infobits_set, "old_infobits");
|
||||
appendStringInfo(buf, ", flags: 0x%02X, new_xmax: %u, new_off: %u",
|
||||
xlrec->flags, xlrec->new_xmax, xlrec->new_offnum);
|
||||
}
|
||||
else if (info == XLOG_NEON_HEAP_HOT_UPDATE)
|
||||
{
|
||||
xl_neon_heap_update *xlrec = (xl_neon_heap_update *) rec;
|
||||
|
||||
appendStringInfo(buf, "old_xmax: %u, old_off: %u, ",
|
||||
xlrec->old_xmax, xlrec->old_offnum);
|
||||
infobits_desc(buf, xlrec->old_infobits_set, "old_infobits");
|
||||
appendStringInfo(buf, ", flags: 0x%02X, new_xmax: %u, new_off: %u",
|
||||
xlrec->flags, xlrec->new_xmax, xlrec->new_offnum);
|
||||
}
|
||||
else if (info == XLOG_NEON_HEAP_LOCK)
|
||||
{
|
||||
xl_neon_heap_lock *xlrec = (xl_neon_heap_lock *) rec;
|
||||
|
||||
appendStringInfo(buf, "xmax: %u, off: %u, ",
|
||||
xlrec->xmax, xlrec->offnum);
|
||||
infobits_desc(buf, xlrec->infobits_set, "infobits");
|
||||
appendStringInfo(buf, ", flags: 0x%02X", xlrec->flags);
|
||||
}
|
||||
else if (info == XLOG_NEON_HEAP_MULTI_INSERT)
|
||||
{
|
||||
xl_neon_heap_multi_insert *xlrec = (xl_neon_heap_multi_insert *) rec;
|
||||
bool isinit = (XLogRecGetInfo(record) & XLOG_NEON_INIT_PAGE) != 0;
|
||||
|
||||
appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples,
|
||||
xlrec->flags);
|
||||
|
||||
if (XLogRecHasBlockData(record, 0) && !isinit)
|
||||
{
|
||||
appendStringInfoString(buf, ", offsets:");
|
||||
array_desc(buf, xlrec->offsets, sizeof(OffsetNumber),
|
||||
xlrec->ntuples, &offset_elem_desc, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char *
|
||||
neon_rm_identify(uint8 info)
|
||||
{
|
||||
const char *id = NULL;
|
||||
|
||||
switch (info & ~XLR_INFO_MASK)
|
||||
{
|
||||
case XLOG_NEON_HEAP_INSERT:
|
||||
id = "INSERT";
|
||||
break;
|
||||
case XLOG_NEON_HEAP_INSERT | XLOG_NEON_INIT_PAGE:
|
||||
id = "INSERT+INIT";
|
||||
break;
|
||||
case XLOG_NEON_HEAP_DELETE:
|
||||
id = "DELETE";
|
||||
break;
|
||||
case XLOG_NEON_HEAP_UPDATE:
|
||||
id = "UPDATE";
|
||||
break;
|
||||
case XLOG_NEON_HEAP_UPDATE | XLOG_NEON_INIT_PAGE:
|
||||
id = "UPDATE+INIT";
|
||||
break;
|
||||
case XLOG_NEON_HEAP_HOT_UPDATE:
|
||||
id = "HOT_UPDATE";
|
||||
break;
|
||||
case XLOG_NEON_HEAP_HOT_UPDATE | XLOG_HEAP_INIT_PAGE:
|
||||
id = "HOT_UPDATE+INIT";
|
||||
break;
|
||||
case XLOG_NEON_HEAP_LOCK:
|
||||
id = "LOCK";
|
||||
break;
|
||||
case XLOG_NEON_HEAP_MULTI_INSERT:
|
||||
id = "MULTI_INSERT";
|
||||
break;
|
||||
case XLOG_NEON_HEAP_MULTI_INSERT | XLOG_NEON_INIT_PAGE:
|
||||
id = "MULTI_INSERT+INIT";
|
||||
break;
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -10,6 +10,8 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "../neon/neon_pgversioncompat.h"
|
||||
|
||||
#include "access/relation.h"
|
||||
#include "access/xact.h"
|
||||
#include "access/xlog.h"
|
||||
@@ -39,8 +41,13 @@ PG_FUNCTION_INFO_V1(neon_xlogflush);
|
||||
* Linkage to functions in neon module.
|
||||
* The signature here would need to be updated whenever function parameters change in pagestore_smgr.c
|
||||
*/
|
||||
typedef void (*neon_read_at_lsn_type) (RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
typedef void (*neon_read_at_lsn_type) (NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, bool request_latest, char *buffer);
|
||||
#else
|
||||
typedef void (*neon_read_at_lsn_type) (NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, bool request_latest, void *buffer);
|
||||
#endif
|
||||
|
||||
static neon_read_at_lsn_type neon_read_at_lsn_ptr;
|
||||
|
||||
@@ -115,7 +122,7 @@ clear_buffer_cache(PG_FUNCTION_ARGS)
|
||||
uint32 buf_state;
|
||||
Buffer bufferid;
|
||||
bool isvalid;
|
||||
RelFileNode rnode;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blocknum;
|
||||
|
||||
@@ -128,7 +135,7 @@ clear_buffer_cache(PG_FUNCTION_ARGS)
|
||||
else
|
||||
isvalid = false;
|
||||
bufferid = BufferDescriptorGetBuffer(bufHdr);
|
||||
rnode = bufHdr->tag.rnode;
|
||||
rinfo = BufTagGetNRelFileInfo(bufHdr->tag);
|
||||
forknum = bufHdr->tag.forkNum;
|
||||
blocknum = bufHdr->tag.blockNum;
|
||||
|
||||
@@ -141,7 +148,7 @@ clear_buffer_cache(PG_FUNCTION_ARGS)
|
||||
*/
|
||||
if (isvalid)
|
||||
{
|
||||
if (ReadRecentBuffer(rnode, forknum, blocknum, bufferid))
|
||||
if (ReadRecentBuffer(rinfo, forknum, blocknum, bufferid))
|
||||
ReleaseBuffer(bufferid);
|
||||
}
|
||||
}
|
||||
@@ -238,7 +245,7 @@ get_raw_page_at_lsn(PG_FUNCTION_ARGS)
|
||||
SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);
|
||||
raw_page_data = VARDATA(raw_page);
|
||||
|
||||
neon_read_at_lsn(rel->rd_node, forknum, blkno, read_lsn, request_latest, raw_page_data);
|
||||
neon_read_at_lsn(InfoFromRelation(rel), forknum, blkno, read_lsn, request_latest, raw_page_data);
|
||||
|
||||
relation_close(rel, AccessShareLock);
|
||||
|
||||
@@ -267,10 +274,17 @@ get_raw_page_at_lsn_ex(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
{
|
||||
RelFileNode rnode = {
|
||||
NRelFileInfo rinfo = {
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
.spcNode = PG_GETARG_OID(0),
|
||||
.dbNode = PG_GETARG_OID(1),
|
||||
.relNode = PG_GETARG_OID(2)};
|
||||
.relNode = PG_GETARG_OID(2)
|
||||
#else
|
||||
.spcOid = PG_GETARG_OID(0),
|
||||
.dbOid = PG_GETARG_OID(1),
|
||||
.relNumber = PG_GETARG_OID(2)
|
||||
#endif
|
||||
};
|
||||
|
||||
ForkNumber forknum = PG_GETARG_UINT32(3);
|
||||
|
||||
@@ -284,7 +298,7 @@ get_raw_page_at_lsn_ex(PG_FUNCTION_ARGS)
|
||||
SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);
|
||||
raw_page_data = VARDATA(raw_page);
|
||||
|
||||
neon_read_at_lsn(rnode, forknum, blkno, read_lsn, request_latest, raw_page_data);
|
||||
neon_read_at_lsn(rinfo, forknum, blkno, read_lsn, request_latest, raw_page_data);
|
||||
PG_RETURN_BYTEA_P(raw_page);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,10 +18,12 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "../neon/neon_pgversioncompat.h"
|
||||
|
||||
#include "access/xlog.h"
|
||||
#include "storage/block.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/relfilenode.h"
|
||||
#include RELFILEINFO_HDR
|
||||
#include "storage/smgr.h"
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
@@ -43,10 +45,12 @@ static int used_pages;
|
||||
static int
|
||||
locate_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno)
|
||||
{
|
||||
NRelFileInfo rinfo = InfoFromSMgrRel(reln);
|
||||
|
||||
/* We only hold a small number of pages, so linear search */
|
||||
for (int i = 0; i < used_pages; i++)
|
||||
{
|
||||
if (RelFileNodeEquals(reln->smgr_rnode.node, page_tag[i].rnode)
|
||||
if (RelFileInfoEquals(rinfo, BufTagGetNRelFileInfo(page_tag[i]))
|
||||
&& forknum == page_tag[i].forkNum
|
||||
&& blkno == page_tag[i].blockNum)
|
||||
{
|
||||
@@ -63,15 +67,26 @@ static void inmem_open(SMgrRelation reln);
|
||||
static void inmem_close(SMgrRelation reln, ForkNumber forknum);
|
||||
static void inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
||||
static bool inmem_exists(SMgrRelation reln, ForkNumber forknum);
|
||||
static void inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
|
||||
static void inmem_extend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
static void inmem_unlink(NRelFileInfoBackend rinfo, ForkNumber forknum, bool isRedo);
|
||||
static bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
static void inmem_extend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
static void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
static void inmem_write(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
#else
|
||||
static void inmem_extend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, const void *buffer, bool skipFsync);
|
||||
static void inmem_zeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, int nblocks, bool skipFsync);
|
||||
static void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
void *buffer);
|
||||
static void inmem_write(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, const void *buffer, bool skipFsync);
|
||||
#endif
|
||||
static void inmem_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, BlockNumber nblocks);
|
||||
static BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum);
|
||||
@@ -95,9 +110,11 @@ inmem_init(void)
|
||||
static bool
|
||||
inmem_exists(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
NRelFileInfo rinfo = InfoFromSMgrRel(reln);
|
||||
|
||||
for (int i = 0; i < used_pages; i++)
|
||||
{
|
||||
if (RelFileNodeEquals(reln->smgr_rnode.node, page_tag[i].rnode)
|
||||
if (RelFileInfoEquals(rinfo, BufTagGetNRelFileInfo(page_tag[i]))
|
||||
&& forknum == page_tag[i].forkNum)
|
||||
{
|
||||
return true;
|
||||
@@ -120,7 +137,7 @@ inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo)
|
||||
* inmem_unlink() -- Unlink a relation.
|
||||
*/
|
||||
static void
|
||||
inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo)
|
||||
inmem_unlink(NRelFileInfoBackend rinfo, ForkNumber forknum, bool isRedo)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -135,12 +152,28 @@ inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo)
|
||||
*/
|
||||
static void
|
||||
inmem_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
char *buffer, bool skipFsync)
|
||||
#else
|
||||
const void *buffer, bool skipFsync)
|
||||
#endif
|
||||
{
|
||||
/* same as smgwrite() for us */
|
||||
inmem_write(reln, forknum, blkno, buffer, skipFsync);
|
||||
}
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
static void
|
||||
inmem_zeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, int nblocks, bool skipFsync)
|
||||
{
|
||||
char buffer[BLCKSZ] = {0};
|
||||
|
||||
for (int i = 0; i < nblocks; i++)
|
||||
inmem_extend(reln, forknum, blocknum + i, buffer, skipFsync);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* inmem_open() -- Initialize newly-opened relation.
|
||||
*/
|
||||
@@ -180,7 +213,11 @@ inmem_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
*/
|
||||
static void
|
||||
inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
char *buffer)
|
||||
#else
|
||||
void *buffer)
|
||||
#endif
|
||||
{
|
||||
int pg;
|
||||
|
||||
@@ -200,7 +237,11 @@ inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
*/
|
||||
static void
|
||||
inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
char *buffer, bool skipFsync)
|
||||
#else
|
||||
const void *buffer, bool skipFsync)
|
||||
#endif
|
||||
{
|
||||
int pg;
|
||||
|
||||
@@ -216,9 +257,7 @@ inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
*/
|
||||
elog(used_pages >= WARN_PAGES ? WARNING : DEBUG1,
|
||||
"inmem_write() called for %u/%u/%u.%u blk %u: used_pages %u",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
blocknum,
|
||||
used_pages);
|
||||
@@ -227,14 +266,13 @@ inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
|
||||
pg = used_pages;
|
||||
used_pages++;
|
||||
INIT_BUFFERTAG(page_tag[pg], reln->smgr_rnode.node, forknum, blocknum);
|
||||
|
||||
InitBufferTag(&page_tag[pg], &InfoFromSMgrRel(reln), forknum, blocknum);
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(DEBUG1, "inmem_write() called for %u/%u/%u.%u blk %u: found at %u",
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
blocknum,
|
||||
used_pages);
|
||||
@@ -287,6 +325,9 @@ static const struct f_smgr inmem_smgr =
|
||||
.smgr_exists = inmem_exists,
|
||||
.smgr_unlink = inmem_unlink,
|
||||
.smgr_extend = inmem_extend,
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
.smgr_zeroextend = inmem_zeroextend,
|
||||
#endif
|
||||
.smgr_prefetch = inmem_prefetch,
|
||||
.smgr_read = inmem_read,
|
||||
.smgr_write = inmem_write,
|
||||
@@ -297,11 +338,11 @@ static const struct f_smgr inmem_smgr =
|
||||
};
|
||||
|
||||
const f_smgr *
|
||||
smgr_inmem(BackendId backend, RelFileNode rnode)
|
||||
smgr_inmem(BackendId backend, NRelFileInfo rinfo)
|
||||
{
|
||||
Assert(InRecovery);
|
||||
if (backend != InvalidBackendId)
|
||||
return smgr_standard(backend, rnode);
|
||||
return smgr_standard(backend, rinfo);
|
||||
else
|
||||
return &inmem_smgr;
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#ifndef INMEM_SMGR_H
|
||||
#define INMEM_SMGR_H
|
||||
|
||||
extern const f_smgr *smgr_inmem(BackendId backend, RelFileNode rnode);
|
||||
extern const f_smgr *smgr_inmem(BackendId backend, NRelFileInfo rinfo);
|
||||
extern void smgr_init_inmem(void);
|
||||
|
||||
#endif /* INMEM_SMGR_H */
|
||||
|
||||
@@ -43,6 +43,8 @@
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "../neon/neon_pgversioncompat.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <signal.h>
|
||||
@@ -61,9 +63,11 @@
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
#ifndef HAVE_GETRUSAGE
|
||||
#include "rusagestub.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "access/clog.h"
|
||||
#include "access/commit_ts.h"
|
||||
@@ -187,7 +191,7 @@ enter_seccomp_mode(void)
|
||||
* backend processes. Some initialization was done in CallExtMain
|
||||
* already.
|
||||
*/
|
||||
void
|
||||
PGDLLEXPORT void
|
||||
WalRedoMain(int argc, char *argv[])
|
||||
{
|
||||
int firstchar;
|
||||
@@ -200,7 +204,7 @@ WalRedoMain(int argc, char *argv[])
|
||||
|
||||
/*
|
||||
* WAL redo does not need a large number of buffers. And speed of
|
||||
* DropRelFileNodeAllLocalBuffers() is proportional to the number of
|
||||
* DropRelationAllLocalBuffers() is proportional to the number of
|
||||
* buffers. So let's keep it small (default value is 1024)
|
||||
*/
|
||||
num_temp_buffers = 4;
|
||||
@@ -212,6 +216,12 @@ WalRedoMain(int argc, char *argv[])
|
||||
smgr_hook = smgr_inmem;
|
||||
smgr_init_hook = smgr_init_inmem;
|
||||
|
||||
#if PG_VERSION_NUM >= 160000
|
||||
/* make rmgr registry believe we can register the resource manager */
|
||||
process_shared_preload_libraries_in_progress = true;
|
||||
load_file("$libdir/neon_rmgr", false);
|
||||
process_shared_preload_libraries_in_progress = false;
|
||||
#endif
|
||||
|
||||
/* Initialize MaxBackends (if under postmaster, was done already) */
|
||||
MaxConnections = 1;
|
||||
@@ -300,6 +310,9 @@ WalRedoMain(int argc, char *argv[])
|
||||
*/
|
||||
MemoryContextSwitchTo(MessageContext);
|
||||
initStringInfo(&input_message);
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
MyBackendType = B_BACKEND;
|
||||
#endif
|
||||
|
||||
for (;;)
|
||||
{
|
||||
@@ -534,16 +547,16 @@ CreateFakeSharedMemoryAndSemaphores()
|
||||
|
||||
/* Version compatility wrapper for ReadBufferWithoutRelcache */
|
||||
static inline Buffer
|
||||
NeonRedoReadBuffer(RelFileNode rnode,
|
||||
NeonRedoReadBuffer(NRelFileInfo rinfo,
|
||||
ForkNumber forkNum, BlockNumber blockNum,
|
||||
ReadBufferMode mode)
|
||||
{
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
return ReadBufferWithoutRelcache(rnode, forkNum, blockNum, mode,
|
||||
return ReadBufferWithoutRelcache(rinfo, forkNum, blockNum, mode,
|
||||
NULL, /* no strategy */
|
||||
true); /* WAL redo is only performed on permanent rels */
|
||||
#else
|
||||
return ReadBufferWithoutRelcache(rnode, forkNum, blockNum, mode,
|
||||
return ReadBufferWithoutRelcache(rinfo, forkNum, blockNum, mode,
|
||||
NULL); /* no strategy */
|
||||
#endif
|
||||
}
|
||||
@@ -647,7 +660,7 @@ ReadRedoCommand(StringInfo inBuf)
|
||||
static void
|
||||
BeginRedoForBlock(StringInfo input_message)
|
||||
{
|
||||
RelFileNode rnode;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blknum;
|
||||
SMgrRelation reln;
|
||||
@@ -662,22 +675,26 @@ BeginRedoForBlock(StringInfo input_message)
|
||||
* BlockNumber
|
||||
*/
|
||||
forknum = pq_getmsgbyte(input_message);
|
||||
rnode.spcNode = pq_getmsgint(input_message, 4);
|
||||
rnode.dbNode = pq_getmsgint(input_message, 4);
|
||||
rnode.relNode = pq_getmsgint(input_message, 4);
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
rinfo.spcNode = pq_getmsgint(input_message, 4);
|
||||
rinfo.dbNode = pq_getmsgint(input_message, 4);
|
||||
rinfo.relNode = pq_getmsgint(input_message, 4);
|
||||
#else
|
||||
rinfo.spcOid = pq_getmsgint(input_message, 4);
|
||||
rinfo.dbOid = pq_getmsgint(input_message, 4);
|
||||
rinfo.relNumber = pq_getmsgint(input_message, 4);
|
||||
#endif
|
||||
blknum = pq_getmsgint(input_message, 4);
|
||||
wal_redo_buffer = InvalidBuffer;
|
||||
|
||||
INIT_BUFFERTAG(target_redo_tag, rnode, forknum, blknum);
|
||||
InitBufferTag(&target_redo_tag, &rinfo, forknum, blknum);
|
||||
|
||||
elog(TRACE, "BeginRedoForBlock %u/%u/%u.%d blk %u",
|
||||
target_redo_tag.rnode.spcNode,
|
||||
target_redo_tag.rnode.dbNode,
|
||||
target_redo_tag.rnode.relNode,
|
||||
RelFileInfoFmt(rinfo),
|
||||
target_redo_tag.forkNum,
|
||||
target_redo_tag.blockNum);
|
||||
|
||||
reln = smgropen(rnode, InvalidBackendId, RELPERSISTENCE_PERMANENT);
|
||||
reln = smgropen(rinfo, InvalidBackendId, RELPERSISTENCE_PERMANENT);
|
||||
if (reln->smgr_cached_nblocks[forknum] == InvalidBlockNumber ||
|
||||
reln->smgr_cached_nblocks[forknum] < blknum + 1)
|
||||
{
|
||||
@@ -691,7 +708,7 @@ BeginRedoForBlock(StringInfo input_message)
|
||||
static void
|
||||
PushPage(StringInfo input_message)
|
||||
{
|
||||
RelFileNode rnode;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blknum;
|
||||
const char *content;
|
||||
@@ -709,13 +726,19 @@ PushPage(StringInfo input_message)
|
||||
* 8k page content
|
||||
*/
|
||||
forknum = pq_getmsgbyte(input_message);
|
||||
rnode.spcNode = pq_getmsgint(input_message, 4);
|
||||
rnode.dbNode = pq_getmsgint(input_message, 4);
|
||||
rnode.relNode = pq_getmsgint(input_message, 4);
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
rinfo.spcNode = pq_getmsgint(input_message, 4);
|
||||
rinfo.dbNode = pq_getmsgint(input_message, 4);
|
||||
rinfo.relNode = pq_getmsgint(input_message, 4);
|
||||
#else
|
||||
rinfo.spcOid = pq_getmsgint(input_message, 4);
|
||||
rinfo.dbOid = pq_getmsgint(input_message, 4);
|
||||
rinfo.relNumber = pq_getmsgint(input_message, 4);
|
||||
#endif
|
||||
blknum = pq_getmsgint(input_message, 4);
|
||||
content = pq_getmsgbytes(input_message, BLCKSZ);
|
||||
|
||||
buf = NeonRedoReadBuffer(rnode, forknum, blknum, RBM_ZERO_AND_LOCK);
|
||||
buf = NeonRedoReadBuffer(rinfo, forknum, blknum, RBM_ZERO_AND_LOCK);
|
||||
wal_redo_buffer = buf;
|
||||
page = BufferGetPage(buf);
|
||||
memcpy(page, content, BLCKSZ);
|
||||
@@ -831,7 +854,7 @@ ApplyRecord(StringInfo input_message)
|
||||
*/
|
||||
if (BufferIsInvalid(wal_redo_buffer))
|
||||
{
|
||||
wal_redo_buffer = NeonRedoReadBuffer(target_redo_tag.rnode,
|
||||
wal_redo_buffer = NeonRedoReadBuffer(BufTagGetNRelFileInfo(target_redo_tag),
|
||||
target_redo_tag.forkNum,
|
||||
target_redo_tag.blockNum,
|
||||
RBM_NORMAL);
|
||||
@@ -878,26 +901,29 @@ static bool
|
||||
redo_block_filter(XLogReaderState *record, uint8 block_id)
|
||||
{
|
||||
BufferTag target_tag;
|
||||
NRelFileInfo rinfo;
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
XLogRecGetBlockTag(record, block_id,
|
||||
&target_tag.rnode, &target_tag.forkNum, &target_tag.blockNum);
|
||||
&rinfo, &target_tag.forkNum, &target_tag.blockNum);
|
||||
#else
|
||||
if (!XLogRecGetBlockTag(record, block_id,
|
||||
&target_tag.rnode, &target_tag.forkNum, &target_tag.blockNum))
|
||||
&rinfo, &target_tag.forkNum, &target_tag.blockNum))
|
||||
{
|
||||
/* Caller specified a bogus block_id */
|
||||
elog(PANIC, "failed to locate backup block with ID %d", block_id);
|
||||
}
|
||||
#endif
|
||||
CopyNRelFileInfoToBufTag(target_tag, rinfo);
|
||||
|
||||
/*
|
||||
* Can a WAL redo function ever access a relation other than the one that
|
||||
* it modifies? I don't see why it would.
|
||||
* Custom RMGRs may be affected by this.
|
||||
*/
|
||||
if (!RelFileNodeEquals(target_tag.rnode, target_redo_tag.rnode))
|
||||
if (!RelFileInfoEquals(rinfo, BufTagGetNRelFileInfo(target_redo_tag)))
|
||||
elog(WARNING, "REDO accessing unexpected page: %u/%u/%u.%u blk %u",
|
||||
target_tag.rnode.spcNode, target_tag.rnode.dbNode, target_tag.rnode.relNode, target_tag.forkNum, target_tag.blockNum);
|
||||
RelFileInfoFmt(rinfo), target_tag.forkNum, target_tag.blockNum);
|
||||
|
||||
/*
|
||||
* If this block isn't one we are currently restoring, then return 'true'
|
||||
@@ -914,7 +940,7 @@ redo_block_filter(XLogReaderState *record, uint8 block_id)
|
||||
static void
|
||||
GetPage(StringInfo input_message)
|
||||
{
|
||||
RelFileNode rnode;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blknum;
|
||||
Buffer buf;
|
||||
@@ -931,14 +957,20 @@ GetPage(StringInfo input_message)
|
||||
* BlockNumber
|
||||
*/
|
||||
forknum = pq_getmsgbyte(input_message);
|
||||
rnode.spcNode = pq_getmsgint(input_message, 4);
|
||||
rnode.dbNode = pq_getmsgint(input_message, 4);
|
||||
rnode.relNode = pq_getmsgint(input_message, 4);
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
rinfo.spcNode = pq_getmsgint(input_message, 4);
|
||||
rinfo.dbNode = pq_getmsgint(input_message, 4);
|
||||
rinfo.relNode = pq_getmsgint(input_message, 4);
|
||||
#else
|
||||
rinfo.spcOid = pq_getmsgint(input_message, 4);
|
||||
rinfo.dbOid = pq_getmsgint(input_message, 4);
|
||||
rinfo.relNumber = pq_getmsgint(input_message, 4);
|
||||
#endif
|
||||
blknum = pq_getmsgint(input_message, 4);
|
||||
|
||||
/* FIXME: check that we got a BeginRedoForBlock message or this earlier */
|
||||
|
||||
buf = NeonRedoReadBuffer(rnode, forknum, blknum, RBM_NORMAL);
|
||||
buf = NeonRedoReadBuffer(rinfo, forknum, blknum, RBM_NORMAL);
|
||||
Assert(buf == wal_redo_buffer);
|
||||
page = BufferGetPage(buf);
|
||||
/* single thread, so don't bother locking the page */
|
||||
@@ -961,7 +993,7 @@ GetPage(StringInfo input_message)
|
||||
} while (tot_written < BLCKSZ);
|
||||
|
||||
ReleaseBuffer(buf);
|
||||
DropRelFileNodeAllLocalBuffers(rnode);
|
||||
DropRelationAllLocalBuffers(rinfo);
|
||||
wal_redo_buffer = InvalidBuffer;
|
||||
|
||||
elog(TRACE, "Page sent back for block %u", blknum);
|
||||
|
||||
@@ -11,7 +11,7 @@ use anyhow::{bail, Context, Result};
|
||||
use bytes::Bytes;
|
||||
use futures::future::BoxFuture;
|
||||
use postgres_ffi::v14::xlog_utils::{IsPartialXLogFileName, IsXLogFileName, XLogFromFileName};
|
||||
use postgres_ffi::{XLogSegNo, PG_TLI};
|
||||
use postgres_ffi::{dispatch_pgversion, XLogSegNo, PG_TLI};
|
||||
use remote_storage::RemotePath;
|
||||
use std::cmp::{max, min};
|
||||
use std::io::{self, SeekFrom};
|
||||
@@ -138,19 +138,13 @@ impl PhysicalStorage {
|
||||
let write_lsn = if state.commit_lsn == Lsn(0) {
|
||||
Lsn(0)
|
||||
} else {
|
||||
match state.server.pg_version / 10000 {
|
||||
14 => postgres_ffi::v14::xlog_utils::find_end_of_wal(
|
||||
&timeline_dir,
|
||||
wal_seg_size,
|
||||
state.commit_lsn,
|
||||
)?,
|
||||
15 => postgres_ffi::v15::xlog_utils::find_end_of_wal(
|
||||
&timeline_dir,
|
||||
wal_seg_size,
|
||||
state.commit_lsn,
|
||||
)?,
|
||||
_ => bail!("unsupported postgres version: {}", state.server.pg_version),
|
||||
}
|
||||
let version = state.server.pg_version / 10000;
|
||||
|
||||
dispatch_pgversion!(
|
||||
version,
|
||||
pgv::xlog_utils::find_end_of_wal(&timeline_dir, wal_seg_size, state.commit_lsn,)?,
|
||||
bail!("unsupported postgres version: {}", version)
|
||||
)
|
||||
};
|
||||
|
||||
// TODO: do we really know that write_lsn is fully flushed to disk?
|
||||
|
||||
@@ -1583,6 +1583,7 @@ class NeonPageserver(PgProtocol):
|
||||
".*took more than expected to complete.*",
|
||||
# these can happen during shutdown, but it should not be a reason to fail a test
|
||||
".*completed, took longer than expected.*",
|
||||
'.*registered custom resource manager "neon".*',
|
||||
]
|
||||
|
||||
def start(
|
||||
|
||||
@@ -17,6 +17,7 @@ This fixture is used to determine which version of Postgres to use for tests.
|
||||
class PgVersion(str, enum.Enum):
|
||||
V14 = "14"
|
||||
V15 = "15"
|
||||
V16 = "16"
|
||||
# Instead of making version an optional parameter in methods, we can use this fake entry
|
||||
# to explicitly rely on the default server version (could be different from pg_version fixture value)
|
||||
NOT_SET = "<-POSTRGRES VERSION IS NOT SET->"
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"public_extensions": [],
|
||||
"library_index": {
|
||||
"TODO": "We still need PG16 extensions"
|
||||
},
|
||||
"extension_data": {}
|
||||
}
|
||||
@@ -20,7 +20,7 @@ from fixtures.pageserver.utils import (
|
||||
wait_for_last_record_lsn,
|
||||
wait_for_upload,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.pg_version import PgVersion, skip_on_postgres
|
||||
from fixtures.port_distributor import PortDistributor
|
||||
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, RemoteStorageUser
|
||||
from fixtures.types import Lsn
|
||||
@@ -151,6 +151,7 @@ def test_create_snapshot(
|
||||
shutil.copytree(test_output_dir, compatibility_snapshot_dir)
|
||||
|
||||
|
||||
@skip_on_postgres(PgVersion.V16, reason="TODO: Enable after the first Postgres 16 release")
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
@@ -208,6 +209,7 @@ def test_backward_compatibility(
|
||||
), "Breaking changes are allowed by ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
|
||||
|
||||
|
||||
@skip_on_postgres(PgVersion.V16, reason="TODO: Enable after the first Postgres 16 release")
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
|
||||
@@ -9,7 +9,7 @@ from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.pg_version import PgVersion, skip_on_postgres
|
||||
from fixtures.remote_storage import (
|
||||
RemoteStorageKind,
|
||||
S3Storage,
|
||||
@@ -86,6 +86,7 @@ def upload_files(env):
|
||||
|
||||
|
||||
# Test downloading remote extension.
|
||||
@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
|
||||
@pytest.mark.parametrize("remote_storage_kind", available_s3_storages())
|
||||
@pytest.mark.skip(reason="https://github.com/neondatabase/neon/issues/4949")
|
||||
def test_remote_extensions(
|
||||
@@ -148,6 +149,7 @@ def test_remote_extensions(
|
||||
|
||||
|
||||
# Test downloading remote library.
|
||||
@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
|
||||
@pytest.mark.parametrize("remote_storage_kind", available_s3_storages())
|
||||
@pytest.mark.skip(reason="https://github.com/neondatabase/neon/issues/4949")
|
||||
def test_remote_library(
|
||||
@@ -206,6 +208,7 @@ def test_remote_library(
|
||||
# RemoteStorageKind.REAL_S3 not in available_s3_storages(),
|
||||
# reason="skipping test because real s3 not enabled",
|
||||
# )
|
||||
@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
|
||||
@pytest.mark.skip(reason="https://github.com/neondatabase/neon/issues/4949")
|
||||
def test_multiple_extensions_one_archive(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
@@ -251,7 +254,8 @@ def test_extension_download_after_restart(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
pg_version: PgVersion,
|
||||
):
|
||||
if "15" in pg_version: # SKIP v15 for now because test set only has extension built for v14
|
||||
# TODO: PG15 + PG16 extension building
|
||||
if "v14" not in pg_version: # test set only has extension built for v14
|
||||
return None
|
||||
|
||||
neon_env_builder.enable_extensions_remote_storage(RemoteStorageKind.MOCK_S3)
|
||||
|
||||
1
vendor/postgres-v16
vendored
Submodule
1
vendor/postgres-v16
vendored
Submodule
Submodule vendor/postgres-v16 added at 7a50f139c6
1
vendor/revisions.json
vendored
1
vendor/revisions.json
vendored
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"postgres-v16": "7a50f139c6269454ab9260c7a9752874b9089943",
|
||||
"postgres-v15": "026d6b093d49e25cec44dd04598152329ceac027",
|
||||
"postgres-v14": "5d5cfee12783f0989a9c9fe13bb40b5585812568"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user