mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-18 05:30:37 +00:00
Compare commits
47 Commits
sk/wal-lev
...
parallel-g
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2359f69278 | ||
|
|
7235377e25 | ||
|
|
6b2bc7f775 | ||
|
|
6c97fc941a | ||
|
|
cb9b26776e | ||
|
|
684329d4d2 | ||
|
|
ed40a045c0 | ||
|
|
3f39327622 | ||
|
|
a50a7e8ac0 | ||
|
|
e28eda7939 | ||
|
|
f564dff0e3 | ||
|
|
d783889a1f | ||
|
|
2655bdbb2e | ||
|
|
b9152f1ef4 | ||
|
|
328ec1ce24 | ||
|
|
dcb79ef08f | ||
|
|
fd99e0fbc4 | ||
|
|
60ac227196 | ||
|
|
4a60051b0d | ||
|
|
24d3ed0952 | ||
|
|
0a87d71294 | ||
|
|
150bddb929 | ||
|
|
2b728bc69e | ||
|
|
5184685ced | ||
|
|
9ae4da4f31 | ||
|
|
aca221ac8b | ||
|
|
d013a2b227 | ||
|
|
3f93c6c6f0 | ||
|
|
53267969d7 | ||
|
|
c4b417ecdb | ||
|
|
1d105727cb | ||
|
|
4787a744c2 | ||
|
|
ac3ccac56c | ||
|
|
638af96c51 | ||
|
|
1e21ca1afe | ||
|
|
46d30bf054 | ||
|
|
d0105cea1f | ||
|
|
e44e4a699b | ||
|
|
223834a420 | ||
|
|
01778e37cc | ||
|
|
03190a2161 | ||
|
|
f87017c04d | ||
|
|
c11cbf0f5c | ||
|
|
f30ef00439 | ||
|
|
dbe5b52494 | ||
|
|
4131a6efae | ||
|
|
03695261fc |
@@ -190,7 +190,7 @@ runs:
|
||||
prefix: latest
|
||||
|
||||
- name: Create Allure report
|
||||
if: always()
|
||||
if: success() || failure()
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: store
|
||||
|
||||
4
.github/ansible/production.hosts.yaml
vendored
4
.github/ansible/production.hosts.yaml
vendored
@@ -22,6 +22,10 @@ storage:
|
||||
console_region_id: aws-us-west-2
|
||||
zenith-1-ps-3:
|
||||
console_region_id: aws-us-west-2
|
||||
zenith-1-ps-4:
|
||||
console_region_id: aws-us-west-2
|
||||
zenith-1-ps-5:
|
||||
console_region_id: aws-us-west-2
|
||||
|
||||
safekeepers:
|
||||
hosts:
|
||||
|
||||
2
.github/ansible/staging.hosts.yaml
vendored
2
.github/ansible/staging.hosts.yaml
vendored
@@ -3,7 +3,7 @@ storage:
|
||||
bucket_name: zenith-staging-storage-us-east-1
|
||||
bucket_region: us-east-1
|
||||
console_mgmt_base_url: http://console-staging.local
|
||||
etcd_endpoints: zenith-us-stage-etcd.local:2379
|
||||
etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
|
||||
pageserver_config_stub:
|
||||
pg_distrib_dir: /usr/local
|
||||
remote_storage:
|
||||
|
||||
13
.github/workflows/benchmarking.yml
vendored
13
.github/workflows/benchmarking.yml
vendored
@@ -144,7 +144,9 @@ jobs:
|
||||
# neon-captest-new: Run pgbench in a freshly created project
|
||||
# neon-captest-reuse: Same, but reusing existing project
|
||||
# neon-captest-prefetch: Same, with prefetching enabled (new project)
|
||||
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
|
||||
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
|
||||
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
|
||||
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-postgres ]
|
||||
db_size: [ 10gb ]
|
||||
include:
|
||||
- platform: neon-captest-new
|
||||
@@ -164,7 +166,7 @@ jobs:
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
|
||||
options: --init
|
||||
@@ -207,8 +209,11 @@ jobs:
|
||||
rds-aurora)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }}
|
||||
;;
|
||||
rds-postgres)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch' or 'rds-aurora'"
|
||||
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@@ -265,7 +270,7 @@ jobs:
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Create Allure report
|
||||
if: always()
|
||||
if: success() || failure()
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: generate
|
||||
|
||||
117
.github/workflows/build_and_test.yml
vendored
117
.github/workflows/build_and_test.yml
vendored
@@ -18,8 +18,8 @@ env:
|
||||
|
||||
jobs:
|
||||
tag:
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
|
||||
@@ -46,7 +46,7 @@ jobs:
|
||||
id: build-tag
|
||||
|
||||
build-neon:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
@@ -236,7 +236,7 @@ jobs:
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
|
||||
regress-tests:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
@@ -269,7 +269,7 @@ jobs:
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
|
||||
benchmarks:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
@@ -300,12 +300,12 @@ jobs:
|
||||
# while coverage is currently collected for the debug ones
|
||||
|
||||
merge-allure-report:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
needs: [ regress-tests, benchmarks ]
|
||||
if: always()
|
||||
if: success() || failure()
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -338,7 +338,7 @@ jobs:
|
||||
DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json
|
||||
|
||||
coverage-report:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
@@ -415,7 +415,7 @@ jobs:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
trigger-e2e-tests:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
|
||||
options: --init
|
||||
@@ -460,7 +460,7 @@ jobs:
|
||||
}"
|
||||
|
||||
neon-image:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
needs: [ tag ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
|
||||
@@ -478,7 +478,7 @@ jobs:
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
compute-tools-image:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
needs: [ tag ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
|
||||
@@ -492,28 +492,8 @@ jobs:
|
||||
- name: Kaniko build compute tools
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
compute-node-image:
|
||||
runs-on: dev
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
needs: [ tag ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure ECR login
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
# compute-node uses postgres 14, which is default now
|
||||
# cloud repo depends on this image name, thus duplicating it
|
||||
# remove compute-node when cloud repo is updated
|
||||
- name: Kaniko build compute node with extensions v14 (compatibility)
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
compute-node-image-v14:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
needs: [ tag ]
|
||||
steps:
|
||||
@@ -529,9 +509,8 @@ jobs:
|
||||
- name: Kaniko build compute node with extensions v14
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
|
||||
compute-node-image-v15:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
needs: [ tag ]
|
||||
steps:
|
||||
@@ -547,18 +526,58 @@ jobs:
|
||||
- name: Kaniko build compute node with extensions v15
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
test-images:
|
||||
needs: [ tag, neon-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
|
||||
# Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
|
||||
# Regular pageserver version string looks like
|
||||
# Neon page server git-env:32d14403bd6ab4f4520a94cbfd81a6acef7a526c failpoints: true, features: []
|
||||
# Bad versions might loop like:
|
||||
# Neon page server git-env:local failpoints: true, features: ["testing"]
|
||||
# Ensure that we don't have bad versions.
|
||||
- name: Verify image versions
|
||||
shell: bash # ensure no set -e for better error messages
|
||||
run: |
|
||||
pageserver_version=$(docker run --rm 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
|
||||
|
||||
echo "Pageserver version string: $pageserver_version"
|
||||
|
||||
if ! echo "$pageserver_version" | grep -qv 'git-env:local' ; then
|
||||
echo "Pageserver version should not be the default Dockerfile one"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! echo "$pageserver_version" | grep -qv '"testing"' ; then
|
||||
echo "Pageserver version should have no testing feature enabled"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Verify docker-compose example
|
||||
run: env REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com TAG=${{needs.tag.outputs.build-tag}} ./docker-compose/docker_compose_test.sh
|
||||
|
||||
- name: Print logs and clean up
|
||||
if: always()
|
||||
run: |
|
||||
docker compose -f ./docker-compose/docker-compose.yml logs || 0
|
||||
docker compose -f ./docker-compose/docker-compose.yml down
|
||||
|
||||
promote-images:
|
||||
runs-on: dev
|
||||
needs: [ tag, neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
needs: [ tag, test-images ]
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
container: amazon/aws-cli
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# compute-node uses postgres 14, which is default now
|
||||
# cloud repo depends on this image name, thus duplicating it
|
||||
# remove compute-node when cloud repo is updated
|
||||
name: [ neon, compute-node, compute-node-v14, compute-node-v15, compute-tools ]
|
||||
name: [ neon, compute-node-v14, compute-node-v15, compute-tools ]
|
||||
|
||||
steps:
|
||||
- name: Promote image to latest
|
||||
@@ -567,7 +586,7 @@ jobs:
|
||||
aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
|
||||
|
||||
push-docker-hub:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
needs: [ promote-images, tag ]
|
||||
container: golang:1.19-bullseye
|
||||
|
||||
@@ -588,9 +607,6 @@ jobs:
|
||||
- name: Pull compute tools image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools
|
||||
|
||||
- name: Pull compute node image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} compute-node
|
||||
|
||||
- name: Pull compute node v14 image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14
|
||||
|
||||
@@ -607,7 +623,6 @@ jobs:
|
||||
run: |
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/neon:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest
|
||||
|
||||
@@ -623,9 +638,6 @@ jobs:
|
||||
- name: Push compute tools image to Docker Hub
|
||||
run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Push compute node image to Docker Hub
|
||||
run: crane push compute-node neondatabase/compute-node:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Push compute node v14 image to Docker Hub
|
||||
run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
@@ -642,7 +654,6 @@ jobs:
|
||||
run: |
|
||||
crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||
|
||||
@@ -725,7 +736,7 @@ jobs:
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-new:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
|
||||
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
|
||||
@@ -805,7 +816,7 @@ jobs:
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-proxy:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
@@ -847,7 +858,7 @@ jobs:
|
||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
deploy-proxy-new:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
@@ -917,7 +928,7 @@ jobs:
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
promote-compatibility-data:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
|
||||
2
.github/workflows/codestyle.yml
vendored
2
.github/workflows/codestyle.yml
vendored
@@ -115,7 +115,7 @@ jobs:
|
||||
run: cargo build --locked --all --all-targets
|
||||
|
||||
check-rust-dependencies:
|
||||
runs-on: dev
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
|
||||
@@ -8,3 +8,4 @@
|
||||
/pgxn/ @neondatabase/compute
|
||||
/proxy/ @neondatabase/control-plane
|
||||
/safekeeper/ @neondatabase/safekeepers
|
||||
/vendor/ @neondatabase/compute
|
||||
|
||||
@@ -25,6 +25,10 @@ members = [
|
||||
# Besides, debug info should not affect the performance.
|
||||
debug = true
|
||||
|
||||
# disable debug symbols for all packages except this one to decrease binaries size
|
||||
[profile.release.package."*"]
|
||||
debug = false
|
||||
|
||||
[profile.release-line-debug]
|
||||
inherits = "release"
|
||||
debug = 1 # true = 2 = all symbols, 1 = line only
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
#
|
||||
# Legacy version of the Dockerfile for the compute node.
|
||||
# Used by e2e CI. Building Dockerfile.compute-node will take
|
||||
# unreasonable ammount of time without v2 runners.
|
||||
#
|
||||
# TODO: remove once cloud repo CI is moved to v2 runners.
|
||||
#
|
||||
|
||||
|
||||
# Allow specifiyng different compute-tools tag and image repo, so we are
|
||||
# able to use different images
|
||||
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
ARG IMAGE=compute-tools
|
||||
ARG TAG=latest
|
||||
|
||||
#
|
||||
# Image with pre-built tools
|
||||
#
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS compute-deps
|
||||
# Only to get ready compute_ctl binary as deppendency
|
||||
|
||||
#
|
||||
# Image with Postgres build deps
|
||||
#
|
||||
FROM debian:bullseye-slim AS build-deps
|
||||
|
||||
RUN apt-get update && apt-get -yq install automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
|
||||
libcurl4-openssl-dev libossp-uuid-dev
|
||||
|
||||
#
|
||||
# Image with built Postgres
|
||||
#
|
||||
FROM build-deps AS pg-build
|
||||
|
||||
# Add user postgres
|
||||
RUN adduser postgres
|
||||
RUN mkdir /pg && chown postgres:postgres /pg
|
||||
|
||||
# Copy source files
|
||||
# version 14 is default for now
|
||||
COPY ./vendor/postgres-v14 /pg/
|
||||
COPY ./pgxn /pg/
|
||||
|
||||
# Build and install Postgres locally
|
||||
RUN mkdir /pg/compute_build && cd /pg/compute_build && \
|
||||
../configure CFLAGS='-O2 -g3' --prefix=$(pwd)/postgres_bin --enable-debug --with-uuid=ossp && \
|
||||
# Install main binaries and contribs
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
|
||||
# Install headers
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install
|
||||
|
||||
# Install neon contrib
|
||||
RUN make MAKELEVEL=0 PG_CONFIG=/pg/compute_build/postgres_bin/bin/pg_config -j $(getconf _NPROCESSORS_ONLN) -C /pg/neon install
|
||||
|
||||
USER postgres
|
||||
WORKDIR /pg
|
||||
|
||||
#
|
||||
# Final compute node image to be exported
|
||||
#
|
||||
FROM debian:bullseye-slim
|
||||
|
||||
# libreadline-dev is required to run psql
|
||||
RUN apt-get update && apt-get -yq install libreadline-dev libossp-uuid-dev
|
||||
|
||||
# Add user postgres
|
||||
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
||||
echo "postgres:test_console_pass" | chpasswd && \
|
||||
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
|
||||
chown -R postgres:postgres /var/db/postgres && \
|
||||
chmod 0750 /var/db/postgres/compute
|
||||
|
||||
# Copy ready Postgres binaries
|
||||
COPY --from=pg-build /pg/compute_build/postgres_bin /usr/local
|
||||
|
||||
# Copy binaries from compute-tools
|
||||
COPY --from=compute-deps /usr/local/bin/compute_ctl /usr/local/bin/compute_ctl
|
||||
|
||||
# XXX: temporary symlink for compatibility with old control-plane
|
||||
RUN ln -s /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
|
||||
|
||||
# Add postgres shared objects to the search path
|
||||
RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
|
||||
|
||||
USER postgres
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/compute_ctl"]
|
||||
28
Makefile
28
Makefile
@@ -20,18 +20,18 @@ else
|
||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||
endif
|
||||
|
||||
# Seccomp BPF is only available for Linux
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
# Seccomp BPF is only available for Linux
|
||||
PG_CONFIGURE_OPTS += --with-libseccomp
|
||||
endif
|
||||
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
# It can be configured with OPENSSL_PREFIX variable
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||
else ifeq ($(UNAME_S),Darwin)
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
# It can be configured with OPENSSL_PREFIX variable
|
||||
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
|
||||
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
|
||||
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
|
||||
endif
|
||||
|
||||
# Use -C option so that when PostgreSQL "make install" installs the
|
||||
@@ -73,7 +73,8 @@ $(POSTGRES_INSTALL_DIR)/build/v14/config.status:
|
||||
+@echo "Configuring Postgres v14 build"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
|
||||
$(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure \
|
||||
CFLAGS='$(PG_CFLAGS)' \
|
||||
$(PG_CONFIGURE_OPTS) \
|
||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)
|
||||
|
||||
@@ -81,7 +82,8 @@ $(POSTGRES_INSTALL_DIR)/build/v15/config.status:
|
||||
+@echo "Configuring Postgres v15 build"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
|
||||
$(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure \
|
||||
CFLAGS='$(PG_CFLAGS)' \
|
||||
$(PG_CONFIGURE_OPTS) \
|
||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)
|
||||
|
||||
@@ -111,6 +113,8 @@ postgres-v14: postgres-v14-configure \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
|
||||
+@echo "Compiling libpq v14"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
|
||||
+@echo "Compiling pg_prewarm v14"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_prewarm install
|
||||
+@echo "Compiling pg_buffercache v14"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
|
||||
+@echo "Compiling pageinspect v14"
|
||||
@@ -123,6 +127,8 @@ postgres-v15: postgres-v15-configure \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
|
||||
+@echo "Compiling libpq v15"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
|
||||
+@echo "Compiling pg_prewarm v15"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_prewarm install
|
||||
+@echo "Compiling pg_buffercache v15"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
|
||||
+@echo "Compiling pageinspect v15"
|
||||
|
||||
25
README.md
25
README.md
@@ -53,7 +53,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
1. Install XCode and dependencies
|
||||
```
|
||||
xcode-select --install
|
||||
brew install protobuf etcd openssl
|
||||
brew install protobuf etcd openssl flex bison
|
||||
```
|
||||
|
||||
2. [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
@@ -125,24 +125,23 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
|
||||
# Create repository in .neon with proper paths to binaries and data
|
||||
# Later that would be responsibility of a package install script
|
||||
> ./target/debug/neon_local init
|
||||
Starting pageserver at '127.0.0.1:64000' in '.neon'
|
||||
|
||||
Pageserver started
|
||||
Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7
|
||||
Stopping pageserver gracefully...done!
|
||||
Starting pageserver at '127.0.0.1:64000' in '.neon'.
|
||||
pageserver started, pid: 2545906
|
||||
Successfully initialized timeline de200bd42b49cc1814412c7e592dd6e9
|
||||
Stopped pageserver 1 process with pid 2545906
|
||||
|
||||
# start pageserver and safekeeper
|
||||
> ./target/debug/neon_local start
|
||||
Starting etcd broker using /usr/bin/etcd
|
||||
Starting pageserver at '127.0.0.1:64000' in '.neon'
|
||||
|
||||
Pageserver started
|
||||
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
|
||||
Safekeeper started
|
||||
Starting etcd broker using "/usr/bin/etcd"
|
||||
etcd started, pid: 2545996
|
||||
Starting pageserver at '127.0.0.1:64000' in '.neon'.
|
||||
pageserver started, pid: 2546005
|
||||
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
|
||||
safekeeper 1 started, pid: 2546041
|
||||
|
||||
# start postgres compute node
|
||||
> ./target/debug/neon_local pg start main
|
||||
Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
||||
Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
||||
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
|
||||
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
|
||||
|
||||
|
||||
@@ -26,8 +26,18 @@ use nix::unistd::Pid;
|
||||
|
||||
use utils::lock_file;
|
||||
|
||||
const RETRIES: u32 = 15;
|
||||
const RETRY_TIMEOUT_MILLIS: u64 = 500;
|
||||
// These constants control the loop used to poll for process start / stop.
|
||||
//
|
||||
// The loop waits for at most 10 seconds, polling every 100 ms.
|
||||
// Once a second, it prints a dot ("."), to give the user an indication that
|
||||
// it's waiting. If the process hasn't started/stopped after 5 seconds,
|
||||
// it prints a notice that it's taking long, but keeps waiting.
|
||||
//
|
||||
const RETRY_UNTIL_SECS: u64 = 10;
|
||||
const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
|
||||
const RETRY_INTERVAL_MILLIS: u64 = 100;
|
||||
const DOT_EVERY_RETRIES: u64 = 10;
|
||||
const NOTICE_AFTER_RETRIES: u64 = 50;
|
||||
|
||||
/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
|
||||
/// it itself.
|
||||
@@ -107,16 +117,16 @@ where
|
||||
return Ok(spawned_process);
|
||||
}
|
||||
Ok(false) => {
|
||||
if retries < 5 {
|
||||
if retries == NOTICE_AFTER_RETRIES {
|
||||
// The process is taking a long time to start up. Keep waiting, but
|
||||
// print a message
|
||||
print!("\n{process_name} has not started yet, continuing to wait");
|
||||
}
|
||||
if retries % DOT_EVERY_RETRIES == 0 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
} else {
|
||||
if retries == 5 {
|
||||
println!() // put a line break after dots for second message
|
||||
}
|
||||
println!("{process_name} has not started yet, retrying ({retries})...");
|
||||
}
|
||||
thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS));
|
||||
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{process_name} failed to start: {e:#}");
|
||||
@@ -127,7 +137,8 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
anyhow::bail!("{process_name} could not start in {RETRIES} attempts");
|
||||
println!();
|
||||
anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
|
||||
}
|
||||
|
||||
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
|
||||
@@ -158,7 +169,7 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
||||
}
|
||||
|
||||
// Wait until process is gone
|
||||
for _ in 0..RETRIES {
|
||||
for retries in 0..RETRIES {
|
||||
match process_has_stopped(pid) {
|
||||
Ok(true) => {
|
||||
println!("\n{process_name} stopped");
|
||||
@@ -170,9 +181,16 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
||||
return Ok(());
|
||||
}
|
||||
Ok(false) => {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
thread::sleep(Duration::from_secs(1))
|
||||
if retries == NOTICE_AFTER_RETRIES {
|
||||
// The process is taking a long time to start up. Keep waiting, but
|
||||
// print a message
|
||||
print!("\n{process_name} has not stopped yet, continuing to wait");
|
||||
}
|
||||
if retries % DOT_EVERY_RETRIES == 0 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
}
|
||||
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{process_name} with pid {pid} failed to stop: {e:#}");
|
||||
@@ -180,24 +198,21 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts");
|
||||
println!();
|
||||
anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
|
||||
}
|
||||
|
||||
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
||||
let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
|
||||
|
||||
let var = "LLVM_PROFILE_FILE";
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
filled_cmd = filled_cmd.env(var, val);
|
||||
// Pass through these environment variables to the command
|
||||
for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] {
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
filled_cmd = filled_cmd.env(var, val);
|
||||
}
|
||||
}
|
||||
|
||||
const RUST_LOG_KEY: &str = "RUST_LOG";
|
||||
if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
|
||||
filled_cmd.env(RUST_LOG_KEY, rust_log_value)
|
||||
} else {
|
||||
filled_cmd
|
||||
}
|
||||
filled_cmd
|
||||
}
|
||||
|
||||
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
|
||||
@@ -6,7 +6,7 @@ use crate::{background_process, local_env};
|
||||
|
||||
pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
let etcd_broker = &env.etcd_broker;
|
||||
println!(
|
||||
print!(
|
||||
"Starting etcd broker using {:?}",
|
||||
etcd_broker.etcd_binary_path
|
||||
);
|
||||
|
||||
@@ -237,7 +237,7 @@ impl PageServerNode {
|
||||
datadir: &Path,
|
||||
update_config: bool,
|
||||
) -> anyhow::Result<Child> {
|
||||
println!(
|
||||
print!(
|
||||
"Starting pageserver at '{}' in '{}'",
|
||||
self.pg_connection_config.raw_address(),
|
||||
datadir.display()
|
||||
|
||||
13
docker-compose/compute_wrapper/Dockerfile
Normal file
13
docker-compose/compute_wrapper/Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
||||
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
ARG COMPUTE_IMAGE=compute-node-v14
|
||||
ARG TAG=latest
|
||||
|
||||
FROM $REPOSITORY/${COMPUTE_IMAGE}:$TAG
|
||||
|
||||
USER root
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl \
|
||||
jq \
|
||||
netcat
|
||||
|
||||
USER postgres
|
||||
@@ -2,6 +2,7 @@ version: '3'
|
||||
|
||||
services:
|
||||
etcd:
|
||||
restart: always
|
||||
image: quay.io/coreos/etcd:v3.5.4
|
||||
ports:
|
||||
- 2379:2379
|
||||
@@ -9,7 +10,7 @@ services:
|
||||
environment:
|
||||
# This signifficantly speeds up etcd and we anyway don't data persistency there.
|
||||
ETCD_UNSAFE_NO_FSYNC: "1"
|
||||
command:
|
||||
command:
|
||||
- "etcd"
|
||||
- "--auto-compaction-mode=revision"
|
||||
- "--auto-compaction-retention=1"
|
||||
@@ -24,6 +25,7 @@ services:
|
||||
- "--quota-backend-bytes=134217728" # 128 MB
|
||||
|
||||
minio:
|
||||
restart: always
|
||||
image: quay.io/minio/minio:RELEASE.2022-10-20T00-55-09Z
|
||||
ports:
|
||||
- 9000:9000
|
||||
@@ -41,7 +43,7 @@ services:
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
command:
|
||||
- "until (/usr/bin/mc alias set minio http://minio:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD) do
|
||||
echo 'Waiting to start minio...' && sleep 1;
|
||||
done;
|
||||
@@ -51,7 +53,8 @@ services:
|
||||
- minio
|
||||
|
||||
pageserver:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
restart: always
|
||||
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
||||
environment:
|
||||
- BROKER_ENDPOINT='http://etcd:2379'
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
@@ -77,7 +80,8 @@ services:
|
||||
- minio_create_buckets
|
||||
|
||||
safekeeper1:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
restart: always
|
||||
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper1:5454
|
||||
- SAFEKEEPER_ID=1
|
||||
@@ -106,7 +110,8 @@ services:
|
||||
- minio_create_buckets
|
||||
|
||||
safekeeper2:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
restart: always
|
||||
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper2:5454
|
||||
- SAFEKEEPER_ID=2
|
||||
@@ -135,7 +140,8 @@ services:
|
||||
- minio_create_buckets
|
||||
|
||||
safekeeper3:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
restart: always
|
||||
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper3:5454
|
||||
- SAFEKEEPER_ID=3
|
||||
@@ -164,18 +170,21 @@ services:
|
||||
- minio_create_buckets
|
||||
|
||||
compute:
|
||||
restart: always
|
||||
build:
|
||||
context: ./image/compute
|
||||
context: ./compute_wrapper/
|
||||
args:
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}:${TAG:-latest}
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}
|
||||
- TAG=${TAG:-latest}
|
||||
- http_proxy=$http_proxy
|
||||
- https_proxy=$https_proxy
|
||||
environment:
|
||||
- PG_VERSION=${PG_VERSION:-14}
|
||||
#- RUST_BACKTRACE=1
|
||||
# Mount the test files directly, for faster editing cycle.
|
||||
volumes:
|
||||
- ./compute/var/db/postgres/specs/:/var/db/postgres/specs/
|
||||
- ./compute/shell/:/shell/
|
||||
- ./compute_wrapper/var/db/postgres/specs/:/var/db/postgres/specs/
|
||||
- ./compute_wrapper/shell/:/shell/
|
||||
ports:
|
||||
- 55433:55433 # pg protocol handler
|
||||
- 3080:3080 # http endpoints
|
||||
|
||||
60
docker-compose/docker_compose_test.sh
Executable file
60
docker-compose/docker_compose_test.sh
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
|
||||
# A basic test to ensure Docker images are built correctly.
|
||||
# Build a wrapper around the compute, start all services and runs a simple SQL query.
|
||||
# Repeats the process for all currenly supported Postgres versions.
|
||||
|
||||
# Implicitly accepts `REPOSITORY` and `TAG` env vars that are passed into the compose file
|
||||
# Their defaults point at DockerHub `neondatabase/neon:latest` image.`,
|
||||
# to verify custom image builds (e.g pre-published ones).
|
||||
|
||||
# XXX: Current does not work on M1 macs due to x86_64 Docker images compiled only, and no seccomp support in M1 Docker emulation layer.
|
||||
|
||||
set -eux -o pipefail
|
||||
|
||||
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
COMPOSE_FILE=$SCRIPT_DIR/docker-compose.yml
|
||||
|
||||
COMPUTE_CONTAINER_NAME=docker-compose-compute-1
|
||||
SQL="CREATE TABLE t(key int primary key, value text); insert into t values(1,1); select * from t;"
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -c '$SQL' postgres"
|
||||
|
||||
cleanup() {
|
||||
echo "show container information"
|
||||
docker ps
|
||||
docker compose -f $COMPOSE_FILE logs
|
||||
echo "stop containers..."
|
||||
docker compose -f $COMPOSE_FILE down
|
||||
}
|
||||
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
|
||||
for pg_version in 14 15; do
|
||||
echo "start containers (pg_version=$pg_version)."
|
||||
PG_VERSION=$pg_version docker compose -f $COMPOSE_FILE up --build -d
|
||||
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
cnt=0
|
||||
while sleep 1; do
|
||||
# check timeout
|
||||
cnt=`expr $cnt + 1`
|
||||
if [ $cnt -gt 60 ]; then
|
||||
echo "timeout before the compute is ready."
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check if the compute is ready
|
||||
set +o pipefail
|
||||
result=`docker compose -f $COMPOSE_FILE logs "compute_is_ready" | grep "accepting connections" | wc -l`
|
||||
set -o pipefail
|
||||
if [ $result -eq 1 ]; then
|
||||
echo "OK. The compute is ready to connect."
|
||||
echo "execute simple queries."
|
||||
docker exec $COMPUTE_CONTAINER_NAME /bin/bash -c "psql $PSQL_OPTION"
|
||||
cleanup
|
||||
break
|
||||
fi
|
||||
done
|
||||
done
|
||||
@@ -1,10 +0,0 @@
|
||||
ARG COMPUTE_IMAGE=compute-node-v14:latest
|
||||
FROM neondatabase/${COMPUTE_IMAGE}
|
||||
|
||||
USER root
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl \
|
||||
jq \
|
||||
netcat
|
||||
|
||||
USER postgres
|
||||
@@ -37,7 +37,7 @@
|
||||
|
||||
- [Source view](./sourcetree.md)
|
||||
- [docker.md](./docker.md) — Docker images and building pipeline.
|
||||
- [Error handling and logging]()
|
||||
- [Error handling and logging](./error-handling.md)
|
||||
- [Testing]()
|
||||
- [Unit testing]()
|
||||
- [Integration testing]()
|
||||
|
||||
198
docs/error-handling.md
Normal file
198
docs/error-handling.md
Normal file
@@ -0,0 +1,198 @@
|
||||
# Error handling and logging
|
||||
|
||||
## Logging errors
|
||||
|
||||
The principle is that errors are logged when they are handled. If you
|
||||
just propagate an error to the caller in a function, you don't need to
|
||||
log it; the caller will. But if you consume an error in a function,
|
||||
you *must* log it (if it needs to be logged at all).
|
||||
|
||||
For example:
|
||||
|
||||
```rust
|
||||
fn read_motd_file() -> std::io::Result<String> {
|
||||
let mut f = File::open("/etc/motd")?;
|
||||
let mut result = String::new();
|
||||
f.read_to_string(&mut result)?;
|
||||
result
|
||||
}
|
||||
```
|
||||
|
||||
Opening or reading the file could fail, but there is no need to log
|
||||
the error here. The function merely propagates the error to the
|
||||
caller, and it is up to the caller to log the error or propagate it
|
||||
further, if the failure is not expected. But if, for example, it is
|
||||
normal that the "/etc/motd" file doesn't exist, the caller can choose
|
||||
to silently ignore the error, or log it as an INFO or DEBUG level
|
||||
message:
|
||||
|
||||
```rust
|
||||
fn get_message_of_the_day() -> String {
|
||||
// Get the motd from /etc/motd, or return the default proverb
|
||||
match read_motd_file() {
|
||||
Ok(motd) => motd,
|
||||
Err(err) => {
|
||||
// It's normal that /etc/motd doesn't exist, but if we fail to
|
||||
// read it for some other reason, that's unexpected. The message
|
||||
// of the day isn't very important though, so we just WARN and
|
||||
// continue with the default in any case.
|
||||
if err.kind() != std::io::ErrorKind::NotFound {
|
||||
tracing::warn!("could not read \"/etc/motd\": {err:?}");
|
||||
}
|
||||
"An old error is always more popular than a new truth. - German proverb"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Error types
|
||||
|
||||
We use the `anyhow` crate widely. It contains many convenient macros
|
||||
like `bail!` and `ensure!` to construct and return errors, and to
|
||||
propagate many kinds of low-level errors, wrapped in `anyhow::Error`.
|
||||
|
||||
A downside of `anyhow::Error` is that the caller cannot distinguish
|
||||
between different error cases. Most errors are propagated all the way
|
||||
to the mgmt API handler function, or the main loop that handles a
|
||||
connection with the compute node, and they are all handled the same
|
||||
way: the error is logged and returned to the client as an HTTP or
|
||||
libpq error.
|
||||
|
||||
But in some cases, we need to distinguish between errors and handle
|
||||
them differently. For example, attaching a tenant to the pageserver
|
||||
could fail either because the tenant has already been attached, or
|
||||
because we could not load its metadata from cloud storage. The first
|
||||
case is more or less expected. The console sends the Attach request to
|
||||
the pageserver, and the pageserver completes the operation, but the
|
||||
network connection might be lost before the console receives the
|
||||
response. The console will retry the operation in that case, but the
|
||||
tenant has already been attached. It is important that the pagserver
|
||||
responds with the HTTP 403 Already Exists error in that case, rather
|
||||
than a generic HTTP 500 Internal Server Error.
|
||||
|
||||
If you need to distinguish between different kinds of errors, create a
|
||||
new `Error` type. The `thiserror` crate is useful for that. But in
|
||||
most cases `anyhow::Error` is good enough.
|
||||
|
||||
## Panics
|
||||
|
||||
Depending on where a panic happens, it can cause the whole pageserver
|
||||
or safekeeper to restart, or just a single tenant. In either case,
|
||||
that is pretty bad and causes an outage. Avoid panics. Never use
|
||||
`unwrap()` or other calls that might panic, to verify inputs from the
|
||||
network or from disk.
|
||||
|
||||
It is acceptable to use functions that might panic, like `unwrap()`, if
|
||||
it is obvious that it cannot panic. For example, if you have just
|
||||
checked that a variable is not None, it is OK to call `unwrap()` on it,
|
||||
but it is still preferable to use `expect("reason")` instead to explain
|
||||
why the function cannot fail.
|
||||
|
||||
`assert!` and `panic!` are reserved for checking clear invariants and
|
||||
very obvious "can't happen" cases. When in doubt, use anyhow `ensure!`
|
||||
or `bail!` instead.
|
||||
|
||||
## Error levels
|
||||
|
||||
`tracing::Level` doesn't provide very clear guidelines on what the
|
||||
different levels mean, or when to use which level. Here is how we use
|
||||
them:
|
||||
|
||||
### Error
|
||||
|
||||
Examples:
|
||||
- could not open file "foobar"
|
||||
- invalid tenant id
|
||||
|
||||
Errors are not expected to happen during normal operation. Incorrect
|
||||
inputs from client can cause ERRORs. For example, if a client tries to
|
||||
call a mgmt API that doesn't exist, or if a compute node sends passes
|
||||
an LSN that has already been garbage collected away.
|
||||
|
||||
These should *not* happen during normal operations. "Normal
|
||||
operations" is not a very precise concept. But for example, disk
|
||||
errors are not expected to happen when the system is working, so those
|
||||
count as Errors. However, if a TCP connection to a compute node is
|
||||
lost, that is not considered an Error, because it doesn't affect the
|
||||
pageserver's or safekeeper's operation in any way, and happens fairly
|
||||
frequently when compute nodes are shut down, or are killed abruptly
|
||||
because of errors in the compute.
|
||||
|
||||
**Errors are monitored, and always need human investigation to determine
|
||||
the cause.**
|
||||
|
||||
Whether something should be logged at ERROR, WARNING or INFO level can
|
||||
depend on the callers and clients. For example, it might be unexpected
|
||||
and a sign of a serious issue if the console calls the
|
||||
"timeline_detail" mgmt API for a timeline that doesn't exist. ERROR
|
||||
would be appropriate in that case. But if the console routinely calls
|
||||
the API after deleting a timeline, to check if the deletion has
|
||||
completed, then it would be totally normal and an INFO or DEBUG level
|
||||
message would be more appropriate. If a message is logged as an ERROR,
|
||||
but it in fact happens frequently in production and never requires any
|
||||
action, it should probably be demoted to an INFO level message.
|
||||
|
||||
### Warn
|
||||
|
||||
Examples:
|
||||
- could not remove temporary file "foobar.temp"
|
||||
- unrecognized file "foobar" in timeline directory
|
||||
|
||||
Warnings are similar to Errors, in that they should not happen
|
||||
when the system is operating normally. The difference between Error and
|
||||
Warning is that an Error means that the operation failed, whereas Warning
|
||||
means that something unexpected happened, but the operation continued anyway.
|
||||
For example, if deleting a file fails because the file already didn't exist,
|
||||
it should be logged as Warning.
|
||||
|
||||
> **Note:** The python regression tests, under `test_regress`, check the
|
||||
> pageserver log after each test for any ERROR and WARN lines. If there are
|
||||
> any ERRORs or WARNs that have not been explicitly listed in the test as
|
||||
> allowed, the test is marked a failed. This is to catch unexpected errors
|
||||
> e.g. in background operations, that don't cause immediate misbehaviour in
|
||||
> the tested functionality.
|
||||
|
||||
### Info
|
||||
|
||||
Info level is used to log useful information when the system is
|
||||
operating normally. Info level is appropriate e.g. for logging state
|
||||
changes, background operations, and network connections.
|
||||
|
||||
Examples:
|
||||
- "system is shutting down"
|
||||
- "tenant was created"
|
||||
- "retrying S3 upload"
|
||||
|
||||
### Debug & Trace
|
||||
|
||||
Debug and Trace level messages are not printed to the log in our normal
|
||||
production configuration, but could be enabled for a specific server or
|
||||
tenant, to aid debugging. (Although we don't actually have that
|
||||
capability as of this writing).
|
||||
|
||||
## Context
|
||||
|
||||
We use logging "spans" to hold context information about the current
|
||||
operation. Almost every operation happens on a particular tenant and
|
||||
timeline, so we enter a span with the "tenant_id" and "timeline_id"
|
||||
very early when processing an incoming API request, for example. All
|
||||
background operations should also run in a span containing at least
|
||||
those two fields, and any other parameters or information that might
|
||||
be useful when debugging an error that might happen when performing
|
||||
the operation.
|
||||
|
||||
TODO: Spans are not captured in the Error when it is created, but when
|
||||
the error is logged. It would be more useful to capture them at Error
|
||||
creation. We should consider using `tracing_error::SpanTrace` to do
|
||||
that.
|
||||
|
||||
## Error message style
|
||||
|
||||
PostgreSQL has a style guide for writing error messages:
|
||||
|
||||
https://www.postgresql.org/docs/current/error-style-guide.html
|
||||
|
||||
Follow that guide when writing error messages in the PostgreSQL
|
||||
extension. We don't follow it strictly in the pageserver and
|
||||
safekeeper, but the advice in the PostgreSQL style guide is generally
|
||||
good, and you can't go wrong by following it.
|
||||
@@ -83,6 +83,16 @@ A subject for future modularization.
|
||||
`/libs/metrics`:
|
||||
Helpers for exposing Prometheus metrics from the server.
|
||||
|
||||
### Adding dependencies
|
||||
When you add a Cargo dependency, you should update hakari manifest by running commands below and committing the updated `Cargo.lock` and `workspace_hack/`. There may be no changes, that's fine.
|
||||
|
||||
```bash
|
||||
cargo hakari generate
|
||||
cargo hakari manage-deps
|
||||
```
|
||||
|
||||
If you don't have hakari installed (`error: no such subcommand: hakari`), install it by running `cargo install cargo-hakari`.
|
||||
|
||||
## Using Python
|
||||
Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
|
||||
so manual installation of dependencies is not recommended.
|
||||
|
||||
@@ -48,6 +48,25 @@ pub mod nonblock;
|
||||
// Default signal handling
|
||||
pub mod signals;
|
||||
|
||||
/// use with fail::cfg("$name", "return(2000)")
|
||||
#[macro_export]
|
||||
macro_rules! failpoint_sleep_millis_async {
|
||||
($name:literal) => {{
|
||||
let should_sleep: Option<std::time::Duration> = (|| {
|
||||
fail::fail_point!($name, |v: Option<_>| {
|
||||
let millis = v.unwrap().parse::<u64>().unwrap();
|
||||
Some(Duration::from_millis(millis))
|
||||
});
|
||||
None
|
||||
})();
|
||||
if let Some(d) = should_sleep {
|
||||
tracing::info!("failpoint {:?}: sleeping for {:?}", $name, d);
|
||||
tokio::time::sleep(d).await;
|
||||
tracing::info!("failpoint {:?}: sleep done", $name);
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
|
||||
///
|
||||
/// we have several cases:
|
||||
|
||||
@@ -138,7 +138,7 @@ impl FromStr for Lsn {
|
||||
///
|
||||
/// If the input string is missing the '/' character, then use `Lsn::from_hex`
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let mut splitter = s.split('/');
|
||||
let mut splitter = s.trim().split('/');
|
||||
if let (Some(left), Some(right), None) = (splitter.next(), splitter.next(), splitter.next())
|
||||
{
|
||||
let left_num = u32::from_str_radix(left, 16).map_err(|_| LsnParseError)?;
|
||||
@@ -270,6 +270,11 @@ mod tests {
|
||||
);
|
||||
assert_eq!(Lsn::from_hex("0"), Ok(Lsn(0)));
|
||||
assert_eq!(Lsn::from_hex("F12345678AAAA5555"), Err(LsnParseError));
|
||||
|
||||
let expected_lsn = Lsn(0x3C490F8);
|
||||
assert_eq!(" 0/3C490F8".parse(), Ok(expected_lsn));
|
||||
assert_eq!("0/3C490F8 ".parse(), Ok(expected_lsn));
|
||||
assert_eq!(" 0/3C490F8 ".parse(), Ok(expected_lsn));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -76,3 +76,7 @@ tempfile = "3.2"
|
||||
[[bench]]
|
||||
name = "bench_layer_map"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "bench_walredo"
|
||||
harness = false
|
||||
|
||||
453
pageserver/benches/bench_walredo.rs
Normal file
453
pageserver/benches/bench_walredo.rs
Normal file
File diff suppressed because one or more lines are too long
@@ -199,6 +199,20 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
logging::init(conf.log_format)?;
|
||||
info!("version: {}", version());
|
||||
|
||||
// If any failpoints were set from FAILPOINTS environment variable,
|
||||
// print them to the log for debugging purposes
|
||||
let failpoints = fail::list();
|
||||
if !failpoints.is_empty() {
|
||||
info!(
|
||||
"started with failpoints: {}",
|
||||
failpoints
|
||||
.iter()
|
||||
.map(|(name, actions)| format!("{name}={actions}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join(";")
|
||||
)
|
||||
}
|
||||
|
||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
|
||||
lock_file::LockCreationResult::Created {
|
||||
|
||||
@@ -614,8 +614,9 @@ impl PageServerConf {
|
||||
PathBuf::from(format!("../tmp_check/test_{test_name}"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn dummy_conf(repo_dir: PathBuf) -> Self {
|
||||
let pg_distrib_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../pg_install");
|
||||
|
||||
PageServerConf {
|
||||
id: NodeId(0),
|
||||
wait_lsn_timeout: Duration::from_secs(60),
|
||||
@@ -626,7 +627,7 @@ impl PageServerConf {
|
||||
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
superuser: "cloud_admin".to_string(),
|
||||
workdir: repo_dir,
|
||||
pg_distrib_dir: PathBuf::new(),
|
||||
pg_distrib_dir,
|
||||
auth_type: AuthType::Trust,
|
||||
auth_validation_public_key_path: None,
|
||||
remote_storage_config: None,
|
||||
|
||||
@@ -13,6 +13,7 @@ use anyhow::{bail, ensure, Context, Result};
|
||||
use bytes::Buf;
|
||||
use bytes::Bytes;
|
||||
use futures::{Stream, StreamExt};
|
||||
use futures::stream::FuturesOrdered;
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||
@@ -25,6 +26,7 @@ use std::net::TcpListener;
|
||||
use std::str;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::pin;
|
||||
use tokio_util::io::StreamReader;
|
||||
use tokio_util::io::SyncIoBridge;
|
||||
@@ -54,6 +56,9 @@ use crate::CheckpointConfig;
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
/// Number of requests to process in parallel, from a single connection
|
||||
const MAX_INFLIGHT_REQUESTS: usize = 4;
|
||||
|
||||
fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Bytes>> + '_ {
|
||||
async_stream::try_stream! {
|
||||
loop {
|
||||
@@ -76,6 +81,12 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
|
||||
FeMessage::CopyData(bytes) => bytes,
|
||||
FeMessage::CopyDone => { break },
|
||||
FeMessage::Sync => continue,
|
||||
FeMessage::Terminate => {
|
||||
let msg = format!("client terminated connection with Terminate message during COPY");
|
||||
pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
|
||||
Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
|
||||
break;
|
||||
}
|
||||
m => {
|
||||
let msg = format!("unexpected message {:?}", m);
|
||||
pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
|
||||
@@ -87,10 +98,10 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
|
||||
yield copy_data_bytes;
|
||||
}
|
||||
Ok(None) => {
|
||||
let msg = "client closed connection";
|
||||
let msg = "client closed connection during COPY";
|
||||
pgb.write_message(&BeMessage::ErrorResponse(msg))?;
|
||||
pgb.flush().await?;
|
||||
Err(io::Error::new(io::ErrorKind::Other, msg))?;
|
||||
Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
|
||||
}
|
||||
Err(e) => {
|
||||
Err(io::Error::new(io::ErrorKind::Other, e))?;
|
||||
@@ -218,6 +229,13 @@ struct PageRequestMetrics {
|
||||
get_db_size: metrics::Histogram,
|
||||
}
|
||||
|
||||
pub enum RequestType {
|
||||
Exists,
|
||||
Nblocks,
|
||||
GetPage,
|
||||
DbSize,
|
||||
}
|
||||
|
||||
impl PageRequestMetrics {
|
||||
fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self {
|
||||
let tenant_id = tenant_id.to_string();
|
||||
@@ -292,66 +310,101 @@ impl PageServerHandler {
|
||||
|
||||
let metrics = PageRequestMetrics::new(&tenant_id, &timeline_id);
|
||||
|
||||
//
|
||||
// Main loop to handle the stream of requests
|
||||
//
|
||||
// We process multiple requests in parallel, by spawning a new Task for each
|
||||
// incoming request.
|
||||
let mut inprogress_requests = FuturesOrdered::new();
|
||||
loop {
|
||||
let msg = tokio::select! {
|
||||
tokio::select! {
|
||||
biased;
|
||||
|
||||
// If we were requested to shut down, stop
|
||||
_ = task_mgr::shutdown_watcher() => {
|
||||
// We were requested to shut down.
|
||||
info!("shutdown request received in page handler");
|
||||
break;
|
||||
}
|
||||
|
||||
msg = pgb.read_message() => { msg }
|
||||
};
|
||||
// When a task completes, send the response to the client
|
||||
completed_task = inprogress_requests.next(), if !inprogress_requests.is_empty() => {
|
||||
let response: Bytes;
|
||||
let request_type: RequestType;
|
||||
let elapsed_sec: f64;
|
||||
(response, request_type, elapsed_sec) = completed_task.unwrap()?;
|
||||
|
||||
let copy_data_bytes = match msg? {
|
||||
Some(FeMessage::CopyData(bytes)) => bytes,
|
||||
Some(m) => {
|
||||
bail!("unexpected message: {m:?} during COPY");
|
||||
pgb.write_message(&BeMessage::CopyData(&response))?;
|
||||
pgb.flush().await?;
|
||||
|
||||
match request_type {
|
||||
RequestType::Exists => metrics.get_rel_exists.observe(elapsed_sec),
|
||||
RequestType::Nblocks => metrics.get_rel_size.observe(elapsed_sec),
|
||||
RequestType::GetPage => metrics.get_page_at_lsn.observe(elapsed_sec),
|
||||
RequestType::DbSize => metrics.get_db_size.observe(elapsed_sec),
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
None => break, // client disconnected
|
||||
};
|
||||
|
||||
trace!("query: {copy_data_bytes:?}");
|
||||
// When a new request arrives, spawn a task to process it.
|
||||
// If we already have MAX_INFLIGHT_REQUESTS requests in-progress, however,
|
||||
// don't start new ones.
|
||||
msg = pgb.read_message(), if inprogress_requests.len() < MAX_INFLIGHT_REQUESTS => {
|
||||
let copy_data_bytes = match msg? {
|
||||
Some(FeMessage::CopyData(bytes)) => bytes,
|
||||
Some(m) => {
|
||||
bail!("unexpected message: {m:?} during COPY");
|
||||
}
|
||||
None => break, // client disconnected
|
||||
};
|
||||
|
||||
// Trace request if needed
|
||||
if let Some(t) = tracer.as_mut() {
|
||||
t.trace(©_data_bytes)
|
||||
}
|
||||
trace!("query: {copy_data_bytes:?}");
|
||||
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
// Trace request if needed
|
||||
if let Some(t) = tracer.as_mut() {
|
||||
t.trace(©_data_bytes)
|
||||
}
|
||||
|
||||
let response = match neon_fe_msg {
|
||||
PagestreamFeMessage::Exists(req) => {
|
||||
let _timer = metrics.get_rel_exists.start_timer();
|
||||
self.handle_get_rel_exists_request(&timeline, &req).await
|
||||
}
|
||||
PagestreamFeMessage::Nblocks(req) => {
|
||||
let _timer = metrics.get_rel_size.start_timer();
|
||||
self.handle_get_nblocks_request(&timeline, &req).await
|
||||
}
|
||||
PagestreamFeMessage::GetPage(req) => {
|
||||
let _timer = metrics.get_page_at_lsn.start_timer();
|
||||
self.handle_get_page_at_lsn_request(&timeline, &req).await
|
||||
}
|
||||
PagestreamFeMessage::DbSize(req) => {
|
||||
let _timer = metrics.get_db_size.start_timer();
|
||||
self.handle_db_size_request(&timeline, &req).await
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
|
||||
let timeline = Arc::clone(&timeline);
|
||||
let conf = self.conf;
|
||||
let task = async move {
|
||||
let start_time = Instant::now();
|
||||
let (response, request_type) = match neon_fe_msg {
|
||||
PagestreamFeMessage::Exists(req) => {
|
||||
(Self::handle_get_rel_exists_request(&timeline, &req).await,
|
||||
RequestType::Exists)
|
||||
}
|
||||
PagestreamFeMessage::Nblocks(req) => {
|
||||
(Self::handle_get_nblocks_request(&timeline, &req).await,
|
||||
RequestType::Nblocks)
|
||||
}
|
||||
PagestreamFeMessage::GetPage(req) => {
|
||||
(Self::handle_get_page_at_lsn_request(conf, &timeline, &req).await,
|
||||
RequestType::GetPage)
|
||||
}
|
||||
PagestreamFeMessage::DbSize(req) => {
|
||||
(Self::handle_db_size_request(&timeline, &req).await,
|
||||
RequestType::DbSize)
|
||||
}
|
||||
};
|
||||
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough
|
||||
error!("error reading relation or page version: {:?}", e);
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
});
|
||||
let response: Bytes = response.serialize();
|
||||
(response, request_type, start_time.elapsed().as_secs_f64())
|
||||
};
|
||||
inprogress_requests.push_back(tokio::spawn(task));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough
|
||||
error!("error reading relation or page version: {:?}", e);
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
});
|
||||
|
||||
pgb.write_message(&BeMessage::CopyData(&response.serialize()))?;
|
||||
pgb.flush().await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -523,9 +576,8 @@ impl PageServerHandler {
|
||||
Ok(lsn)
|
||||
}
|
||||
|
||||
#[instrument(skip(self, timeline, req), fields(rel = %req.rel, req_lsn = %req.lsn))]
|
||||
#[instrument(skip(timeline, req), fields(rel = %req.rel, req_lsn = %req.lsn))]
|
||||
async fn handle_get_rel_exists_request(
|
||||
&self,
|
||||
timeline: &Timeline,
|
||||
req: &PagestreamExistsRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
@@ -540,9 +592,8 @@ impl PageServerHandler {
|
||||
}))
|
||||
}
|
||||
|
||||
#[instrument(skip(self, timeline, req), fields(rel = %req.rel, req_lsn = %req.lsn))]
|
||||
#[instrument(skip(timeline, req), fields(rel = %req.rel, req_lsn = %req.lsn))]
|
||||
async fn handle_get_nblocks_request(
|
||||
&self,
|
||||
timeline: &Timeline,
|
||||
req: &PagestreamNblocksRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
@@ -557,9 +608,8 @@ impl PageServerHandler {
|
||||
}))
|
||||
}
|
||||
|
||||
#[instrument(skip(self, timeline, req), fields(dbnode = %req.dbnode, req_lsn = %req.lsn))]
|
||||
#[instrument(skip(timeline, req), fields(dbnode = %req.dbnode, req_lsn = %req.lsn))]
|
||||
async fn handle_db_size_request(
|
||||
&self,
|
||||
timeline: &Timeline,
|
||||
req: &PagestreamDbSizeRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
@@ -577,11 +627,11 @@ impl PageServerHandler {
|
||||
}))
|
||||
}
|
||||
|
||||
#[instrument(skip(self, timeline, req), fields(rel = %req.rel, blkno = %req.blkno, req_lsn = %req.lsn))]
|
||||
async fn handle_get_page_at_lsn_request(
|
||||
&self,
|
||||
timeline: &Timeline,
|
||||
req: &PagestreamGetPageRequest,
|
||||
#[instrument(skip(conf, timeline, req), fields(rel = %req.rel, blkno = %req.blkno, req_lsn = %req.lsn))]
|
||||
async fn handle_get_page_at_lsn_request<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
timeline: &'a Timeline,
|
||||
req: &'a PagestreamGetPageRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)
|
||||
@@ -598,7 +648,7 @@ impl PageServerHandler {
|
||||
// FIXME: this profiling now happens at different place than it used to. The
|
||||
// current profiling is based on a thread-local variable, so it doesn't work
|
||||
// across awaits
|
||||
let _profiling_guard = profpoint_start(self.conf, ProfilingConfig::PageRequests);
|
||||
let _profiling_guard = profpoint_start(conf, ProfilingConfig::PageRequests);
|
||||
let page = timeline.get_rel_page_at_lsn(req.rel, req.blkno, lsn, req.latest)?;
|
||||
|
||||
Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
|
||||
@@ -606,9 +656,8 @@ impl PageServerHandler {
|
||||
}))
|
||||
}
|
||||
|
||||
#[instrument(skip(self, pgb))]
|
||||
#[instrument(skip(pgb))]
|
||||
async fn handle_basebackup_request(
|
||||
&self,
|
||||
pgb: &mut PostgresBackend,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
@@ -741,7 +790,7 @@ impl postgres_backend_async::Handler for PageServerHandler {
|
||||
};
|
||||
|
||||
// Check that the timeline exists
|
||||
self.handle_basebackup_request(pgb, tenant_id, timeline_id, lsn, None, false)
|
||||
Self::handle_basebackup_request(pgb, tenant_id, timeline_id, lsn, None, false)
|
||||
.await?;
|
||||
pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
}
|
||||
@@ -801,7 +850,7 @@ impl postgres_backend_async::Handler for PageServerHandler {
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
// Check that the timeline exists
|
||||
self.handle_basebackup_request(pgb, tenant_id, timeline_id, lsn, prev_lsn, true)
|
||||
Self::handle_basebackup_request(pgb, tenant_id, timeline_id, lsn, prev_lsn, true)
|
||||
.await?;
|
||||
pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.starts_with("import basebackup ") {
|
||||
|
||||
@@ -1010,6 +1010,10 @@ impl Tenant {
|
||||
|
||||
let gc_timelines = self.refresh_gc_info_internal(target_timeline_id, horizon, pitr)?;
|
||||
|
||||
utils::failpoint_sleep_millis_async!("gc_iteration_internal_after_getting_gc_timelines");
|
||||
|
||||
info!("starting on {} timelines", gc_timelines.len());
|
||||
|
||||
// Perform GC for each timeline.
|
||||
//
|
||||
// Note that we don't hold the GC lock here because we don't want
|
||||
|
||||
@@ -74,6 +74,7 @@ where
|
||||
};
|
||||
|
||||
dstbuf.clear();
|
||||
dstbuf.reserve(len);
|
||||
|
||||
// Read the payload
|
||||
let mut remain = len;
|
||||
|
||||
@@ -260,8 +260,9 @@ impl Layer for DeltaLayer {
|
||||
|
||||
// Ok, 'offsets' now contains the offsets of all the entries we need to read
|
||||
let mut cursor = file.block_cursor();
|
||||
let mut buf = Vec::new();
|
||||
for (entry_lsn, pos) in offsets {
|
||||
let buf = cursor.read_blob(pos).with_context(|| {
|
||||
cursor.read_blob_into_buf(pos, &mut buf).with_context(|| {
|
||||
format!(
|
||||
"Failed to read blob from virtual file {}",
|
||||
file.file.path.display()
|
||||
|
||||
@@ -61,6 +61,13 @@ use crate::{
|
||||
storage_sync::{self, index::LayerFileMetadata},
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
enum FlushLoopState {
|
||||
NotStarted,
|
||||
Running,
|
||||
Exited,
|
||||
}
|
||||
|
||||
pub struct Timeline {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: Arc<RwLock<TenantConfOpt>>,
|
||||
@@ -122,7 +129,7 @@ pub struct Timeline {
|
||||
write_lock: Mutex<()>,
|
||||
|
||||
/// Used to avoid multiple `flush_loop` tasks running
|
||||
flush_loop_started: Mutex<bool>,
|
||||
flush_loop_state: Mutex<FlushLoopState>,
|
||||
|
||||
/// layer_flush_start_tx can be used to wake up the layer-flushing task.
|
||||
/// The value is a counter, incremented every time a new flush cycle is requested.
|
||||
@@ -755,7 +762,7 @@ impl Timeline {
|
||||
|
||||
upload_layers: AtomicBool::new(upload_layers),
|
||||
|
||||
flush_loop_started: Mutex::new(false),
|
||||
flush_loop_state: Mutex::new(FlushLoopState::NotStarted),
|
||||
|
||||
layer_flush_start_tx,
|
||||
layer_flush_done_tx,
|
||||
@@ -794,13 +801,23 @@ impl Timeline {
|
||||
}
|
||||
|
||||
pub(super) fn maybe_spawn_flush_loop(self: &Arc<Self>) {
|
||||
let mut flush_loop_started = self.flush_loop_started.lock().unwrap();
|
||||
if *flush_loop_started {
|
||||
info!(
|
||||
"skipping attempt to start flush_loop twice {}/{}",
|
||||
self.tenant_id, self.timeline_id
|
||||
);
|
||||
return;
|
||||
let mut flush_loop_state = self.flush_loop_state.lock().unwrap();
|
||||
match *flush_loop_state {
|
||||
FlushLoopState::NotStarted => (),
|
||||
FlushLoopState::Running => {
|
||||
info!(
|
||||
"skipping attempt to start flush_loop twice {}/{}",
|
||||
self.tenant_id, self.timeline_id
|
||||
);
|
||||
return;
|
||||
}
|
||||
FlushLoopState::Exited => {
|
||||
warn!(
|
||||
"ignoring attempt to restart exited flush_loop {}/{}",
|
||||
self.tenant_id, self.timeline_id
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let layer_flush_start_rx = self.layer_flush_start_tx.subscribe();
|
||||
@@ -813,11 +830,16 @@ impl Timeline {
|
||||
Some(self.timeline_id),
|
||||
"layer flush task",
|
||||
false,
|
||||
async move { self_clone.flush_loop(layer_flush_start_rx).await; Ok(()) }
|
||||
async move {
|
||||
self_clone.flush_loop(layer_flush_start_rx).await;
|
||||
let mut flush_loop_state = self_clone.flush_loop_state.lock().unwrap();
|
||||
assert_eq!(*flush_loop_state, FlushLoopState::Running);
|
||||
*flush_loop_state = FlushLoopState::Exited;
|
||||
Ok(()) }
|
||||
.instrument(info_span!(parent: None, "layer flush task", tenant = %self.tenant_id, timeline = %self.timeline_id))
|
||||
);
|
||||
|
||||
*flush_loop_started = true;
|
||||
*flush_loop_state = FlushLoopState::Running;
|
||||
}
|
||||
|
||||
pub(super) fn launch_wal_receiver(self: &Arc<Self>) {
|
||||
@@ -1365,8 +1387,9 @@ impl Timeline {
|
||||
// finished, instead of some other flush that was started earlier.
|
||||
let mut my_flush_request = 0;
|
||||
|
||||
if !&*self.flush_loop_started.lock().unwrap() {
|
||||
anyhow::bail!("cannot flush frozen layers when flush_loop is not running")
|
||||
let flush_loop_state = { *self.flush_loop_state.lock().unwrap() };
|
||||
if flush_loop_state != FlushLoopState::Running {
|
||||
anyhow::bail!("cannot flush frozen layers when flush_loop is not running, state is {flush_loop_state:?}")
|
||||
}
|
||||
|
||||
self.layer_flush_start_tx.send_modify(|counter| {
|
||||
|
||||
@@ -216,7 +216,6 @@ impl TenantConf {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn dummy_conf() -> Self {
|
||||
TenantConf {
|
||||
checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
|
||||
|
||||
@@ -71,9 +71,7 @@ async fn compaction_loop(tenant_id: TenantId) {
|
||||
let mut sleep_duration = tenant.get_compaction_period();
|
||||
if let Err(e) = tenant.compaction_iteration() {
|
||||
sleep_duration = wait_duration;
|
||||
error!("Compaction failed, retrying in {:?}: {e:#}", sleep_duration);
|
||||
#[cfg(feature = "testing")]
|
||||
std::process::abort();
|
||||
error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration);
|
||||
}
|
||||
|
||||
// Sleep
|
||||
@@ -122,9 +120,7 @@ async fn gc_loop(tenant_id: TenantId) {
|
||||
if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false).await
|
||||
{
|
||||
sleep_duration = wait_duration;
|
||||
error!("Gc failed, retrying in {:?}: {e:#}", sleep_duration);
|
||||
#[cfg(feature = "testing")]
|
||||
std::process::abort();
|
||||
error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use nix::poll::*;
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::Serialize;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::prelude::*;
|
||||
@@ -31,7 +32,8 @@ use std::os::unix::prelude::CommandExt;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::process::{Child, ChildStderr, ChildStdin, ChildStdout, Command};
|
||||
use std::sync::Mutex;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Condvar, Mutex};
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
use std::{fs, io};
|
||||
@@ -57,6 +59,9 @@ use postgres_ffi::v14::nonrelfile_utils::{
|
||||
};
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
/// Maximum number of WAL redo processes to launch for a single tenant.
|
||||
const MAX_PROCESSES: usize = 4;
|
||||
|
||||
///
|
||||
/// `RelTag` + block number (`blknum`) gives us a unique id of the page in the cluster.
|
||||
///
|
||||
@@ -90,18 +95,32 @@ pub trait WalRedoManager: Send + Sync {
|
||||
) -> Result<Bytes, WalRedoError>;
|
||||
}
|
||||
|
||||
static WAL_REDO_PROCESS_COUNTER: Lazy<AtomicU64> = Lazy::new(|| { AtomicU64::new(0) });
|
||||
|
||||
///
|
||||
/// This is the real implementation that uses a Postgres process to
|
||||
/// perform WAL replay. Only one thread can use the process at a time,
|
||||
/// that is controlled by the Mutex. In the future, we might want to
|
||||
/// launch a pool of processes to allow concurrent replay of multiple
|
||||
/// records.
|
||||
/// This is the real implementation that uses a special Postgres
|
||||
/// process to perform WAL replay. There is a pool of these processes.
|
||||
///
|
||||
pub struct PostgresRedoManager {
|
||||
tenant_id: TenantId,
|
||||
conf: &'static PageServerConf,
|
||||
|
||||
process: Mutex<Option<PostgresRedoProcess>>,
|
||||
/// Pool of processes.
|
||||
process_list: Mutex<ProcessList>,
|
||||
/// Condition variable that can be used to sleep until a process
|
||||
/// becomes available in the pool.
|
||||
condvar: Condvar,
|
||||
}
|
||||
|
||||
// A pool of WAL redo processes
|
||||
#[derive(Default)]
|
||||
struct ProcessList {
|
||||
/// processes that are available for reuse
|
||||
free_processes: Vec<PostgresRedoProcess>,
|
||||
|
||||
/// Total number of processes, including all the processes in
|
||||
/// 'free_processes' list, and any processes that are in use.
|
||||
num_processes: usize,
|
||||
}
|
||||
|
||||
/// Can this request be served by neon redo functions
|
||||
@@ -206,10 +225,42 @@ impl PostgresRedoManager {
|
||||
PostgresRedoManager {
|
||||
tenant_id,
|
||||
conf,
|
||||
process: Mutex::new(None),
|
||||
process_list: Mutex::new(ProcessList::default()),
|
||||
condvar: Condvar::new(),
|
||||
}
|
||||
}
|
||||
|
||||
// Get a handle to a redo process from the pool.
|
||||
fn get_process(&self, pg_version: u32) -> Result<PostgresRedoProcess, WalRedoError> {
|
||||
let mut process_list = self.process_list.lock().unwrap();
|
||||
|
||||
loop {
|
||||
// If there's a free process immediately available, take it.
|
||||
if let Some(process) = process_list.free_processes.pop() {
|
||||
return Ok(process);
|
||||
}
|
||||
|
||||
// All processes are in use. If the pool is at its maximum size
|
||||
// already, wait for a process to become free. Otherwise launch
|
||||
// a new process.
|
||||
if process_list.num_processes >= MAX_PROCESSES {
|
||||
process_list = self.condvar.wait(process_list).unwrap();
|
||||
continue;
|
||||
} else {
|
||||
let process = PostgresRedoProcess::launch(self.conf, self.tenant_id, pg_version)?;
|
||||
process_list.num_processes += 1;
|
||||
return Ok(process);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Launch process pre-emptively. Should not be needed except for benchmarking.
|
||||
pub fn launch_process(&mut self, pg_version: u32) -> anyhow::Result<()> {
|
||||
// get_process launches a process, if no processes were running previously
|
||||
let _ = self.get_process(pg_version)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Process one request for WAL redo using wal-redo postgres
|
||||
///
|
||||
@@ -226,15 +277,9 @@ impl PostgresRedoManager {
|
||||
|
||||
let start_time = Instant::now();
|
||||
|
||||
let mut process_guard = self.process.lock().unwrap();
|
||||
let lock_time = Instant::now();
|
||||
let mut process = self.get_process(pg_version)?;
|
||||
|
||||
// launch the WAL redo process on first use
|
||||
if process_guard.is_none() {
|
||||
let p = PostgresRedoProcess::launch(self.conf, self.tenant_id, pg_version)?;
|
||||
*process_guard = Some(p);
|
||||
}
|
||||
let process = process_guard.as_mut().unwrap();
|
||||
let lock_time = Instant::now();
|
||||
|
||||
WAL_REDO_WAIT_TIME.observe(lock_time.duration_since(start_time).as_secs_f64());
|
||||
|
||||
@@ -268,8 +313,9 @@ impl PostgresRedoManager {
|
||||
lsn
|
||||
);
|
||||
|
||||
// If something went wrong, don't try to reuse the process. Kill it, and
|
||||
// next request will launch a new one.
|
||||
// If something went wrong, don't try to reuse the
|
||||
// process. Kill it, and next request will launch a new one.
|
||||
// Otherwise return the process to the pool.
|
||||
if result.is_err() {
|
||||
error!(
|
||||
"error applying {} WAL records ({} bytes) to reconstruct page image at LSN {}",
|
||||
@@ -277,8 +323,14 @@ impl PostgresRedoManager {
|
||||
nbytes,
|
||||
lsn
|
||||
);
|
||||
let process = process_guard.take().unwrap();
|
||||
process.kill();
|
||||
let mut process_list = self.process_list.lock().unwrap();
|
||||
process_list.num_processes -= 1;
|
||||
self.condvar.notify_one();
|
||||
} else {
|
||||
let mut process_list = self.process_list.lock().unwrap();
|
||||
process_list.free_processes.push(process);
|
||||
self.condvar.notify_one();
|
||||
}
|
||||
result
|
||||
}
|
||||
@@ -598,11 +650,10 @@ impl PostgresRedoProcess {
|
||||
tenant_id: TenantId,
|
||||
pg_version: u32,
|
||||
) -> Result<PostgresRedoProcess, Error> {
|
||||
// FIXME: We need a dummy Postgres cluster to run the process in. Currently, we
|
||||
// just create one with constant name. That fails if you try to launch more than
|
||||
// one WAL redo manager concurrently.
|
||||
// We need a dummy Postgres cluster to run the process in.
|
||||
let processno = WAL_REDO_PROCESS_COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||
let datadir = path_with_suffix_extension(
|
||||
conf.tenant_path(&tenant_id).join("wal-redo-datadir"),
|
||||
conf.tenant_path(&tenant_id).join(format!("wal-redo-datadir-{}", processno)),
|
||||
TEMP_FILE_SUFFIX,
|
||||
);
|
||||
|
||||
@@ -740,7 +791,11 @@ impl PostgresRedoProcess {
|
||||
// This could be problematic if there are millions of records to replay,
|
||||
// but in practice the number of records is usually so small that it doesn't
|
||||
// matter, and it's better to keep this code simple.
|
||||
let mut writebuf: Vec<u8> = Vec::new();
|
||||
//
|
||||
// Most requests start with a before-image with BLCKSZ bytes, followed by
|
||||
// by some other WAL records. Start with a buffer that can hold that
|
||||
// comfortably.
|
||||
let mut writebuf: Vec<u8> = Vec::with_capacity((BLCKSZ as usize) * 3);
|
||||
build_begin_redo_for_block_msg(tag, &mut writebuf);
|
||||
if let Some(img) = base_img {
|
||||
build_push_page_msg(tag, &img, &mut writebuf);
|
||||
|
||||
@@ -32,11 +32,6 @@
|
||||
|
||||
#define PageStoreTrace DEBUG5
|
||||
|
||||
#define NEON_TAG "[NEON_SMGR] "
|
||||
#define neon_log(tag, fmt, ...) ereport(tag, \
|
||||
(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
|
||||
errhidestmt(true), errhidecontext(true)))
|
||||
|
||||
bool connected = false;
|
||||
PGconn *pageserver_conn = NULL;
|
||||
|
||||
@@ -52,6 +47,7 @@ char *page_server_connstring_raw;
|
||||
|
||||
int n_unflushed_requests = 0;
|
||||
int flush_every_n_requests = 8;
|
||||
int readahead_buffer_size = 128;
|
||||
|
||||
static void pageserver_flush(void);
|
||||
|
||||
@@ -96,11 +92,10 @@ pageserver_connect()
|
||||
|
||||
while (PQisBusy(pageserver_conn))
|
||||
{
|
||||
int wc;
|
||||
WaitEvent event;
|
||||
|
||||
/* Sleep until there's something to do */
|
||||
wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
||||
(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
||||
ResetLatch(MyLatch);
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
@@ -140,11 +135,10 @@ retry:
|
||||
|
||||
if (ret == 0)
|
||||
{
|
||||
int wc;
|
||||
WaitEvent event;
|
||||
|
||||
/* Sleep until there's something to do */
|
||||
wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
||||
(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
||||
ResetLatch(MyLatch);
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
@@ -238,6 +232,9 @@ pageserver_receive(void)
|
||||
StringInfoData resp_buff;
|
||||
NeonResponse *resp;
|
||||
|
||||
if (!connected)
|
||||
return NULL;
|
||||
|
||||
PG_TRY();
|
||||
{
|
||||
/* read response */
|
||||
@@ -247,7 +244,10 @@ pageserver_receive(void)
|
||||
if (resp_buff.len < 0)
|
||||
{
|
||||
if (resp_buff.len == -1)
|
||||
neon_log(ERROR, "end of COPY");
|
||||
{
|
||||
pageserver_disconnect();
|
||||
return NULL;
|
||||
}
|
||||
else if (resp_buff.len == -2)
|
||||
neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn));
|
||||
}
|
||||
@@ -449,9 +449,22 @@ pg_init_libpagestore(void)
|
||||
NULL,
|
||||
&flush_every_n_requests,
|
||||
8, -1, INT_MAX,
|
||||
PGC_SIGHUP,
|
||||
PGC_USERSET,
|
||||
0, /* no flags required */
|
||||
NULL, NULL, NULL);
|
||||
DefineCustomIntVariable("neon.readahead_buffer_size",
|
||||
"number of prefetches to buffer",
|
||||
"This buffer is used to store prefetched data; so "
|
||||
"it is important that this buffer is at least as "
|
||||
"large as the configured value of all tablespaces' "
|
||||
"effective_io_concurrency and maintenance_io_concurrency, "
|
||||
"your sessions' values of these, and the value for "
|
||||
"seqscan_prefetch_buffers.",
|
||||
&readahead_buffer_size,
|
||||
128, 16, 1024,
|
||||
PGC_USERSET,
|
||||
0, /* no flags required */
|
||||
NULL, (GucIntAssignHook) &readahead_buffer_resize, NULL);
|
||||
|
||||
relsize_hash_init();
|
||||
|
||||
|
||||
@@ -49,6 +49,11 @@ typedef struct
|
||||
|
||||
#define messageTag(m) (((const NeonMessage *)(m))->tag)
|
||||
|
||||
#define NEON_TAG "[NEON_SMGR] "
|
||||
#define neon_log(tag, fmt, ...) ereport(tag, \
|
||||
(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
|
||||
errhidestmt(true), errhidecontext(true)))
|
||||
|
||||
/*
|
||||
* supertype of all the Neon*Request structs below
|
||||
*
|
||||
@@ -150,6 +155,8 @@ extern void prefetch_on_ps_disconnect(void);
|
||||
extern page_server_api * page_server;
|
||||
|
||||
extern char *page_server_connstring;
|
||||
extern int flush_every_n_requests;
|
||||
extern int readahead_buffer_size;
|
||||
extern bool seqscan_prefetch_enabled;
|
||||
extern int seqscan_prefetch_distance;
|
||||
extern char *neon_timeline;
|
||||
@@ -159,6 +166,7 @@ extern int32 max_cluster_size;
|
||||
|
||||
extern const f_smgr *smgr_neon(BackendId backend, RelFileNode rnode);
|
||||
extern void smgr_init_neon(void);
|
||||
extern void readahead_buffer_resize(int newsize, void *extra);
|
||||
|
||||
/* Neon storage manager functionality */
|
||||
|
||||
|
||||
@@ -116,10 +116,10 @@ static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
|
||||
*
|
||||
* Prefetch is performed locally by each backend.
|
||||
*
|
||||
* There can be up to READ_BUFFER_SIZE active IO requests registered at any
|
||||
* time. Requests using smgr_prefetch are sent to the pageserver, but we don't
|
||||
* wait on the response. Requests using smgr_read are either read from the
|
||||
* buffer, or (if that's not possible) we wait on the response to arrive -
|
||||
* There can be up to readahead_buffer_size active IO requests registered at
|
||||
* any time. Requests using smgr_prefetch are sent to the pageserver, but we
|
||||
* don't wait on the response. Requests using smgr_read are either read from
|
||||
* the buffer, or (if that's not possible) we wait on the response to arrive -
|
||||
* this also will allow us to receive other prefetched pages.
|
||||
* Each request is immediately written to the output buffer of the pageserver
|
||||
* connection, but may not be flushed if smgr_prefetch is used: pageserver
|
||||
@@ -136,15 +136,25 @@ static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
|
||||
* the connection; the responses are stored for later use.
|
||||
*
|
||||
* NOTE: The current implementation of the prefetch system implements a ring
|
||||
* buffer of up to READ_BUFFER_SIZE requests. If there are more _read and
|
||||
* buffer of up to readahead_buffer_size requests. If there are more _read and
|
||||
* _prefetch requests between the initial _prefetch and the _read of a buffer,
|
||||
* the prefetch request will have been dropped from this prefetch buffer, and
|
||||
* your prefetch was wasted.
|
||||
*/
|
||||
|
||||
/* Max amount of tracked buffer reads */
|
||||
#define READ_BUFFER_SIZE 128
|
||||
|
||||
/*
|
||||
* State machine:
|
||||
*
|
||||
* not in hash : in hash
|
||||
* :
|
||||
* UNUSED ------> REQUESTED --> RECEIVED
|
||||
* ^ : | |
|
||||
* | : v |
|
||||
* | : TAG_UNUSED |
|
||||
* | : | |
|
||||
* +----------------+------------+
|
||||
* :
|
||||
*/
|
||||
typedef enum PrefetchStatus {
|
||||
PRFS_UNUSED = 0, /* unused slot */
|
||||
PRFS_REQUESTED, /* request was written to the sendbuffer to PS, but not
|
||||
@@ -192,7 +202,7 @@ typedef struct PrfHashEntry {
|
||||
* It maintains a (ring) buffer of in-flight requests and responses.
|
||||
*
|
||||
* We maintain several indexes into the ring buffer:
|
||||
* ring_unused >= ring_receive >= ring_last >= 0
|
||||
* ring_unused >= ring_flush >= ring_receive >= ring_last >= 0
|
||||
*
|
||||
* ring_unused points to the first unused slot of the buffer
|
||||
* ring_receive is the next request that is to be received
|
||||
@@ -208,6 +218,7 @@ typedef struct PrefetchState {
|
||||
|
||||
/* buffer indexes */
|
||||
uint64 ring_unused; /* first unused slot */
|
||||
uint64 ring_flush; /* next request to flush */
|
||||
uint64 ring_receive; /* next slot that is to receive a response */
|
||||
uint64 ring_last; /* min slot with a response value */
|
||||
|
||||
@@ -218,11 +229,19 @@ typedef struct PrefetchState {
|
||||
|
||||
/* the buffers */
|
||||
prfh_hash *prf_hash;
|
||||
PrefetchRequest prf_buffer[READ_BUFFER_SIZE]; /* prefetch buffers */
|
||||
PrefetchRequest prf_buffer[]; /* prefetch buffers */
|
||||
} PrefetchState;
|
||||
|
||||
PrefetchState *MyPState;
|
||||
|
||||
#define GetPrfSlot(ring_index) ( \
|
||||
( \
|
||||
AssertMacro((ring_index) < MyPState->ring_unused && \
|
||||
(ring_index) >= MyPState->ring_last), \
|
||||
&MyPState->prf_buffer[((ring_index) % readahead_buffer_size)] \
|
||||
) \
|
||||
)
|
||||
|
||||
int n_prefetch_hits = 0;
|
||||
int n_prefetch_misses = 0;
|
||||
int n_prefetch_missed_caches = 0;
|
||||
@@ -232,18 +251,116 @@ XLogRecPtr prefetch_lsn = 0;
|
||||
|
||||
static void consume_prefetch_responses(void);
|
||||
static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
|
||||
static void prefetch_read(PrefetchRequest *slot);
|
||||
static bool prefetch_read(PrefetchRequest *slot);
|
||||
static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
|
||||
static void prefetch_wait_for(uint64 ring_index);
|
||||
static bool prefetch_wait_for(uint64 ring_index);
|
||||
static void prefetch_cleanup(void);
|
||||
static inline void prefetch_set_unused(uint64 ring_index, bool hash_cleanup);
|
||||
static inline void prefetch_set_unused(uint64 ring_index);
|
||||
|
||||
static XLogRecPtr neon_get_request_lsn(bool *latest, RelFileNode rnode,
|
||||
ForkNumber forknum, BlockNumber blkno);
|
||||
|
||||
void
|
||||
readahead_buffer_resize(int newsize, void *extra)
|
||||
{
|
||||
uint64 end,
|
||||
nfree = newsize;
|
||||
PrefetchState *newPState;
|
||||
Size newprfs_size = offsetof(PrefetchState, prf_buffer) + (
|
||||
sizeof(PrefetchRequest) * readahead_buffer_size
|
||||
);
|
||||
|
||||
/* don't try to re-initialize if we haven't initialized yet */
|
||||
if (MyPState == NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Make sure that we don't lose track of active prefetch requests by
|
||||
* ensuring we have received all but the last n requests (n = newsize).
|
||||
*/
|
||||
if (MyPState->n_requests_inflight > newsize)
|
||||
prefetch_wait_for(MyPState->ring_unused - newsize);
|
||||
|
||||
/* construct the new PrefetchState, and copy over the memory contexts */
|
||||
newPState = MemoryContextAllocZero(TopMemoryContext, newprfs_size);
|
||||
|
||||
newPState->bufctx = MyPState->bufctx;
|
||||
newPState->errctx = MyPState->errctx;
|
||||
newPState->hashctx = MyPState->hashctx;
|
||||
newPState->prf_hash = prfh_create(MyPState->hashctx, newsize, NULL);
|
||||
newPState->n_unused = newsize;
|
||||
newPState->n_requests_inflight = 0;
|
||||
newPState->n_responses_buffered = 0;
|
||||
newPState->ring_last = newsize;
|
||||
newPState->ring_unused = newsize;
|
||||
newPState->ring_receive = newsize;
|
||||
newPState->ring_flush = newsize;
|
||||
|
||||
/*
|
||||
* Copy over the prefetches.
|
||||
*
|
||||
* We populate the prefetch array from the end; to retain the most recent
|
||||
* prefetches, but this has the benefit of only needing to do one iteration
|
||||
* on the dataset, and trivial compaction.
|
||||
*/
|
||||
for (end = MyPState->ring_unused - 1;
|
||||
end >= MyPState->ring_last && end != UINT64_MAX && nfree != 0;
|
||||
end -= 1)
|
||||
{
|
||||
PrefetchRequest *slot = GetPrfSlot(end);
|
||||
PrefetchRequest *newslot;
|
||||
bool found;
|
||||
|
||||
if (slot->status == PRFS_UNUSED)
|
||||
continue;
|
||||
|
||||
nfree -= 1;
|
||||
|
||||
newslot = &newPState->prf_buffer[nfree];
|
||||
*newslot = *slot;
|
||||
newslot->my_ring_index = nfree;
|
||||
|
||||
prfh_insert(newPState->prf_hash, newslot, &found);
|
||||
|
||||
Assert(!found);
|
||||
|
||||
switch (newslot->status)
|
||||
{
|
||||
case PRFS_UNUSED:
|
||||
pg_unreachable();
|
||||
case PRFS_REQUESTED:
|
||||
newPState->n_requests_inflight += 1;
|
||||
newPState->ring_receive -= 1;
|
||||
newPState->ring_last -= 1;
|
||||
break;
|
||||
case PRFS_RECEIVED:
|
||||
newPState->n_responses_buffered += 1;
|
||||
newPState->ring_last -= 1;
|
||||
break;
|
||||
case PRFS_TAG_REMAINS:
|
||||
newPState->ring_last -= 1;
|
||||
break;
|
||||
}
|
||||
newPState->n_unused -= 1;
|
||||
}
|
||||
|
||||
for (; end >= MyPState->ring_last && end != UINT64_MAX; end -= 1)
|
||||
{
|
||||
prefetch_set_unused(end);
|
||||
}
|
||||
|
||||
prfh_destroy(MyPState->prf_hash);
|
||||
pfree(MyPState);
|
||||
MyPState = newPState;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Make sure that there are no responses still in the buffer.
|
||||
*
|
||||
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
||||
* invalidates any active pointers into the hash table.
|
||||
*/
|
||||
static void
|
||||
consume_prefetch_responses(void)
|
||||
@@ -255,14 +372,12 @@ consume_prefetch_responses(void)
|
||||
static void
|
||||
prefetch_cleanup(void)
|
||||
{
|
||||
int index;
|
||||
uint64 ring_index;
|
||||
PrefetchRequest *slot;
|
||||
|
||||
while (MyPState->ring_last < MyPState->ring_receive) {
|
||||
ring_index = MyPState->ring_last;
|
||||
index = (ring_index % READ_BUFFER_SIZE);
|
||||
slot = &MyPState->prf_buffer[index];
|
||||
slot = GetPrfSlot(ring_index);
|
||||
|
||||
if (slot->status == PRFS_UNUSED)
|
||||
MyPState->ring_last += 1;
|
||||
@@ -274,23 +389,33 @@ prefetch_cleanup(void)
|
||||
/*
|
||||
* Wait for slot of ring_index to have received its response.
|
||||
* The caller is responsible for making sure the request buffer is flushed.
|
||||
*
|
||||
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
||||
* invalidates any active pointers into the hash table.
|
||||
*/
|
||||
static void
|
||||
static bool
|
||||
prefetch_wait_for(uint64 ring_index)
|
||||
{
|
||||
int index;
|
||||
PrefetchRequest *entry;
|
||||
|
||||
if (MyPState->ring_flush <= ring_index &&
|
||||
MyPState->ring_unused > MyPState->ring_flush)
|
||||
{
|
||||
page_server->flush();
|
||||
MyPState->ring_flush = MyPState->ring_unused;
|
||||
}
|
||||
|
||||
Assert(MyPState->ring_unused > ring_index);
|
||||
|
||||
while (MyPState->ring_receive <= ring_index)
|
||||
{
|
||||
index = (MyPState->ring_receive % READ_BUFFER_SIZE);
|
||||
entry = &MyPState->prf_buffer[index];
|
||||
entry = GetPrfSlot(MyPState->ring_receive);
|
||||
|
||||
Assert(entry->status == PRFS_REQUESTED);
|
||||
prefetch_read(entry);
|
||||
if (!prefetch_read(entry))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -298,8 +423,11 @@ prefetch_wait_for(uint64 ring_index)
|
||||
*
|
||||
* The caller is responsible for making sure that the request for this buffer
|
||||
* was flushed to the PageServer.
|
||||
*
|
||||
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
||||
* invalidates any active pointers into the hash table.
|
||||
*/
|
||||
static void
|
||||
static bool
|
||||
prefetch_read(PrefetchRequest *slot)
|
||||
{
|
||||
NeonResponse *response;
|
||||
@@ -312,15 +440,22 @@ prefetch_read(PrefetchRequest *slot)
|
||||
old = MemoryContextSwitchTo(MyPState->errctx);
|
||||
response = (NeonResponse *) page_server->receive();
|
||||
MemoryContextSwitchTo(old);
|
||||
if (response)
|
||||
{
|
||||
/* update prefetch state */
|
||||
MyPState->n_responses_buffered += 1;
|
||||
MyPState->n_requests_inflight -= 1;
|
||||
MyPState->ring_receive += 1;
|
||||
|
||||
/* update prefetch state */
|
||||
MyPState->n_responses_buffered += 1;
|
||||
MyPState->n_requests_inflight -= 1;
|
||||
MyPState->ring_receive += 1;
|
||||
|
||||
/* update slot state */
|
||||
slot->status = PRFS_RECEIVED;
|
||||
slot->response = response;
|
||||
/* update slot state */
|
||||
slot->status = PRFS_RECEIVED;
|
||||
slot->response = response;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -332,19 +467,22 @@ prefetch_read(PrefetchRequest *slot)
|
||||
void
|
||||
prefetch_on_ps_disconnect(void)
|
||||
{
|
||||
for (; MyPState->ring_receive < MyPState->ring_unused; MyPState->ring_receive++)
|
||||
MyPState->ring_flush = MyPState->ring_unused;
|
||||
while (MyPState->ring_receive < MyPState->ring_unused)
|
||||
{
|
||||
PrefetchRequest *slot;
|
||||
int index = MyPState->ring_receive % READ_BUFFER_SIZE;
|
||||
uint64 ring_index = MyPState->ring_receive;
|
||||
|
||||
slot = GetPrfSlot(ring_index);
|
||||
|
||||
slot = &MyPState->prf_buffer[index];
|
||||
Assert(slot->status == PRFS_REQUESTED);
|
||||
Assert(slot->my_ring_index == MyPState->ring_receive);
|
||||
Assert(slot->my_ring_index == ring_index);
|
||||
|
||||
/* clean up the request */
|
||||
slot->status = PRFS_TAG_REMAINS;
|
||||
MyPState->n_requests_inflight--;
|
||||
prefetch_set_unused(MyPState->ring_receive, true);
|
||||
MyPState->n_requests_inflight -= 1;
|
||||
MyPState->ring_receive += 1;
|
||||
prefetch_set_unused(ring_index);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -353,21 +491,24 @@ prefetch_on_ps_disconnect(void)
|
||||
*
|
||||
* The slot at ring_index must be a current member of the ring buffer,
|
||||
* and may not be in the PRFS_REQUESTED state.
|
||||
*
|
||||
* NOTE: this function will update MyPState->pfs_hash; which invalidates any
|
||||
* active pointers into the hash table.
|
||||
*/
|
||||
static inline void
|
||||
prefetch_set_unused(uint64 ring_index, bool hash_cleanup)
|
||||
prefetch_set_unused(uint64 ring_index)
|
||||
{
|
||||
PrefetchRequest *slot = &MyPState->prf_buffer[ring_index % READ_BUFFER_SIZE];
|
||||
PrefetchRequest *slot = GetPrfSlot(ring_index);
|
||||
|
||||
Assert(MyPState->ring_last <= ring_index &&
|
||||
MyPState->ring_unused > ring_index);
|
||||
if (ring_index < MyPState->ring_last)
|
||||
return; /* Should already be unused */
|
||||
|
||||
Assert(MyPState->ring_unused > ring_index);
|
||||
|
||||
if (slot->status == PRFS_UNUSED)
|
||||
return;
|
||||
|
||||
Assert(slot->status == PRFS_RECEIVED || slot->status == PRFS_TAG_REMAINS);
|
||||
Assert(ring_index >= MyPState->ring_last &&
|
||||
ring_index < MyPState->ring_unused);
|
||||
|
||||
if (slot->status == PRFS_RECEIVED)
|
||||
{
|
||||
@@ -382,8 +523,7 @@ prefetch_set_unused(uint64 ring_index, bool hash_cleanup)
|
||||
Assert(slot->response == NULL);
|
||||
}
|
||||
|
||||
if (hash_cleanup)
|
||||
prfh_delete(MyPState->prf_hash, slot);
|
||||
prfh_delete(MyPState->prf_hash, slot);
|
||||
|
||||
/* clear all fields */
|
||||
MemSet(slot, 0, sizeof(PrefetchRequest));
|
||||
@@ -397,6 +537,7 @@ prefetch_set_unused(uint64 ring_index, bool hash_cleanup)
|
||||
static void
|
||||
prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn)
|
||||
{
|
||||
bool found;
|
||||
NeonGetPageRequest request = {
|
||||
.req.tag = T_NeonGetPageRequest,
|
||||
.req.latest = false,
|
||||
@@ -454,6 +595,9 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
|
||||
|
||||
/* update slot state */
|
||||
slot->status = PRFS_REQUESTED;
|
||||
|
||||
prfh_insert(MyPState->prf_hash, slot, &found);
|
||||
Assert(!found);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -464,13 +608,14 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
|
||||
* If force_latest and force_lsn are not NULL, those values are sent to the
|
||||
* pageserver. If they are NULL, we utilize the lastWrittenLsn -infrastructure
|
||||
* to fill in these values manually.
|
||||
*
|
||||
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
||||
* invalidates any active pointers into the hash table.
|
||||
*/
|
||||
|
||||
static uint64
|
||||
prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn)
|
||||
{
|
||||
int index;
|
||||
bool found;
|
||||
uint64 ring_index;
|
||||
PrefetchRequest req;
|
||||
PrefetchRequest *slot;
|
||||
@@ -485,28 +630,49 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
||||
{
|
||||
slot = entry->slot;
|
||||
ring_index = slot->my_ring_index;
|
||||
index = (ring_index % READ_BUFFER_SIZE);
|
||||
Assert(slot == &MyPState->prf_buffer[index]);
|
||||
Assert(slot == GetPrfSlot(ring_index));
|
||||
|
||||
Assert(slot->status != PRFS_UNUSED);
|
||||
Assert(MyPState->ring_last <= ring_index &&
|
||||
ring_index < MyPState->ring_unused);
|
||||
Assert(BUFFERTAGS_EQUAL(slot->buftag, tag));
|
||||
|
||||
|
||||
/*
|
||||
* If we want a specific lsn, we do not accept requests that were made
|
||||
* with a potentially different LSN.
|
||||
*/
|
||||
if (force_lsn && slot->effective_request_lsn != *force_lsn)
|
||||
if (force_latest && force_lsn)
|
||||
{
|
||||
prefetch_wait_for(ring_index);
|
||||
prefetch_set_unused(ring_index, true);
|
||||
/* if we want the latest version, any effective_request_lsn < request lsn is OK */
|
||||
if (*force_latest)
|
||||
{
|
||||
if (*force_lsn > slot->effective_request_lsn)
|
||||
{
|
||||
prefetch_wait_for(ring_index);
|
||||
prefetch_set_unused(ring_index);
|
||||
entry = NULL;
|
||||
}
|
||||
}
|
||||
/* if we don't want the latest version, only accept requests with the exact same LSN */
|
||||
else
|
||||
{
|
||||
if (*force_lsn != slot->effective_request_lsn)
|
||||
{
|
||||
prefetch_wait_for(ring_index);
|
||||
prefetch_set_unused(ring_index);
|
||||
entry = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We received a prefetch for a page that was recently read and
|
||||
* removed from the buffers. Remove that request from the buffers.
|
||||
*/
|
||||
else if (slot->status == PRFS_TAG_REMAINS)
|
||||
{
|
||||
prefetch_set_unused(ring_index, true);
|
||||
prefetch_set_unused(ring_index);
|
||||
entry = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -529,9 +695,10 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
||||
* output buffer, and 'not sending' a prefetch request kind of goes
|
||||
* against the principles of prefetching)
|
||||
*/
|
||||
if (MyPState->ring_last + READ_BUFFER_SIZE - 1 == MyPState->ring_unused)
|
||||
if (MyPState->ring_last + readahead_buffer_size - 1 == MyPState->ring_unused)
|
||||
{
|
||||
slot = &MyPState->prf_buffer[(MyPState->ring_last % READ_BUFFER_SIZE)];
|
||||
uint64 cleanup_index = MyPState->ring_last;
|
||||
slot = GetPrfSlot(cleanup_index);
|
||||
|
||||
Assert(slot->status != PRFS_UNUSED);
|
||||
|
||||
@@ -539,13 +706,13 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
||||
switch (slot->status)
|
||||
{
|
||||
case PRFS_REQUESTED:
|
||||
Assert(MyPState->ring_receive == MyPState->ring_last);
|
||||
prefetch_wait_for(MyPState->ring_last);
|
||||
prefetch_set_unused(MyPState->ring_last, true);
|
||||
Assert(MyPState->ring_receive == cleanup_index);
|
||||
prefetch_wait_for(cleanup_index);
|
||||
prefetch_set_unused(cleanup_index);
|
||||
break;
|
||||
case PRFS_RECEIVED:
|
||||
case PRFS_TAG_REMAINS:
|
||||
prefetch_set_unused(MyPState->ring_last, true);
|
||||
prefetch_set_unused(cleanup_index);
|
||||
break;
|
||||
default:
|
||||
pg_unreachable();
|
||||
@@ -553,12 +720,11 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
||||
}
|
||||
|
||||
/*
|
||||
* The next buffer pointed to by `ring_unused` is now unused, so we can insert
|
||||
* the new request to it.
|
||||
* The next buffer pointed to by `ring_unused` is now definitely empty,
|
||||
* so we can insert the new request to it.
|
||||
*/
|
||||
ring_index = MyPState->ring_unused;
|
||||
index = (ring_index % READ_BUFFER_SIZE);
|
||||
slot = &MyPState->prf_buffer[index];
|
||||
slot = &MyPState->prf_buffer[((ring_index) % readahead_buffer_size)];
|
||||
|
||||
Assert(MyPState->ring_last <= ring_index);
|
||||
|
||||
@@ -571,22 +737,34 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
||||
slot->buftag = tag;
|
||||
slot->my_ring_index = ring_index;
|
||||
|
||||
prfh_insert(MyPState->prf_hash, slot, &found);
|
||||
Assert(!found);
|
||||
|
||||
prefetch_do_request(slot, force_latest, force_lsn);
|
||||
Assert(slot->status == PRFS_REQUESTED);
|
||||
Assert(ring_index < MyPState->ring_unused);
|
||||
Assert(MyPState->ring_last <= ring_index &&
|
||||
ring_index < MyPState->ring_unused);
|
||||
|
||||
if (flush_every_n_requests > 0 &&
|
||||
MyPState->ring_unused - MyPState->ring_flush >= flush_every_n_requests)
|
||||
{
|
||||
page_server->flush();
|
||||
MyPState->ring_flush = MyPState->ring_unused;
|
||||
}
|
||||
|
||||
return ring_index;
|
||||
}
|
||||
|
||||
static NeonResponse *
|
||||
page_server_request(void const *req)
|
||||
{
|
||||
page_server->send((NeonRequest *) req);
|
||||
page_server->flush();
|
||||
consume_prefetch_responses();
|
||||
return page_server->receive();
|
||||
NeonResponse* resp;
|
||||
do {
|
||||
page_server->send((NeonRequest *) req);
|
||||
page_server->flush();
|
||||
MyPState->ring_flush = MyPState->ring_unused;
|
||||
consume_prefetch_responses();
|
||||
resp = page_server->receive();
|
||||
} while (resp == NULL);
|
||||
return resp;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1052,14 +1230,18 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
|
||||
void
|
||||
neon_init(void)
|
||||
{
|
||||
HASHCTL info;
|
||||
Size prfs_size;
|
||||
|
||||
if (MyPState != NULL)
|
||||
return;
|
||||
|
||||
MyPState = MemoryContextAllocZero(TopMemoryContext, sizeof(PrefetchState));
|
||||
prfs_size = offsetof(PrefetchState, prf_buffer) + (
|
||||
sizeof(PrefetchRequest) * readahead_buffer_size
|
||||
);
|
||||
|
||||
MyPState = MemoryContextAllocZero(TopMemoryContext, prfs_size);
|
||||
|
||||
MyPState->n_unused = READ_BUFFER_SIZE;
|
||||
MyPState->n_unused = readahead_buffer_size;
|
||||
|
||||
MyPState->bufctx = SlabContextCreate(TopMemoryContext,
|
||||
"NeonSMGR/prefetch",
|
||||
@@ -1072,11 +1254,8 @@ neon_init(void)
|
||||
"NeonSMGR/prefetch",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
|
||||
info.keysize = sizeof(BufferTag);
|
||||
info.entrysize = sizeof(uint64);
|
||||
|
||||
MyPState->prf_hash = prfh_create(MyPState->hashctx,
|
||||
READ_BUFFER_SIZE, NULL);
|
||||
readahead_buffer_size, NULL);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
mdinit();
|
||||
@@ -1470,7 +1649,8 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
|
||||
bool
|
||||
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
{
|
||||
uint64 ring_index;
|
||||
BufferTag tag;
|
||||
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
|
||||
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
@@ -1486,7 +1666,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
}
|
||||
|
||||
BufferTag tag = (BufferTag) {
|
||||
tag = (BufferTag) {
|
||||
.rnode = reln->smgr_rnode.node,
|
||||
.forkNum = forknum,
|
||||
.blockNum = blocknum
|
||||
@@ -1565,9 +1745,9 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
if (entry != NULL)
|
||||
{
|
||||
if (entry->slot->effective_request_lsn >= prefetch_lsn)
|
||||
slot = entry->slot;
|
||||
if (slot->effective_request_lsn >= request_lsn)
|
||||
{
|
||||
slot = entry->slot;
|
||||
ring_index = slot->my_ring_index;
|
||||
n_prefetch_hits += 1;
|
||||
}
|
||||
@@ -1578,36 +1758,36 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
* unlikely this happens, but it can happen if prefetch distance is
|
||||
* large enough and a backend didn't consume all prefetch requests.
|
||||
*/
|
||||
if (entry->slot->status == PRFS_REQUESTED)
|
||||
if (slot->status == PRFS_REQUESTED)
|
||||
{
|
||||
page_server->flush();
|
||||
prefetch_wait_for(entry->slot->my_ring_index);
|
||||
prefetch_wait_for(slot->my_ring_index);
|
||||
}
|
||||
/* drop caches */
|
||||
prefetch_set_unused(entry->slot->my_ring_index, true);
|
||||
prefetch_set_unused(slot->my_ring_index);
|
||||
n_prefetch_missed_caches += 1;
|
||||
/* make it look like a prefetch cache miss */
|
||||
entry = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (entry == NULL)
|
||||
do
|
||||
{
|
||||
n_prefetch_misses += 1;
|
||||
if (entry == NULL)
|
||||
{
|
||||
n_prefetch_misses += 1;
|
||||
|
||||
ring_index = prefetch_register_buffer(buftag, &request_latest,
|
||||
&request_lsn);
|
||||
slot = &MyPState->prf_buffer[(ring_index % READ_BUFFER_SIZE)];
|
||||
}
|
||||
ring_index = prefetch_register_buffer(buftag, &request_latest,
|
||||
&request_lsn);
|
||||
slot = GetPrfSlot(ring_index);
|
||||
}
|
||||
|
||||
Assert(MyPState->ring_last <= ring_index &&
|
||||
MyPState->ring_unused > ring_index);
|
||||
Assert(slot->my_ring_index == ring_index);
|
||||
Assert(slot->status != PRFS_UNUSED);
|
||||
Assert(&MyPState->prf_buffer[(ring_index % READ_BUFFER_SIZE)] == slot);
|
||||
Assert(slot->my_ring_index == ring_index);
|
||||
Assert(MyPState->ring_last <= ring_index &&
|
||||
MyPState->ring_unused > ring_index);
|
||||
Assert(slot->status != PRFS_UNUSED);
|
||||
Assert(GetPrfSlot(ring_index) == slot);
|
||||
|
||||
page_server->flush();
|
||||
prefetch_wait_for(ring_index);
|
||||
} while (!prefetch_wait_for(ring_index));
|
||||
|
||||
Assert(slot->status == PRFS_RECEIVED);
|
||||
|
||||
@@ -1637,7 +1817,7 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
}
|
||||
|
||||
/* buffer was used, clean up for later reuse */
|
||||
prefetch_set_unused(ring_index, true);
|
||||
prefetch_set_unused(ring_index);
|
||||
prefetch_cleanup();
|
||||
}
|
||||
|
||||
|
||||
@@ -119,6 +119,7 @@ static TimestampTz last_reconnect_attempt;
|
||||
static WalproposerShmemState * walprop_shared;
|
||||
|
||||
/* Prototypes for private functions */
|
||||
static void WalProposerRegister(void);
|
||||
static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId);
|
||||
static void WalProposerStart(void);
|
||||
static void WalProposerLoop(void);
|
||||
@@ -455,7 +456,7 @@ WalProposerPoll(void)
|
||||
/*
|
||||
* Register a background worker proposing WAL to wal acceptors.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
WalProposerRegister(void)
|
||||
{
|
||||
BackgroundWorker bgw;
|
||||
|
||||
@@ -377,18 +377,18 @@ typedef struct Safekeeper
|
||||
AppendResponse appendResponse; /* feedback for master */
|
||||
} Safekeeper;
|
||||
|
||||
extern PGDLLIMPORT void WalProposerMain(Datum main_arg);
|
||||
void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
|
||||
void WalProposerPoll(void);
|
||||
void WalProposerRegister(void);
|
||||
void ParseReplicationFeedbackMessage(StringInfo reply_message,
|
||||
ReplicationFeedback * rf);
|
||||
extern void WalProposerSync(int argc, char *argv[]);
|
||||
extern void WalProposerMain(Datum main_arg);
|
||||
extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
|
||||
extern void WalProposerPoll(void);
|
||||
extern void ParseReplicationFeedbackMessage(StringInfo reply_message,
|
||||
ReplicationFeedback *rf);
|
||||
extern void StartProposerReplication(StartReplicationCmd *cmd);
|
||||
|
||||
Size WalproposerShmemSize(void);
|
||||
bool WalproposerShmemInit(void);
|
||||
void replication_feedback_set(ReplicationFeedback * rf);
|
||||
void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
|
||||
extern Size WalproposerShmemSize(void);
|
||||
extern bool WalproposerShmemInit(void);
|
||||
extern void replication_feedback_set(ReplicationFeedback *rf);
|
||||
extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
|
||||
|
||||
/* libpqwalproposer hooks & helper type */
|
||||
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# this is a shortcut script to avoid duplication in CI
|
||||
set -eux -o pipefail
|
||||
|
||||
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
COMPOSE_FILE=$SCRIPT_DIR/../docker-compose/docker-compose.yml
|
||||
|
||||
COMPUTE_CONTAINER_NAME=dockercompose_compute_1
|
||||
SQL="CREATE TABLE t(key int primary key, value text); insert into t values(1,1); select * from t;"
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -c '$SQL' postgres"
|
||||
|
||||
cleanup() {
|
||||
echo "show container information"
|
||||
docker ps
|
||||
docker-compose -f $COMPOSE_FILE logs
|
||||
echo "stop containers..."
|
||||
docker-compose -f $COMPOSE_FILE down
|
||||
}
|
||||
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
|
||||
for pg_version in 14 15; do
|
||||
echo "start containers (pg_version=$pg_version)."
|
||||
PG_VERSION=$pg_version TAG=latest docker-compose -f $COMPOSE_FILE up --build -d
|
||||
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
cnt=0
|
||||
while sleep 1; do
|
||||
# check timeout
|
||||
cnt=`expr $cnt + 1`
|
||||
if [ $cnt -gt 60 ]; then
|
||||
echo "timeout before the compute is ready."
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check if the compute is ready
|
||||
set +o pipefail
|
||||
result=`docker-compose -f $COMPOSE_FILE logs "compute_is_ready" | grep "accepting connections" | wc -l`
|
||||
set -o pipefail
|
||||
if [ $result -eq 1 ]; then
|
||||
echo "OK. The compute is ready to connect."
|
||||
echo "execute simple queries."
|
||||
docker exec -it $COMPUTE_CONTAINER_NAME /bin/bash -c "psql $PSQL_OPTION"
|
||||
cleanup
|
||||
break
|
||||
fi
|
||||
done
|
||||
done
|
||||
@@ -784,6 +784,8 @@ class NeonEnvBuilder:
|
||||
|
||||
self.cleanup_remote_storage()
|
||||
|
||||
self.env.pageserver.assert_no_errors()
|
||||
|
||||
|
||||
class NeonEnv:
|
||||
"""
|
||||
@@ -1566,6 +1568,7 @@ class NeonCli(AbstractNeonCli):
|
||||
def pageserver_start(
|
||||
self,
|
||||
overrides: Tuple[str, ...] = (),
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
start_args = ["pageserver", "start", *overrides]
|
||||
append_pageserver_param_overrides(
|
||||
@@ -1575,11 +1578,11 @@ class NeonCli(AbstractNeonCli):
|
||||
pageserver_config_override=self.env.pageserver.config_override,
|
||||
)
|
||||
|
||||
s3_env_vars = None
|
||||
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
|
||||
s3_env_vars = self.env.remote_storage.access_env_vars()
|
||||
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
|
||||
|
||||
return self.raw_cli(start_args, extra_env_vars=s3_env_vars)
|
||||
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
|
||||
|
||||
def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["pageserver", "stop"]
|
||||
@@ -1723,7 +1726,48 @@ class NeonPageserver(PgProtocol):
|
||||
self.config_override = config_override
|
||||
self.version = env.get_pageserver_version()
|
||||
|
||||
def start(self, overrides: Tuple[str, ...] = ()) -> "NeonPageserver":
|
||||
# After a test finishes, we will scrape the log to see if there are any
|
||||
# unexpected error messages. If your test expects an error, add it to
|
||||
# 'allowed_errors' in the test with something like:
|
||||
#
|
||||
# env.pageserver.allowed_errors.append(".*could not open garage door.*")
|
||||
#
|
||||
# The entries in the list are regular experessions.
|
||||
self.allowed_errors = [
|
||||
# All tests print these, when starting up or shutting down
|
||||
".*wal receiver task finished with an error: walreceiver connection handling failure.*",
|
||||
".*Shutdown task error: walreceiver connection handling failure.*",
|
||||
".*Etcd client error: grpc request error: status: Unavailable.*",
|
||||
".*query handler for .* failed: Connection reset by peer.*",
|
||||
".*serving compute connection task.*exited with error: Broken pipe.*",
|
||||
".*Connection aborted: error communicating with the server: Broken pipe.*",
|
||||
".*Connection aborted: error communicating with the server: Transport endpoint is not connected.*",
|
||||
".*Connection aborted: error communicating with the server: Connection reset by peer.*",
|
||||
".*kill_and_wait_impl.*: wait successful.*",
|
||||
".*end streaming to Some.*",
|
||||
# safekeeper connection can fail with this, in the window between timeline creation
|
||||
# and streaming start
|
||||
".*Failed to process query for timeline .*: state uninitialized, no data to read.*",
|
||||
# Tests related to authentication and authorization print these
|
||||
".*Error processing HTTP request: Forbidden",
|
||||
# intentional failpoints
|
||||
".*failpoint ",
|
||||
# FIXME: there is a race condition between GC and detach, see
|
||||
# https://github.com/neondatabase/neon/issues/2442
|
||||
".*could not remove ephemeral file.*No such file or directory.*",
|
||||
# FIXME: These need investigation
|
||||
".*gc_loop.*Failed to get a tenant .* Tenant .* not found in the local state.*",
|
||||
".*compaction_loop.*Failed to get a tenant .* Tenant .* not found in the local state.*",
|
||||
".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
|
||||
".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
|
||||
".*Removing intermediate uninit mark file.*",
|
||||
]
|
||||
|
||||
def start(
|
||||
self,
|
||||
overrides: Tuple[str, ...] = (),
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
) -> "NeonPageserver":
|
||||
"""
|
||||
Start the page server.
|
||||
`overrides` allows to add some config to this pageserver start.
|
||||
@@ -1731,7 +1775,7 @@ class NeonPageserver(PgProtocol):
|
||||
"""
|
||||
assert self.running is False
|
||||
|
||||
self.env.neon_cli.pageserver_start(overrides=overrides)
|
||||
self.env.neon_cli.pageserver_start(overrides=overrides, extra_env_vars=extra_env_vars)
|
||||
self.running = True
|
||||
return self
|
||||
|
||||
@@ -1771,6 +1815,26 @@ class NeonPageserver(PgProtocol):
|
||||
is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
|
||||
)
|
||||
|
||||
def assert_no_errors(self):
|
||||
logfile = open(os.path.join(self.env.repo_dir, "pageserver.log"), "r")
|
||||
|
||||
error_or_warn = re.compile("ERROR|WARN")
|
||||
errors = []
|
||||
while True:
|
||||
line = logfile.readline()
|
||||
if not line:
|
||||
break
|
||||
|
||||
if error_or_warn.search(line):
|
||||
# It's an ERROR or WARN. Is it in the allow-list?
|
||||
for a in self.allowed_errors:
|
||||
if re.match(a, line):
|
||||
break
|
||||
else:
|
||||
errors.append(line)
|
||||
|
||||
assert not errors
|
||||
|
||||
|
||||
def append_pageserver_param_overrides(
|
||||
params_to_update: List[str],
|
||||
@@ -2014,9 +2078,9 @@ class NeonProxy(PgProtocol):
|
||||
self,
|
||||
proxy_port: int,
|
||||
http_port: int,
|
||||
mgmt_port: int,
|
||||
neon_binpath: Path,
|
||||
auth_endpoint=None,
|
||||
mgmt_port=None,
|
||||
):
|
||||
super().__init__(dsn=auth_endpoint, port=proxy_port)
|
||||
self.host = "127.0.0.1"
|
||||
@@ -2040,6 +2104,7 @@ class NeonProxy(PgProtocol):
|
||||
str(self.neon_binpath / "proxy"),
|
||||
*["--http", f"{self.host}:{self.http_port}"],
|
||||
*["--proxy", f"{self.host}:{self.proxy_port}"],
|
||||
*["--mgmt", f"{self.host}:{self.mgmt_port}"],
|
||||
*["--auth-backend", "postgres"],
|
||||
*["--auth-endpoint", self.auth_endpoint],
|
||||
]
|
||||
@@ -2116,11 +2181,13 @@ def static_proxy(
|
||||
auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}"
|
||||
|
||||
proxy_port = port_distributor.get_port()
|
||||
mgmt_port = port_distributor.get_port()
|
||||
http_port = port_distributor.get_port()
|
||||
|
||||
with NeonProxy(
|
||||
proxy_port=proxy_port,
|
||||
http_port=http_port,
|
||||
mgmt_port=mgmt_port,
|
||||
neon_binpath=neon_binpath,
|
||||
auth_endpoint=auth_endpoint,
|
||||
) as proxy:
|
||||
|
||||
@@ -4,6 +4,7 @@ from typing import List
|
||||
|
||||
from fixtures.benchmark_fixture import PgBenchRunResult
|
||||
from fixtures.compare_fixtures import NeonCompare
|
||||
from fixtures.neon_fixtures import fork_at_current_lsn
|
||||
from performance.test_perf_pgbench import utc_now_timestamp
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
@@ -43,7 +44,8 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
|
||||
pg_root = env.postgres.create_start("root")
|
||||
pg_bin.run_capture(["pgbench", "-i", pg_root.connstr(), "-s10"])
|
||||
|
||||
env.neon_cli.create_branch("child", "root")
|
||||
fork_at_current_lsn(env, pg_root, "child", "root")
|
||||
|
||||
pg_child = env.postgres.create_start("child")
|
||||
|
||||
run_pgbench_on_branch("root", ["pgbench", "-c10", "-T10", pg_root.connstr()])
|
||||
|
||||
@@ -6,6 +6,7 @@ import pytest
|
||||
from fixtures.benchmark_fixture import MetricReport
|
||||
from fixtures.compare_fixtures import PgCompare
|
||||
from fixtures.log_helper import log
|
||||
from pytest_lazyfixture import lazy_fixture # type: ignore
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -20,11 +21,19 @@ from fixtures.log_helper import log
|
||||
pytest.param(10000000, 1, 4),
|
||||
],
|
||||
)
|
||||
def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int):
|
||||
env = neon_with_baseline
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"env",
|
||||
[
|
||||
# Run on all envs
|
||||
pytest.param(lazy_fixture("neon_compare"), id="neon"),
|
||||
pytest.param(lazy_fixture("vanilla_compare"), id="vanilla"),
|
||||
pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster),
|
||||
],
|
||||
)
|
||||
def test_seqscans(env: PgCompare, rows: int, iters: int, workers: int):
|
||||
with closing(env.pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("drop table if exists t;")
|
||||
cur.execute("create table t (i integer);")
|
||||
cur.execute(f"insert into t values (generate_series(1,{rows}));")
|
||||
|
||||
|
||||
@@ -116,6 +116,13 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
pageserver_http_client = env.pageserver.http_client()
|
||||
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*invalid branch start lsn: less than latest GC cutoff.*",
|
||||
".*invalid branch start lsn: less than planned GC cutoff.*",
|
||||
]
|
||||
)
|
||||
|
||||
# Disable background GC but set the `pitr_interval` to be small, so GC can delete something
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
|
||||
@@ -13,6 +13,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.pageserver.allowed_errors.append(".*invalid branch start lsn.*")
|
||||
env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*")
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
env.neon_cli.create_branch("test_branch_behind")
|
||||
pgmain = env.postgres.create_start("test_branch_behind")
|
||||
|
||||
@@ -11,10 +11,17 @@ from fixtures.types import TenantId, TimelineId
|
||||
# Test restarting page server, while safekeeper and compute node keep
|
||||
# running.
|
||||
def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
|
||||
# One safekeeper is enough for this test.
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*No timelines to attach received.*",
|
||||
".*Failed to process timeline dir contents.*",
|
||||
".*Failed to load delta layer.*",
|
||||
".*Timeline .* was not found.*",
|
||||
]
|
||||
)
|
||||
|
||||
tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = []
|
||||
|
||||
for n in range(4):
|
||||
@@ -72,23 +79,24 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
|
||||
# First timeline would not get loaded into pageserver due to corrupt metadata file
|
||||
with pytest.raises(Exception, match=f"Timeline {tenant1}/{timeline1} was not found") as err:
|
||||
pg1.start()
|
||||
log.info(f"compute startup failed eagerly for timeline with corrupt metadata: {err}")
|
||||
log.info(
|
||||
f"As expected, compute startup failed eagerly for timeline with corrupt metadata: {err}"
|
||||
)
|
||||
|
||||
# Second timeline has no ancestors, only the metadata file and no layer files
|
||||
# We don't have the remote storage enabled, which means timeline is in an incorrect state,
|
||||
# it's not loaded at all
|
||||
with pytest.raises(Exception, match=f"Timeline {tenant2}/{timeline2} was not found") as err:
|
||||
pg2.start()
|
||||
log.info(f"compute startup failed eagerly for timeline with corrupt metadata: {err}")
|
||||
log.info(f"As expected, compute startup failed for timeline with missing layers: {err}")
|
||||
|
||||
# Yet other timelines will fail when their layers will be queried during basebackup: we don't check layer file contents on startup, when loading the timeline
|
||||
for n in range(3, 4):
|
||||
(bad_tenant, bad_timeline, pg) = tenant_timelines[n]
|
||||
with pytest.raises(Exception, match="extracting base backup failed") as err:
|
||||
pg.start()
|
||||
log.info(
|
||||
f"compute startup failed lazily for timeline {bad_tenant}/{bad_timeline} with corrupt layers, during basebackup preparation: {err}"
|
||||
)
|
||||
# Third timeline will also fail during basebackup, because the layer file is corrupt.
|
||||
# (We don't check layer file contents on startup, when loading the timeline)
|
||||
with pytest.raises(Exception, match="Failed to load delta layer") as err:
|
||||
pg3.start()
|
||||
log.info(
|
||||
f"As expected, compute startup failed for timeline {tenant3}/{timeline3} with corrupt layers: {err}"
|
||||
)
|
||||
|
||||
|
||||
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
|
||||
@@ -111,6 +119,13 @@ def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*Failed to process timeline dir contents.*Timeline has no ancestor and no layer files.*",
|
||||
".*Timeline got dropped without initializing, cleaning its files.*",
|
||||
]
|
||||
)
|
||||
|
||||
tenant_id, _ = env.neon_cli.create_tenant()
|
||||
|
||||
timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
|
||||
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
|
||||
import pytest
|
||||
import toml # TODO: replace with tomllib for Python >= 3.11
|
||||
@@ -50,6 +50,12 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
pg = env.postgres.create_start("main")
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()])
|
||||
pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()])
|
||||
pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"])
|
||||
@@ -154,6 +160,7 @@ def test_forward_compatibility(
|
||||
from_dir=compatibility_snapshot_dir,
|
||||
to_dir=test_output_dir / "compatibility_snapshot",
|
||||
port_distributor=port_distributor,
|
||||
pg_distrib_dir=compatibility_postgres_distrib_dir,
|
||||
)
|
||||
|
||||
breaking_changes_allowed = (
|
||||
@@ -183,7 +190,12 @@ def test_forward_compatibility(
|
||||
), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
|
||||
|
||||
|
||||
def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistributor):
|
||||
def prepare_snapshot(
|
||||
from_dir: Path,
|
||||
to_dir: Path,
|
||||
port_distributor: PortDistributor,
|
||||
pg_distrib_dir: Optional[Path] = None,
|
||||
):
|
||||
assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist"
|
||||
assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory"
|
||||
assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql"
|
||||
@@ -208,7 +220,7 @@ def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistrib
|
||||
# Update paths and ports in config files
|
||||
pageserver_toml = repo_dir / "pageserver.toml"
|
||||
pageserver_config = toml.load(pageserver_toml)
|
||||
pageserver_config["remote_storage"]["local_path"] = repo_dir / "local_fs_remote_storage"
|
||||
pageserver_config["remote_storage"]["local_path"] = str(repo_dir / "local_fs_remote_storage")
|
||||
pageserver_config["listen_http_addr"] = port_distributor.replace_with_new_port(
|
||||
pageserver_config["listen_http_addr"]
|
||||
)
|
||||
@@ -219,6 +231,9 @@ def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistrib
|
||||
port_distributor.replace_with_new_port(ep) for ep in pageserver_config["broker_endpoints"]
|
||||
]
|
||||
|
||||
if pg_distrib_dir:
|
||||
pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
|
||||
|
||||
with pageserver_toml.open("w") as f:
|
||||
toml.dump(pageserver_config, f)
|
||||
|
||||
@@ -238,7 +253,10 @@ def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistrib
|
||||
sk["http_port"] = port_distributor.replace_with_new_port(sk["http_port"])
|
||||
sk["pg_port"] = port_distributor.replace_with_new_port(sk["pg_port"])
|
||||
|
||||
with (snapshot_config_toml).open("w") as f:
|
||||
if pg_distrib_dir:
|
||||
snapshot_config["pg_distrib_dir"] = str(pg_distrib_dir)
|
||||
|
||||
with snapshot_config_toml.open("w") as f:
|
||||
toml.dump(snapshot_config, f)
|
||||
|
||||
# Ensure that snapshot doesn't contain references to the original path
|
||||
|
||||
@@ -179,7 +179,16 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
# run compute_ctl and wait for 10s
|
||||
try:
|
||||
ctl.raw_cli(
|
||||
["--connstr", ps_connstr, "--pgdata", pgdata, "--spec", spec, "--pgbin", pg_bin_path],
|
||||
[
|
||||
"--connstr",
|
||||
"postgres://invalid/",
|
||||
"--pgdata",
|
||||
pgdata,
|
||||
"--spec",
|
||||
spec,
|
||||
"--pgbin",
|
||||
pg_bin_path,
|
||||
],
|
||||
timeout=10,
|
||||
)
|
||||
except TimeoutExpired as exc:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
|
||||
|
||||
|
||||
@@ -7,8 +8,14 @@ from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
|
||||
# normally restarts after it. Also, there should be GC ERRORs in the log,
|
||||
# but the fixture checks the log for any unexpected ERRORs after every
|
||||
# test anyway, so it doesn't need any special attention here.
|
||||
@pytest.mark.timeout(600)
|
||||
def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# These warnings are expected, when the pageserver is restarted abruptly
|
||||
env.pageserver.allowed_errors.append(".*found future image layer.*")
|
||||
env.pageserver.allowed_errors.append(".*found future delta layer.*")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
|
||||
@@ -30,10 +37,9 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
|
||||
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
|
||||
|
||||
for i in range(5):
|
||||
try:
|
||||
pg_bin.run_capture(["pgbench", "-N", "-c5", "-T100", "-Mprepared", connstr])
|
||||
except Exception:
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
|
||||
for _ in range(5):
|
||||
with pytest.raises(Exception):
|
||||
pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
|
||||
|
||||
@@ -76,6 +76,26 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
|
||||
env = neon_env_builder.init_start()
|
||||
env.pageserver.http_client().tenant_create(tenant)
|
||||
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*error importing base backup .*",
|
||||
".*Timeline got dropped without initializing, cleaning its files.*",
|
||||
".*Removing intermediate uninit mark file.*",
|
||||
".*InternalServerError.*timeline not found.*",
|
||||
".*InternalServerError.*Tenant .* not found.*",
|
||||
".*InternalServerError.*Timeline .* not found.*",
|
||||
".*InternalServerError.*Cannot delete timeline which has child timelines.*",
|
||||
]
|
||||
)
|
||||
|
||||
# FIXME: we should clean up pageserver to not print this
|
||||
env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*")
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
def import_tar(base, wal):
|
||||
env.neon_cli.raw_cli(
|
||||
[
|
||||
@@ -122,6 +142,11 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
|
||||
neon_env_builder.enable_local_fs_remote_storage()
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
|
||||
pg = env.postgres.create_start("test_import_from_pageserver_small")
|
||||
|
||||
|
||||
@@ -67,6 +67,10 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
|
||||
def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# These warnings are expected, when the pageserver is restarted abruptly
|
||||
env.pageserver.allowed_errors.append(".*found future image layer.*")
|
||||
env.pageserver.allowed_errors.append(".*found future delta layer.*")
|
||||
|
||||
# Use a tiny checkpoint distance, to create a lot of layers quickly.
|
||||
# That allows us to stress the compaction and layer flushing logic more.
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
|
||||
@@ -8,11 +8,11 @@ from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres
|
||||
|
||||
|
||||
def test_proxy_select_1(static_proxy):
|
||||
def test_proxy_select_1(static_proxy: NeonProxy):
|
||||
static_proxy.safe_psql("select 1", options="project=generic-project-name")
|
||||
|
||||
|
||||
def test_password_hack(static_proxy):
|
||||
def test_password_hack(static_proxy: NeonProxy):
|
||||
user = "borat"
|
||||
password = "password"
|
||||
static_proxy.safe_psql(
|
||||
@@ -24,7 +24,7 @@ def test_password_hack(static_proxy):
|
||||
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
|
||||
|
||||
# Must also check that invalid magic won't be accepted.
|
||||
with pytest.raises(psycopg2.errors.OperationalError):
|
||||
with pytest.raises(psycopg2.OperationalError):
|
||||
magic = "broken"
|
||||
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
|
||||
|
||||
@@ -135,7 +135,7 @@ async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProx
|
||||
|
||||
|
||||
# Pass extra options to the server.
|
||||
def test_proxy_options(static_proxy):
|
||||
def test_proxy_options(static_proxy: NeonProxy):
|
||||
with static_proxy.connect(options="project=irrelevant -cproxytest.option=value") as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SHOW proxytest.option")
|
||||
|
||||
@@ -143,6 +143,8 @@ def test_read_validation_neg(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_read_validation_neg", "empty")
|
||||
|
||||
env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*")
|
||||
|
||||
pg = env.postgres.create_start("test_read_validation_neg")
|
||||
log.info("postgres is running on 'test_read_validation_neg' branch")
|
||||
|
||||
|
||||
@@ -17,6 +17,8 @@ def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
pgmain = env.postgres.create_start("test_readonly_node")
|
||||
log.info("postgres is running on 'test_readonly_node' branch")
|
||||
|
||||
env.pageserver.allowed_errors.append(".*basebackup .* failed: invalid basebackup lsn.*")
|
||||
|
||||
main_pg_conn = pgmain.connect()
|
||||
main_cur = main_pg_conn.cursor()
|
||||
|
||||
|
||||
@@ -17,6 +17,10 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
neon_env_builder.start()
|
||||
|
||||
# These warnings are expected, when the pageserver is restarted abruptly
|
||||
env.pageserver.allowed_errors.append(".*found future delta layer.*")
|
||||
env.pageserver.allowed_errors.append(".*found future image layer.*")
|
||||
|
||||
# Create a branch for us
|
||||
env.neon_cli.create_branch("test_pageserver_recovery", "main")
|
||||
|
||||
|
||||
@@ -56,6 +56,17 @@ def test_remote_storage_backup_and_restore(
|
||||
|
||||
##### First start, insert secret data and upload it to the remote storage
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
|
||||
|
||||
env.pageserver.allowed_errors.append(".*Tenant download is already in progress.*")
|
||||
env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*")
|
||||
env.pageserver.allowed_errors.append(".*No metadata file found in the timeline directory.*")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import time
|
||||
from threading import Thread
|
||||
|
||||
import pytest
|
||||
@@ -11,15 +12,30 @@ def do_gc_target(
|
||||
):
|
||||
"""Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
|
||||
try:
|
||||
log.info("sending gc http request")
|
||||
pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
|
||||
except Exception as e:
|
||||
log.error("do_gc failed: %s", e)
|
||||
finally:
|
||||
log.info("gc http thread returning")
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="""
|
||||
Commit 'make test_tenant_detach_smoke fail reproducibly' adds failpoint to make this test fail reproducibly.
|
||||
Fix in https://github.com/neondatabase/neon/pull/2851 will come as part of
|
||||
https://github.com/neondatabase/neon/pull/2785 .
|
||||
"""
|
||||
)
|
||||
def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
env.pageserver.allowed_errors.append(".*NotFound\\(Tenant .* not found in the local state")
|
||||
# FIXME: we have a race condition between GC and detach. GC might fail with this
|
||||
# error. Similar to https://github.com/neondatabase/neon/issues/2671
|
||||
env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*")
|
||||
|
||||
# first check for non existing tenant
|
||||
tenant_id = TenantId.generate()
|
||||
with pytest.raises(
|
||||
@@ -28,6 +44,9 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
):
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
|
||||
# the error will be printed to the log too
|
||||
env.pageserver.allowed_errors.append(".*Tenant not found for id.*")
|
||||
|
||||
# create new nenant
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
|
||||
@@ -43,32 +62,34 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
]
|
||||
)
|
||||
|
||||
# gc should not try to even start
|
||||
# gc should not try to even start on a timeline that doesn't exist
|
||||
with pytest.raises(
|
||||
expected_exception=PageserverApiException, match="gc target timeline does not exist"
|
||||
):
|
||||
bogus_timeline_id = TimelineId.generate()
|
||||
pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)
|
||||
|
||||
# try to concurrently run gc and detach
|
||||
# the error will be printed to the log too
|
||||
env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
|
||||
|
||||
# Detach while running manual GC.
|
||||
# It should wait for manual GC to finish (right now it doesn't that's why this test fails sometimes)
|
||||
pageserver_http.configure_failpoints(
|
||||
("gc_iteration_internal_after_getting_gc_timelines", "return(2000)")
|
||||
)
|
||||
gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id))
|
||||
gc_thread.start()
|
||||
time.sleep(1)
|
||||
# By now the gc task is spawned but in sleep for another second due to the failpoint.
|
||||
|
||||
last_error = None
|
||||
for i in range(3):
|
||||
try:
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
log.error(f"try {i} error detaching tenant: {e}")
|
||||
continue
|
||||
else:
|
||||
break
|
||||
# else is called if the loop finished without reaching "break"
|
||||
else:
|
||||
pytest.fail(f"could not detach tenant: {last_error}")
|
||||
log.info("detaching tenant")
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
log.info("tenant detached without error")
|
||||
|
||||
log.info("wait for gc thread to return")
|
||||
gc_thread.join(timeout=10)
|
||||
assert not gc_thread.is_alive()
|
||||
log.info("gc thread returned")
|
||||
|
||||
# check that nothing is left on disk for deleted tenant
|
||||
assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()
|
||||
|
||||
@@ -259,6 +259,11 @@ def test_tenant_relocation(
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
# create folder for remote storage mock
|
||||
remote_storage_mock_path = env.repo_dir / "local_fs_remote_storage"
|
||||
|
||||
|
||||
@@ -166,6 +166,10 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: we have a race condition between GC and delete timeline. GC might fail with this
|
||||
# error. Similar to https://github.com/neondatabase/neon/issues/2671
|
||||
env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*")
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
main_branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0]
|
||||
|
||||
@@ -263,6 +267,8 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
|
||||
except PageserverApiException as e:
|
||||
# compaction is ok but just retry if this fails; related to #2442
|
||||
if "cannot lock compaction critical section" in str(e):
|
||||
# also ignore it in the log
|
||||
env.pageserver.allowed_errors.append(".*cannot lock compaction critical section.*")
|
||||
time.sleep(1)
|
||||
continue
|
||||
raise
|
||||
|
||||
@@ -25,6 +25,13 @@ def test_tenant_creation_fails(neon_simple_env: NeonEnv):
|
||||
)
|
||||
initial_tenant_dirs = [d for d in tenants_dir.iterdir()]
|
||||
|
||||
neon_simple_env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*Failed to create directory structure for tenant .*, cleaning tmp data.*",
|
||||
".*Failed to fsync removed temporary tenant directory .*",
|
||||
]
|
||||
)
|
||||
|
||||
pageserver_http = neon_simple_env.pageserver.http_client()
|
||||
pageserver_http.configure_failpoints(("tenant-creation-before-tmp-rename", "return"))
|
||||
with pytest.raises(Exception, match="tenant-creation-before-tmp-rename"):
|
||||
@@ -206,6 +213,13 @@ def test_pageserver_with_empty_tenants(
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*Tenant .* has no timelines directory.*")
|
||||
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_without_timelines_dir = env.initial_tenant
|
||||
|
||||
@@ -66,6 +66,11 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
tenants_pgs: List[Tuple[TenantId, Postgres]] = []
|
||||
|
||||
for _ in range(1, 5):
|
||||
@@ -117,6 +122,13 @@ def test_tenants_attached_after_download(
|
||||
|
||||
##### First start, insert secret data and upload it to the remote storage
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: Are these expected?
|
||||
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
|
||||
@@ -209,6 +221,16 @@ def test_tenant_upgrades_index_json_from_v0(
|
||||
# launch pageserver, populate the default tenants timeline, wait for it to be uploaded,
|
||||
# then go ahead and modify the "remote" version as if it was downgraded, needing upgrade
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: Are these expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Failed to get local tenant state: Tenant .* not found in the local state.*"
|
||||
)
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
|
||||
@@ -315,6 +337,20 @@ def test_tenant_redownloads_truncated_file_on_startup(
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Redownloading locally existing .* due to size mismatch.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Downloaded layer exists already but layer file metadata mismatches.*"
|
||||
)
|
||||
|
||||
# FIXME: Are these expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
|
||||
|
||||
@@ -7,6 +7,11 @@ from fixtures.utils import wait_until
|
||||
def test_timeline_delete(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
env.pageserver.allowed_errors.append(".*Timeline .* was not found.*")
|
||||
env.pageserver.allowed_errors.append(".*timeline not found.*")
|
||||
env.pageserver.allowed_errors.append(".*Cannot delete timeline which has child timelines.*")
|
||||
env.pageserver.allowed_errors.append(".*Tenant .* not found in the local state.*")
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
# first try to delete non existing timeline
|
||||
|
||||
@@ -263,6 +263,12 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_broker", "main")
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
pg = env.postgres.create_start("test_broker")
|
||||
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
|
||||
@@ -306,6 +312,11 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
neon_env_builder.auth_enabled = auth_enabled
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
env.neon_cli.create_branch("test_safekeepers_wal_removal")
|
||||
pg = env.postgres.create_start("test_safekeepers_wal_removal")
|
||||
|
||||
@@ -538,6 +549,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
|
||||
)
|
||||
|
||||
pg.stop_and_destroy()
|
||||
ps_cli.timeline_delete(tenant_id, timeline_id)
|
||||
|
||||
# Also delete and manually create timeline on safekeepers -- this tests
|
||||
# scenario of manual recovery on different set of safekeepers.
|
||||
@@ -562,7 +574,6 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
|
||||
shutil.copy(f_partial_saved, f_partial_path)
|
||||
|
||||
# recreate timeline on pageserver from scratch
|
||||
ps_cli.timeline_delete(tenant_id, timeline_id)
|
||||
ps_cli.timeline_create(tenant_id, timeline_id)
|
||||
|
||||
wait_lsn_timeout = 60 * 3
|
||||
@@ -1081,6 +1092,14 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
neon_env_builder.auth_enabled = auth_enabled
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: are these expected?
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*Failed to process query for timeline .*: Timeline .* was not found in global map.*",
|
||||
".*end streaming to Some.*",
|
||||
]
|
||||
)
|
||||
|
||||
# Create two tenants: one will be deleted, other should be preserved.
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id_1 = env.neon_cli.create_branch("br1") # Active, delete explicitly
|
||||
|
||||
@@ -22,6 +22,8 @@ def assert_child_processes(pageserver_pid, wal_redo_present=False, defunct_prese
|
||||
# as a zombie process.
|
||||
def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
# We intentionally test for a non-existent tenant.
|
||||
env.pageserver.allowed_errors.append(".*Tenant not found.*")
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
pagserver_pid = int((env.repo_dir / "pageserver.pid").read_text())
|
||||
|
||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: e56b812dd8...360ff1c637
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: 39e3d745b3...d31b3f7c6d
Reference in New Issue
Block a user