Compare commits

..

3 Commits

Author SHA1 Message Date
Bojan Serafimov
612f1f2dba wip 2022-11-15 23:08:22 -05:00
Bojan Serafimov
accbf4a313 Merge branch 'main' into perf-summary 2022-11-15 13:10:20 -05:00
Bojan Serafimov
ea0207c4b7 WIP add perf test summary 2022-02-24 12:46:53 -05:00
85 changed files with 798 additions and 2889 deletions

View File

@@ -1,3 +1,5 @@
zenith_install.tar.gz
.zenith_current_version
neon_install.tar.gz
.neon_current_version

View File

@@ -22,10 +22,6 @@ storage:
console_region_id: aws-us-west-2
zenith-1-ps-3:
console_region_id: aws-us-west-2
zenith-1-ps-4:
console_region_id: aws-us-west-2
zenith-1-ps-5:
console_region_id: aws-us-west-2
safekeepers:
hosts:

View File

@@ -1,33 +0,0 @@
storage:
vars:
bucket_name: neon-dev-storage-eu-west-1
bucket_region: eu-west-1
console_mgmt_base_url: http://console-staging.local
etcd_endpoints: etcd-0.eu-west-1.aws.neon.build:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: eu-west-1
ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
console_region_id: aws-eu-west-1
children:
pageservers:
hosts:
pageserver-0.eu-west-1.aws.neon.build:
ansible_host: i-01d496c5041c7f34c
safekeepers:
hosts:
safekeeper-0.eu-west-1.aws.neon.build:
ansible_host: i-05226ef85722831bf
safekeeper-1.eu-west-1.aws.neon.build:
ansible_host: i-06969ee1bf2958bfc
safekeeper-2.eu-west-1.aws.neon.build:
ansible_host: i-087892e9625984a0b

View File

@@ -3,7 +3,7 @@ storage:
bucket_name: zenith-staging-storage-us-east-1
bucket_region: us-east-1
console_mgmt_base_url: http://console-staging.local
etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
etcd_endpoints: zenith-us-stage-etcd.local:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:

View File

@@ -22,8 +22,6 @@ storage:
hosts:
pageserver-0.us-east-2.aws.neon.build:
ansible_host: i-0c3e70929edb5d691
pageserver-1.us-east-2.aws.neon.build:
ansible_host: i-0565a8b4008aa3f40
safekeepers:
hosts:

View File

@@ -1,31 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://console-staging.local/management/api/v2"
domain: "*.eu-west-1.aws.neon.build"
# -- Additional labels for neon-proxy pods
podLabels:
zenith_service: proxy-scram
zenith_env: dev
zenith_region: eu-west-1
zenith_region_slug: eu-west-1
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack

View File

@@ -144,9 +144,7 @@ jobs:
# neon-captest-new: Run pgbench in a freshly created project
# neon-captest-reuse: Same, but reusing existing project
# neon-captest-prefetch: Same, with prefetching enabled (new project)
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-postgres ]
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
db_size: [ 10gb ]
include:
- platform: neon-captest-new
@@ -166,7 +164,7 @@ jobs:
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }}
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
options: --init
@@ -209,11 +207,8 @@ jobs:
rds-aurora)
CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }}
;;
rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
;;
*)
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch' or 'rds-aurora'"
exit 1
;;
esac

View File

@@ -18,8 +18,8 @@ env:
jobs:
tag:
runs-on: [ self-hosted, dev, x64 ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
outputs:
build-tag: ${{steps.build-tag.outputs.tag}}
@@ -46,7 +46,7 @@ jobs:
id: build-tag
build-neon:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
@@ -236,7 +236,7 @@ jobs:
uses: ./.github/actions/save-coverage-data
regress-tests:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
@@ -269,7 +269,7 @@ jobs:
uses: ./.github/actions/save-coverage-data
benchmarks:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
@@ -300,7 +300,7 @@ jobs:
# while coverage is currently collected for the debug ones
merge-allure-report:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
@@ -338,7 +338,7 @@ jobs:
DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json
coverage-report:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
@@ -415,7 +415,7 @@ jobs:
shell: bash -euxo pipefail {0}
trigger-e2e-tests:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
options: --init
@@ -460,7 +460,7 @@ jobs:
}"
neon-image:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
needs: [ tag ]
container: gcr.io/kaniko-project/executor:v1.9.0-debug
@@ -478,7 +478,7 @@ jobs:
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
compute-tools-image:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
needs: [ tag ]
container: gcr.io/kaniko-project/executor:v1.9.0-debug
@@ -493,7 +493,7 @@ jobs:
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
compute-node-image-v14:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
steps:
@@ -510,7 +510,7 @@ jobs:
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}}
compute-node-image-v15:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
steps:
@@ -528,7 +528,7 @@ jobs:
test-images:
needs: [ tag, neon-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
steps:
- name: Checkout
@@ -570,7 +570,7 @@ jobs:
docker compose -f ./docker-compose/docker-compose.yml down
promote-images:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
needs: [ tag, test-images ]
if: github.event_name != 'workflow_dispatch'
container: amazon/aws-cli
@@ -586,7 +586,7 @@ jobs:
aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
push-docker-hub:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
needs: [ promote-images, tag ]
container: golang:1.19-bullseye
@@ -736,7 +736,7 @@ jobs:
rm -f neon_install.tar.gz .neon_current_version
deploy-new:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
@@ -761,6 +761,7 @@ jobs:
run: |
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
cd "$(pwd)/.github/ansible"
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
./get_binaries.sh
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
@@ -769,38 +770,6 @@ jobs:
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
ansible-galaxy collection install sivel.toiletwater
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
rm -f neon_install.tar.gz .neon_current_version
deploy-pr-test-new:
runs-on: [ self-hosted, dev, x64 ]
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
needs: [ push-docker-hub, tag, regress-tests ]
if: |
contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
strategy:
matrix:
target_region: [ eu-west-1 ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Redeploy
run: |
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
cd "$(pwd)/.github/ansible"
./get_binaries.sh
ansible-galaxy collection install sivel.toiletwater
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
@@ -811,7 +780,7 @@ jobs:
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
needs: [ push-docker-hub, tag, regress-tests ]
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
@@ -847,7 +816,7 @@ jobs:
rm -f neon_install.tar.gz .neon_current_version
deploy-proxy:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
@@ -889,10 +858,10 @@ jobs:
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
deploy-proxy-new:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, tag, regress-tests ]
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'main') &&
github.event_name != 'workflow_dispatch'
@@ -904,8 +873,6 @@ jobs:
include:
- target_region: us-east-2
target_cluster: dev-us-east-2-beta
- target_region: eu-west-1
target_cluster: dev-eu-west-1-zeta
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -927,7 +894,7 @@ jobs:
runs-on: prod
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, tag, regress-tests ]
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
@@ -961,7 +928,7 @@ jobs:
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
promote-compatibility-data:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init

View File

@@ -115,7 +115,7 @@ jobs:
run: cargo build --locked --all --all-targets
check-rust-dependencies:
runs-on: [ self-hosted, dev, x64 ]
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init

View File

@@ -8,4 +8,3 @@
/pgxn/ @neondatabase/compute
/proxy/ @neondatabase/control-plane
/safekeeper/ @neondatabase/safekeepers
/vendor/ @neondatabase/compute

8
Cargo.lock generated
View File

@@ -2255,14 +2255,6 @@ version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
[[package]]
name = "persistent_range_query"
version = "0.1.0"
dependencies = [
"rand",
"workspace_hack",
]
[[package]]
name = "petgraph"
version = "0.6.2"

View File

@@ -20,18 +20,18 @@ else
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
endif
# Seccomp BPF is only available for Linux
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
# Seccomp BPF is only available for Linux
PG_CONFIGURE_OPTS += --with-libseccomp
else ifeq ($(UNAME_S),Darwin)
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
endif
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
endif
# Use -C option so that when PostgreSQL "make install" installs the
@@ -73,8 +73,7 @@ $(POSTGRES_INSTALL_DIR)/build/v14/config.status:
+@echo "Configuring Postgres v14 build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
(cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure \
CFLAGS='$(PG_CFLAGS)' \
$(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)
@@ -82,8 +81,7 @@ $(POSTGRES_INSTALL_DIR)/build/v15/config.status:
+@echo "Configuring Postgres v15 build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure \
CFLAGS='$(PG_CFLAGS)' \
$(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)
@@ -113,8 +111,6 @@ postgres-v14: postgres-v14-configure \
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
+@echo "Compiling libpq v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
+@echo "Compiling pg_prewarm v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_prewarm install
+@echo "Compiling pg_buffercache v14"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
+@echo "Compiling pageinspect v14"
@@ -127,8 +123,6 @@ postgres-v15: postgres-v15-configure \
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
+@echo "Compiling libpq v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
+@echo "Compiling pg_prewarm v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_prewarm install
+@echo "Compiling pg_buffercache v15"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
+@echo "Compiling pageinspect v15"

View File

@@ -53,7 +53,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
1. Install XCode and dependencies
```
xcode-select --install
brew install protobuf etcd openssl flex bison
brew install protobuf etcd openssl
```
2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -125,23 +125,24 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
# Create repository in .neon with proper paths to binaries and data
# Later that would be responsibility of a package install script
> ./target/debug/neon_local init
Starting pageserver at '127.0.0.1:64000' in '.neon'.
pageserver started, pid: 2545906
Successfully initialized timeline de200bd42b49cc1814412c7e592dd6e9
Stopped pageserver 1 process with pid 2545906
Starting pageserver at '127.0.0.1:64000' in '.neon'
Pageserver started
Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7
Stopping pageserver gracefully...done!
# start pageserver and safekeeper
> ./target/debug/neon_local start
Starting etcd broker using "/usr/bin/etcd"
etcd started, pid: 2545996
Starting pageserver at '127.0.0.1:64000' in '.neon'.
pageserver started, pid: 2546005
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
safekeeper 1 started, pid: 2546041
Starting etcd broker using /usr/bin/etcd
Starting pageserver at '127.0.0.1:64000' in '.neon'
Pageserver started
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
Safekeeper started
# start postgres compute node
> ./target/debug/neon_local pg start main
Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

188
cli-v2-story.md Normal file
View File

@@ -0,0 +1,188 @@
Create a new Zenith repository in the current directory:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
The files belonging to this database system will be owned by user "heikki".
This user must also own the server process.
The database cluster will be initialized with locale "en_GB.UTF-8".
The default database encoding has accordingly been set to "UTF8".
The default text search configuration will be set to "english".
Data page checksums are disabled.
creating directory tmp ... ok
creating subdirectories ... ok
selecting dynamic shared memory implementation ... posix
selecting default max_connections ... 100
selecting default shared_buffers ... 128MB
selecting default time zone ... Europe/Helsinki
creating configuration files ... ok
running bootstrap script ... ok
performing post-bootstrap initialization ... ok
syncing data to disk ... ok
initdb: warning: enabling "trust" authentication for local connections
You can change this by editing pg_hba.conf or using the option -A, or
--auth-local and --auth-host, the next time you run initdb.
new zenith repository was created in .zenith
Initially, there is only one branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
main
Start a local Postgres instance on the branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv6 address "::1", port 5432
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv4 address "127.0.0.1", port 5432
2021-04-13 09:27:43.927 EEST [984664] LOG: listening on Unix socket "/tmp/.s.PGSQL.5432"
2021-04-13 09:27:43.939 EEST [984665] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:27:43.939 EEST [984665] LOG: creating missing WAL directory "pg_wal/archive_status"
2021-04-13 09:27:44.189 EEST [984665] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:27:44.195 EEST [984665] LOG: invalid record length at 0/15FFB80: wanted 24, got 0
2021-04-13 09:27:44.195 EEST [984665] LOG: redo is not required
2021-04-13 09:27:44.225 EEST [984664] LOG: database system is ready to accept connections
done
server started
Run some commands against it:
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);"
CREATE TABLE
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');"
INSERT 0 1
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
Create a new branch called 'experimental'. We create it from the
current end of the 'main' branch, but you could specify a different
LSN as the start point instead.
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
branching at end of WAL: 0/161F478
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
experimental
main
Start another Postgres instance off the 'experimental' branch:
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv6 address "::1", port 5433
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv4 address "127.0.0.1", port 5433
2021-04-13 09:28:41.883 EEST [984766] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
2021-04-13 09:28:41.896 EEST [984767] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:28:42.265 EEST [984767] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:28:42.269 EEST [984767] LOG: redo starts at 0/15FFB80
2021-04-13 09:28:42.272 EEST [984767] LOG: invalid record length at 0/161F4B0: wanted 24, got 0
2021-04-13 09:28:42.272 EEST [984767] LOG: redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
2021-04-13 09:28:42.321 EEST [984766] LOG: database system is ready to accept connections
done
server started
Insert some a row on the 'experimental' branch:
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')"
INSERT 0 1
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
inserted on experimental
(2 rows)
See that the other Postgres instance is still running on 'main' branch on port 5432:
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo"
t
-----------------------------
inserted on the main branch
(1 row)
Everything is stored in the .zenith directory:
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
total 12
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
The 'datadirs' directory contains the datadirs of the running instances:
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
total 8
drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
total 124
drwxr-xr-x 5 heikki heikki 4096 Apr 13 09:27 base
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 global
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_commit_ts
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_dynshmem
-rw------- 1 heikki heikki 4760 Apr 13 09:27 pg_hba.conf
-rw------- 1 heikki heikki 1636 Apr 13 09:27 pg_ident.conf
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:32 pg_logical
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 pg_multixact
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_notify
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_replslot
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_serial
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_snapshots
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_stat
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:34 pg_stat_tmp
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_subtrans
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_tblspc
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_twophase
-rw------- 1 heikki heikki 3 Apr 13 09:27 PG_VERSION
lrwxrwxrwx 1 heikki heikki 52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_xact
-rw------- 1 heikki heikki 88 Apr 13 09:27 postgresql.auto.conf
-rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
-rw------- 1 heikki heikki 96 Apr 13 09:27 postmaster.opts
-rw------- 1 heikki heikki 149 Apr 13 09:27 postmaster.pid
Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
datadir is ephemeral, you can delete it at any time, and it can be reconstructed
from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
the repository, the 'datadirs' are not included. (They are like git working trees)
~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
Creating data directory from snapshot at 0/15FFB08...
waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv6 address "::1", port 5433
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv4 address "127.0.0.1", port 5433
2021-04-13 09:37:05.487 EEST [985340] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
2021-04-13 09:37:05.498 EEST [985341] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
2021-04-13 09:37:05.808 EEST [985341] LOG: database system was not properly shut down; automatic recovery in progress
2021-04-13 09:37:05.813 EEST [985341] LOG: redo starts at 0/15FFB80
2021-04-13 09:37:05.815 EEST [985341] LOG: invalid record length at 0/161F770: wanted 24, got 0
2021-04-13 09:37:05.815 EEST [985341] LOG: redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
2021-04-13 09:37:05.866 EEST [985340] LOG: database system is ready to accept connections
done
server started
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
t
-----------------------------
inserted on the main branch
inserted on experimental
(2 rows)

View File

@@ -26,18 +26,8 @@ use nix::unistd::Pid;
use utils::lock_file;
// These constants control the loop used to poll for process start / stop.
//
// The loop waits for at most 10 seconds, polling every 100 ms.
// Once a second, it prints a dot ("."), to give the user an indication that
// it's waiting. If the process hasn't started/stopped after 5 seconds,
// it prints a notice that it's taking long, but keeps waiting.
//
const RETRY_UNTIL_SECS: u64 = 10;
const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
const RETRY_INTERVAL_MILLIS: u64 = 100;
const DOT_EVERY_RETRIES: u64 = 10;
const NOTICE_AFTER_RETRIES: u64 = 50;
const RETRIES: u32 = 15;
const RETRY_TIMEOUT_MILLIS: u64 = 500;
/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
/// it itself.
@@ -117,16 +107,16 @@ where
return Ok(spawned_process);
}
Ok(false) => {
if retries == NOTICE_AFTER_RETRIES {
// The process is taking a long time to start up. Keep waiting, but
// print a message
print!("\n{process_name} has not started yet, continuing to wait");
}
if retries % DOT_EVERY_RETRIES == 0 {
if retries < 5 {
print!(".");
io::stdout().flush().unwrap();
} else {
if retries == 5 {
println!() // put a line break after dots for second message
}
println!("{process_name} has not started yet, retrying ({retries})...");
}
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS));
}
Err(e) => {
println!("{process_name} failed to start: {e:#}");
@@ -137,8 +127,7 @@ where
}
}
}
println!();
anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
anyhow::bail!("{process_name} could not start in {RETRIES} attempts");
}
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
@@ -169,7 +158,7 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
}
// Wait until process is gone
for retries in 0..RETRIES {
for _ in 0..RETRIES {
match process_has_stopped(pid) {
Ok(true) => {
println!("\n{process_name} stopped");
@@ -181,16 +170,9 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
return Ok(());
}
Ok(false) => {
if retries == NOTICE_AFTER_RETRIES {
// The process is taking a long time to start up. Keep waiting, but
// print a message
print!("\n{process_name} has not stopped yet, continuing to wait");
}
if retries % DOT_EVERY_RETRIES == 0 {
print!(".");
io::stdout().flush().unwrap();
}
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
print!(".");
io::stdout().flush().unwrap();
thread::sleep(Duration::from_secs(1))
}
Err(e) => {
println!("{process_name} with pid {pid} failed to stop: {e:#}");
@@ -198,21 +180,24 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
}
}
}
println!();
anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts");
}
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
// Pass through these environment variables to the command
for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] {
if let Some(val) = std::env::var_os(var) {
filled_cmd = filled_cmd.env(var, val);
}
let var = "LLVM_PROFILE_FILE";
if let Some(val) = std::env::var_os(var) {
filled_cmd = filled_cmd.env(var, val);
}
filled_cmd
const RUST_LOG_KEY: &str = "RUST_LOG";
if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
filled_cmd.env(RUST_LOG_KEY, rust_log_value)
} else {
filled_cmd
}
}
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {

View File

@@ -343,7 +343,7 @@ impl PostgresNode {
// To be able to restore database in case of pageserver node crash, safekeeper should not
// remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
// (if they are not able to upload WAL to S3).
conf.append("max_replication_write_lag", "15MB");
conf.append("max_replication_write_lag", "500MB");
conf.append("max_replication_flush_lag", "10GB");
if !self.env.safekeepers.is_empty() {

View File

@@ -6,7 +6,7 @@ use crate::{background_process, local_env};
pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
let etcd_broker = &env.etcd_broker;
print!(
println!(
"Starting etcd broker using {:?}",
etcd_broker.etcd_binary_path
);

View File

@@ -237,7 +237,7 @@ impl PageServerNode {
datadir: &Path,
update_config: bool,
) -> anyhow::Result<Child> {
print!(
println!(
"Starting pageserver at '{}' in '{}'",
self.pg_connection_config.raw_address(),
datadir.display()

View File

@@ -37,7 +37,7 @@
- [Source view](./sourcetree.md)
- [docker.md](./docker.md) — Docker images and building pipeline.
- [Error handling and logging](./error-handling.md)
- [Error handling and logging]()
- [Testing]()
- [Unit testing]()
- [Integration testing]()

View File

@@ -1,198 +0,0 @@
# Error handling and logging
## Logging errors
The principle is that errors are logged when they are handled. If you
just propagate an error to the caller in a function, you don't need to
log it; the caller will. But if you consume an error in a function,
you *must* log it (if it needs to be logged at all).
For example:
```rust
fn read_motd_file() -> std::io::Result<String> {
let mut f = File::open("/etc/motd")?;
let mut result = String::new();
f.read_to_string(&mut result)?;
result
}
```
Opening or reading the file could fail, but there is no need to log
the error here. The function merely propagates the error to the
caller, and it is up to the caller to log the error or propagate it
further, if the failure is not expected. But if, for example, it is
normal that the "/etc/motd" file doesn't exist, the caller can choose
to silently ignore the error, or log it as an INFO or DEBUG level
message:
```rust
fn get_message_of_the_day() -> String {
// Get the motd from /etc/motd, or return the default proverb
match read_motd_file() {
Ok(motd) => motd,
Err(err) => {
// It's normal that /etc/motd doesn't exist, but if we fail to
// read it for some other reason, that's unexpected. The message
// of the day isn't very important though, so we just WARN and
// continue with the default in any case.
if err.kind() != std::io::ErrorKind::NotFound {
tracing::warn!("could not read \"/etc/motd\": {err:?}");
}
"An old error is always more popular than a new truth. - German proverb"
}
}
}
```
## Error types
We use the `anyhow` crate widely. It contains many convenient macros
like `bail!` and `ensure!` to construct and return errors, and to
propagate many kinds of low-level errors, wrapped in `anyhow::Error`.
A downside of `anyhow::Error` is that the caller cannot distinguish
between different error cases. Most errors are propagated all the way
to the mgmt API handler function, or the main loop that handles a
connection with the compute node, and they are all handled the same
way: the error is logged and returned to the client as an HTTP or
libpq error.
But in some cases, we need to distinguish between errors and handle
them differently. For example, attaching a tenant to the pageserver
could fail either because the tenant has already been attached, or
because we could not load its metadata from cloud storage. The first
case is more or less expected. The console sends the Attach request to
the pageserver, and the pageserver completes the operation, but the
network connection might be lost before the console receives the
response. The console will retry the operation in that case, but the
tenant has already been attached. It is important that the pagserver
responds with the HTTP 403 Already Exists error in that case, rather
than a generic HTTP 500 Internal Server Error.
If you need to distinguish between different kinds of errors, create a
new `Error` type. The `thiserror` crate is useful for that. But in
most cases `anyhow::Error` is good enough.
## Panics
Depending on where a panic happens, it can cause the whole pageserver
or safekeeper to restart, or just a single tenant. In either case,
that is pretty bad and causes an outage. Avoid panics. Never use
`unwrap()` or other calls that might panic, to verify inputs from the
network or from disk.
It is acceptable to use functions that might panic, like `unwrap()`, if
it is obvious that it cannot panic. For example, if you have just
checked that a variable is not None, it is OK to call `unwrap()` on it,
but it is still preferable to use `expect("reason")` instead to explain
why the function cannot fail.
`assert!` and `panic!` are reserved for checking clear invariants and
very obvious "can't happen" cases. When in doubt, use anyhow `ensure!`
or `bail!` instead.
## Error levels
`tracing::Level` doesn't provide very clear guidelines on what the
different levels mean, or when to use which level. Here is how we use
them:
### Error
Examples:
- could not open file "foobar"
- invalid tenant id
Errors are not expected to happen during normal operation. Incorrect
inputs from client can cause ERRORs. For example, if a client tries to
call a mgmt API that doesn't exist, or if a compute node sends passes
an LSN that has already been garbage collected away.
These should *not* happen during normal operations. "Normal
operations" is not a very precise concept. But for example, disk
errors are not expected to happen when the system is working, so those
count as Errors. However, if a TCP connection to a compute node is
lost, that is not considered an Error, because it doesn't affect the
pageserver's or safekeeper's operation in any way, and happens fairly
frequently when compute nodes are shut down, or are killed abruptly
because of errors in the compute.
**Errors are monitored, and always need human investigation to determine
the cause.**
Whether something should be logged at ERROR, WARNING or INFO level can
depend on the callers and clients. For example, it might be unexpected
and a sign of a serious issue if the console calls the
"timeline_detail" mgmt API for a timeline that doesn't exist. ERROR
would be appropriate in that case. But if the console routinely calls
the API after deleting a timeline, to check if the deletion has
completed, then it would be totally normal and an INFO or DEBUG level
message would be more appropriate. If a message is logged as an ERROR,
but it in fact happens frequently in production and never requires any
action, it should probably be demoted to an INFO level message.
### Warn
Examples:
- could not remove temporary file "foobar.temp"
- unrecognized file "foobar" in timeline directory
Warnings are similar to Errors, in that they should not happen
when the system is operating normally. The difference between Error and
Warning is that an Error means that the operation failed, whereas Warning
means that something unexpected happened, but the operation continued anyway.
For example, if deleting a file fails because the file already didn't exist,
it should be logged as Warning.
> **Note:** The python regression tests, under `test_regress`, check the
> pageserver log after each test for any ERROR and WARN lines. If there are
> any ERRORs or WARNs that have not been explicitly listed in the test as
> allowed, the test is marked a failed. This is to catch unexpected errors
> e.g. in background operations, that don't cause immediate misbehaviour in
> the tested functionality.
### Info
Info level is used to log useful information when the system is
operating normally. Info level is appropriate e.g. for logging state
changes, background operations, and network connections.
Examples:
- "system is shutting down"
- "tenant was created"
- "retrying S3 upload"
### Debug & Trace
Debug and Trace level messages are not printed to the log in our normal
production configuration, but could be enabled for a specific server or
tenant, to aid debugging. (Although we don't actually have that
capability as of this writing).
## Context
We use logging "spans" to hold context information about the current
operation. Almost every operation happens on a particular tenant and
timeline, so we enter a span with the "tenant_id" and "timeline_id"
very early when processing an incoming API request, for example. All
background operations should also run in a span containing at least
those two fields, and any other parameters or information that might
be useful when debugging an error that might happen when performing
the operation.
TODO: Spans are not captured in the Error when it is created, but when
the error is logged. It would be more useful to capture them at Error
creation. We should consider using `tracing_error::SpanTrace` to do
that.
## Error message style
PostgreSQL has a style guide for writing error messages:
https://www.postgresql.org/docs/current/error-style-guide.html
Follow that guide when writing error messages in the PostgreSQL
extension. We don't follow it strictly in the pageserver and
safekeeper, but the advice in the PostgreSQL style guide is generally
good, and you can't go wrong by following it.

View File

@@ -83,16 +83,6 @@ A subject for future modularization.
`/libs/metrics`:
Helpers for exposing Prometheus metrics from the server.
### Adding dependencies
When you add a Cargo dependency, you should update hakari manifest by running commands below and committing the updated `Cargo.lock` and `workspace_hack/`. There may be no changes, that's fine.
```bash
cargo hakari generate
cargo hakari manage-deps
```
If you don't have hakari installed (`error: no such subcommand: hakari`), install it by running `cargo install cargo-hakari`.
## Using Python
Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
so manual installation of dependencies is not recommended.

View File

@@ -1,12 +0,0 @@
[package]
name = "persistent_range_query"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
[dev-dependencies]
rand = "0.8.3"

View File

@@ -1,78 +0,0 @@
use std::ops::Range;
pub mod naive;
pub mod ops;
pub mod segment_tree;
/// Should be a monoid:
/// * Identity element: for all a: combine(new_for_empty_range(), a) = combine(a, new_for_empty_range()) = a
/// * Associativity: for all a, b, c: combine(combine(a, b), c) == combine(a, combine(b, c))
pub trait RangeQueryResult<Key>: Sized + Clone {
// Clone is equivalent to combine with an empty range.
fn new_for_empty_range() -> Self;
// Contract: left_range.end == right_range.start
// left_range.start == left_range.end == right_range.start == right_range.end is still possible
fn combine(
left: &Self,
left_range: &Range<Key>,
right: &Self,
right_range: &Range<Key>,
) -> Self;
fn add(left: &mut Self, left_range: &Range<Key>, right: &Self, right_range: &Range<Key>);
}
pub trait LazyRangeInitializer<Result: RangeQueryResult<Key>, Key> {
fn get(&self, range: &Range<Key>) -> Result;
}
/// Should be a monoid:
/// * Identity element: for all op: compose(no_op(), op) == compose(op, no_op()) == op
/// * Associativity: for all op_1, op_2, op_3: compose(compose(op_1, op_2), op_3) == compose(op_1, compose(op_2, op_3))
///
/// Should left act on Result:
/// * Identity operation: for all r: no_op().apply(r) == r
/// * Compatibility: for all op_1, op_2, r: op_1.apply(op_2.apply(r)) == compose(op_1, op_2).apply(r)
pub trait RangeModification<Key> {
type Result: RangeQueryResult<Key>;
fn no_op() -> Self;
fn is_no_op(&self) -> bool;
fn is_reinitialization(&self) -> bool;
fn apply(&self, result: &mut Self::Result, range: &Range<Key>);
fn compose(later: &Self, earlier: &mut Self);
}
pub trait VecReadableVersion<Modification: RangeModification<Key>, Key> {
fn get(&self, keys: &Range<Key>) -> Modification::Result;
}
// TODO: use trait alias when stabilized
pub trait VecFrozenVersion<Modification: RangeModification<Key>, Key>:
Clone + VecReadableVersion<Modification, Key>
{
}
impl<
T: Clone + VecReadableVersion<Modification, Key>,
Modification: RangeModification<Key>,
Key,
> VecFrozenVersion<Modification, Key> for T
{
}
pub trait PersistentVecStorage<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key,
>: VecReadableVersion<Modification, Key>
{
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self;
type FrozenVersion: VecFrozenVersion<Modification, Key>;
fn modify(&mut self, keys: &Range<Key>, modification: &Modification);
fn freeze(&mut self) -> Self::FrozenVersion;
}

View File

@@ -1,115 +0,0 @@
use crate::{
LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
VecReadableVersion,
};
use std::marker::PhantomData;
use std::ops::Range;
use std::rc::Rc;
pub struct NaiveFrozenVersion<Modification: RangeModification<Key>, Key> {
all_keys: Range<Key>,
values: Rc<Box<Vec<Modification::Result>>>,
}
pub trait IndexableKey: Clone {
fn index(all_keys: &Range<Self>, key: &Self) -> usize;
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self>;
}
fn get<Modification: RangeModification<Key>, Key: IndexableKey>(
all_keys: &Range<Key>,
values: &Vec<Modification::Result>,
keys: &Range<Key>,
) -> Modification::Result {
let mut result = Modification::Result::new_for_empty_range();
let mut result_range = keys.start.clone()..keys.start.clone();
for index in
IndexableKey::index(&all_keys, &keys.start)..IndexableKey::index(&all_keys, &keys.end)
{
let element_range = IndexableKey::element_range(&all_keys, index);
Modification::Result::add(&mut result, &result_range, &values[index], &element_range);
result_range.end = element_range.end;
}
result
}
impl<Modification: RangeModification<Key>, Key: IndexableKey> VecReadableVersion<Modification, Key>
for NaiveFrozenVersion<Modification, Key>
{
fn get(&self, keys: &Range<Key>) -> Modification::Result {
get::<Modification, Key>(&self.all_keys, &self.values, keys)
}
}
// Manual implementation of `Clone` becase `derive` requires `Modification: Clone`
impl<Modification: RangeModification<Key>, Key: Clone> Clone
for NaiveFrozenVersion<Modification, Key>
{
fn clone(&self) -> Self {
Self {
all_keys: self.all_keys.clone(),
values: self.values.clone(),
}
}
}
// TODO: is it at all possible to store previous versions in this struct,
// without any Rc<>?
pub struct NaiveVecStorage<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: IndexableKey,
> {
all_keys: Range<Key>,
last_version: Vec<Modification::Result>,
_initializer: PhantomData<Initializer>,
}
impl<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: IndexableKey,
> VecReadableVersion<Modification, Key> for NaiveVecStorage<Modification, Initializer, Key>
{
fn get(&self, keys: &Range<Key>) -> Modification::Result {
get::<Modification, Key>(&self.all_keys, &self.last_version, keys)
}
}
impl<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: IndexableKey,
> PersistentVecStorage<Modification, Initializer, Key>
for NaiveVecStorage<Modification, Initializer, Key>
{
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
let mut values = Vec::with_capacity(IndexableKey::index(&all_keys, &all_keys.end));
for index in 0..values.capacity() {
values.push(initializer.get(&IndexableKey::element_range(&all_keys, index)));
}
NaiveVecStorage {
all_keys,
last_version: values,
_initializer: PhantomData,
}
}
type FrozenVersion = NaiveFrozenVersion<Modification, Key>;
fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
for index in IndexableKey::index(&self.all_keys, &keys.start)
..IndexableKey::index(&self.all_keys, &keys.end)
{
let element_range = IndexableKey::element_range(&self.all_keys, index);
modification.apply(&mut self.last_version[index], &element_range);
}
}
fn freeze(&mut self) -> Self::FrozenVersion {
NaiveFrozenVersion::<Modification, Key> {
all_keys: self.all_keys.clone(),
values: Rc::new(Box::new(self.last_version.clone())),
}
}
}

View File

@@ -1,14 +0,0 @@
pub mod rsq;
#[derive(Copy, Clone, Debug)]
pub struct SameElementsInitializer<T> {
initial_element_value: T,
}
impl<T> SameElementsInitializer<T> {
pub fn new(initial_element_value: T) -> Self {
SameElementsInitializer {
initial_element_value,
}
}
}

View File

@@ -1,118 +0,0 @@
//! # Range Sum Query
use crate::ops::SameElementsInitializer;
use crate::{LazyRangeInitializer, RangeModification, RangeQueryResult};
use std::borrow::Borrow;
use std::ops::{Add, AddAssign, Range};
// TODO: commutative Add
#[derive(Clone, Copy, Debug)]
pub struct SumResult<T> {
sum: T,
}
impl<T> SumResult<T> {
pub fn sum(&self) -> &T {
&self.sum
}
}
impl<T: Clone + for<'a> AddAssign<&'a T> + From<u8>, Key> RangeQueryResult<Key> for SumResult<T>
where
for<'a> &'a T: Add<&'a T, Output = T>,
{
fn new_for_empty_range() -> Self {
SumResult { sum: 0.into() }
}
fn combine(
left: &Self,
_left_range: &Range<Key>,
right: &Self,
_right_range: &Range<Key>,
) -> Self {
SumResult {
sum: &left.sum + &right.sum,
}
}
fn add(left: &mut Self, _left_range: &Range<Key>, right: &Self, _right_range: &Range<Key>) {
left.sum += &right.sum
}
}
pub trait SumOfSameElements<Key> {
fn sum(initial_element_value: &Self, keys: &Range<Key>) -> Self;
}
impl<T: SumOfSameElements<Key>, TB: Borrow<T>, Key> LazyRangeInitializer<SumResult<T>, Key>
for SameElementsInitializer<TB>
where
SumResult<T>: RangeQueryResult<Key>,
{
fn get(&self, range: &Range<Key>) -> SumResult<T> {
SumResult {
sum: SumOfSameElements::sum(self.initial_element_value.borrow(), range),
}
}
}
#[derive(Copy, Clone, Debug)]
pub enum AddAssignModification<T> {
None,
Add(T),
Assign(T),
}
impl<T: Clone + for<'a> AddAssign<&'a T>, Key> RangeModification<Key> for AddAssignModification<T>
where
SumResult<T>: RangeQueryResult<Key>,
for<'a> SameElementsInitializer<&'a T>: LazyRangeInitializer<SumResult<T>, Key>,
{
type Result = SumResult<T>;
fn no_op() -> Self {
AddAssignModification::None
}
fn is_no_op(&self) -> bool {
match self {
AddAssignModification::None => true,
_ => false,
}
}
fn is_reinitialization(&self) -> bool {
match self {
AddAssignModification::Assign(_) => true,
_ => false,
}
}
fn apply(&self, result: &mut SumResult<T>, range: &Range<Key>) {
use AddAssignModification::*;
match self {
None => {}
Add(x) | Assign(x) => {
let to_add = SameElementsInitializer::new(x).get(range).sum;
if let Assign(_) = self {
result.sum = to_add;
} else {
result.sum += &to_add;
}
}
}
}
fn compose(later: &Self, earlier: &mut Self) {
use AddAssignModification::*;
match (later, earlier) {
(_, e @ None) => *e = later.clone(),
(None, _) => {}
(Assign(_), e) => *e = later.clone(),
(Add(x), Add(y)) => *y += x,
(Add(x), Assign(value)) => *value += x,
}
}
}

View File

@@ -1,255 +0,0 @@
//! # Segment Tree
//! It is a competitive programming folklore data structure. Do not confuse with the interval tree.
use crate::{LazyRangeInitializer, PersistentVecStorage, RangeQueryResult, VecReadableVersion};
use std::ops::Range;
use std::rc::Rc;
pub trait MidpointableKey: Clone + Ord + Sized {
fn midpoint(range: &Range<Self>) -> Self;
}
pub trait RangeModification<Key>: Clone + crate::RangeModification<Key> {}
// TODO: use trait alias when stabilized
impl<T: Clone + crate::RangeModification<Key>, Key> RangeModification<Key> for T {}
#[derive(Debug)]
struct Node<Modification: RangeModification<Key>, Key> {
result: Modification::Result,
modify_children: Modification,
left: Option<Rc<Self>>,
right: Option<Rc<Self>>,
}
// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
impl<Modification: RangeModification<Key>, Key> Clone for Node<Modification, Key> {
fn clone(&self) -> Self {
Node {
result: self.result.clone(),
modify_children: self.modify_children.clone(),
left: self.left.clone(),
right: self.right.clone(),
}
}
}
impl<Modification: RangeModification<Key>, Key> Node<Modification, Key> {
fn new<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
range: &Range<Key>,
initializer: &Initializer,
) -> Self {
Node {
result: initializer.get(range),
modify_children: Modification::no_op(),
left: None,
right: None,
}
}
pub fn apply(&mut self, modification: &Modification, range: &Range<Key>) {
modification.apply(&mut self.result, range);
Modification::compose(modification, &mut self.modify_children);
if self.modify_children.is_reinitialization() {
self.left = None;
self.right = None;
}
}
pub fn force_children<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
&mut self,
initializer: &Initializer,
range_left: &Range<Key>,
range_right: &Range<Key>,
) {
let left = Rc::make_mut(
self.left
.get_or_insert_with(|| Rc::new(Node::new(&range_left, initializer))),
);
let right = Rc::make_mut(
self.right
.get_or_insert_with(|| Rc::new(Node::new(&range_right, initializer))),
);
left.apply(&self.modify_children, &range_left);
right.apply(&self.modify_children, &range_right);
self.modify_children = Modification::no_op();
}
pub fn recalculate_from_children(&mut self, range_left: &Range<Key>, range_right: &Range<Key>) {
assert!(self.modify_children.is_no_op());
assert!(self.left.is_some());
assert!(self.right.is_some());
self.result = Modification::Result::combine(
&self.left.as_ref().unwrap().result,
&range_left,
&self.right.as_ref().unwrap().result,
&range_right,
);
}
}
fn split_range<Key: MidpointableKey>(range: &Range<Key>) -> (Range<Key>, Range<Key>) {
let range_left = range.start.clone()..MidpointableKey::midpoint(range);
let range_right = range_left.end.clone()..range.end.clone();
(range_left, range_right)
}
pub struct PersistentSegmentTreeVersion<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: Clone,
> {
root: Rc<Node<Modification, Key>>,
all_keys: Range<Key>,
initializer: Rc<Initializer>,
}
// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
impl<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: Clone,
> Clone for PersistentSegmentTreeVersion<Modification, Initializer, Key>
{
fn clone(&self) -> Self {
Self {
root: self.root.clone(),
all_keys: self.all_keys.clone(),
initializer: self.initializer.clone(),
}
}
}
fn get<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: MidpointableKey,
>(
node: &mut Rc<Node<Modification, Key>>,
node_keys: &Range<Key>,
initializer: &Initializer,
keys: &Range<Key>,
) -> Modification::Result {
if node_keys.end <= keys.start || keys.end <= node_keys.start {
return Modification::Result::new_for_empty_range();
}
if keys.start <= node_keys.start && node_keys.end <= keys.end {
return node.result.clone();
}
let node = Rc::make_mut(node);
let (left_keys, right_keys) = split_range(node_keys);
node.force_children(initializer, &left_keys, &right_keys);
let mut result = get(node.left.as_mut().unwrap(), &left_keys, initializer, keys);
Modification::Result::add(
&mut result,
&left_keys,
&get(node.right.as_mut().unwrap(), &right_keys, initializer, keys),
&right_keys,
);
result
}
fn modify<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: MidpointableKey,
>(
node: &mut Rc<Node<Modification, Key>>,
node_keys: &Range<Key>,
initializer: &Initializer,
keys: &Range<Key>,
modification: &Modification,
) {
if modification.is_no_op() || node_keys.end <= keys.start || keys.end <= node_keys.start {
return;
}
let node = Rc::make_mut(node);
if keys.start <= node_keys.start && node_keys.end <= keys.end {
node.apply(modification, node_keys);
return;
}
let (left_keys, right_keys) = split_range(node_keys);
node.force_children(initializer, &left_keys, &right_keys);
modify(
node.left.as_mut().unwrap(),
&left_keys,
initializer,
keys,
&modification,
);
modify(
node.right.as_mut().unwrap(),
&right_keys,
initializer,
keys,
&modification,
);
node.recalculate_from_children(&left_keys, &right_keys);
}
impl<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: MidpointableKey,
> VecReadableVersion<Modification, Key>
for PersistentSegmentTreeVersion<Modification, Initializer, Key>
{
fn get(&self, keys: &Range<Key>) -> Modification::Result {
get(
&mut self.root.clone(), // TODO: do not always force a branch
&self.all_keys,
self.initializer.as_ref(),
keys,
)
}
}
pub struct PersistentSegmentTree<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: MidpointableKey,
>(PersistentSegmentTreeVersion<Modification, Initializer, Key>);
impl<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: MidpointableKey,
> VecReadableVersion<Modification, Key>
for PersistentSegmentTree<Modification, Initializer, Key>
{
fn get(&self, keys: &Range<Key>) -> Modification::Result {
self.0.get(keys)
}
}
impl<
Modification: RangeModification<Key>,
Initializer: LazyRangeInitializer<Modification::Result, Key>,
Key: MidpointableKey,
> PersistentVecStorage<Modification, Initializer, Key>
for PersistentSegmentTree<Modification, Initializer, Key>
{
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
PersistentSegmentTree(PersistentSegmentTreeVersion {
root: Rc::new(Node::new(&all_keys, &initializer)),
all_keys: all_keys,
initializer: Rc::new(initializer),
})
}
type FrozenVersion = PersistentSegmentTreeVersion<Modification, Initializer, Key>;
fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
modify(
&mut self.0.root, // TODO: do not always force a branch
&self.0.all_keys,
self.0.initializer.as_ref(),
keys,
modification,
)
}
fn freeze(&mut self) -> Self::FrozenVersion {
self.0.clone()
}
}

View File

@@ -1,295 +0,0 @@
use persistent_range_query::naive::{IndexableKey, NaiveVecStorage};
use persistent_range_query::ops::SameElementsInitializer;
use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
use persistent_range_query::{
LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
VecReadableVersion,
};
use std::cmp::Ordering;
use std::ops::Range;
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
struct PageIndex(u32);
type LayerId = String;
impl IndexableKey for PageIndex {
fn index(all_keys: &Range<Self>, key: &Self) -> usize {
(key.0 as usize) - (all_keys.start.0 as usize)
}
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
PageIndex(all_keys.start.0 + index as u32)..PageIndex(all_keys.start.0 + index as u32 + 1)
}
}
impl MidpointableKey for PageIndex {
fn midpoint(range: &Range<Self>) -> Self {
PageIndex(range.start.0 + (range.end.0 - range.start.0) / 2)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct LayerMapInformation {
// Only make sense for a range of length 1.
last_layer: Option<LayerId>,
last_image_layer: Option<LayerId>,
// Work for all ranges
max_delta_layers: (usize, Range<PageIndex>),
}
impl LayerMapInformation {
fn last_layers(&self) -> (&Option<LayerId>, &Option<LayerId>) {
(&self.last_layer, &self.last_image_layer)
}
fn max_delta_layers(&self) -> &(usize, Range<PageIndex>) {
&self.max_delta_layers
}
}
fn merge_ranges(left: &Range<PageIndex>, right: &Range<PageIndex>) -> Range<PageIndex> {
if left.is_empty() {
right.clone()
} else if right.is_empty() {
left.clone()
} else if left.end == right.start {
left.start..right.end
} else {
left.clone()
}
}
impl RangeQueryResult<PageIndex> for LayerMapInformation {
fn new_for_empty_range() -> Self {
LayerMapInformation {
last_layer: None,
last_image_layer: None,
max_delta_layers: (0, PageIndex(0)..PageIndex(0)),
}
}
fn combine(
left: &Self,
_left_range: &Range<PageIndex>,
right: &Self,
_right_range: &Range<PageIndex>,
) -> Self {
// Note that either range may be empty.
LayerMapInformation {
last_layer: left
.last_layer
.as_ref()
.or_else(|| right.last_layer.as_ref())
.cloned(),
last_image_layer: left
.last_image_layer
.as_ref()
.or_else(|| right.last_image_layer.as_ref())
.cloned(),
max_delta_layers: match left.max_delta_layers.0.cmp(&right.max_delta_layers.0) {
Ordering::Less => right.max_delta_layers.clone(),
Ordering::Greater => left.max_delta_layers.clone(),
Ordering::Equal => (
left.max_delta_layers.0,
merge_ranges(&left.max_delta_layers.1, &right.max_delta_layers.1),
),
},
}
}
fn add(
left: &mut Self,
left_range: &Range<PageIndex>,
right: &Self,
right_range: &Range<PageIndex>,
) {
*left = Self::combine(&left, left_range, right, right_range);
}
}
#[derive(Clone, Debug)]
struct AddDeltaLayers {
last_layer: LayerId,
count: usize,
}
#[derive(Clone, Debug)]
struct LayerMapModification {
add_image_layer: Option<LayerId>,
add_delta_layers: Option<AddDeltaLayers>,
}
impl LayerMapModification {
fn add_image_layer(layer: impl Into<LayerId>) -> Self {
LayerMapModification {
add_image_layer: Some(layer.into()),
add_delta_layers: None,
}
}
fn add_delta_layer(layer: impl Into<LayerId>) -> Self {
LayerMapModification {
add_image_layer: None,
add_delta_layers: Some(AddDeltaLayers {
last_layer: layer.into(),
count: 1,
}),
}
}
}
impl RangeModification<PageIndex> for LayerMapModification {
type Result = LayerMapInformation;
fn no_op() -> Self {
LayerMapModification {
add_image_layer: None,
add_delta_layers: None,
}
}
fn is_no_op(&self) -> bool {
self.add_image_layer.is_none() && self.add_delta_layers.is_none()
}
fn is_reinitialization(&self) -> bool {
self.add_image_layer.is_some()
}
fn apply(&self, result: &mut Self::Result, range: &Range<PageIndex>) {
if let Some(layer) = &self.add_image_layer {
result.last_layer = Some(layer.clone());
result.last_image_layer = Some(layer.clone());
result.max_delta_layers = (0, range.clone());
}
if let Some(AddDeltaLayers { last_layer, count }) = &self.add_delta_layers {
result.last_layer = Some(last_layer.clone());
result.max_delta_layers.0 += count;
}
}
fn compose(later: &Self, earlier: &mut Self) {
if later.add_image_layer.is_some() {
*earlier = later.clone();
return;
}
if let Some(AddDeltaLayers { last_layer, count }) = &later.add_delta_layers {
let res = earlier.add_delta_layers.get_or_insert(AddDeltaLayers {
last_layer: LayerId::default(),
count: 0,
});
res.last_layer = last_layer.clone();
res.count += count;
}
}
}
impl LazyRangeInitializer<LayerMapInformation, PageIndex> for SameElementsInitializer<()> {
fn get(&self, range: &Range<PageIndex>) -> LayerMapInformation {
LayerMapInformation {
last_layer: None,
last_image_layer: None,
max_delta_layers: (0, range.clone()),
}
}
}
fn test_layer_map<
S: PersistentVecStorage<LayerMapModification, SameElementsInitializer<()>, PageIndex>,
>() {
let mut s = S::new(
PageIndex(0)..PageIndex(100),
SameElementsInitializer::new(()),
);
s.modify(
&(PageIndex(0)..PageIndex(70)),
&LayerMapModification::add_image_layer("Img0..70"),
);
s.modify(
&(PageIndex(50)..PageIndex(100)),
&LayerMapModification::add_image_layer("Img50..100"),
);
s.modify(
&(PageIndex(10)..PageIndex(60)),
&LayerMapModification::add_delta_layer("Delta10..60"),
);
let s_before_last_delta = s.freeze();
s.modify(
&(PageIndex(20)..PageIndex(80)),
&LayerMapModification::add_delta_layer("Delta20..80"),
);
assert_eq!(
s.get(&(PageIndex(5)..PageIndex(6))).last_layers(),
(&Some("Img0..70".to_owned()), &Some("Img0..70".to_owned()))
);
assert_eq!(
s.get(&(PageIndex(15)..PageIndex(16))).last_layers(),
(
&Some("Delta10..60".to_owned()),
&Some("Img0..70".to_owned())
)
);
assert_eq!(
s.get(&(PageIndex(25)..PageIndex(26))).last_layers(),
(
&Some("Delta20..80".to_owned()),
&Some("Img0..70".to_owned())
)
);
assert_eq!(
s.get(&(PageIndex(65)..PageIndex(66))).last_layers(),
(
&Some("Delta20..80".to_owned()),
&Some("Img50..100".to_owned())
)
);
assert_eq!(
s.get(&(PageIndex(95)..PageIndex(96))).last_layers(),
(
&Some("Img50..100".to_owned()),
&Some("Img50..100".to_owned())
)
);
assert_eq!(
s.get(&(PageIndex(0)..PageIndex(100))).max_delta_layers(),
&(2, PageIndex(20)..PageIndex(60)),
);
assert_eq!(
*s_before_last_delta
.get(&(PageIndex(0)..PageIndex(100)))
.max_delta_layers(),
(1, PageIndex(10)..PageIndex(60)),
);
assert_eq!(
*s.get(&(PageIndex(10)..PageIndex(30))).max_delta_layers(),
(2, PageIndex(20)..PageIndex(30))
);
assert_eq!(
*s.get(&(PageIndex(10)..PageIndex(20))).max_delta_layers(),
(1, PageIndex(10)..PageIndex(20))
);
assert_eq!(
*s.get(&(PageIndex(70)..PageIndex(80))).max_delta_layers(),
(1, PageIndex(70)..PageIndex(80))
);
assert_eq!(
*s_before_last_delta
.get(&(PageIndex(70)..PageIndex(80)))
.max_delta_layers(),
(0, PageIndex(70)..PageIndex(80))
);
}
#[test]
fn test_naive() {
test_layer_map::<NaiveVecStorage<_, _, _>>();
}
#[test]
fn test_segment_tree() {
test_layer_map::<PersistentSegmentTree<_, _, _>>();
}

View File

@@ -1,116 +0,0 @@
use persistent_range_query::naive::*;
use persistent_range_query::ops::rsq::AddAssignModification::Add;
use persistent_range_query::ops::rsq::*;
use persistent_range_query::ops::SameElementsInitializer;
use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
use persistent_range_query::{PersistentVecStorage, VecReadableVersion};
use rand::{Rng, SeedableRng};
use std::ops::Range;
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
struct K(u16);
impl IndexableKey for K {
fn index(all_keys: &Range<Self>, key: &Self) -> usize {
(key.0 as usize) - (all_keys.start.0 as usize)
}
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
K(all_keys.start.0 + index as u16)..K(all_keys.start.0 + index as u16 + 1)
}
}
impl SumOfSameElements<K> for i32 {
fn sum(initial_element_value: &Self, keys: &Range<K>) -> Self {
initial_element_value * (keys.end.0 - keys.start.0) as Self
}
}
impl MidpointableKey for K {
fn midpoint(range: &Range<Self>) -> Self {
K(range.start.0 + (range.end.0 - range.start.0) / 2)
}
}
fn test_storage<
S: PersistentVecStorage<AddAssignModification<i32>, SameElementsInitializer<i32>, K>,
>() {
let mut s = S::new(K(0)..K(12), SameElementsInitializer::new(0i32));
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 0);
s.modify(&(K(2)..K(5)), &AddAssignModification::Add(3));
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 3 + 3);
let s_old = s.freeze();
s.modify(&(K(3)..K(6)), &AddAssignModification::Assign(10));
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 10 + 10);
s.modify(&(K(4)..K(7)), &AddAssignModification::Add(2));
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 12 + 12 + 2);
assert_eq!(*s.get(&(K(4)..K(6))).sum(), 12 + 12);
assert_eq!(*s_old.get(&(K(4)..K(6))).sum(), 3);
}
#[test]
fn test_naive() {
test_storage::<NaiveVecStorage<_, _, _>>();
}
#[test]
fn test_segment_tree() {
test_storage::<PersistentSegmentTree<_, _, _>>();
}
#[test]
fn test_stress() {
const LEN: u16 = 17_238;
const OPERATIONS: i32 = 20_000;
let mut rng = rand::rngs::StdRng::seed_from_u64(0);
let mut naive: NaiveVecStorage<AddAssignModification<i32>, _, _> =
NaiveVecStorage::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
let mut segm_tree: PersistentSegmentTree<AddAssignModification<i32>, _, _> =
PersistentSegmentTree::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
fn gen_range(rng: &mut impl Rng) -> Range<K> {
let l: u16 = rng.gen_range(0..LEN);
let r: u16 = rng.gen_range(0..LEN);
if l <= r {
K(l)..K(r)
} else {
K(r)..K(l)
}
}
for _ in 0..2 {
let checksum_range = gen_range(&mut rng);
let checksum_before: i32 = *naive.get(&checksum_range).sum();
assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
let naive_before = naive.freeze();
let segm_tree_before = segm_tree.freeze();
assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
for _ in 0..OPERATIONS {
{
let range = gen_range(&mut rng);
assert_eq!(naive.get(&range).sum(), segm_tree.get(&range).sum());
}
{
let range = gen_range(&mut rng);
let val = rng.gen_range(-10i32..=10i32);
let op = Add(val);
naive.modify(&range, &op);
segm_tree.modify(&range, &op);
}
}
assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
assert_eq!(
checksum_before,
*segm_tree_before.get(&checksum_range).sum()
);
}
}

View File

@@ -33,8 +33,8 @@ pub struct Segment {
/// Logical size before this state
start_size: u64,
/// Logical size at this state. Can be None in the last Segment of a branch.
pub end_size: Option<u64>,
/// Logical size at this state
pub end_size: u64,
/// Indices to [`Storage::segments`]
///
@@ -115,7 +115,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
start_lsn: 0,
end_lsn: 0,
start_size: 0,
end_size: Some(0),
end_size: 0,
children_after: Vec::new(),
};
@@ -125,39 +125,6 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
}
}
/// Advances the branch with a new point, at given LSN.
pub fn insert_point<Q: ?Sized>(
&mut self,
branch: &Q,
op: Cow<'static, str>,
lsn: u64,
size: Option<u64>,
) where
K: std::borrow::Borrow<Q>,
Q: std::hash::Hash + Eq,
{
let lastseg_id = *self.branches.get(branch).unwrap();
let newseg_id = self.segments.len();
let lastseg = &mut self.segments[lastseg_id];
assert!(lsn > lastseg.end_lsn);
let newseg = Segment {
op,
parent: Some(lastseg_id),
start_lsn: lastseg.end_lsn,
end_lsn: lsn,
start_size: lastseg.end_size.unwrap(),
end_size: size,
children_after: Vec::new(),
needed: false,
};
lastseg.children_after.push(newseg_id);
self.segments.push(newseg);
*self.branches.get_mut(branch).expect("read already") = newseg_id;
}
/// Advances the branch with the named operation, by the relative LSN and logical size bytes.
pub fn modify_branch<Q: ?Sized>(
&mut self,
@@ -178,8 +145,8 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
parent: Some(lastseg_id),
start_lsn: lastseg.end_lsn,
end_lsn: lastseg.end_lsn + lsn_bytes,
start_size: lastseg.end_size.unwrap(),
end_size: Some((lastseg.end_size.unwrap() as i64 + size_bytes) as u64),
start_size: lastseg.end_size,
end_size: (lastseg.end_size as i64 + size_bytes) as u64,
children_after: Vec::new(),
needed: false,
};
@@ -354,7 +321,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
Some(SegmentSize {
seg_id,
method: SnapshotAfter,
this_size: seg.end_size.unwrap(),
this_size: seg.end_size,
children,
})
} else {

View File

@@ -174,7 +174,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
let seg_id = node.seg_id;
let seg = segments.get(seg_id).unwrap();
let lsn = seg.end_lsn;
let size = seg.end_size.unwrap_or(0);
let size = seg.end_size;
let method = node.method;
println!(" {{");
@@ -226,7 +226,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
print!(
" label=\"{} / {}\"",
next.end_lsn - seg.end_lsn,
(next.end_size.unwrap_or(0) as i128 - seg.end_size.unwrap_or(0) as i128)
(next.end_size as i128 - seg.end_size as i128)
);
} else {
print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn);

View File

@@ -48,25 +48,6 @@ pub mod nonblock;
// Default signal handling
pub mod signals;
/// use with fail::cfg("$name", "return(2000)")
#[macro_export]
macro_rules! failpoint_sleep_millis_async {
($name:literal) => {{
let should_sleep: Option<std::time::Duration> = (|| {
fail::fail_point!($name, |v: Option<_>| {
let millis = v.unwrap().parse::<u64>().unwrap();
Some(Duration::from_millis(millis))
});
None
})();
if let Some(d) = should_sleep {
tracing::info!("failpoint {:?}: sleeping for {:?}", $name, d);
tokio::time::sleep(d).await;
tracing::info!("failpoint {:?}: sleep done", $name);
}
}};
}
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
///
/// we have several cases:

View File

@@ -138,7 +138,7 @@ impl FromStr for Lsn {
///
/// If the input string is missing the '/' character, then use `Lsn::from_hex`
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut splitter = s.trim().split('/');
let mut splitter = s.split('/');
if let (Some(left), Some(right), None) = (splitter.next(), splitter.next(), splitter.next())
{
let left_num = u32::from_str_radix(left, 16).map_err(|_| LsnParseError)?;
@@ -270,11 +270,6 @@ mod tests {
);
assert_eq!(Lsn::from_hex("0"), Ok(Lsn(0)));
assert_eq!(Lsn::from_hex("F12345678AAAA5555"), Err(LsnParseError));
let expected_lsn = Lsn(0x3C490F8);
assert_eq!(" 0/3C490F8".parse(), Ok(expected_lsn));
assert_eq!("0/3C490F8 ".parse(), Ok(expected_lsn));
assert_eq!(" 0/3C490F8 ".parse(), Ok(expected_lsn));
}
#[test]

View File

@@ -76,7 +76,3 @@ tempfile = "3.2"
[[bench]]
name = "bench_layer_map"
harness = false
[[bench]]
name = "bench_walredo"
harness = false

File diff suppressed because one or more lines are too long

View File

@@ -199,20 +199,6 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
logging::init(conf.log_format)?;
info!("version: {}", version());
// If any failpoints were set from FAILPOINTS environment variable,
// print them to the log for debugging purposes
let failpoints = fail::list();
if !failpoints.is_empty() {
info!(
"started with failpoints: {}",
failpoints
.iter()
.map(|(name, actions)| format!("{name}={actions}"))
.collect::<Vec<String>>()
.join(";")
)
}
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
lock_file::LockCreationResult::Created {

View File

@@ -614,9 +614,8 @@ impl PageServerConf {
PathBuf::from(format!("../tmp_check/test_{test_name}"))
}
#[cfg(test)]
pub fn dummy_conf(repo_dir: PathBuf) -> Self {
let pg_distrib_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../pg_install");
PageServerConf {
id: NodeId(0),
wait_lsn_timeout: Duration::from_secs(60),
@@ -627,7 +626,7 @@ impl PageServerConf {
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
superuser: "cloud_admin".to_string(),
workdir: repo_dir,
pg_distrib_dir,
pg_distrib_dir: PathBuf::new(),
auth_type: AuthType::Trust,
auth_validation_public_key_path: None,
remote_storage_config: None,

View File

@@ -667,7 +667,6 @@ components:
- disk_consistent_lsn
- awaits_download
- state
- latest_gc_cutoff_lsn
properties:
timeline_id:
type: string
@@ -712,9 +711,6 @@ components:
type: boolean
state:
type: string
latest_gc_cutoff_lsn:
type: string
format: hex
# These 'local' and 'remote' fields just duplicate some of the fields
# above. They are kept for backwards-compatibility. They can be removed,

View File

@@ -76,12 +76,6 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
FeMessage::CopyData(bytes) => bytes,
FeMessage::CopyDone => { break },
FeMessage::Sync => continue,
FeMessage::Terminate => {
let msg = format!("client terminated connection with Terminate message during COPY");
pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
break;
}
m => {
let msg = format!("unexpected message {:?}", m);
pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
@@ -93,10 +87,10 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
yield copy_data_bytes;
}
Ok(None) => {
let msg = "client closed connection during COPY";
let msg = "client closed connection";
pgb.write_message(&BeMessage::ErrorResponse(msg))?;
pgb.flush().await?;
Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
Err(io::Error::new(io::ErrorKind::Other, msg))?;
}
Err(e) => {
Err(io::Error::new(io::ErrorKind::Other, e))?;

View File

@@ -461,7 +461,14 @@ impl Tenant {
.context("Cannot branch off the timeline that's not present in pageserver")?;
if let Some(lsn) = ancestor_start_lsn.as_mut() {
// Wait for the WAL to arrive and be processed on the parent branch up
// to the requested branch point. The repository code itself doesn't
// require it, but if we start to receive WAL on the new timeline,
// decoding the new WAL might need to look up previous pages, relation
// sizes etc. and that would get confused if the previous page versions
// are not in the repository yet.
*lsn = lsn.align();
ancestor_timeline.wait_lsn(*lsn).await?;
let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
if ancestor_ancestor_lsn > *lsn {
@@ -473,14 +480,6 @@ impl Tenant {
ancestor_ancestor_lsn,
);
}
// Wait for the WAL to arrive and be processed on the parent branch up
// to the requested branch point. The repository code itself doesn't
// require it, but if we start to receive WAL on the new timeline,
// decoding the new WAL might need to look up previous pages, relation
// sizes etc. and that would get confused if the previous page versions
// are not in the repository yet.
ancestor_timeline.wait_lsn(*lsn).await?;
}
self.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?
@@ -1011,10 +1010,6 @@ impl Tenant {
let gc_timelines = self.refresh_gc_info_internal(target_timeline_id, horizon, pitr)?;
utils::failpoint_sleep_millis_async!("gc_iteration_internal_after_getting_gc_timelines");
info!("starting on {} timelines", gc_timelines.len());
// Perform GC for each timeline.
//
// Note that we don't hold the GC lock here because we don't want

View File

@@ -183,19 +183,6 @@ pub(super) async fn gather_inputs(
}
}
// all timelines also have an end point if they have made any progress
if last_record_lsn > timeline.get_ancestor_lsn()
&& !interesting_lsns
.iter()
.any(|(lsn, _)| lsn == &last_record_lsn)
{
updates.push(Update {
lsn: last_record_lsn,
command: Command::EndOfBranch,
timeline_id: timeline.timeline_id,
});
}
timeline_inputs.insert(
timeline.timeline_id,
TimelineInputs {
@@ -283,22 +270,48 @@ impl ModelInputs {
// impossible to always determine the a one main branch.
let mut storage = tenant_size_model::Storage::<Option<TimelineId>>::new(None);
// tracking these not to require modifying the current implementation of the size model,
// which works in relative LSNs and sizes.
let mut last_state: HashMap<TimelineId, (Lsn, u64)> = HashMap::new();
for update in &self.updates {
let Update {
lsn,
command: op,
timeline_id,
} = update;
let Lsn(now) = *lsn;
match op {
Command::Update(sz) => {
storage.insert_point(&Some(*timeline_id), "".into(), now, Some(*sz));
}
Command::EndOfBranch => {
storage.insert_point(&Some(*timeline_id), "".into(), now, None);
let latest = last_state.get_mut(timeline_id).ok_or_else(|| {
anyhow::anyhow!(
"ordering-mismatch: there must had been a previous state for {timeline_id}"
)
})?;
let lsn_bytes = {
let Lsn(now) = lsn;
let Lsn(prev) = latest.0;
debug_assert!(prev <= *now, "self.updates should had been sorted");
now - prev
};
let size_diff =
i64::try_from(*sz as i128 - latest.1 as i128).with_context(|| {
format!("size difference i64 overflow for {timeline_id}")
})?;
storage.modify_branch(&Some(*timeline_id), "".into(), lsn_bytes, size_diff);
*latest = (*lsn, *sz);
}
Command::BranchFrom(parent) => {
storage.branch(parent, Some(*timeline_id));
let size = parent
.as_ref()
.and_then(|id| last_state.get(id))
.map(|x| x.1)
.unwrap_or(0);
last_state.insert(*timeline_id, (*lsn, size));
}
}
}
@@ -307,7 +320,10 @@ impl ModelInputs {
}
}
/// A point of interest in the tree of branches
/// Single size model update.
///
/// Sizing model works with relative increments over latest branch state.
/// Updates are absolute, so additional state needs to be tracked when applying.
#[serde_with::serde_as]
#[derive(
Debug, PartialEq, PartialOrd, Eq, Ord, Clone, Copy, serde::Serialize, serde::Deserialize,
@@ -326,7 +342,6 @@ struct Update {
enum Command {
Update(u64),
BranchFrom(#[serde_as(as = "Option<serde_with::DisplayFromStr>")] Option<TimelineId>),
EndOfBranch,
}
impl std::fmt::Debug for Command {
@@ -336,7 +351,6 @@ impl std::fmt::Debug for Command {
match self {
Self::Update(arg0) => write!(f, "Update({arg0})"),
Self::BranchFrom(arg0) => write!(f, "BranchFrom({arg0:?})"),
Self::EndOfBranch => write!(f, "EndOfBranch"),
}
}
}

View File

@@ -61,13 +61,6 @@ use crate::{
storage_sync::{self, index::LayerFileMetadata},
};
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
enum FlushLoopState {
NotStarted,
Running,
Exited,
}
pub struct Timeline {
conf: &'static PageServerConf,
tenant_conf: Arc<RwLock<TenantConfOpt>>,
@@ -129,7 +122,7 @@ pub struct Timeline {
write_lock: Mutex<()>,
/// Used to avoid multiple `flush_loop` tasks running
flush_loop_state: Mutex<FlushLoopState>,
flush_loop_started: Mutex<bool>,
/// layer_flush_start_tx can be used to wake up the layer-flushing task.
/// The value is a counter, incremented every time a new flush cycle is requested.
@@ -762,7 +755,7 @@ impl Timeline {
upload_layers: AtomicBool::new(upload_layers),
flush_loop_state: Mutex::new(FlushLoopState::NotStarted),
flush_loop_started: Mutex::new(false),
layer_flush_start_tx,
layer_flush_done_tx,
@@ -801,23 +794,13 @@ impl Timeline {
}
pub(super) fn maybe_spawn_flush_loop(self: &Arc<Self>) {
let mut flush_loop_state = self.flush_loop_state.lock().unwrap();
match *flush_loop_state {
FlushLoopState::NotStarted => (),
FlushLoopState::Running => {
info!(
"skipping attempt to start flush_loop twice {}/{}",
self.tenant_id, self.timeline_id
);
return;
}
FlushLoopState::Exited => {
warn!(
"ignoring attempt to restart exited flush_loop {}/{}",
self.tenant_id, self.timeline_id
);
return;
}
let mut flush_loop_started = self.flush_loop_started.lock().unwrap();
if *flush_loop_started {
info!(
"skipping attempt to start flush_loop twice {}/{}",
self.tenant_id, self.timeline_id
);
return;
}
let layer_flush_start_rx = self.layer_flush_start_tx.subscribe();
@@ -830,16 +813,11 @@ impl Timeline {
Some(self.timeline_id),
"layer flush task",
false,
async move {
self_clone.flush_loop(layer_flush_start_rx).await;
let mut flush_loop_state = self_clone.flush_loop_state.lock().unwrap();
assert_eq!(*flush_loop_state, FlushLoopState::Running);
*flush_loop_state = FlushLoopState::Exited;
Ok(()) }
async move { self_clone.flush_loop(layer_flush_start_rx).await; Ok(()) }
.instrument(info_span!(parent: None, "layer flush task", tenant = %self.tenant_id, timeline = %self.timeline_id))
);
*flush_loop_state = FlushLoopState::Running;
*flush_loop_started = true;
}
pub(super) fn launch_wal_receiver(self: &Arc<Self>) {
@@ -1387,9 +1365,8 @@ impl Timeline {
// finished, instead of some other flush that was started earlier.
let mut my_flush_request = 0;
let flush_loop_state = { *self.flush_loop_state.lock().unwrap() };
if flush_loop_state != FlushLoopState::Running {
anyhow::bail!("cannot flush frozen layers when flush_loop is not running, state is {flush_loop_state:?}")
if !&*self.flush_loop_started.lock().unwrap() {
anyhow::bail!("cannot flush frozen layers when flush_loop is not running")
}
self.layer_flush_start_tx.send_modify(|counter| {

View File

@@ -216,6 +216,7 @@ impl TenantConf {
}
}
#[cfg(test)]
pub fn dummy_conf() -> Self {
TenantConf {
checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,

View File

@@ -71,7 +71,9 @@ async fn compaction_loop(tenant_id: TenantId) {
let mut sleep_duration = tenant.get_compaction_period();
if let Err(e) = tenant.compaction_iteration() {
sleep_duration = wait_duration;
error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration);
error!("Compaction failed, retrying in {:?}: {e:#}", sleep_duration);
#[cfg(feature = "testing")]
std::process::abort();
}
// Sleep
@@ -120,7 +122,9 @@ async fn gc_loop(tenant_id: TenantId) {
if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false).await
{
sleep_duration = wait_duration;
error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration);
error!("Gc failed, retrying in {:?}: {e:#}", sleep_duration);
#[cfg(feature = "testing")]
std::process::abort();
}
}

View File

@@ -210,16 +210,6 @@ impl PostgresRedoManager {
}
}
/// Launch process pre-emptively. Should not be needed except for benchmarking.
pub fn launch_process(&mut self, pg_version: u32) -> anyhow::Result<()> {
let inner = self.process.get_mut().unwrap();
if inner.is_none() {
let p = PostgresRedoProcess::launch(self.conf, self.tenant_id, pg_version)?;
*inner = Some(p);
}
Ok(())
}
///
/// Process one request for WAL redo using wal-redo postgres
///

View File

@@ -32,6 +32,11 @@
#define PageStoreTrace DEBUG5
#define NEON_TAG "[NEON_SMGR] "
#define neon_log(tag, fmt, ...) ereport(tag, \
(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
errhidestmt(true), errhidecontext(true)))
bool connected = false;
PGconn *pageserver_conn = NULL;
@@ -92,10 +97,11 @@ pageserver_connect()
while (PQisBusy(pageserver_conn))
{
int wc;
WaitEvent event;
/* Sleep until there's something to do */
(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
ResetLatch(MyLatch);
CHECK_FOR_INTERRUPTS();
@@ -135,10 +141,11 @@ retry:
if (ret == 0)
{
int wc;
WaitEvent event;
/* Sleep until there's something to do */
(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
ResetLatch(MyLatch);
CHECK_FOR_INTERRUPTS();
@@ -232,9 +239,6 @@ pageserver_receive(void)
StringInfoData resp_buff;
NeonResponse *resp;
if (!connected)
return NULL;
PG_TRY();
{
/* read response */
@@ -244,10 +248,7 @@ pageserver_receive(void)
if (resp_buff.len < 0)
{
if (resp_buff.len == -1)
{
pageserver_disconnect();
return NULL;
}
neon_log(ERROR, "end of COPY");
else if (resp_buff.len == -2)
neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn));
}

View File

@@ -49,11 +49,6 @@ typedef struct
#define messageTag(m) (((const NeonMessage *)(m))->tag)
#define NEON_TAG "[NEON_SMGR] "
#define neon_log(tag, fmt, ...) ereport(tag, \
(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
errhidestmt(true), errhidecontext(true)))
/*
* supertype of all the Neon*Request structs below
*

View File

@@ -251,9 +251,9 @@ XLogRecPtr prefetch_lsn = 0;
static void consume_prefetch_responses(void);
static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
static bool prefetch_read(PrefetchRequest *slot);
static void prefetch_read(PrefetchRequest *slot);
static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
static bool prefetch_wait_for(uint64 ring_index);
static void prefetch_wait_for(uint64 ring_index);
static void prefetch_cleanup(void);
static inline void prefetch_set_unused(uint64 ring_index);
@@ -393,7 +393,7 @@ prefetch_cleanup(void)
* NOTE: this function may indirectly update MyPState->pfs_hash; which
* invalidates any active pointers into the hash table.
*/
static bool
static void
prefetch_wait_for(uint64 ring_index)
{
PrefetchRequest *entry;
@@ -412,10 +412,8 @@ prefetch_wait_for(uint64 ring_index)
entry = GetPrfSlot(MyPState->ring_receive);
Assert(entry->status == PRFS_REQUESTED);
if (!prefetch_read(entry))
return false;
prefetch_read(entry);
}
return true;
}
/*
@@ -427,7 +425,7 @@ prefetch_wait_for(uint64 ring_index)
* NOTE: this function may indirectly update MyPState->pfs_hash; which
* invalidates any active pointers into the hash table.
*/
static bool
static void
prefetch_read(PrefetchRequest *slot)
{
NeonResponse *response;
@@ -440,22 +438,15 @@ prefetch_read(PrefetchRequest *slot)
old = MemoryContextSwitchTo(MyPState->errctx);
response = (NeonResponse *) page_server->receive();
MemoryContextSwitchTo(old);
if (response)
{
/* update prefetch state */
MyPState->n_responses_buffered += 1;
MyPState->n_requests_inflight -= 1;
MyPState->ring_receive += 1;
/* update prefetch state */
MyPState->n_responses_buffered += 1;
MyPState->n_requests_inflight -= 1;
MyPState->ring_receive += 1;
/* update slot state */
slot->status = PRFS_RECEIVED;
slot->response = response;
return true;
}
else
{
return false;
}
/* update slot state */
slot->status = PRFS_RECEIVED;
slot->response = response;
}
/*
@@ -755,16 +746,11 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
static NeonResponse *
page_server_request(void const *req)
{
NeonResponse* resp;
do {
page_server->send((NeonRequest *) req);
page_server->flush();
MyPState->ring_flush = MyPState->ring_unused;
consume_prefetch_responses();
resp = page_server->receive();
} while (resp == NULL);
return resp;
page_server->send((NeonRequest *) req);
page_server->flush();
MyPState->ring_flush = MyPState->ring_unused;
consume_prefetch_responses();
return page_server->receive();
}
@@ -1649,8 +1635,7 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
bool
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
{
BufferTag tag;
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
switch (reln->smgr_relpersistence)
{
@@ -1666,7 +1651,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
tag = (BufferTag) {
BufferTag tag = (BufferTag) {
.rnode = reln->smgr_rnode.node,
.forkNum = forknum,
.blockNum = blocknum
@@ -1770,24 +1755,22 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
}
}
do
if (entry == NULL)
{
if (entry == NULL)
{
n_prefetch_misses += 1;
n_prefetch_misses += 1;
ring_index = prefetch_register_buffer(buftag, &request_latest,
&request_lsn);
slot = GetPrfSlot(ring_index);
}
ring_index = prefetch_register_buffer(buftag, &request_latest,
&request_lsn);
slot = GetPrfSlot(ring_index);
}
Assert(slot->my_ring_index == ring_index);
Assert(MyPState->ring_last <= ring_index &&
MyPState->ring_unused > ring_index);
Assert(slot->status != PRFS_UNUSED);
Assert(GetPrfSlot(ring_index) == slot);
Assert(slot->my_ring_index == ring_index);
Assert(MyPState->ring_last <= ring_index &&
MyPState->ring_unused > ring_index);
Assert(slot->status != PRFS_UNUSED);
Assert(GetPrfSlot(ring_index) == slot);
} while (!prefetch_wait_for(ring_index));
prefetch_wait_for(ring_index);
Assert(slot->status == PRFS_RECEIVED);

View File

@@ -119,7 +119,6 @@ static TimestampTz last_reconnect_attempt;
static WalproposerShmemState * walprop_shared;
/* Prototypes for private functions */
static void WalProposerRegister(void);
static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId);
static void WalProposerStart(void);
static void WalProposerLoop(void);
@@ -456,7 +455,7 @@ WalProposerPoll(void)
/*
* Register a background worker proposing WAL to wal acceptors.
*/
static void
void
WalProposerRegister(void)
{
BackgroundWorker bgw;

View File

@@ -377,18 +377,18 @@ typedef struct Safekeeper
AppendResponse appendResponse; /* feedback for master */
} Safekeeper;
extern void WalProposerSync(int argc, char *argv[]);
extern void WalProposerMain(Datum main_arg);
extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
extern void WalProposerPoll(void);
extern void ParseReplicationFeedbackMessage(StringInfo reply_message,
ReplicationFeedback *rf);
extern PGDLLIMPORT void WalProposerMain(Datum main_arg);
void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
void WalProposerPoll(void);
void WalProposerRegister(void);
void ParseReplicationFeedbackMessage(StringInfo reply_message,
ReplicationFeedback * rf);
extern void StartProposerReplication(StartReplicationCmd *cmd);
extern Size WalproposerShmemSize(void);
extern bool WalproposerShmemInit(void);
extern void replication_feedback_set(ReplicationFeedback *rf);
extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
Size WalproposerShmemSize(void);
bool WalproposerShmemInit(void);
void replication_feedback_set(ReplicationFeedback * rf);
void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
/* libpqwalproposer hooks & helper type */

View File

@@ -1,7 +1,7 @@
//! Client authentication mechanisms.
pub mod backend;
pub use backend::{BackendType, ConsoleReqExtra};
pub use backend::{BackendType, ConsoleReqExtra, DatabaseInfo};
mod credentials;
pub use credentials::ClientCredentials;

View File

@@ -12,6 +12,7 @@ use crate::{
waiters::{self, Waiter, Waiters},
};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use tokio::io::{AsyncRead, AsyncWrite};
use tracing::{info, warn};
@@ -35,6 +36,45 @@ pub fn notify(psql_session_id: &str, msg: mgmt::ComputeReady) -> Result<(), wait
CPLANE_WAITERS.notify(psql_session_id, msg)
}
/// Compute node connection params provided by the cloud.
/// Note how it implements serde traits, since we receive it over the wire.
#[derive(Serialize, Deserialize, Default)]
pub struct DatabaseInfo {
pub host: String,
pub port: u16,
pub dbname: String,
pub user: String,
pub password: Option<String>,
}
// Manually implement debug to omit personal and sensitive info.
impl std::fmt::Debug for DatabaseInfo {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
fmt.debug_struct("DatabaseInfo")
.field("host", &self.host)
.field("port", &self.port)
.finish_non_exhaustive()
}
}
impl From<DatabaseInfo> for tokio_postgres::Config {
fn from(db_info: DatabaseInfo) -> Self {
let mut config = tokio_postgres::Config::new();
config
.host(&db_info.host)
.port(db_info.port)
.dbname(&db_info.dbname)
.user(&db_info.user);
if let Some(password) = db_info.password {
config.password(password);
}
config
}
}
/// Extra query params we'd like to pass to the console.
pub struct ConsoleReqExtra<'a> {
/// A unique identifier for a connection.
@@ -118,107 +158,54 @@ impl<'a, T, E> BackendType<'a, Result<T, E>> {
}
}
/// A product of successful authentication.
pub struct AuthSuccess<T> {
/// Did we send [`pq_proto::BeMessage::AuthenticationOk`] to client?
pub reported_auth_ok: bool,
/// Something to be considered a positive result.
pub value: T,
}
impl<T> AuthSuccess<T> {
/// Very similar to [`std::option::Option::map`].
/// Maps [`AuthSuccess<T>`] to [`AuthSuccess<R>`] by applying
/// a function to a contained value.
pub fn map<R>(self, f: impl FnOnce(T) -> R) -> AuthSuccess<R> {
AuthSuccess {
reported_auth_ok: self.reported_auth_ok,
value: f(self.value),
}
}
}
/// Info for establishing a connection to a compute node.
/// This is what we get after auth succeeded, but not before!
pub struct NodeInfo {
/// Project from [`auth::ClientCredentials`].
pub project: String,
/// Compute node connection params.
pub config: compute::ConnCfg,
}
impl BackendType<'_, ClientCredentials<'_>> {
/// Do something special if user didn't provide the `project` parameter.
async fn try_password_hack(
&mut self,
extra: &ConsoleReqExtra<'_>,
client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
) -> auth::Result<Option<AuthSuccess<NodeInfo>>> {
use BackendType::*;
// If there's no project so far, that entails that client doesn't
// support SNI or other means of passing the project name.
// We now expect to see a very specific payload in the place of password.
let fetch_magic_payload = async {
warn!("project name not specified, resorting to the password hack auth flow");
let payload = AuthFlow::new(client)
.begin(auth::PasswordHack)
.await?
.authenticate()
.await?;
info!(project = &payload.project, "received missing parameter");
auth::Result::Ok(payload)
};
// TODO: find a proper way to merge those very similar blocks.
let (mut config, payload) = match self {
Console(endpoint, creds) if creds.project.is_none() => {
let payload = fetch_magic_payload.await?;
let mut creds = creds.as_ref();
creds.project = Some(payload.project.as_str().into());
let config = console::Api::new(endpoint, extra, &creds)
.wake_compute()
.await?;
(config, payload)
}
Postgres(endpoint, creds) if creds.project.is_none() => {
let payload = fetch_magic_payload.await?;
let mut creds = creds.as_ref();
creds.project = Some(payload.project.as_str().into());
let config = postgres::Api::new(endpoint, &creds).wake_compute().await?;
(config, payload)
}
_ => return Ok(None),
};
config.password(payload.password);
Ok(Some(AuthSuccess {
reported_auth_ok: false,
value: NodeInfo {
project: payload.project,
config,
},
}))
}
/// Authenticate the client via the requested backend, possibly using credentials.
pub async fn authenticate(
mut self,
extra: &ConsoleReqExtra<'_>,
client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
) -> auth::Result<AuthSuccess<NodeInfo>> {
) -> super::Result<compute::NodeInfo> {
use BackendType::*;
// Handle cases when `project` is missing in `creds`.
// TODO: type safety: return `creds` with irrefutable `project`.
if let Some(res) = self.try_password_hack(extra, client).await? {
info!("user successfully authenticated (using the password hack)");
return Ok(res);
if let Console(_, creds) | Postgres(_, creds) = &mut self {
// If there's no project so far, that entails that client doesn't
// support SNI or other means of passing the project name.
// We now expect to see a very specific payload in the place of password.
if creds.project().is_none() {
warn!("project name not specified, resorting to the password hack auth flow");
let payload = AuthFlow::new(client)
.begin(auth::PasswordHack)
.await?
.authenticate()
.await?;
// Finally we may finish the initialization of `creds`.
// TODO: add missing type safety to ClientCredentials.
info!(project = &payload.project, "received missing parameter");
creds.project = Some(payload.project.into());
let mut config = match &self {
Console(endpoint, creds) => {
console::Api::new(endpoint, extra, creds)
.wake_compute()
.await?
}
Postgres(endpoint, creds) => {
postgres::Api::new(endpoint, creds).wake_compute().await?
}
_ => unreachable!("see the patterns above"),
};
// We should use a password from payload as well.
config.password(payload.password);
info!("user successfully authenticated (using the password hack)");
return Ok(compute::NodeInfo {
reported_auth_ok: false,
config,
});
}
}
let res = match self {
@@ -228,34 +215,22 @@ impl BackendType<'_, ClientCredentials<'_>> {
project = creds.project(),
"performing authentication using the console"
);
assert!(creds.project.is_some());
console::Api::new(&endpoint, extra, &creds)
.handle_user(client)
.await?
.map(|config| NodeInfo {
project: creds.project.unwrap().into_owned(),
config,
})
.await
}
Postgres(endpoint, creds) => {
info!("performing mock authentication using a local postgres instance");
assert!(creds.project.is_some());
postgres::Api::new(&endpoint, &creds)
.handle_user(client)
.await?
.map(|config| NodeInfo {
project: creds.project.unwrap().into_owned(),
config,
})
.await
}
// NOTE: this auth backend doesn't use client credentials.
Link(url) => {
info!("performing link authentication");
link::handle_user(&url, client).await?
link::handle_user(&url, client).await
}
};
}?;
info!("user successfully authenticated");
Ok(res)

View File

@@ -1,9 +1,9 @@
//! Cloud API V2.
use super::{AuthSuccess, ConsoleReqExtra};
use super::ConsoleReqExtra;
use crate::{
auth::{self, AuthFlow, ClientCredentials},
compute,
compute::{self, ComputeConnCfg},
error::{io_error, UserFacingError},
http, scram,
stream::PqStream,
@@ -128,7 +128,7 @@ impl<'a> Api<'a> {
pub(super) async fn handle_user(
self,
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
) -> auth::Result<compute::NodeInfo> {
handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
}
@@ -164,7 +164,7 @@ impl<'a> Api<'a> {
}
/// Wake up the compute node and return the corresponding connection info.
pub(super) async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
let request_id = uuid::Uuid::new_v4().to_string();
let req = self
.endpoint
@@ -195,7 +195,7 @@ impl<'a> Api<'a> {
Some(x) => x,
};
let mut config = compute::ConnCfg::new();
let mut config = ComputeConnCfg::new();
config
.host(host)
.port(port)
@@ -213,10 +213,10 @@ pub(super) async fn handle_user<'a, Endpoint, GetAuthInfo, WakeCompute>(
endpoint: &'a Endpoint,
get_auth_info: impl FnOnce(&'a Endpoint) -> GetAuthInfo,
wake_compute: impl FnOnce(&'a Endpoint) -> WakeCompute,
) -> auth::Result<AuthSuccess<compute::ConnCfg>>
) -> auth::Result<compute::NodeInfo>
where
GetAuthInfo: Future<Output = Result<AuthInfo, GetAuthInfoError>>,
WakeCompute: Future<Output = Result<compute::ConnCfg, WakeComputeError>>,
WakeCompute: Future<Output = Result<ComputeConnCfg, WakeComputeError>>,
{
info!("fetching user's authentication info");
let auth_info = get_auth_info(endpoint).await?;
@@ -243,9 +243,9 @@ where
config.auth_keys(tokio_postgres::config::AuthKeys::ScramSha256(keys));
}
Ok(AuthSuccess {
Ok(compute::NodeInfo {
reported_auth_ok: false,
value: config,
config,
})
}

View File

@@ -1,4 +1,3 @@
use super::{AuthSuccess, NodeInfo};
use crate::{auth, compute, error::UserFacingError, stream::PqStream, waiters};
use pq_proto::{BeMessage as Be, BeParameterStatusMessage};
use thiserror::Error;
@@ -50,7 +49,7 @@ pub fn new_psql_session_id() -> String {
pub async fn handle_user(
link_uri: &reqwest::Url,
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
) -> auth::Result<AuthSuccess<NodeInfo>> {
) -> auth::Result<compute::NodeInfo> {
let psql_session_id = new_psql_session_id();
let span = info_span!("link", psql_session_id = &psql_session_id);
let greeting = hello_message(link_uri, &psql_session_id);
@@ -72,22 +71,8 @@ pub async fn handle_user(
client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
let mut config = compute::ConnCfg::new();
config
.host(&db_info.host)
.port(db_info.port)
.dbname(&db_info.dbname)
.user(&db_info.user);
if let Some(password) = db_info.password {
config.password(password);
}
Ok(AuthSuccess {
Ok(compute::NodeInfo {
reported_auth_ok: true,
value: NodeInfo {
project: db_info.project,
config,
},
config: db_info.into(),
})
}

View File

@@ -1,12 +1,12 @@
//! Local mock of Cloud API V2.
use super::{
console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
AuthSuccess,
};
use crate::{
auth::{self, ClientCredentials},
compute,
auth::{
self,
backend::console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
ClientCredentials,
},
compute::{self, ComputeConnCfg},
error::io_error,
scram,
stream::PqStream,
@@ -37,7 +37,7 @@ impl<'a> Api<'a> {
pub(super) async fn handle_user(
self,
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
) -> auth::Result<compute::NodeInfo> {
// We reuse user handling logic from a production module.
console::handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
}
@@ -82,8 +82,8 @@ impl<'a> Api<'a> {
}
/// We don't need to wake anything locally, so we just return the connection info.
pub(super) async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
let mut config = compute::ConnCfg::new();
pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
let mut config = ComputeConnCfg::new();
config
.host(self.endpoint.host_str().unwrap_or("localhost"))
.port(self.endpoint.port().unwrap_or(5432))

View File

@@ -36,23 +36,11 @@ pub struct ClientCredentials<'a> {
}
impl ClientCredentials<'_> {
#[inline]
pub fn project(&self) -> Option<&str> {
self.project.as_deref()
}
}
impl<'a> ClientCredentials<'a> {
#[inline]
pub fn as_ref(&'a self) -> ClientCredentials<'a> {
Self {
user: self.user,
dbname: self.dbname,
project: self.project().map(Cow::Borrowed),
}
}
}
impl<'a> ClientCredentials<'a> {
pub fn parse(
params: &'a StartupMessageParams,

View File

@@ -40,36 +40,17 @@ impl UserFacingError for ConnectionError {
/// A pair of `ClientKey` & `ServerKey` for `SCRAM-SHA-256`.
pub type ScramKeys = tokio_postgres::config::ScramKeys<32>;
/// A config for establishing a connection to compute node.
/// Eventually, `tokio_postgres` will be replaced with something better.
/// Newtype allows us to implement methods on top of it.
#[repr(transparent)]
pub struct ConnCfg(pub tokio_postgres::Config);
pub type ComputeConnCfg = tokio_postgres::Config;
impl ConnCfg {
/// Construct a new connection config.
pub fn new() -> Self {
Self(tokio_postgres::Config::new())
}
/// Various compute node info for establishing connection etc.
pub struct NodeInfo {
/// Did we send [`pq_proto::BeMessage::AuthenticationOk`]?
pub reported_auth_ok: bool,
/// Compute node connection params.
pub config: tokio_postgres::Config,
}
impl std::ops::Deref for ConnCfg {
type Target = tokio_postgres::Config;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// For now, let's make it easier to setup the config.
impl std::ops::DerefMut for ConnCfg {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl ConnCfg {
/// Establish a raw TCP connection to the compute node.
impl NodeInfo {
async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> {
use tokio_postgres::config::Host;
@@ -87,8 +68,8 @@ impl ConnCfg {
// because it has no means for extracting the underlying socket which we
// require for our business.
let mut connection_error = None;
let ports = self.0.get_ports();
let hosts = self.0.get_hosts();
let ports = self.config.get_ports();
let hosts = self.config.get_hosts();
// the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array
if ports.len() > 1 && ports.len() != hosts.len() {
return Err(io::Error::new(
@@ -96,7 +77,7 @@ impl ConnCfg {
format!(
"couldn't connect: bad compute config, \
ports and hosts entries' count does not match: {:?}",
self.0
self.config
),
));
}
@@ -122,7 +103,7 @@ impl ConnCfg {
Err(connection_error.unwrap_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
format!("couldn't connect: bad compute config: {:?}", self.0),
format!("couldn't connect: bad compute config: {:?}", self.config),
)
}))
}
@@ -135,7 +116,7 @@ pub struct PostgresConnection {
pub version: String,
}
impl ConnCfg {
impl NodeInfo {
/// Connect to a corresponding compute node.
pub async fn connect(
mut self,
@@ -149,21 +130,21 @@ impl ConnCfg {
.intersperse(" ") // TODO: use impl from std once it's stabilized
.collect();
self.0.options(&options);
self.config.options(&options);
}
if let Some(app_name) = params.get("application_name") {
self.0.application_name(app_name);
self.config.application_name(app_name);
}
if let Some(replication) = params.get("replication") {
use tokio_postgres::config::ReplicationMode;
match replication {
"true" | "on" | "yes" | "1" => {
self.0.replication_mode(ReplicationMode::Physical);
self.config.replication_mode(ReplicationMode::Physical);
}
"database" => {
self.0.replication_mode(ReplicationMode::Logical);
self.config.replication_mode(ReplicationMode::Logical);
}
_other => {}
}
@@ -179,7 +160,7 @@ impl ConnCfg {
.map_err(|_| ConnectionError::FailedToConnectToCompute)?;
// TODO: establish a secure connection to the DB
let (client, conn) = self.0.connect_raw(&mut stream, NoTls).await?;
let (client, conn) = self.config.connect_raw(&mut stream, NoTls).await?;
let version = conn
.parameter("server_version")
.ok_or(ConnectionError::FailedToFetchPgVersion)?

View File

@@ -6,11 +6,16 @@ use std::{
net::{TcpListener, TcpStream},
thread,
};
use tracing::{error, info, info_span};
use tracing::{error, info};
use utils::postgres_backend::{self, AuthType, PostgresBackend};
/// Console management API listener thread.
/// It spawns console response handlers needed for the link auth.
/// TODO: move all of that to auth-backend/link.rs when we ditch legacy-console backend
///
/// Main proxy listener loop.
///
/// Listens for connections, and launches a new handler thread for each.
///
pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
scopeguard::defer! {
info!("mgmt has shut down");
@@ -19,7 +24,6 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
listener
.set_nonblocking(false)
.context("failed to set listener to blocking")?;
loop {
let (socket, peer_addr) = listener.accept().context("failed to accept a new client")?;
info!("accepted connection from {peer_addr}");
@@ -27,19 +31,9 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
.set_nodelay(true)
.context("failed to set client socket option")?;
// TODO: replace with async tasks.
thread::spawn(move || {
let tid = std::thread::current().id();
let span = info_span!("mgmt", thread = format_args!("{tid:?}"));
let _enter = span.enter();
info!("started a new console management API thread");
scopeguard::defer! {
info!("console management API thread is about to finish");
}
if let Err(e) = handle_connection(socket) {
error!("thread failed with an error: {e}");
if let Err(err) = handle_connection(socket) {
error!("{err}");
}
});
}
@@ -50,21 +44,44 @@ fn handle_connection(socket: TcpStream) -> anyhow::Result<()> {
pgbackend.run(&mut MgmtHandler)
}
/// Known as `kickResponse` in the console.
#[derive(Debug, Deserialize)]
struct MgmtHandler;
/// Serialized examples:
// {
// "session_id": "71d6d03e6d93d99a",
// "result": {
// "Success": {
// "host": "127.0.0.1",
// "port": 5432,
// "dbname": "stas",
// "user": "stas",
// "password": "mypass"
// }
// }
// }
// {
// "session_id": "71d6d03e6d93d99a",
// "result": {
// "Failure": "oops"
// }
// }
//
// // to test manually by sending a query to mgmt interface:
// psql -h 127.0.0.1 -p 9999 -c '{"session_id":"4f10dde522e14739","result":{"Success":{"host":"127.0.0.1","port":5432,"dbname":"stas","user":"stas","password":"stas"}}}'
#[derive(Deserialize)]
struct PsqlSessionResponse {
session_id: String,
result: PsqlSessionResult,
}
#[derive(Debug, Deserialize)]
#[derive(Deserialize)]
enum PsqlSessionResult {
Success(DatabaseInfo),
Success(auth::DatabaseInfo),
Failure(String),
}
/// A message received by `mgmt` when a compute node is ready.
pub type ComputeReady = Result<DatabaseInfo, String>;
pub type ComputeReady = Result<auth::DatabaseInfo, String>;
impl PsqlSessionResult {
fn into_compute_ready(self) -> ComputeReady {
@@ -75,51 +92,25 @@ impl PsqlSessionResult {
}
}
/// Compute node connection params provided by the console.
/// This struct and its parents are mgmt API implementation
/// detail and thus should remain in this module.
// TODO: restore deserialization tests from git history.
#[derive(Deserialize)]
pub struct DatabaseInfo {
pub host: String,
pub port: u16,
pub dbname: String,
pub user: String,
/// Console always provides a password, but it might
/// be inconvenient for debug with local PG instance.
pub password: Option<String>,
pub project: String,
}
// Manually implement debug to omit sensitive info.
impl std::fmt::Debug for DatabaseInfo {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
fmt.debug_struct("DatabaseInfo")
.field("host", &self.host)
.field("port", &self.port)
.field("dbname", &self.dbname)
.field("user", &self.user)
.finish_non_exhaustive()
}
}
// TODO: replace with an http-based protocol.
struct MgmtHandler;
impl postgres_backend::Handler for MgmtHandler {
fn process_query(&mut self, pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
try_process_query(pgb, query).map_err(|e| {
error!("failed to process response: {e:?}");
e
})
fn process_query(
&mut self,
pgb: &mut PostgresBackend,
query_string: &str,
) -> anyhow::Result<()> {
let res = try_process_query(pgb, query_string);
// intercept and log error message
if res.is_err() {
error!("mgmt query failed: {res:?}");
}
res
}
}
fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
let resp: PsqlSessionResponse = serde_json::from_str(query)?;
fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::Result<()> {
info!("got mgmt query [redacted]"); // Content contains password, don't print it
let span = info_span!("event", session_id = resp.session_id);
let _enter = span.enter();
info!("got response: {:?}", resp.result);
let resp: PsqlSessionResponse = serde_json::from_str(query_string)?;
match auth::backend::notify(&resp.session_id, resp.result.into_compute_ready()) {
Ok(()) => {
@@ -128,50 +119,9 @@ fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<(
.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
}
Err(e) => {
error!("failed to deliver response to per-client task");
pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn parse_db_info() -> anyhow::Result<()> {
// with password
let _: DatabaseInfo = serde_json::from_value(json!({
"host": "localhost",
"port": 5432,
"dbname": "postgres",
"user": "john_doe",
"password": "password",
"project": "hello_world",
}))?;
// without password
let _: DatabaseInfo = serde_json::from_value(json!({
"host": "localhost",
"port": 5432,
"dbname": "postgres",
"user": "john_doe",
"project": "hello_world",
}))?;
// new field (forward compatibility)
let _: DatabaseInfo = serde_json::from_value(json!({
"host": "localhost",
"port": 5432,
"dbname": "postgres",
"user": "john_doe",
"project": "hello_world",
"N.E.W": "forward compatibility check",
}))?;
Ok(())
}
}

View File

@@ -4,7 +4,7 @@ use crate::config::{ProxyConfig, TlsConfig};
use crate::stream::{MeasuredStream, PqStream, Stream};
use anyhow::{bail, Context};
use futures::TryFutureExt;
use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
use metrics::{register_int_counter, IntCounter};
use once_cell::sync::Lazy;
use pq_proto::{BeMessage as Be, *};
use std::sync::Arc;
@@ -30,16 +30,10 @@ static NUM_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
.unwrap()
});
static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"proxy_io_bytes_per_client",
"Number of bytes sent/received between client and backend.",
&[
// Received (rx) / sent (tx).
"direction",
// Proxy can keep calling it `project` internally.
"endpoint_id"
]
static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!(
"proxy_io_bytes_total",
"Number of bytes sent/received between any client and backend."
)
.unwrap()
});
@@ -236,17 +230,16 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
application_name: params.get("application_name"),
};
let auth_result = async {
// `&mut stream` doesn't let us merge those 2 lines.
let res = creds.authenticate(&extra, &mut stream).await;
async { res }.or_else(|e| stream.throw_error(e)).await
}
.instrument(info_span!("auth"))
.await?;
// Authenticate and connect to a compute node.
let auth = creds
.authenticate(&extra, &mut stream)
.instrument(info_span!("auth"))
.await;
let node = async { auth }.or_else(|e| stream.throw_error(e)).await?;
let reported_auth_ok = node.reported_auth_ok;
let node = auth_result.value;
let (db, cancel_closure) = node
.config
.connect(params)
.or_else(|e| stream.throw_error(e))
.await?;
@@ -254,9 +247,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
let cancel_key_data = session.enable_query_cancellation(cancel_closure);
// Report authentication success if we haven't done this already.
// Note that we do this only (for the most part) after we've connected
// to a compute (see above) which performs its own authentication.
if !auth_result.reported_auth_ok {
if !reported_auth_ok {
stream
.write_message_noflush(&Be::AuthenticationOk)?
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
@@ -270,23 +261,17 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
.write_message(&BeMessage::ReadyForQuery)
.await?;
// TODO: add more identifiers.
let metric_id = node.project;
let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx", &metric_id]);
let mut client = MeasuredStream::new(stream.into_inner(), |cnt| {
// Number of bytes we sent to the client (outbound).
m_sent.inc_by(cnt as u64);
});
let m_recv = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["rx", &metric_id]);
let mut db = MeasuredStream::new(db.stream, |cnt| {
// Number of bytes the client sent to the compute node (inbound).
m_recv.inc_by(cnt as u64);
});
/// This function will be called for writes to either direction.
fn inc_proxied(cnt: usize) {
// Consider inventing something more sophisticated
// if this ever becomes a bottleneck (cacheline bouncing).
NUM_BYTES_PROXIED_COUNTER.inc_by(cnt as u64);
}
// Starting from here we only proxy the client's traffic.
info!("performing the proxy pass...");
let mut db = MeasuredStream::new(db.stream, inc_proxied);
let mut client = MeasuredStream::new(stream.into_inner(), inc_proxied);
let _ = tokio::io::copy_bidirectional(&mut client, &mut db).await?;
Ok(())

View File

@@ -440,35 +440,68 @@ def pytest_terminal_summary(
terminalreporter.section("Benchmark results", "-")
result = []
for test_report in terminalreporter.stats.get("passed", []):
result_entry = []
# TODO group by test report
reports = {
report.head_line: report
for report in terminalreporter.stats.get("passed", [])
}
for _, recorded_property in test_report.user_properties:
terminalreporter.write(
"{}.{}: ".format(test_report.head_line, recorded_property["name"])
)
unit = recorded_property["unit"]
value = recorded_property["value"]
if unit == "MB":
terminalreporter.write("{0:,.0f}".format(value), green=True)
elif unit in ("s", "ms") and isinstance(value, float):
terminalreporter.write("{0:,.3f}".format(value), green=True)
elif isinstance(value, float):
terminalreporter.write("{0:,.4f}".format(value), green=True)
else:
terminalreporter.write(str(value), green=True)
terminalreporter.line(" {}".format(unit))
results = []
for name, report in reports.items():
# terminalreporter.write(f"{name}", green=True)
# terminalreporter.line("")
if "[neon" in name:
vanilla_report = reports.get(name.replace("[neon", "[vanilla"))
if vanilla_report:
for key, prop in report.user_properties:
if prop["unit"] == "s":
neon_value = prop["value"]
vanilla_value = dict(vanilla_report.user_properties)[key]["value"]
try:
ratio = float(neon_value) / vanilla_value
except ZeroDivisionError:
ratio = 99999
result_entry.append(recorded_property)
results.append((ratio, name.replace("[neon", "[neon/vanilla"), prop["name"]))
result.append(
{
"suit": test_report.nodeid,
"total_duration": test_report.duration,
"data": result_entry,
}
)
results.sort(reverse=True)
for ratio, test, prop in results:
terminalreporter.write("{}.{}: ".format(test, prop))
terminalreporter.write("{0:,.3f}".format(ratio), green=True)
terminalreporter.line("")
# result = []
# for test_report in terminalreporter.stats.get("passed", []):
# result_entry = []
# durations = [
# prop
# for _, prop in test_report.user_properties
# if prop["unit"] == "s"
# ]
# for _, recorded_property in test_report.user_properties:
# terminalreporter.write("{}.{}: ".format(test_report.head_line,
# recorded_property["name"]))
# unit = recorded_property["unit"]
# value = recorded_property["value"]
# if unit == "MB":
# terminalreporter.write("{0:,.0f}".format(value), green=True)
# elif unit in ("s", "ms") and isinstance(value, float):
# terminalreporter.write("{0:,.3f}".format(value), green=True)
# elif isinstance(value, float):
# terminalreporter.write("{0:,.4f}".format(value), green=True)
# else:
# terminalreporter.write(str(value), green=True)
# terminalreporter.line(" {}".format(unit))
# result_entry.append(recorded_property)
# result.append({
# "suit": test_report.nodeid,
# "total_duration": test_report.duration,
# "data": result_entry,
# })
out_dir = config.getoption("out_dir")
if out_dir is None:

View File

@@ -784,8 +784,6 @@ class NeonEnvBuilder:
self.cleanup_remote_storage()
self.env.pageserver.assert_no_errors()
class NeonEnv:
"""
@@ -1568,7 +1566,6 @@ class NeonCli(AbstractNeonCli):
def pageserver_start(
self,
overrides: Tuple[str, ...] = (),
extra_env_vars: Optional[Dict[str, str]] = None,
) -> "subprocess.CompletedProcess[str]":
start_args = ["pageserver", "start", *overrides]
append_pageserver_param_overrides(
@@ -1578,11 +1575,11 @@ class NeonCli(AbstractNeonCli):
pageserver_config_override=self.env.pageserver.config_override,
)
s3_env_vars = None
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
s3_env_vars = self.env.remote_storage.access_env_vars()
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
return self.raw_cli(start_args, extra_env_vars=s3_env_vars)
def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]":
cmd = ["pageserver", "stop"]
@@ -1726,50 +1723,7 @@ class NeonPageserver(PgProtocol):
self.config_override = config_override
self.version = env.get_pageserver_version()
# After a test finishes, we will scrape the log to see if there are any
# unexpected error messages. If your test expects an error, add it to
# 'allowed_errors' in the test with something like:
#
# env.pageserver.allowed_errors.append(".*could not open garage door.*")
#
# The entries in the list are regular experessions.
self.allowed_errors = [
# All tests print these, when starting up or shutting down
".*wal receiver task finished with an error: walreceiver connection handling failure.*",
".*Shutdown task error: walreceiver connection handling failure.*",
".*Etcd client error: grpc request error: status: Unavailable.*",
".*query handler for .* failed: Connection reset by peer.*",
".*serving compute connection task.*exited with error: Broken pipe.*",
".*Connection aborted: error communicating with the server: Broken pipe.*",
".*Connection aborted: error communicating with the server: Transport endpoint is not connected.*",
".*Connection aborted: error communicating with the server: Connection reset by peer.*",
".*kill_and_wait_impl.*: wait successful.*",
".*end streaming to Some.*",
# safekeeper connection can fail with this, in the window between timeline creation
# and streaming start
".*Failed to process query for timeline .*: state uninitialized, no data to read.*",
# Tests related to authentication and authorization print these
".*Error processing HTTP request: Forbidden",
# intentional failpoints
".*failpoint ",
# FIXME: there is a race condition between GC and detach, see
# https://github.com/neondatabase/neon/issues/2442
".*could not remove ephemeral file.*No such file or directory.*",
# FIXME: These need investigation
".*gc_loop.*Failed to get a tenant .* Tenant .* not found in the local state.*",
".*compaction_loop.*Failed to get a tenant .* Tenant .* not found in the local state.*",
".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
".*Removing intermediate uninit mark file.*",
# FIXME: known race condition in TaskHandle: https://github.com/neondatabase/neon/issues/2885
".*sender is dropped while join handle is still alive.*",
]
def start(
self,
overrides: Tuple[str, ...] = (),
extra_env_vars: Optional[Dict[str, str]] = None,
) -> "NeonPageserver":
def start(self, overrides: Tuple[str, ...] = ()) -> "NeonPageserver":
"""
Start the page server.
`overrides` allows to add some config to this pageserver start.
@@ -1777,7 +1731,7 @@ class NeonPageserver(PgProtocol):
"""
assert self.running is False
self.env.neon_cli.pageserver_start(overrides=overrides, extra_env_vars=extra_env_vars)
self.env.neon_cli.pageserver_start(overrides=overrides)
self.running = True
return self
@@ -1817,26 +1771,6 @@ class NeonPageserver(PgProtocol):
is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
)
def assert_no_errors(self):
logfile = open(os.path.join(self.env.repo_dir, "pageserver.log"), "r")
error_or_warn = re.compile("ERROR|WARN")
errors = []
while True:
line = logfile.readline()
if not line:
break
if error_or_warn.search(line):
# It's an ERROR or WARN. Is it in the allow-list?
for a in self.allowed_errors:
if re.match(a, line):
break
else:
errors.append(line)
assert not errors
def append_pageserver_param_overrides(
params_to_update: List[str],
@@ -2080,9 +2014,9 @@ class NeonProxy(PgProtocol):
self,
proxy_port: int,
http_port: int,
mgmt_port: int,
neon_binpath: Path,
auth_endpoint=None,
mgmt_port=None,
):
super().__init__(dsn=auth_endpoint, port=proxy_port)
self.host = "127.0.0.1"
@@ -2096,8 +2030,7 @@ class NeonProxy(PgProtocol):
def start(self):
"""
Starts a proxy with option '--auth-backend postgres' and a postgres instance
already provided though '--auth-endpoint <postgress-instance>'."
Starts a proxy with option '--auth-backend postgres' and a postgres instance already provided though '--auth-endpoint <postgress-instance>'."
"""
assert self._popen is None
assert self.auth_endpoint is not None
@@ -2107,7 +2040,6 @@ class NeonProxy(PgProtocol):
str(self.neon_binpath / "proxy"),
*["--http", f"{self.host}:{self.http_port}"],
*["--proxy", f"{self.host}:{self.proxy_port}"],
*["--mgmt", f"{self.host}:{self.mgmt_port}"],
*["--auth-backend", "postgres"],
*["--auth-endpoint", self.auth_endpoint],
]
@@ -2184,13 +2116,11 @@ def static_proxy(
auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}"
proxy_port = port_distributor.get_port()
mgmt_port = port_distributor.get_port()
http_port = port_distributor.get_port()
with NeonProxy(
proxy_port=proxy_port,
http_port=http_port,
mgmt_port=mgmt_port,
neon_binpath=neon_binpath,
auth_endpoint=auth_endpoint,
) as proxy:
@@ -2731,6 +2661,8 @@ def test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Iterator[P
yield test_dir
shutil.rmtree(test_dir)
allure_attach_from_dir(test_dir)

View File

@@ -6,7 +6,6 @@ import pytest
from fixtures.benchmark_fixture import MetricReport
from fixtures.compare_fixtures import PgCompare
from fixtures.log_helper import log
from pytest_lazyfixture import lazy_fixture # type: ignore
@pytest.mark.parametrize(
@@ -21,24 +20,11 @@ from pytest_lazyfixture import lazy_fixture # type: ignore
pytest.param(10000000, 1, 4),
],
)
@pytest.mark.parametrize(
"env, scale",
[
# Run on all envs. Use 50x larger table on remote cluster to make sure
# it doesn't fit in shared buffers, which are larger on remote than local.
pytest.param(lazy_fixture("neon_compare"), 1, id="neon"),
pytest.param(lazy_fixture("vanilla_compare"), 1, id="vanilla"),
pytest.param(
lazy_fixture("remote_compare"), 50, id="remote", marks=pytest.mark.remote_cluster
),
],
)
def test_seqscans(env: PgCompare, scale: int, rows: int, iters: int, workers: int):
rows = scale * rows
def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int):
env = neon_with_baseline
with closing(env.pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute("drop table if exists t;")
cur.execute("create table t (i integer);")
cur.execute(f"insert into t values (generate_series(1,{rows}));")

View File

@@ -116,13 +116,6 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http_client = env.pageserver.http_client()
env.pageserver.allowed_errors.extend(
[
".*invalid branch start lsn: less than latest GC cutoff.*",
".*invalid branch start lsn: less than planned GC cutoff.*",
]
)
# Disable background GC but set the `pitr_interval` to be small, so GC can delete something
tenant, _ = env.neon_cli.create_tenant(
conf={

View File

@@ -13,9 +13,6 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.append(".*invalid branch start lsn.*")
env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*")
# Branch at the point where only 100 rows were inserted
env.neon_cli.create_branch("test_branch_behind")
pgmain = env.postgres.create_start("test_branch_behind")

View File

@@ -11,17 +11,10 @@ from fixtures.types import TenantId, TimelineId
# Test restarting page server, while safekeeper and compute node keep
# running.
def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
# One safekeeper is enough for this test.
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.extend(
[
".*No timelines to attach received.*",
".*Failed to process timeline dir contents.*",
".*Failed to load delta layer.*",
".*Timeline .* was not found.*",
]
)
tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = []
for n in range(4):
@@ -79,24 +72,23 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
# First timeline would not get loaded into pageserver due to corrupt metadata file
with pytest.raises(Exception, match=f"Timeline {tenant1}/{timeline1} was not found") as err:
pg1.start()
log.info(
f"As expected, compute startup failed eagerly for timeline with corrupt metadata: {err}"
)
log.info(f"compute startup failed eagerly for timeline with corrupt metadata: {err}")
# Second timeline has no ancestors, only the metadata file and no layer files
# We don't have the remote storage enabled, which means timeline is in an incorrect state,
# it's not loaded at all
with pytest.raises(Exception, match=f"Timeline {tenant2}/{timeline2} was not found") as err:
pg2.start()
log.info(f"As expected, compute startup failed for timeline with missing layers: {err}")
log.info(f"compute startup failed eagerly for timeline with corrupt metadata: {err}")
# Third timeline will also fail during basebackup, because the layer file is corrupt.
# (We don't check layer file contents on startup, when loading the timeline)
with pytest.raises(Exception, match="Failed to load delta layer") as err:
pg3.start()
log.info(
f"As expected, compute startup failed for timeline {tenant3}/{timeline3} with corrupt layers: {err}"
)
# Yet other timelines will fail when their layers will be queried during basebackup: we don't check layer file contents on startup, when loading the timeline
for n in range(3, 4):
(bad_tenant, bad_timeline, pg) = tenant_timelines[n]
with pytest.raises(Exception, match="extracting base backup failed") as err:
pg.start()
log.info(
f"compute startup failed lazily for timeline {bad_tenant}/{bad_timeline} with corrupt layers, during basebackup preparation: {err}"
)
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
@@ -119,13 +111,6 @@ def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
env.pageserver.allowed_errors.extend(
[
".*Failed to process timeline dir contents.*Timeline has no ancestor and no layer files.*",
".*Timeline got dropped without initializing, cleaning its files.*",
]
)
tenant_id, _ = env.neon_cli.create_tenant()
timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"

View File

@@ -2,7 +2,7 @@ import os
import shutil
import subprocess
from pathlib import Path
from typing import Any, Optional
from typing import Any
import pytest
import toml # TODO: replace with tomllib for Python >= 3.11
@@ -50,12 +50,6 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o
env = neon_env_builder.init_start()
pg = env.postgres.create_start("main")
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()])
pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()])
pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"])
@@ -160,7 +154,6 @@ def test_forward_compatibility(
from_dir=compatibility_snapshot_dir,
to_dir=test_output_dir / "compatibility_snapshot",
port_distributor=port_distributor,
pg_distrib_dir=compatibility_postgres_distrib_dir,
)
breaking_changes_allowed = (
@@ -190,12 +183,7 @@ def test_forward_compatibility(
), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
def prepare_snapshot(
from_dir: Path,
to_dir: Path,
port_distributor: PortDistributor,
pg_distrib_dir: Optional[Path] = None,
):
def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistributor):
assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist"
assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory"
assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql"
@@ -220,7 +208,7 @@ def prepare_snapshot(
# Update paths and ports in config files
pageserver_toml = repo_dir / "pageserver.toml"
pageserver_config = toml.load(pageserver_toml)
pageserver_config["remote_storage"]["local_path"] = str(repo_dir / "local_fs_remote_storage")
pageserver_config["remote_storage"]["local_path"] = repo_dir / "local_fs_remote_storage"
pageserver_config["listen_http_addr"] = port_distributor.replace_with_new_port(
pageserver_config["listen_http_addr"]
)
@@ -231,9 +219,6 @@ def prepare_snapshot(
port_distributor.replace_with_new_port(ep) for ep in pageserver_config["broker_endpoints"]
]
if pg_distrib_dir:
pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
with pageserver_toml.open("w") as f:
toml.dump(pageserver_config, f)
@@ -253,10 +238,7 @@ def prepare_snapshot(
sk["http_port"] = port_distributor.replace_with_new_port(sk["http_port"])
sk["pg_port"] = port_distributor.replace_with_new_port(sk["pg_port"])
if pg_distrib_dir:
snapshot_config["pg_distrib_dir"] = str(pg_distrib_dir)
with snapshot_config_toml.open("w") as f:
with (snapshot_config_toml).open("w") as f:
toml.dump(snapshot_config, f)
# Ensure that snapshot doesn't contain references to the original path

View File

@@ -179,16 +179,7 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
# run compute_ctl and wait for 10s
try:
ctl.raw_cli(
[
"--connstr",
"postgres://invalid/",
"--pgdata",
pgdata,
"--spec",
spec,
"--pgbin",
pg_bin_path,
],
["--connstr", ps_connstr, "--pgdata", pgdata, "--spec", spec, "--pgbin", pg_bin_path],
timeout=10,
)
except TimeoutExpired as exc:

View File

@@ -1,4 +1,3 @@
import pytest
from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
@@ -8,14 +7,8 @@ from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
# normally restarts after it. Also, there should be GC ERRORs in the log,
# but the fixture checks the log for any unexpected ERRORs after every
# test anyway, so it doesn't need any special attention here.
@pytest.mark.timeout(600)
def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
env = neon_env_builder.init_start()
# These warnings are expected, when the pageserver is restarted abruptly
env.pageserver.allowed_errors.append(".*found future image layer.*")
env.pageserver.allowed_errors.append(".*found future delta layer.*")
pageserver_http = env.pageserver.http_client()
# Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
@@ -37,9 +30,10 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
for _ in range(5):
with pytest.raises(Exception):
pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
env.pageserver.stop()
env.pageserver.start()
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
for i in range(5):
try:
pg_bin.run_capture(["pgbench", "-N", "-c5", "-T100", "-Mprepared", connstr])
except Exception:
env.pageserver.stop()
env.pageserver.start()
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))

View File

@@ -76,26 +76,6 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
env = neon_env_builder.init_start()
env.pageserver.http_client().tenant_create(tenant)
env.pageserver.allowed_errors.extend(
[
".*error importing base backup .*",
".*Timeline got dropped without initializing, cleaning its files.*",
".*Removing intermediate uninit mark file.*",
".*InternalServerError.*timeline not found.*",
".*InternalServerError.*Tenant .* not found.*",
".*InternalServerError.*Timeline .* not found.*",
".*InternalServerError.*Cannot delete timeline which has child timelines.*",
]
)
# FIXME: we should clean up pageserver to not print this
env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*")
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
def import_tar(base, wal):
env.neon_cli.raw_cli(
[
@@ -142,11 +122,6 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
neon_env_builder.enable_local_fs_remote_storage()
env = neon_env_builder.init_start()
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
pg = env.postgres.create_start("test_import_from_pageserver_small")

View File

@@ -67,10 +67,6 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# These warnings are expected, when the pageserver is restarted abruptly
env.pageserver.allowed_errors.append(".*found future image layer.*")
env.pageserver.allowed_errors.append(".*found future delta layer.*")
# Use a tiny checkpoint distance, to create a lot of layers quickly.
# That allows us to stress the compaction and layer flushing logic more.
tenant, _ = env.neon_cli.create_tenant(

View File

@@ -1,4 +1,5 @@
import json
import subprocess
from urllib.parse import urlparse
import psycopg2
@@ -7,11 +8,11 @@ from fixtures.log_helper import log
from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres
def test_proxy_select_1(static_proxy: NeonProxy):
def test_proxy_select_1(static_proxy):
static_proxy.safe_psql("select 1", options="project=generic-project-name")
def test_password_hack(static_proxy: NeonProxy):
def test_password_hack(static_proxy):
user = "borat"
password = "password"
static_proxy.safe_psql(
@@ -23,75 +24,118 @@ def test_password_hack(static_proxy: NeonProxy):
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
# Must also check that invalid magic won't be accepted.
with pytest.raises(psycopg2.OperationalError):
with pytest.raises(psycopg2.errors.OperationalError):
magic = "broken"
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
def get_session_id(uri_prefix, uri_line):
def get_session_id_from_uri_line(uri_prefix, uri_line):
assert uri_prefix in uri_line
url_parts = urlparse(uri_line)
psql_session_id = url_parts.path[1:]
assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars"
assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars."
link_auth_uri_prefix = uri_line[: -len(url_parts.path)]
# invariant: the prefix must match the uri_prefix.
assert (
link_auth_uri_prefix == uri_prefix
), f"Line='{uri_line}' should contain a http auth link of form '{uri_prefix}/<psql_session_id>'."
# invariant: the entire link_auth_uri should be on its own line, module spaces.
assert " ".join(uri_line.split(" ")) == f"{uri_prefix}/{psql_session_id}"
return psql_session_id
async def find_auth_link(link_auth_uri_prefix, proc):
for _ in range(100):
line = (await proc.stderr.readline()).decode("utf-8").strip()
log.info(f"psql line: {line}")
if link_auth_uri_prefix in line:
log.info(f"SUCCESS, found auth url: {line}")
return line
async def activate_link_auth(local_vanilla_pg, link_proxy, psql_session_id):
def create_and_send_db_info(local_vanilla_pg, psql_session_id, mgmt_port):
pg_user = "proxy"
pg_password = "password"
log.info("creating a new user for link auth test")
local_vanilla_pg.start()
local_vanilla_pg.safe_psql(f"create user {pg_user} with login superuser")
query = f"create user {pg_user} with login superuser password '{pg_password}'"
local_vanilla_pg.safe_psql(query)
db_info = json.dumps(
{
"session_id": psql_session_id,
"result": {
"Success": {
"host": local_vanilla_pg.default_options["host"],
"port": local_vanilla_pg.default_options["port"],
"dbname": local_vanilla_pg.default_options["dbname"],
"user": pg_user,
"project": "irrelevant",
}
},
}
)
port = local_vanilla_pg.default_options["port"]
host = local_vanilla_pg.default_options["host"]
dbname = local_vanilla_pg.default_options["dbname"]
log.info("sending session activation message")
psql = await PSQL(host=link_proxy.host, port=link_proxy.mgmt_port).run(db_info)
out = (await psql.stdout.read()).decode("utf-8").strip()
assert out == "ok"
db_info_dict = {
"session_id": psql_session_id,
"result": {
"Success": {
"host": host,
"port": port,
"dbname": dbname,
"user": pg_user,
"password": pg_password,
}
},
}
db_info_str = json.dumps(db_info_dict)
cmd_args = [
"psql",
"-h",
"127.0.0.1", # localhost
"-p",
f"{mgmt_port}",
"-c",
db_info_str,
]
log.info(f"Sending to proxy the user and db info: {' '.join(cmd_args)}")
p = subprocess.Popen(cmd_args, stdout=subprocess.PIPE)
out, err = p.communicate()
assert "ok" in str(out)
async def get_uri_line_from_process_welcome_notice(link_auth_uri_prefix, proc):
"""
Returns the line from the welcome notice from proc containing link_auth_uri_prefix.
:param link_auth_uri_prefix: the uri prefix used to indicate the line of interest
:param proc: the process to read the welcome message from.
:return: a line containing the full link authentication uri.
"""
max_num_lines_of_welcome_message = 15
for attempt in range(max_num_lines_of_welcome_message):
raw_line = await proc.stderr.readline()
line = raw_line.decode("utf-8").strip()
if link_auth_uri_prefix in line:
return line
assert False, f"did not find line containing '{link_auth_uri_prefix}'"
@pytest.mark.asyncio
async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProxy):
psql = await PSQL(host=link_proxy.host, port=link_proxy.proxy_port).run("select 42")
"""
Test copied and modified from: test_project_psql_link_auth test from cloud/tests_e2e/tests/test_project.py
Step 1. establish connection to the proxy
Step 2. retrieve session_id:
Step 2.1: read welcome message
Step 2.2: parse session_id
Step 3. create a vanilla_pg and send user and db info via command line (using Popen) a psql query via mgmt port to proxy.
Step 4. assert that select 1 has been executed correctly.
"""
psql = PSQL(
host=link_proxy.host,
port=link_proxy.proxy_port,
)
proc = await psql.run("select 42")
uri_prefix = link_proxy.link_auth_uri_prefix
link = await find_auth_link(uri_prefix, psql)
line_str = await get_uri_line_from_process_welcome_notice(uri_prefix, proc)
psql_session_id = get_session_id(uri_prefix, link)
await activate_link_auth(vanilla_pg, link_proxy, psql_session_id)
psql_session_id = get_session_id_from_uri_line(uri_prefix, line_str)
log.info(f"Parsed psql_session_id='{psql_session_id}' from Neon welcome message.")
assert psql.stdout is not None
out = (await psql.stdout.read()).decode("utf-8").strip()
create_and_send_db_info(vanilla_pg, psql_session_id, link_proxy.mgmt_port)
assert proc.stdout is not None
out = (await proc.stdout.read()).decode("utf-8").strip()
assert out == "42"
# Pass extra options to the server.
def test_proxy_options(static_proxy: NeonProxy):
def test_proxy_options(static_proxy):
with static_proxy.connect(options="project=irrelevant -cproxytest.option=value") as conn:
with conn.cursor() as cur:
cur.execute("SHOW proxytest.option")

View File

@@ -143,8 +143,6 @@ def test_read_validation_neg(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_read_validation_neg", "empty")
env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*")
pg = env.postgres.create_start("test_read_validation_neg")
log.info("postgres is running on 'test_read_validation_neg' branch")

View File

@@ -17,8 +17,6 @@ def test_readonly_node(neon_simple_env: NeonEnv):
pgmain = env.postgres.create_start("test_readonly_node")
log.info("postgres is running on 'test_readonly_node' branch")
env.pageserver.allowed_errors.append(".*basebackup .* failed: invalid basebackup lsn.*")
main_pg_conn = pgmain.connect()
main_cur = main_pg_conn.cursor()

View File

@@ -17,10 +17,6 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
neon_env_builder.start()
# These warnings are expected, when the pageserver is restarted abruptly
env.pageserver.allowed_errors.append(".*found future delta layer.*")
env.pageserver.allowed_errors.append(".*found future image layer.*")
# Create a branch for us
env.neon_cli.create_branch("test_pageserver_recovery", "main")

View File

@@ -56,17 +56,6 @@ def test_remote_storage_backup_and_restore(
##### First start, insert secret data and upload it to the remote storage
env = neon_env_builder.init_start()
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*marking .* as locally complete, while it doesnt exist in remote index.*"
)
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
env.pageserver.allowed_errors.append(".*Tenant download is already in progress.*")
env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*")
env.pageserver.allowed_errors.append(".*No metadata file found in the timeline directory.*")
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")

View File

@@ -1,4 +1,3 @@
import time
from threading import Thread
import pytest
@@ -12,30 +11,15 @@ def do_gc_target(
):
"""Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
try:
log.info("sending gc http request")
pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
except Exception as e:
log.error("do_gc failed: %s", e)
finally:
log.info("gc http thread returning")
@pytest.mark.skip(
reason="""
Commit 'make test_tenant_detach_smoke fail reproducibly' adds failpoint to make this test fail reproducibly.
Fix in https://github.com/neondatabase/neon/pull/2851 will come as part of
https://github.com/neondatabase/neon/pull/2785 .
"""
)
def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
env.pageserver.allowed_errors.append(".*NotFound\\(Tenant .* not found in the local state")
# FIXME: we have a race condition between GC and detach. GC might fail with this
# error. Similar to https://github.com/neondatabase/neon/issues/2671
env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*")
# first check for non existing tenant
tenant_id = TenantId.generate()
with pytest.raises(
@@ -44,9 +28,6 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
):
pageserver_http.tenant_detach(tenant_id)
# the error will be printed to the log too
env.pageserver.allowed_errors.append(".*Tenant not found for id.*")
# create new nenant
tenant_id, timeline_id = env.neon_cli.create_tenant()
@@ -62,34 +43,32 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
]
)
# gc should not try to even start on a timeline that doesn't exist
# gc should not try to even start
with pytest.raises(
expected_exception=PageserverApiException, match="gc target timeline does not exist"
):
bogus_timeline_id = TimelineId.generate()
pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)
# the error will be printed to the log too
env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
# Detach while running manual GC.
# It should wait for manual GC to finish (right now it doesn't that's why this test fails sometimes)
pageserver_http.configure_failpoints(
("gc_iteration_internal_after_getting_gc_timelines", "return(2000)")
)
# try to concurrently run gc and detach
gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id))
gc_thread.start()
time.sleep(1)
# By now the gc task is spawned but in sleep for another second due to the failpoint.
log.info("detaching tenant")
pageserver_http.tenant_detach(tenant_id)
log.info("tenant detached without error")
last_error = None
for i in range(3):
try:
pageserver_http.tenant_detach(tenant_id)
except Exception as e:
last_error = e
log.error(f"try {i} error detaching tenant: {e}")
continue
else:
break
# else is called if the loop finished without reaching "break"
else:
pytest.fail(f"could not detach tenant: {last_error}")
log.info("wait for gc thread to return")
gc_thread.join(timeout=10)
assert not gc_thread.is_alive()
log.info("gc thread returned")
# check that nothing is left on disk for deleted tenant
assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()

View File

@@ -259,11 +259,6 @@ def test_tenant_relocation(
env = neon_env_builder.init_start()
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
# create folder for remote storage mock
remote_storage_mock_path = env.repo_dir / "local_fs_remote_storage"

View File

@@ -166,10 +166,6 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
env = neon_env_builder.init_start()
# FIXME: we have a race condition between GC and delete timeline. GC might fail with this
# error. Similar to https://github.com/neondatabase/neon/issues/2671
env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*")
tenant_id = env.initial_tenant
main_branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0]
@@ -192,8 +188,10 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
"first-branch", main_branch_name, tenant_id
)
# unsure why this happens, the size difference is more than a page alignment
size_after_first_branch = http_client.tenant_size(tenant_id)
assert size_after_first_branch == size_at_branch
assert size_after_first_branch > size_at_branch
assert size_after_first_branch - size_at_branch == gc_horizon
first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id)
@@ -219,7 +217,7 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
"second-branch", main_branch_name, tenant_id
)
size_after_second_branch = http_client.tenant_size(tenant_id)
assert size_after_second_branch == size_after_continuing_on_main
assert size_after_second_branch > size_after_continuing_on_main
second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id)
@@ -265,8 +263,6 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
except PageserverApiException as e:
# compaction is ok but just retry if this fails; related to #2442
if "cannot lock compaction critical section" in str(e):
# also ignore it in the log
env.pageserver.allowed_errors.append(".*cannot lock compaction critical section.*")
time.sleep(1)
continue
raise

View File

@@ -25,13 +25,6 @@ def test_tenant_creation_fails(neon_simple_env: NeonEnv):
)
initial_tenant_dirs = [d for d in tenants_dir.iterdir()]
neon_simple_env.pageserver.allowed_errors.extend(
[
".*Failed to create directory structure for tenant .*, cleaning tmp data.*",
".*Failed to fsync removed temporary tenant directory .*",
]
)
pageserver_http = neon_simple_env.pageserver.http_client()
pageserver_http.configure_failpoints(("tenant-creation-before-tmp-rename", "return"))
with pytest.raises(Exception, match="tenant-creation-before-tmp-rename"):
@@ -213,13 +206,6 @@ def test_pageserver_with_empty_tenants(
)
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.append(
".*marking .* as locally complete, while it doesnt exist in remote index.*"
)
env.pageserver.allowed_errors.append(".*Tenant .* has no timelines directory.*")
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
client = env.pageserver.http_client()
tenant_without_timelines_dir = env.initial_tenant

View File

@@ -66,11 +66,6 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem
env = neon_env_builder.init_start()
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
tenants_pgs: List[Tuple[TenantId, Postgres]] = []
for _ in range(1, 5):
@@ -122,13 +117,6 @@ def test_tenants_attached_after_download(
##### First start, insert secret data and upload it to the remote storage
env = neon_env_builder.init_start()
# FIXME: Are these expected?
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
env.pageserver.allowed_errors.append(
".*marking .* as locally complete, while it doesnt exist in remote index.*"
)
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
@@ -221,16 +209,6 @@ def test_tenant_upgrades_index_json_from_v0(
# launch pageserver, populate the default tenants timeline, wait for it to be uploaded,
# then go ahead and modify the "remote" version as if it was downgraded, needing upgrade
env = neon_env_builder.init_start()
# FIXME: Are these expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
env.pageserver.allowed_errors.append(
".*Failed to get local tenant state: Tenant .* not found in the local state.*"
)
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
@@ -337,20 +315,6 @@ def test_tenant_redownloads_truncated_file_on_startup(
)
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.append(
".*Redownloading locally existing .* due to size mismatch.*"
)
env.pageserver.allowed_errors.append(
".*Downloaded layer exists already but layer file metadata mismatches.*"
)
# FIXME: Are these expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")

View File

@@ -7,11 +7,6 @@ from fixtures.utils import wait_until
def test_timeline_delete(neon_simple_env: NeonEnv):
env = neon_simple_env
env.pageserver.allowed_errors.append(".*Timeline .* was not found.*")
env.pageserver.allowed_errors.append(".*timeline not found.*")
env.pageserver.allowed_errors.append(".*Cannot delete timeline which has child timelines.*")
env.pageserver.allowed_errors.append(".*Tenant .* not found in the local state.*")
ps_http = env.pageserver.http_client()
# first try to delete non existing timeline

View File

@@ -263,12 +263,6 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_broker", "main")
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
pg = env.postgres.create_start("test_broker")
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
@@ -312,11 +306,6 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
neon_env_builder.auth_enabled = auth_enabled
env = neon_env_builder.init_start()
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
env.neon_cli.create_branch("test_safekeepers_wal_removal")
pg = env.postgres.create_start("test_safekeepers_wal_removal")
@@ -549,7 +538,6 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
)
pg.stop_and_destroy()
ps_cli.timeline_delete(tenant_id, timeline_id)
# Also delete and manually create timeline on safekeepers -- this tests
# scenario of manual recovery on different set of safekeepers.
@@ -574,6 +562,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
shutil.copy(f_partial_saved, f_partial_path)
# recreate timeline on pageserver from scratch
ps_cli.timeline_delete(tenant_id, timeline_id)
ps_cli.timeline_create(tenant_id, timeline_id)
wait_lsn_timeout = 60 * 3
@@ -1092,14 +1081,6 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
neon_env_builder.auth_enabled = auth_enabled
env = neon_env_builder.init_start()
# FIXME: are these expected?
env.pageserver.allowed_errors.extend(
[
".*Failed to process query for timeline .*: Timeline .* was not found in global map.*",
".*end streaming to Some.*",
]
)
# Create two tenants: one will be deleted, other should be preserved.
tenant_id = env.initial_tenant
timeline_id_1 = env.neon_cli.create_branch("br1") # Active, delete explicitly

View File

@@ -22,8 +22,6 @@ def assert_child_processes(pageserver_pid, wal_redo_present=False, defunct_prese
# as a zombie process.
def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# We intentionally test for a non-existent tenant.
env.pageserver.allowed_errors.append(".*Tenant not found.*")
pageserver_http = env.pageserver.http_client()
pagserver_pid = int((env.repo_dir / "pageserver.pid").read_text())