mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-19 11:22:56 +00:00
Compare commits
1 Commits
persistent
...
test-prefe
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c431a305aa |
2
.github/ansible/.gitignore
vendored
2
.github/ansible/.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
zenith_install.tar.gz
|
||||
.zenith_current_version
|
||||
neon_install.tar.gz
|
||||
.neon_current_version
|
||||
|
||||
|
||||
33
.github/ansible/staging.eu-west-1.hosts.yaml
vendored
33
.github/ansible/staging.eu-west-1.hosts.yaml
vendored
@@ -1,33 +0,0 @@
|
||||
storage:
|
||||
vars:
|
||||
bucket_name: neon-dev-storage-eu-west-1
|
||||
bucket_region: eu-west-1
|
||||
console_mgmt_base_url: http://console-staging.local
|
||||
etcd_endpoints: etcd-0.eu-west-1.aws.neon.build:2379
|
||||
pageserver_config_stub:
|
||||
pg_distrib_dir: /usr/local
|
||||
remote_storage:
|
||||
bucket_name: "{{ bucket_name }}"
|
||||
bucket_region: "{{ bucket_region }}"
|
||||
prefix_in_bucket: "pageserver/v1"
|
||||
safekeeper_s3_prefix: safekeeper/v1/wal
|
||||
hostname_suffix: ""
|
||||
remote_user: ssm-user
|
||||
ansible_aws_ssm_region: eu-west-1
|
||||
ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
|
||||
console_region_id: aws-eu-west-1
|
||||
|
||||
children:
|
||||
pageservers:
|
||||
hosts:
|
||||
pageserver-0.eu-west-1.aws.neon.build:
|
||||
ansible_host: i-01d496c5041c7f34c
|
||||
|
||||
safekeepers:
|
||||
hosts:
|
||||
safekeeper-0.eu-west-1.aws.neon.build:
|
||||
ansible_host: i-05226ef85722831bf
|
||||
safekeeper-1.eu-west-1.aws.neon.build:
|
||||
ansible_host: i-06969ee1bf2958bfc
|
||||
safekeeper-2.eu-west-1.aws.neon.build:
|
||||
ansible_host: i-087892e9625984a0b
|
||||
2
.github/ansible/staging.us-east-2.hosts.yaml
vendored
2
.github/ansible/staging.us-east-2.hosts.yaml
vendored
@@ -22,8 +22,6 @@ storage:
|
||||
hosts:
|
||||
pageserver-0.us-east-2.aws.neon.build:
|
||||
ansible_host: i-0c3e70929edb5d691
|
||||
pageserver-1.us-east-2.aws.neon.build:
|
||||
ansible_host: i-0565a8b4008aa3f40
|
||||
|
||||
safekeepers:
|
||||
hosts:
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
# Helm chart values for neon-proxy-scram.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
image:
|
||||
repository: neondatabase/neon
|
||||
|
||||
settings:
|
||||
authBackend: "console"
|
||||
authEndpoint: "http://console-staging.local/management/api/v2"
|
||||
domain: "*.eu-west-1.aws.neon.build"
|
||||
|
||||
# -- Additional labels for neon-proxy pods
|
||||
podLabels:
|
||||
zenith_service: proxy-scram
|
||||
zenith_env: dev
|
||||
zenith_region: eu-west-1
|
||||
zenith_region_slug: eu-west-1
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
|
||||
|
||||
#metrics:
|
||||
# enabled: true
|
||||
# serviceMonitor:
|
||||
# enabled: true
|
||||
# selector:
|
||||
# release: kube-prometheus-stack
|
||||
9
.github/workflows/benchmarking.yml
vendored
9
.github/workflows/benchmarking.yml
vendored
@@ -144,9 +144,7 @@ jobs:
|
||||
# neon-captest-new: Run pgbench in a freshly created project
|
||||
# neon-captest-reuse: Same, but reusing existing project
|
||||
# neon-captest-prefetch: Same, with prefetching enabled (new project)
|
||||
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
|
||||
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
|
||||
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-postgres ]
|
||||
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
|
||||
db_size: [ 10gb ]
|
||||
include:
|
||||
- platform: neon-captest-new
|
||||
@@ -209,11 +207,8 @@ jobs:
|
||||
rds-aurora)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }}
|
||||
;;
|
||||
rds-postgres)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
|
||||
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch' or 'rds-aurora'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
41
.github/workflows/build_and_test.yml
vendored
41
.github/workflows/build_and_test.yml
vendored
@@ -761,6 +761,7 @@ jobs:
|
||||
run: |
|
||||
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
cd "$(pwd)/.github/ansible"
|
||||
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
./get_binaries.sh
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
@@ -769,38 +770,6 @@ jobs:
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-pr-test-new:
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
|
||||
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
|
||||
needs: [ push-docker-hub, tag, regress-tests ]
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
target_region: [ eu-west-1 ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Redeploy
|
||||
run: |
|
||||
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
cd "$(pwd)/.github/ansible"
|
||||
|
||||
./get_binaries.sh
|
||||
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
||||
@@ -811,7 +780,7 @@ jobs:
|
||||
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
|
||||
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
|
||||
needs: [ push-docker-hub, tag, regress-tests ]
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
@@ -892,7 +861,7 @@ jobs:
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, tag, regress-tests ]
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
@@ -904,8 +873,6 @@ jobs:
|
||||
include:
|
||||
- target_region: us-east-2
|
||||
target_cluster: dev-us-east-2-beta
|
||||
- target_region: eu-west-1
|
||||
target_cluster: dev-eu-west-1-zeta
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
@@ -927,7 +894,7 @@ jobs:
|
||||
runs-on: prod
|
||||
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, tag, regress-tests ]
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
|
||||
@@ -8,4 +8,3 @@
|
||||
/pgxn/ @neondatabase/compute
|
||||
/proxy/ @neondatabase/control-plane
|
||||
/safekeeper/ @neondatabase/safekeepers
|
||||
/vendor/ @neondatabase/compute
|
||||
|
||||
8
Cargo.lock
generated
8
Cargo.lock
generated
@@ -2255,14 +2255,6 @@ version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
|
||||
|
||||
[[package]]
|
||||
name = "persistent_range_query"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"rand",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "petgraph"
|
||||
version = "0.6.2"
|
||||
|
||||
28
Makefile
28
Makefile
@@ -20,18 +20,18 @@ else
|
||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||
endif
|
||||
|
||||
# Seccomp BPF is only available for Linux
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
# Seccomp BPF is only available for Linux
|
||||
PG_CONFIGURE_OPTS += --with-libseccomp
|
||||
else ifeq ($(UNAME_S),Darwin)
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
# It can be configured with OPENSSL_PREFIX variable
|
||||
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
|
||||
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
|
||||
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
|
||||
endif
|
||||
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
# It can be configured with OPENSSL_PREFIX variable
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||
endif
|
||||
|
||||
# Use -C option so that when PostgreSQL "make install" installs the
|
||||
@@ -73,8 +73,7 @@ $(POSTGRES_INSTALL_DIR)/build/v14/config.status:
|
||||
+@echo "Configuring Postgres v14 build"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure \
|
||||
CFLAGS='$(PG_CFLAGS)' \
|
||||
$(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \
|
||||
$(PG_CONFIGURE_OPTS) \
|
||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)
|
||||
|
||||
@@ -82,8 +81,7 @@ $(POSTGRES_INSTALL_DIR)/build/v15/config.status:
|
||||
+@echo "Configuring Postgres v15 build"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure \
|
||||
CFLAGS='$(PG_CFLAGS)' \
|
||||
$(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \
|
||||
$(PG_CONFIGURE_OPTS) \
|
||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)
|
||||
|
||||
@@ -113,8 +111,6 @@ postgres-v14: postgres-v14-configure \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
|
||||
+@echo "Compiling libpq v14"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
|
||||
+@echo "Compiling pg_prewarm v14"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_prewarm install
|
||||
+@echo "Compiling pg_buffercache v14"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
|
||||
+@echo "Compiling pageinspect v14"
|
||||
@@ -127,8 +123,6 @@ postgres-v15: postgres-v15-configure \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
|
||||
+@echo "Compiling libpq v15"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
|
||||
+@echo "Compiling pg_prewarm v15"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_prewarm install
|
||||
+@echo "Compiling pg_buffercache v15"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
|
||||
+@echo "Compiling pageinspect v15"
|
||||
|
||||
25
README.md
25
README.md
@@ -53,7 +53,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
1. Install XCode and dependencies
|
||||
```
|
||||
xcode-select --install
|
||||
brew install protobuf etcd openssl flex bison
|
||||
brew install protobuf etcd openssl
|
||||
```
|
||||
|
||||
2. [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
@@ -125,23 +125,24 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
|
||||
# Create repository in .neon with proper paths to binaries and data
|
||||
# Later that would be responsibility of a package install script
|
||||
> ./target/debug/neon_local init
|
||||
Starting pageserver at '127.0.0.1:64000' in '.neon'.
|
||||
pageserver started, pid: 2545906
|
||||
Successfully initialized timeline de200bd42b49cc1814412c7e592dd6e9
|
||||
Stopped pageserver 1 process with pid 2545906
|
||||
Starting pageserver at '127.0.0.1:64000' in '.neon'
|
||||
|
||||
Pageserver started
|
||||
Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7
|
||||
Stopping pageserver gracefully...done!
|
||||
|
||||
# start pageserver and safekeeper
|
||||
> ./target/debug/neon_local start
|
||||
Starting etcd broker using "/usr/bin/etcd"
|
||||
etcd started, pid: 2545996
|
||||
Starting pageserver at '127.0.0.1:64000' in '.neon'.
|
||||
pageserver started, pid: 2546005
|
||||
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
|
||||
safekeeper 1 started, pid: 2546041
|
||||
Starting etcd broker using /usr/bin/etcd
|
||||
Starting pageserver at '127.0.0.1:64000' in '.neon'
|
||||
|
||||
Pageserver started
|
||||
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
|
||||
Safekeeper started
|
||||
|
||||
# start postgres compute node
|
||||
> ./target/debug/neon_local pg start main
|
||||
Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
||||
Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
||||
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
|
||||
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
|
||||
|
||||
|
||||
188
cli-v2-story.md
Normal file
188
cli-v2-story.md
Normal file
@@ -0,0 +1,188 @@
|
||||
Create a new Zenith repository in the current directory:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
|
||||
The files belonging to this database system will be owned by user "heikki".
|
||||
This user must also own the server process.
|
||||
|
||||
The database cluster will be initialized with locale "en_GB.UTF-8".
|
||||
The default database encoding has accordingly been set to "UTF8".
|
||||
The default text search configuration will be set to "english".
|
||||
|
||||
Data page checksums are disabled.
|
||||
|
||||
creating directory tmp ... ok
|
||||
creating subdirectories ... ok
|
||||
selecting dynamic shared memory implementation ... posix
|
||||
selecting default max_connections ... 100
|
||||
selecting default shared_buffers ... 128MB
|
||||
selecting default time zone ... Europe/Helsinki
|
||||
creating configuration files ... ok
|
||||
running bootstrap script ... ok
|
||||
performing post-bootstrap initialization ... ok
|
||||
syncing data to disk ... ok
|
||||
|
||||
initdb: warning: enabling "trust" authentication for local connections
|
||||
You can change this by editing pg_hba.conf or using the option -A, or
|
||||
--auth-local and --auth-host, the next time you run initdb.
|
||||
new zenith repository was created in .zenith
|
||||
|
||||
Initially, there is only one branch:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
|
||||
main
|
||||
|
||||
Start a local Postgres instance on the branch:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
|
||||
Creating data directory from snapshot at 0/15FFB08...
|
||||
waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
||||
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv6 address "::1", port 5432
|
||||
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv4 address "127.0.0.1", port 5432
|
||||
2021-04-13 09:27:43.927 EEST [984664] LOG: listening on Unix socket "/tmp/.s.PGSQL.5432"
|
||||
2021-04-13 09:27:43.939 EEST [984665] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
||||
2021-04-13 09:27:43.939 EEST [984665] LOG: creating missing WAL directory "pg_wal/archive_status"
|
||||
2021-04-13 09:27:44.189 EEST [984665] LOG: database system was not properly shut down; automatic recovery in progress
|
||||
2021-04-13 09:27:44.195 EEST [984665] LOG: invalid record length at 0/15FFB80: wanted 24, got 0
|
||||
2021-04-13 09:27:44.195 EEST [984665] LOG: redo is not required
|
||||
2021-04-13 09:27:44.225 EEST [984664] LOG: database system is ready to accept connections
|
||||
done
|
||||
server started
|
||||
|
||||
Run some commands against it:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);"
|
||||
CREATE TABLE
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');"
|
||||
INSERT 0 1
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo"
|
||||
t
|
||||
-----------------------------
|
||||
inserted on the main branch
|
||||
(1 row)
|
||||
|
||||
Create a new branch called 'experimental'. We create it from the
|
||||
current end of the 'main' branch, but you could specify a different
|
||||
LSN as the start point instead.
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
|
||||
branching at end of WAL: 0/161F478
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
|
||||
experimental
|
||||
main
|
||||
|
||||
Start another Postgres instance off the 'experimental' branch:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
|
||||
Creating data directory from snapshot at 0/15FFB08...
|
||||
waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
||||
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv6 address "::1", port 5433
|
||||
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv4 address "127.0.0.1", port 5433
|
||||
2021-04-13 09:28:41.883 EEST [984766] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
|
||||
2021-04-13 09:28:41.896 EEST [984767] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
||||
2021-04-13 09:28:42.265 EEST [984767] LOG: database system was not properly shut down; automatic recovery in progress
|
||||
2021-04-13 09:28:42.269 EEST [984767] LOG: redo starts at 0/15FFB80
|
||||
2021-04-13 09:28:42.272 EEST [984767] LOG: invalid record length at 0/161F4B0: wanted 24, got 0
|
||||
2021-04-13 09:28:42.272 EEST [984767] LOG: redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
|
||||
2021-04-13 09:28:42.321 EEST [984766] LOG: database system is ready to accept connections
|
||||
done
|
||||
server started
|
||||
|
||||
Insert some a row on the 'experimental' branch:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
||||
t
|
||||
-----------------------------
|
||||
inserted on the main branch
|
||||
(1 row)
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')"
|
||||
INSERT 0 1
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
||||
t
|
||||
-----------------------------
|
||||
inserted on the main branch
|
||||
inserted on experimental
|
||||
(2 rows)
|
||||
|
||||
See that the other Postgres instance is still running on 'main' branch on port 5432:
|
||||
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo"
|
||||
t
|
||||
-----------------------------
|
||||
inserted on the main branch
|
||||
(1 row)
|
||||
|
||||
|
||||
|
||||
|
||||
Everything is stored in the .zenith directory:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
|
||||
total 12
|
||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
|
||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
|
||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
|
||||
|
||||
The 'datadirs' directory contains the datadirs of the running instances:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
|
||||
total 8
|
||||
drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
|
||||
drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
|
||||
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
|
||||
total 124
|
||||
drwxr-xr-x 5 heikki heikki 4096 Apr 13 09:27 base
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 global
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_commit_ts
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_dynshmem
|
||||
-rw------- 1 heikki heikki 4760 Apr 13 09:27 pg_hba.conf
|
||||
-rw------- 1 heikki heikki 1636 Apr 13 09:27 pg_ident.conf
|
||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:32 pg_logical
|
||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 pg_multixact
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_notify
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_replslot
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_serial
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_snapshots
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_stat
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:34 pg_stat_tmp
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_subtrans
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_tblspc
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_twophase
|
||||
-rw------- 1 heikki heikki 3 Apr 13 09:27 PG_VERSION
|
||||
lrwxrwxrwx 1 heikki heikki 52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_xact
|
||||
-rw------- 1 heikki heikki 88 Apr 13 09:27 postgresql.auto.conf
|
||||
-rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
|
||||
-rw------- 1 heikki heikki 96 Apr 13 09:27 postmaster.opts
|
||||
-rw------- 1 heikki heikki 149 Apr 13 09:27 postmaster.pid
|
||||
|
||||
Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
|
||||
datadir is ephemeral, you can delete it at any time, and it can be reconstructed
|
||||
from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
|
||||
the repository, the 'datadirs' are not included. (They are like git working trees)
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
|
||||
~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
|
||||
Creating data directory from snapshot at 0/15FFB08...
|
||||
waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
||||
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv6 address "::1", port 5433
|
||||
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv4 address "127.0.0.1", port 5433
|
||||
2021-04-13 09:37:05.487 EEST [985340] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
|
||||
2021-04-13 09:37:05.498 EEST [985341] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
||||
2021-04-13 09:37:05.808 EEST [985341] LOG: database system was not properly shut down; automatic recovery in progress
|
||||
2021-04-13 09:37:05.813 EEST [985341] LOG: redo starts at 0/15FFB80
|
||||
2021-04-13 09:37:05.815 EEST [985341] LOG: invalid record length at 0/161F770: wanted 24, got 0
|
||||
2021-04-13 09:37:05.815 EEST [985341] LOG: redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
|
||||
2021-04-13 09:37:05.866 EEST [985340] LOG: database system is ready to accept connections
|
||||
done
|
||||
server started
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
||||
t
|
||||
-----------------------------
|
||||
inserted on the main branch
|
||||
inserted on experimental
|
||||
(2 rows)
|
||||
|
||||
@@ -26,18 +26,8 @@ use nix::unistd::Pid;
|
||||
|
||||
use utils::lock_file;
|
||||
|
||||
// These constants control the loop used to poll for process start / stop.
|
||||
//
|
||||
// The loop waits for at most 10 seconds, polling every 100 ms.
|
||||
// Once a second, it prints a dot ("."), to give the user an indication that
|
||||
// it's waiting. If the process hasn't started/stopped after 5 seconds,
|
||||
// it prints a notice that it's taking long, but keeps waiting.
|
||||
//
|
||||
const RETRY_UNTIL_SECS: u64 = 10;
|
||||
const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
|
||||
const RETRY_INTERVAL_MILLIS: u64 = 100;
|
||||
const DOT_EVERY_RETRIES: u64 = 10;
|
||||
const NOTICE_AFTER_RETRIES: u64 = 50;
|
||||
const RETRIES: u32 = 15;
|
||||
const RETRY_TIMEOUT_MILLIS: u64 = 500;
|
||||
|
||||
/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
|
||||
/// it itself.
|
||||
@@ -117,16 +107,16 @@ where
|
||||
return Ok(spawned_process);
|
||||
}
|
||||
Ok(false) => {
|
||||
if retries == NOTICE_AFTER_RETRIES {
|
||||
// The process is taking a long time to start up. Keep waiting, but
|
||||
// print a message
|
||||
print!("\n{process_name} has not started yet, continuing to wait");
|
||||
}
|
||||
if retries % DOT_EVERY_RETRIES == 0 {
|
||||
if retries < 5 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
} else {
|
||||
if retries == 5 {
|
||||
println!() // put a line break after dots for second message
|
||||
}
|
||||
println!("{process_name} has not started yet, retrying ({retries})...");
|
||||
}
|
||||
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
|
||||
thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS));
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{process_name} failed to start: {e:#}");
|
||||
@@ -137,8 +127,7 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
println!();
|
||||
anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
|
||||
anyhow::bail!("{process_name} could not start in {RETRIES} attempts");
|
||||
}
|
||||
|
||||
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
|
||||
@@ -169,7 +158,7 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
||||
}
|
||||
|
||||
// Wait until process is gone
|
||||
for retries in 0..RETRIES {
|
||||
for _ in 0..RETRIES {
|
||||
match process_has_stopped(pid) {
|
||||
Ok(true) => {
|
||||
println!("\n{process_name} stopped");
|
||||
@@ -181,16 +170,9 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
||||
return Ok(());
|
||||
}
|
||||
Ok(false) => {
|
||||
if retries == NOTICE_AFTER_RETRIES {
|
||||
// The process is taking a long time to start up. Keep waiting, but
|
||||
// print a message
|
||||
print!("\n{process_name} has not stopped yet, continuing to wait");
|
||||
}
|
||||
if retries % DOT_EVERY_RETRIES == 0 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
}
|
||||
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
thread::sleep(Duration::from_secs(1))
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{process_name} with pid {pid} failed to stop: {e:#}");
|
||||
@@ -198,21 +180,24 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
||||
}
|
||||
}
|
||||
}
|
||||
println!();
|
||||
anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
|
||||
|
||||
anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts");
|
||||
}
|
||||
|
||||
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
||||
let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
|
||||
|
||||
// Pass through these environment variables to the command
|
||||
for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] {
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
filled_cmd = filled_cmd.env(var, val);
|
||||
}
|
||||
let var = "LLVM_PROFILE_FILE";
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
filled_cmd = filled_cmd.env(var, val);
|
||||
}
|
||||
|
||||
filled_cmd
|
||||
const RUST_LOG_KEY: &str = "RUST_LOG";
|
||||
if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
|
||||
filled_cmd.env(RUST_LOG_KEY, rust_log_value)
|
||||
} else {
|
||||
filled_cmd
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
|
||||
@@ -343,7 +343,7 @@ impl PostgresNode {
|
||||
// To be able to restore database in case of pageserver node crash, safekeeper should not
|
||||
// remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
|
||||
// (if they are not able to upload WAL to S3).
|
||||
conf.append("max_replication_write_lag", "15MB");
|
||||
conf.append("max_replication_write_lag", "500MB");
|
||||
conf.append("max_replication_flush_lag", "10GB");
|
||||
|
||||
if !self.env.safekeepers.is_empty() {
|
||||
|
||||
@@ -6,7 +6,7 @@ use crate::{background_process, local_env};
|
||||
|
||||
pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
let etcd_broker = &env.etcd_broker;
|
||||
print!(
|
||||
println!(
|
||||
"Starting etcd broker using {:?}",
|
||||
etcd_broker.etcd_binary_path
|
||||
);
|
||||
|
||||
@@ -237,7 +237,7 @@ impl PageServerNode {
|
||||
datadir: &Path,
|
||||
update_config: bool,
|
||||
) -> anyhow::Result<Child> {
|
||||
print!(
|
||||
println!(
|
||||
"Starting pageserver at '{}' in '{}'",
|
||||
self.pg_connection_config.raw_address(),
|
||||
datadir.display()
|
||||
|
||||
@@ -83,16 +83,6 @@ A subject for future modularization.
|
||||
`/libs/metrics`:
|
||||
Helpers for exposing Prometheus metrics from the server.
|
||||
|
||||
### Adding dependencies
|
||||
When you add a Cargo dependency, you should update hakari manifest by running commands below and committing the updated `Cargo.lock` and `workspace_hack/`. There may be no changes, that's fine.
|
||||
|
||||
```bash
|
||||
cargo hakari generate
|
||||
cargo hakari manage-deps
|
||||
```
|
||||
|
||||
If you don't have hakari installed (`error: no such subcommand: hakari`), install it by running `cargo install cargo-hakari`.
|
||||
|
||||
## Using Python
|
||||
Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
|
||||
so manual installation of dependencies is not recommended.
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
[package]
|
||||
name = "persistent_range_query"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.8.3"
|
||||
@@ -1,78 +0,0 @@
|
||||
use std::ops::Range;
|
||||
|
||||
pub mod naive;
|
||||
pub mod ops;
|
||||
pub mod segment_tree;
|
||||
|
||||
/// Should be a monoid:
|
||||
/// * Identity element: for all a: combine(new_for_empty_range(), a) = combine(a, new_for_empty_range()) = a
|
||||
/// * Associativity: for all a, b, c: combine(combine(a, b), c) == combine(a, combine(b, c))
|
||||
pub trait RangeQueryResult<Key>: Sized + Clone {
|
||||
// Clone is equivalent to combine with an empty range.
|
||||
|
||||
fn new_for_empty_range() -> Self;
|
||||
|
||||
// Contract: left_range.end == right_range.start
|
||||
// left_range.start == left_range.end == right_range.start == right_range.end is still possible
|
||||
fn combine(
|
||||
left: &Self,
|
||||
left_range: &Range<Key>,
|
||||
right: &Self,
|
||||
right_range: &Range<Key>,
|
||||
) -> Self;
|
||||
|
||||
fn add(left: &mut Self, left_range: &Range<Key>, right: &Self, right_range: &Range<Key>);
|
||||
}
|
||||
|
||||
pub trait LazyRangeInitializer<Result: RangeQueryResult<Key>, Key> {
|
||||
fn get(&self, range: &Range<Key>) -> Result;
|
||||
}
|
||||
|
||||
/// Should be a monoid:
|
||||
/// * Identity element: for all op: compose(no_op(), op) == compose(op, no_op()) == op
|
||||
/// * Associativity: for all op_1, op_2, op_3: compose(compose(op_1, op_2), op_3) == compose(op_1, compose(op_2, op_3))
|
||||
///
|
||||
/// Should left act on Result:
|
||||
/// * Identity operation: for all r: no_op().apply(r) == r
|
||||
/// * Compatibility: for all op_1, op_2, r: op_1.apply(op_2.apply(r)) == compose(op_1, op_2).apply(r)
|
||||
pub trait RangeModification<Key> {
|
||||
type Result: RangeQueryResult<Key>;
|
||||
|
||||
fn no_op() -> Self;
|
||||
fn is_no_op(&self) -> bool;
|
||||
fn is_reinitialization(&self) -> bool;
|
||||
fn apply(&self, result: &mut Self::Result, range: &Range<Key>);
|
||||
fn compose(later: &Self, earlier: &mut Self);
|
||||
}
|
||||
|
||||
pub trait VecReadableVersion<Modification: RangeModification<Key>, Key> {
|
||||
fn get(&self, keys: &Range<Key>) -> Modification::Result;
|
||||
}
|
||||
|
||||
// TODO: use trait alias when stabilized
|
||||
pub trait VecFrozenVersion<Modification: RangeModification<Key>, Key>:
|
||||
Clone + VecReadableVersion<Modification, Key>
|
||||
{
|
||||
}
|
||||
|
||||
impl<
|
||||
T: Clone + VecReadableVersion<Modification, Key>,
|
||||
Modification: RangeModification<Key>,
|
||||
Key,
|
||||
> VecFrozenVersion<Modification, Key> for T
|
||||
{
|
||||
}
|
||||
|
||||
pub trait PersistentVecStorage<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key,
|
||||
>: VecReadableVersion<Modification, Key>
|
||||
{
|
||||
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self;
|
||||
|
||||
type FrozenVersion: VecFrozenVersion<Modification, Key>;
|
||||
|
||||
fn modify(&mut self, keys: &Range<Key>, modification: &Modification);
|
||||
fn freeze(&mut self) -> Self::FrozenVersion;
|
||||
}
|
||||
@@ -1,115 +0,0 @@
|
||||
use crate::{
|
||||
LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
|
||||
VecReadableVersion,
|
||||
};
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
|
||||
pub struct NaiveFrozenVersion<Modification: RangeModification<Key>, Key> {
|
||||
all_keys: Range<Key>,
|
||||
values: Rc<Box<Vec<Modification::Result>>>,
|
||||
}
|
||||
|
||||
pub trait IndexableKey: Clone {
|
||||
fn index(all_keys: &Range<Self>, key: &Self) -> usize;
|
||||
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self>;
|
||||
}
|
||||
|
||||
fn get<Modification: RangeModification<Key>, Key: IndexableKey>(
|
||||
all_keys: &Range<Key>,
|
||||
values: &Vec<Modification::Result>,
|
||||
keys: &Range<Key>,
|
||||
) -> Modification::Result {
|
||||
let mut result = Modification::Result::new_for_empty_range();
|
||||
let mut result_range = keys.start.clone()..keys.start.clone();
|
||||
for index in
|
||||
IndexableKey::index(&all_keys, &keys.start)..IndexableKey::index(&all_keys, &keys.end)
|
||||
{
|
||||
let element_range = IndexableKey::element_range(&all_keys, index);
|
||||
Modification::Result::add(&mut result, &result_range, &values[index], &element_range);
|
||||
result_range.end = element_range.end;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
impl<Modification: RangeModification<Key>, Key: IndexableKey> VecReadableVersion<Modification, Key>
|
||||
for NaiveFrozenVersion<Modification, Key>
|
||||
{
|
||||
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||
get::<Modification, Key>(&self.all_keys, &self.values, keys)
|
||||
}
|
||||
}
|
||||
|
||||
// Manual implementation of `Clone` becase `derive` requires `Modification: Clone`
|
||||
impl<Modification: RangeModification<Key>, Key: Clone> Clone
|
||||
for NaiveFrozenVersion<Modification, Key>
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
all_keys: self.all_keys.clone(),
|
||||
values: self.values.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: is it at all possible to store previous versions in this struct,
|
||||
// without any Rc<>?
|
||||
pub struct NaiveVecStorage<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: IndexableKey,
|
||||
> {
|
||||
all_keys: Range<Key>,
|
||||
last_version: Vec<Modification::Result>,
|
||||
_initializer: PhantomData<Initializer>,
|
||||
}
|
||||
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: IndexableKey,
|
||||
> VecReadableVersion<Modification, Key> for NaiveVecStorage<Modification, Initializer, Key>
|
||||
{
|
||||
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||
get::<Modification, Key>(&self.all_keys, &self.last_version, keys)
|
||||
}
|
||||
}
|
||||
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: IndexableKey,
|
||||
> PersistentVecStorage<Modification, Initializer, Key>
|
||||
for NaiveVecStorage<Modification, Initializer, Key>
|
||||
{
|
||||
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
|
||||
let mut values = Vec::with_capacity(IndexableKey::index(&all_keys, &all_keys.end));
|
||||
for index in 0..values.capacity() {
|
||||
values.push(initializer.get(&IndexableKey::element_range(&all_keys, index)));
|
||||
}
|
||||
NaiveVecStorage {
|
||||
all_keys,
|
||||
last_version: values,
|
||||
_initializer: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
type FrozenVersion = NaiveFrozenVersion<Modification, Key>;
|
||||
|
||||
fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
|
||||
for index in IndexableKey::index(&self.all_keys, &keys.start)
|
||||
..IndexableKey::index(&self.all_keys, &keys.end)
|
||||
{
|
||||
let element_range = IndexableKey::element_range(&self.all_keys, index);
|
||||
modification.apply(&mut self.last_version[index], &element_range);
|
||||
}
|
||||
}
|
||||
|
||||
fn freeze(&mut self) -> Self::FrozenVersion {
|
||||
NaiveFrozenVersion::<Modification, Key> {
|
||||
all_keys: self.all_keys.clone(),
|
||||
values: Rc::new(Box::new(self.last_version.clone())),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
pub mod rsq;
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct SameElementsInitializer<T> {
|
||||
initial_element_value: T,
|
||||
}
|
||||
|
||||
impl<T> SameElementsInitializer<T> {
|
||||
pub fn new(initial_element_value: T) -> Self {
|
||||
SameElementsInitializer {
|
||||
initial_element_value,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,118 +0,0 @@
|
||||
//! # Range Sum Query
|
||||
|
||||
use crate::ops::SameElementsInitializer;
|
||||
use crate::{LazyRangeInitializer, RangeModification, RangeQueryResult};
|
||||
use std::borrow::Borrow;
|
||||
use std::ops::{Add, AddAssign, Range};
|
||||
|
||||
// TODO: commutative Add
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct SumResult<T> {
|
||||
sum: T,
|
||||
}
|
||||
|
||||
impl<T> SumResult<T> {
|
||||
pub fn sum(&self) -> &T {
|
||||
&self.sum
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Clone + for<'a> AddAssign<&'a T> + From<u8>, Key> RangeQueryResult<Key> for SumResult<T>
|
||||
where
|
||||
for<'a> &'a T: Add<&'a T, Output = T>,
|
||||
{
|
||||
fn new_for_empty_range() -> Self {
|
||||
SumResult { sum: 0.into() }
|
||||
}
|
||||
|
||||
fn combine(
|
||||
left: &Self,
|
||||
_left_range: &Range<Key>,
|
||||
right: &Self,
|
||||
_right_range: &Range<Key>,
|
||||
) -> Self {
|
||||
SumResult {
|
||||
sum: &left.sum + &right.sum,
|
||||
}
|
||||
}
|
||||
|
||||
fn add(left: &mut Self, _left_range: &Range<Key>, right: &Self, _right_range: &Range<Key>) {
|
||||
left.sum += &right.sum
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SumOfSameElements<Key> {
|
||||
fn sum(initial_element_value: &Self, keys: &Range<Key>) -> Self;
|
||||
}
|
||||
|
||||
impl<T: SumOfSameElements<Key>, TB: Borrow<T>, Key> LazyRangeInitializer<SumResult<T>, Key>
|
||||
for SameElementsInitializer<TB>
|
||||
where
|
||||
SumResult<T>: RangeQueryResult<Key>,
|
||||
{
|
||||
fn get(&self, range: &Range<Key>) -> SumResult<T> {
|
||||
SumResult {
|
||||
sum: SumOfSameElements::sum(self.initial_element_value.borrow(), range),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum AddAssignModification<T> {
|
||||
None,
|
||||
Add(T),
|
||||
Assign(T),
|
||||
}
|
||||
|
||||
impl<T: Clone + for<'a> AddAssign<&'a T>, Key> RangeModification<Key> for AddAssignModification<T>
|
||||
where
|
||||
SumResult<T>: RangeQueryResult<Key>,
|
||||
for<'a> SameElementsInitializer<&'a T>: LazyRangeInitializer<SumResult<T>, Key>,
|
||||
{
|
||||
type Result = SumResult<T>;
|
||||
|
||||
fn no_op() -> Self {
|
||||
AddAssignModification::None
|
||||
}
|
||||
|
||||
fn is_no_op(&self) -> bool {
|
||||
match self {
|
||||
AddAssignModification::None => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_reinitialization(&self) -> bool {
|
||||
match self {
|
||||
AddAssignModification::Assign(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn apply(&self, result: &mut SumResult<T>, range: &Range<Key>) {
|
||||
use AddAssignModification::*;
|
||||
match self {
|
||||
None => {}
|
||||
Add(x) | Assign(x) => {
|
||||
let to_add = SameElementsInitializer::new(x).get(range).sum;
|
||||
if let Assign(_) = self {
|
||||
result.sum = to_add;
|
||||
} else {
|
||||
result.sum += &to_add;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compose(later: &Self, earlier: &mut Self) {
|
||||
use AddAssignModification::*;
|
||||
match (later, earlier) {
|
||||
(_, e @ None) => *e = later.clone(),
|
||||
(None, _) => {}
|
||||
(Assign(_), e) => *e = later.clone(),
|
||||
(Add(x), Add(y)) => *y += x,
|
||||
(Add(x), Assign(value)) => *value += x,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,255 +0,0 @@
|
||||
//! # Segment Tree
|
||||
//! It is a competitive programming folklore data structure. Do not confuse with the interval tree.
|
||||
|
||||
use crate::{LazyRangeInitializer, PersistentVecStorage, RangeQueryResult, VecReadableVersion};
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
|
||||
pub trait MidpointableKey: Clone + Ord + Sized {
|
||||
fn midpoint(range: &Range<Self>) -> Self;
|
||||
}
|
||||
|
||||
pub trait RangeModification<Key>: Clone + crate::RangeModification<Key> {}
|
||||
|
||||
// TODO: use trait alias when stabilized
|
||||
impl<T: Clone + crate::RangeModification<Key>, Key> RangeModification<Key> for T {}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Node<Modification: RangeModification<Key>, Key> {
|
||||
result: Modification::Result,
|
||||
modify_children: Modification,
|
||||
left: Option<Rc<Self>>,
|
||||
right: Option<Rc<Self>>,
|
||||
}
|
||||
|
||||
// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
|
||||
impl<Modification: RangeModification<Key>, Key> Clone for Node<Modification, Key> {
|
||||
fn clone(&self) -> Self {
|
||||
Node {
|
||||
result: self.result.clone(),
|
||||
modify_children: self.modify_children.clone(),
|
||||
left: self.left.clone(),
|
||||
right: self.right.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Modification: RangeModification<Key>, Key> Node<Modification, Key> {
|
||||
fn new<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
|
||||
range: &Range<Key>,
|
||||
initializer: &Initializer,
|
||||
) -> Self {
|
||||
Node {
|
||||
result: initializer.get(range),
|
||||
modify_children: Modification::no_op(),
|
||||
left: None,
|
||||
right: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply(&mut self, modification: &Modification, range: &Range<Key>) {
|
||||
modification.apply(&mut self.result, range);
|
||||
Modification::compose(modification, &mut self.modify_children);
|
||||
if self.modify_children.is_reinitialization() {
|
||||
self.left = None;
|
||||
self.right = None;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn force_children<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
|
||||
&mut self,
|
||||
initializer: &Initializer,
|
||||
range_left: &Range<Key>,
|
||||
range_right: &Range<Key>,
|
||||
) {
|
||||
let left = Rc::make_mut(
|
||||
self.left
|
||||
.get_or_insert_with(|| Rc::new(Node::new(&range_left, initializer))),
|
||||
);
|
||||
let right = Rc::make_mut(
|
||||
self.right
|
||||
.get_or_insert_with(|| Rc::new(Node::new(&range_right, initializer))),
|
||||
);
|
||||
left.apply(&self.modify_children, &range_left);
|
||||
right.apply(&self.modify_children, &range_right);
|
||||
self.modify_children = Modification::no_op();
|
||||
}
|
||||
|
||||
pub fn recalculate_from_children(&mut self, range_left: &Range<Key>, range_right: &Range<Key>) {
|
||||
assert!(self.modify_children.is_no_op());
|
||||
assert!(self.left.is_some());
|
||||
assert!(self.right.is_some());
|
||||
self.result = Modification::Result::combine(
|
||||
&self.left.as_ref().unwrap().result,
|
||||
&range_left,
|
||||
&self.right.as_ref().unwrap().result,
|
||||
&range_right,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn split_range<Key: MidpointableKey>(range: &Range<Key>) -> (Range<Key>, Range<Key>) {
|
||||
let range_left = range.start.clone()..MidpointableKey::midpoint(range);
|
||||
let range_right = range_left.end.clone()..range.end.clone();
|
||||
(range_left, range_right)
|
||||
}
|
||||
|
||||
pub struct PersistentSegmentTreeVersion<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: Clone,
|
||||
> {
|
||||
root: Rc<Node<Modification, Key>>,
|
||||
all_keys: Range<Key>,
|
||||
initializer: Rc<Initializer>,
|
||||
}
|
||||
|
||||
// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: Clone,
|
||||
> Clone for PersistentSegmentTreeVersion<Modification, Initializer, Key>
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
root: self.root.clone(),
|
||||
all_keys: self.all_keys.clone(),
|
||||
initializer: self.initializer.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
>(
|
||||
node: &mut Rc<Node<Modification, Key>>,
|
||||
node_keys: &Range<Key>,
|
||||
initializer: &Initializer,
|
||||
keys: &Range<Key>,
|
||||
) -> Modification::Result {
|
||||
if node_keys.end <= keys.start || keys.end <= node_keys.start {
|
||||
return Modification::Result::new_for_empty_range();
|
||||
}
|
||||
if keys.start <= node_keys.start && node_keys.end <= keys.end {
|
||||
return node.result.clone();
|
||||
}
|
||||
let node = Rc::make_mut(node);
|
||||
let (left_keys, right_keys) = split_range(node_keys);
|
||||
node.force_children(initializer, &left_keys, &right_keys);
|
||||
let mut result = get(node.left.as_mut().unwrap(), &left_keys, initializer, keys);
|
||||
Modification::Result::add(
|
||||
&mut result,
|
||||
&left_keys,
|
||||
&get(node.right.as_mut().unwrap(), &right_keys, initializer, keys),
|
||||
&right_keys,
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
fn modify<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
>(
|
||||
node: &mut Rc<Node<Modification, Key>>,
|
||||
node_keys: &Range<Key>,
|
||||
initializer: &Initializer,
|
||||
keys: &Range<Key>,
|
||||
modification: &Modification,
|
||||
) {
|
||||
if modification.is_no_op() || node_keys.end <= keys.start || keys.end <= node_keys.start {
|
||||
return;
|
||||
}
|
||||
let node = Rc::make_mut(node);
|
||||
if keys.start <= node_keys.start && node_keys.end <= keys.end {
|
||||
node.apply(modification, node_keys);
|
||||
return;
|
||||
}
|
||||
let (left_keys, right_keys) = split_range(node_keys);
|
||||
node.force_children(initializer, &left_keys, &right_keys);
|
||||
modify(
|
||||
node.left.as_mut().unwrap(),
|
||||
&left_keys,
|
||||
initializer,
|
||||
keys,
|
||||
&modification,
|
||||
);
|
||||
modify(
|
||||
node.right.as_mut().unwrap(),
|
||||
&right_keys,
|
||||
initializer,
|
||||
keys,
|
||||
&modification,
|
||||
);
|
||||
node.recalculate_from_children(&left_keys, &right_keys);
|
||||
}
|
||||
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
> VecReadableVersion<Modification, Key>
|
||||
for PersistentSegmentTreeVersion<Modification, Initializer, Key>
|
||||
{
|
||||
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||
get(
|
||||
&mut self.root.clone(), // TODO: do not always force a branch
|
||||
&self.all_keys,
|
||||
self.initializer.as_ref(),
|
||||
keys,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PersistentSegmentTree<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
>(PersistentSegmentTreeVersion<Modification, Initializer, Key>);
|
||||
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
> VecReadableVersion<Modification, Key>
|
||||
for PersistentSegmentTree<Modification, Initializer, Key>
|
||||
{
|
||||
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||
self.0.get(keys)
|
||||
}
|
||||
}
|
||||
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
> PersistentVecStorage<Modification, Initializer, Key>
|
||||
for PersistentSegmentTree<Modification, Initializer, Key>
|
||||
{
|
||||
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
|
||||
PersistentSegmentTree(PersistentSegmentTreeVersion {
|
||||
root: Rc::new(Node::new(&all_keys, &initializer)),
|
||||
all_keys: all_keys,
|
||||
initializer: Rc::new(initializer),
|
||||
})
|
||||
}
|
||||
|
||||
type FrozenVersion = PersistentSegmentTreeVersion<Modification, Initializer, Key>;
|
||||
|
||||
fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
|
||||
modify(
|
||||
&mut self.0.root, // TODO: do not always force a branch
|
||||
&self.0.all_keys,
|
||||
self.0.initializer.as_ref(),
|
||||
keys,
|
||||
modification,
|
||||
)
|
||||
}
|
||||
|
||||
fn freeze(&mut self) -> Self::FrozenVersion {
|
||||
self.0.clone()
|
||||
}
|
||||
}
|
||||
@@ -1,295 +0,0 @@
|
||||
use persistent_range_query::naive::{IndexableKey, NaiveVecStorage};
|
||||
use persistent_range_query::ops::SameElementsInitializer;
|
||||
use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
|
||||
use persistent_range_query::{
|
||||
LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
|
||||
VecReadableVersion,
|
||||
};
|
||||
use std::cmp::Ordering;
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
|
||||
struct PageIndex(u32);
|
||||
type LayerId = String;
|
||||
|
||||
impl IndexableKey for PageIndex {
|
||||
fn index(all_keys: &Range<Self>, key: &Self) -> usize {
|
||||
(key.0 as usize) - (all_keys.start.0 as usize)
|
||||
}
|
||||
|
||||
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
|
||||
PageIndex(all_keys.start.0 + index as u32)..PageIndex(all_keys.start.0 + index as u32 + 1)
|
||||
}
|
||||
}
|
||||
|
||||
impl MidpointableKey for PageIndex {
|
||||
fn midpoint(range: &Range<Self>) -> Self {
|
||||
PageIndex(range.start.0 + (range.end.0 - range.start.0) / 2)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct LayerMapInformation {
|
||||
// Only make sense for a range of length 1.
|
||||
last_layer: Option<LayerId>,
|
||||
last_image_layer: Option<LayerId>,
|
||||
// Work for all ranges
|
||||
max_delta_layers: (usize, Range<PageIndex>),
|
||||
}
|
||||
|
||||
impl LayerMapInformation {
|
||||
fn last_layers(&self) -> (&Option<LayerId>, &Option<LayerId>) {
|
||||
(&self.last_layer, &self.last_image_layer)
|
||||
}
|
||||
|
||||
fn max_delta_layers(&self) -> &(usize, Range<PageIndex>) {
|
||||
&self.max_delta_layers
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_ranges(left: &Range<PageIndex>, right: &Range<PageIndex>) -> Range<PageIndex> {
|
||||
if left.is_empty() {
|
||||
right.clone()
|
||||
} else if right.is_empty() {
|
||||
left.clone()
|
||||
} else if left.end == right.start {
|
||||
left.start..right.end
|
||||
} else {
|
||||
left.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl RangeQueryResult<PageIndex> for LayerMapInformation {
|
||||
fn new_for_empty_range() -> Self {
|
||||
LayerMapInformation {
|
||||
last_layer: None,
|
||||
last_image_layer: None,
|
||||
max_delta_layers: (0, PageIndex(0)..PageIndex(0)),
|
||||
}
|
||||
}
|
||||
|
||||
fn combine(
|
||||
left: &Self,
|
||||
_left_range: &Range<PageIndex>,
|
||||
right: &Self,
|
||||
_right_range: &Range<PageIndex>,
|
||||
) -> Self {
|
||||
// Note that either range may be empty.
|
||||
LayerMapInformation {
|
||||
last_layer: left
|
||||
.last_layer
|
||||
.as_ref()
|
||||
.or_else(|| right.last_layer.as_ref())
|
||||
.cloned(),
|
||||
last_image_layer: left
|
||||
.last_image_layer
|
||||
.as_ref()
|
||||
.or_else(|| right.last_image_layer.as_ref())
|
||||
.cloned(),
|
||||
max_delta_layers: match left.max_delta_layers.0.cmp(&right.max_delta_layers.0) {
|
||||
Ordering::Less => right.max_delta_layers.clone(),
|
||||
Ordering::Greater => left.max_delta_layers.clone(),
|
||||
Ordering::Equal => (
|
||||
left.max_delta_layers.0,
|
||||
merge_ranges(&left.max_delta_layers.1, &right.max_delta_layers.1),
|
||||
),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn add(
|
||||
left: &mut Self,
|
||||
left_range: &Range<PageIndex>,
|
||||
right: &Self,
|
||||
right_range: &Range<PageIndex>,
|
||||
) {
|
||||
*left = Self::combine(&left, left_range, right, right_range);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct AddDeltaLayers {
|
||||
last_layer: LayerId,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct LayerMapModification {
|
||||
add_image_layer: Option<LayerId>,
|
||||
add_delta_layers: Option<AddDeltaLayers>,
|
||||
}
|
||||
|
||||
impl LayerMapModification {
|
||||
fn add_image_layer(layer: impl Into<LayerId>) -> Self {
|
||||
LayerMapModification {
|
||||
add_image_layer: Some(layer.into()),
|
||||
add_delta_layers: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn add_delta_layer(layer: impl Into<LayerId>) -> Self {
|
||||
LayerMapModification {
|
||||
add_image_layer: None,
|
||||
add_delta_layers: Some(AddDeltaLayers {
|
||||
last_layer: layer.into(),
|
||||
count: 1,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RangeModification<PageIndex> for LayerMapModification {
|
||||
type Result = LayerMapInformation;
|
||||
|
||||
fn no_op() -> Self {
|
||||
LayerMapModification {
|
||||
add_image_layer: None,
|
||||
add_delta_layers: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_no_op(&self) -> bool {
|
||||
self.add_image_layer.is_none() && self.add_delta_layers.is_none()
|
||||
}
|
||||
|
||||
fn is_reinitialization(&self) -> bool {
|
||||
self.add_image_layer.is_some()
|
||||
}
|
||||
|
||||
fn apply(&self, result: &mut Self::Result, range: &Range<PageIndex>) {
|
||||
if let Some(layer) = &self.add_image_layer {
|
||||
result.last_layer = Some(layer.clone());
|
||||
result.last_image_layer = Some(layer.clone());
|
||||
result.max_delta_layers = (0, range.clone());
|
||||
}
|
||||
if let Some(AddDeltaLayers { last_layer, count }) = &self.add_delta_layers {
|
||||
result.last_layer = Some(last_layer.clone());
|
||||
result.max_delta_layers.0 += count;
|
||||
}
|
||||
}
|
||||
|
||||
fn compose(later: &Self, earlier: &mut Self) {
|
||||
if later.add_image_layer.is_some() {
|
||||
*earlier = later.clone();
|
||||
return;
|
||||
}
|
||||
if let Some(AddDeltaLayers { last_layer, count }) = &later.add_delta_layers {
|
||||
let res = earlier.add_delta_layers.get_or_insert(AddDeltaLayers {
|
||||
last_layer: LayerId::default(),
|
||||
count: 0,
|
||||
});
|
||||
res.last_layer = last_layer.clone();
|
||||
res.count += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LazyRangeInitializer<LayerMapInformation, PageIndex> for SameElementsInitializer<()> {
|
||||
fn get(&self, range: &Range<PageIndex>) -> LayerMapInformation {
|
||||
LayerMapInformation {
|
||||
last_layer: None,
|
||||
last_image_layer: None,
|
||||
max_delta_layers: (0, range.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn test_layer_map<
|
||||
S: PersistentVecStorage<LayerMapModification, SameElementsInitializer<()>, PageIndex>,
|
||||
>() {
|
||||
let mut s = S::new(
|
||||
PageIndex(0)..PageIndex(100),
|
||||
SameElementsInitializer::new(()),
|
||||
);
|
||||
s.modify(
|
||||
&(PageIndex(0)..PageIndex(70)),
|
||||
&LayerMapModification::add_image_layer("Img0..70"),
|
||||
);
|
||||
s.modify(
|
||||
&(PageIndex(50)..PageIndex(100)),
|
||||
&LayerMapModification::add_image_layer("Img50..100"),
|
||||
);
|
||||
s.modify(
|
||||
&(PageIndex(10)..PageIndex(60)),
|
||||
&LayerMapModification::add_delta_layer("Delta10..60"),
|
||||
);
|
||||
let s_before_last_delta = s.freeze();
|
||||
s.modify(
|
||||
&(PageIndex(20)..PageIndex(80)),
|
||||
&LayerMapModification::add_delta_layer("Delta20..80"),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(5)..PageIndex(6))).last_layers(),
|
||||
(&Some("Img0..70".to_owned()), &Some("Img0..70".to_owned()))
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(15)..PageIndex(16))).last_layers(),
|
||||
(
|
||||
&Some("Delta10..60".to_owned()),
|
||||
&Some("Img0..70".to_owned())
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(25)..PageIndex(26))).last_layers(),
|
||||
(
|
||||
&Some("Delta20..80".to_owned()),
|
||||
&Some("Img0..70".to_owned())
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(65)..PageIndex(66))).last_layers(),
|
||||
(
|
||||
&Some("Delta20..80".to_owned()),
|
||||
&Some("Img50..100".to_owned())
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(95)..PageIndex(96))).last_layers(),
|
||||
(
|
||||
&Some("Img50..100".to_owned()),
|
||||
&Some("Img50..100".to_owned())
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(0)..PageIndex(100))).max_delta_layers(),
|
||||
&(2, PageIndex(20)..PageIndex(60)),
|
||||
);
|
||||
assert_eq!(
|
||||
*s_before_last_delta
|
||||
.get(&(PageIndex(0)..PageIndex(100)))
|
||||
.max_delta_layers(),
|
||||
(1, PageIndex(10)..PageIndex(60)),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
*s.get(&(PageIndex(10)..PageIndex(30))).max_delta_layers(),
|
||||
(2, PageIndex(20)..PageIndex(30))
|
||||
);
|
||||
assert_eq!(
|
||||
*s.get(&(PageIndex(10)..PageIndex(20))).max_delta_layers(),
|
||||
(1, PageIndex(10)..PageIndex(20))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
*s.get(&(PageIndex(70)..PageIndex(80))).max_delta_layers(),
|
||||
(1, PageIndex(70)..PageIndex(80))
|
||||
);
|
||||
assert_eq!(
|
||||
*s_before_last_delta
|
||||
.get(&(PageIndex(70)..PageIndex(80)))
|
||||
.max_delta_layers(),
|
||||
(0, PageIndex(70)..PageIndex(80))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_naive() {
|
||||
test_layer_map::<NaiveVecStorage<_, _, _>>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_tree() {
|
||||
test_layer_map::<PersistentSegmentTree<_, _, _>>();
|
||||
}
|
||||
@@ -1,116 +0,0 @@
|
||||
use persistent_range_query::naive::*;
|
||||
use persistent_range_query::ops::rsq::AddAssignModification::Add;
|
||||
use persistent_range_query::ops::rsq::*;
|
||||
use persistent_range_query::ops::SameElementsInitializer;
|
||||
use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
|
||||
use persistent_range_query::{PersistentVecStorage, VecReadableVersion};
|
||||
use rand::{Rng, SeedableRng};
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
|
||||
struct K(u16);
|
||||
|
||||
impl IndexableKey for K {
|
||||
fn index(all_keys: &Range<Self>, key: &Self) -> usize {
|
||||
(key.0 as usize) - (all_keys.start.0 as usize)
|
||||
}
|
||||
|
||||
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
|
||||
K(all_keys.start.0 + index as u16)..K(all_keys.start.0 + index as u16 + 1)
|
||||
}
|
||||
}
|
||||
|
||||
impl SumOfSameElements<K> for i32 {
|
||||
fn sum(initial_element_value: &Self, keys: &Range<K>) -> Self {
|
||||
initial_element_value * (keys.end.0 - keys.start.0) as Self
|
||||
}
|
||||
}
|
||||
|
||||
impl MidpointableKey for K {
|
||||
fn midpoint(range: &Range<Self>) -> Self {
|
||||
K(range.start.0 + (range.end.0 - range.start.0) / 2)
|
||||
}
|
||||
}
|
||||
|
||||
fn test_storage<
|
||||
S: PersistentVecStorage<AddAssignModification<i32>, SameElementsInitializer<i32>, K>,
|
||||
>() {
|
||||
let mut s = S::new(K(0)..K(12), SameElementsInitializer::new(0i32));
|
||||
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 0);
|
||||
|
||||
s.modify(&(K(2)..K(5)), &AddAssignModification::Add(3));
|
||||
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 3 + 3);
|
||||
let s_old = s.freeze();
|
||||
|
||||
s.modify(&(K(3)..K(6)), &AddAssignModification::Assign(10));
|
||||
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 10 + 10);
|
||||
|
||||
s.modify(&(K(4)..K(7)), &AddAssignModification::Add(2));
|
||||
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 12 + 12 + 2);
|
||||
|
||||
assert_eq!(*s.get(&(K(4)..K(6))).sum(), 12 + 12);
|
||||
assert_eq!(*s_old.get(&(K(4)..K(6))).sum(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_naive() {
|
||||
test_storage::<NaiveVecStorage<_, _, _>>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_tree() {
|
||||
test_storage::<PersistentSegmentTree<_, _, _>>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stress() {
|
||||
const LEN: u16 = 17_238;
|
||||
const OPERATIONS: i32 = 20_000;
|
||||
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(0);
|
||||
let mut naive: NaiveVecStorage<AddAssignModification<i32>, _, _> =
|
||||
NaiveVecStorage::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
|
||||
let mut segm_tree: PersistentSegmentTree<AddAssignModification<i32>, _, _> =
|
||||
PersistentSegmentTree::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
|
||||
|
||||
fn gen_range(rng: &mut impl Rng) -> Range<K> {
|
||||
let l: u16 = rng.gen_range(0..LEN);
|
||||
let r: u16 = rng.gen_range(0..LEN);
|
||||
if l <= r {
|
||||
K(l)..K(r)
|
||||
} else {
|
||||
K(r)..K(l)
|
||||
}
|
||||
}
|
||||
|
||||
for _ in 0..2 {
|
||||
let checksum_range = gen_range(&mut rng);
|
||||
let checksum_before: i32 = *naive.get(&checksum_range).sum();
|
||||
assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
|
||||
|
||||
let naive_before = naive.freeze();
|
||||
let segm_tree_before = segm_tree.freeze();
|
||||
assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
|
||||
assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
|
||||
|
||||
for _ in 0..OPERATIONS {
|
||||
{
|
||||
let range = gen_range(&mut rng);
|
||||
assert_eq!(naive.get(&range).sum(), segm_tree.get(&range).sum());
|
||||
}
|
||||
{
|
||||
let range = gen_range(&mut rng);
|
||||
let val = rng.gen_range(-10i32..=10i32);
|
||||
let op = Add(val);
|
||||
naive.modify(&range, &op);
|
||||
segm_tree.modify(&range, &op);
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
|
||||
assert_eq!(
|
||||
checksum_before,
|
||||
*segm_tree_before.get(&checksum_range).sum()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -33,8 +33,8 @@ pub struct Segment {
|
||||
/// Logical size before this state
|
||||
start_size: u64,
|
||||
|
||||
/// Logical size at this state. Can be None in the last Segment of a branch.
|
||||
pub end_size: Option<u64>,
|
||||
/// Logical size at this state
|
||||
pub end_size: u64,
|
||||
|
||||
/// Indices to [`Storage::segments`]
|
||||
///
|
||||
@@ -115,7 +115,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
|
||||
start_lsn: 0,
|
||||
end_lsn: 0,
|
||||
start_size: 0,
|
||||
end_size: Some(0),
|
||||
end_size: 0,
|
||||
children_after: Vec::new(),
|
||||
};
|
||||
|
||||
@@ -125,39 +125,6 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Advances the branch with a new point, at given LSN.
|
||||
pub fn insert_point<Q: ?Sized>(
|
||||
&mut self,
|
||||
branch: &Q,
|
||||
op: Cow<'static, str>,
|
||||
lsn: u64,
|
||||
size: Option<u64>,
|
||||
) where
|
||||
K: std::borrow::Borrow<Q>,
|
||||
Q: std::hash::Hash + Eq,
|
||||
{
|
||||
let lastseg_id = *self.branches.get(branch).unwrap();
|
||||
let newseg_id = self.segments.len();
|
||||
let lastseg = &mut self.segments[lastseg_id];
|
||||
|
||||
assert!(lsn > lastseg.end_lsn);
|
||||
|
||||
let newseg = Segment {
|
||||
op,
|
||||
parent: Some(lastseg_id),
|
||||
start_lsn: lastseg.end_lsn,
|
||||
end_lsn: lsn,
|
||||
start_size: lastseg.end_size.unwrap(),
|
||||
end_size: size,
|
||||
children_after: Vec::new(),
|
||||
needed: false,
|
||||
};
|
||||
lastseg.children_after.push(newseg_id);
|
||||
|
||||
self.segments.push(newseg);
|
||||
*self.branches.get_mut(branch).expect("read already") = newseg_id;
|
||||
}
|
||||
|
||||
/// Advances the branch with the named operation, by the relative LSN and logical size bytes.
|
||||
pub fn modify_branch<Q: ?Sized>(
|
||||
&mut self,
|
||||
@@ -178,8 +145,8 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
|
||||
parent: Some(lastseg_id),
|
||||
start_lsn: lastseg.end_lsn,
|
||||
end_lsn: lastseg.end_lsn + lsn_bytes,
|
||||
start_size: lastseg.end_size.unwrap(),
|
||||
end_size: Some((lastseg.end_size.unwrap() as i64 + size_bytes) as u64),
|
||||
start_size: lastseg.end_size,
|
||||
end_size: (lastseg.end_size as i64 + size_bytes) as u64,
|
||||
children_after: Vec::new(),
|
||||
needed: false,
|
||||
};
|
||||
@@ -354,7 +321,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
|
||||
Some(SegmentSize {
|
||||
seg_id,
|
||||
method: SnapshotAfter,
|
||||
this_size: seg.end_size.unwrap(),
|
||||
this_size: seg.end_size,
|
||||
children,
|
||||
})
|
||||
} else {
|
||||
|
||||
@@ -174,7 +174,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
|
||||
let seg_id = node.seg_id;
|
||||
let seg = segments.get(seg_id).unwrap();
|
||||
let lsn = seg.end_lsn;
|
||||
let size = seg.end_size.unwrap_or(0);
|
||||
let size = seg.end_size;
|
||||
let method = node.method;
|
||||
|
||||
println!(" {{");
|
||||
@@ -226,7 +226,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
|
||||
print!(
|
||||
" label=\"{} / {}\"",
|
||||
next.end_lsn - seg.end_lsn,
|
||||
(next.end_size.unwrap_or(0) as i128 - seg.end_size.unwrap_or(0) as i128)
|
||||
(next.end_size as i128 - seg.end_size as i128)
|
||||
);
|
||||
} else {
|
||||
print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn);
|
||||
|
||||
@@ -48,25 +48,6 @@ pub mod nonblock;
|
||||
// Default signal handling
|
||||
pub mod signals;
|
||||
|
||||
/// use with fail::cfg("$name", "return(2000)")
|
||||
#[macro_export]
|
||||
macro_rules! failpoint_sleep_millis_async {
|
||||
($name:literal) => {{
|
||||
let should_sleep: Option<std::time::Duration> = (|| {
|
||||
fail::fail_point!($name, |v: Option<_>| {
|
||||
let millis = v.unwrap().parse::<u64>().unwrap();
|
||||
Some(Duration::from_millis(millis))
|
||||
});
|
||||
None
|
||||
})();
|
||||
if let Some(d) = should_sleep {
|
||||
tracing::info!("failpoint {:?}: sleeping for {:?}", $name, d);
|
||||
tokio::time::sleep(d).await;
|
||||
tracing::info!("failpoint {:?}: sleep done", $name);
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
|
||||
///
|
||||
/// we have several cases:
|
||||
|
||||
@@ -199,20 +199,6 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
logging::init(conf.log_format)?;
|
||||
info!("version: {}", version());
|
||||
|
||||
// If any failpoints were set from FAILPOINTS environment variable,
|
||||
// print them to the log for debugging purposes
|
||||
let failpoints = fail::list();
|
||||
if !failpoints.is_empty() {
|
||||
info!(
|
||||
"started with failpoints: {}",
|
||||
failpoints
|
||||
.iter()
|
||||
.map(|(name, actions)| format!("{name}={actions}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join(";")
|
||||
)
|
||||
}
|
||||
|
||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
|
||||
lock_file::LockCreationResult::Created {
|
||||
|
||||
@@ -667,7 +667,6 @@ components:
|
||||
- disk_consistent_lsn
|
||||
- awaits_download
|
||||
- state
|
||||
- latest_gc_cutoff_lsn
|
||||
properties:
|
||||
timeline_id:
|
||||
type: string
|
||||
@@ -712,9 +711,6 @@ components:
|
||||
type: boolean
|
||||
state:
|
||||
type: string
|
||||
latest_gc_cutoff_lsn:
|
||||
type: string
|
||||
format: hex
|
||||
|
||||
# These 'local' and 'remote' fields just duplicate some of the fields
|
||||
# above. They are kept for backwards-compatibility. They can be removed,
|
||||
|
||||
@@ -461,7 +461,14 @@ impl Tenant {
|
||||
.context("Cannot branch off the timeline that's not present in pageserver")?;
|
||||
|
||||
if let Some(lsn) = ancestor_start_lsn.as_mut() {
|
||||
// Wait for the WAL to arrive and be processed on the parent branch up
|
||||
// to the requested branch point. The repository code itself doesn't
|
||||
// require it, but if we start to receive WAL on the new timeline,
|
||||
// decoding the new WAL might need to look up previous pages, relation
|
||||
// sizes etc. and that would get confused if the previous page versions
|
||||
// are not in the repository yet.
|
||||
*lsn = lsn.align();
|
||||
ancestor_timeline.wait_lsn(*lsn).await?;
|
||||
|
||||
let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
|
||||
if ancestor_ancestor_lsn > *lsn {
|
||||
@@ -473,14 +480,6 @@ impl Tenant {
|
||||
ancestor_ancestor_lsn,
|
||||
);
|
||||
}
|
||||
|
||||
// Wait for the WAL to arrive and be processed on the parent branch up
|
||||
// to the requested branch point. The repository code itself doesn't
|
||||
// require it, but if we start to receive WAL on the new timeline,
|
||||
// decoding the new WAL might need to look up previous pages, relation
|
||||
// sizes etc. and that would get confused if the previous page versions
|
||||
// are not in the repository yet.
|
||||
ancestor_timeline.wait_lsn(*lsn).await?;
|
||||
}
|
||||
|
||||
self.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?
|
||||
@@ -1011,10 +1010,6 @@ impl Tenant {
|
||||
|
||||
let gc_timelines = self.refresh_gc_info_internal(target_timeline_id, horizon, pitr)?;
|
||||
|
||||
utils::failpoint_sleep_millis_async!("gc_iteration_internal_after_getting_gc_timelines");
|
||||
|
||||
info!("starting on {} timelines", gc_timelines.len());
|
||||
|
||||
// Perform GC for each timeline.
|
||||
//
|
||||
// Note that we don't hold the GC lock here because we don't want
|
||||
|
||||
@@ -183,19 +183,6 @@ pub(super) async fn gather_inputs(
|
||||
}
|
||||
}
|
||||
|
||||
// all timelines also have an end point if they have made any progress
|
||||
if last_record_lsn > timeline.get_ancestor_lsn()
|
||||
&& !interesting_lsns
|
||||
.iter()
|
||||
.any(|(lsn, _)| lsn == &last_record_lsn)
|
||||
{
|
||||
updates.push(Update {
|
||||
lsn: last_record_lsn,
|
||||
command: Command::EndOfBranch,
|
||||
timeline_id: timeline.timeline_id,
|
||||
});
|
||||
}
|
||||
|
||||
timeline_inputs.insert(
|
||||
timeline.timeline_id,
|
||||
TimelineInputs {
|
||||
@@ -283,22 +270,48 @@ impl ModelInputs {
|
||||
// impossible to always determine the a one main branch.
|
||||
let mut storage = tenant_size_model::Storage::<Option<TimelineId>>::new(None);
|
||||
|
||||
// tracking these not to require modifying the current implementation of the size model,
|
||||
// which works in relative LSNs and sizes.
|
||||
let mut last_state: HashMap<TimelineId, (Lsn, u64)> = HashMap::new();
|
||||
|
||||
for update in &self.updates {
|
||||
let Update {
|
||||
lsn,
|
||||
command: op,
|
||||
timeline_id,
|
||||
} = update;
|
||||
let Lsn(now) = *lsn;
|
||||
match op {
|
||||
Command::Update(sz) => {
|
||||
storage.insert_point(&Some(*timeline_id), "".into(), now, Some(*sz));
|
||||
}
|
||||
Command::EndOfBranch => {
|
||||
storage.insert_point(&Some(*timeline_id), "".into(), now, None);
|
||||
let latest = last_state.get_mut(timeline_id).ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"ordering-mismatch: there must had been a previous state for {timeline_id}"
|
||||
)
|
||||
})?;
|
||||
|
||||
let lsn_bytes = {
|
||||
let Lsn(now) = lsn;
|
||||
let Lsn(prev) = latest.0;
|
||||
debug_assert!(prev <= *now, "self.updates should had been sorted");
|
||||
now - prev
|
||||
};
|
||||
|
||||
let size_diff =
|
||||
i64::try_from(*sz as i128 - latest.1 as i128).with_context(|| {
|
||||
format!("size difference i64 overflow for {timeline_id}")
|
||||
})?;
|
||||
|
||||
storage.modify_branch(&Some(*timeline_id), "".into(), lsn_bytes, size_diff);
|
||||
*latest = (*lsn, *sz);
|
||||
}
|
||||
Command::BranchFrom(parent) => {
|
||||
storage.branch(parent, Some(*timeline_id));
|
||||
|
||||
let size = parent
|
||||
.as_ref()
|
||||
.and_then(|id| last_state.get(id))
|
||||
.map(|x| x.1)
|
||||
.unwrap_or(0);
|
||||
last_state.insert(*timeline_id, (*lsn, size));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -307,7 +320,10 @@ impl ModelInputs {
|
||||
}
|
||||
}
|
||||
|
||||
/// A point of interest in the tree of branches
|
||||
/// Single size model update.
|
||||
///
|
||||
/// Sizing model works with relative increments over latest branch state.
|
||||
/// Updates are absolute, so additional state needs to be tracked when applying.
|
||||
#[serde_with::serde_as]
|
||||
#[derive(
|
||||
Debug, PartialEq, PartialOrd, Eq, Ord, Clone, Copy, serde::Serialize, serde::Deserialize,
|
||||
@@ -326,7 +342,6 @@ struct Update {
|
||||
enum Command {
|
||||
Update(u64),
|
||||
BranchFrom(#[serde_as(as = "Option<serde_with::DisplayFromStr>")] Option<TimelineId>),
|
||||
EndOfBranch,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Command {
|
||||
@@ -336,7 +351,6 @@ impl std::fmt::Debug for Command {
|
||||
match self {
|
||||
Self::Update(arg0) => write!(f, "Update({arg0})"),
|
||||
Self::BranchFrom(arg0) => write!(f, "BranchFrom({arg0:?})"),
|
||||
Self::EndOfBranch => write!(f, "EndOfBranch"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,13 +61,6 @@ use crate::{
|
||||
storage_sync::{self, index::LayerFileMetadata},
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
enum FlushLoopState {
|
||||
NotStarted,
|
||||
Running,
|
||||
Exited,
|
||||
}
|
||||
|
||||
pub struct Timeline {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: Arc<RwLock<TenantConfOpt>>,
|
||||
@@ -129,7 +122,7 @@ pub struct Timeline {
|
||||
write_lock: Mutex<()>,
|
||||
|
||||
/// Used to avoid multiple `flush_loop` tasks running
|
||||
flush_loop_state: Mutex<FlushLoopState>,
|
||||
flush_loop_started: Mutex<bool>,
|
||||
|
||||
/// layer_flush_start_tx can be used to wake up the layer-flushing task.
|
||||
/// The value is a counter, incremented every time a new flush cycle is requested.
|
||||
@@ -762,7 +755,7 @@ impl Timeline {
|
||||
|
||||
upload_layers: AtomicBool::new(upload_layers),
|
||||
|
||||
flush_loop_state: Mutex::new(FlushLoopState::NotStarted),
|
||||
flush_loop_started: Mutex::new(false),
|
||||
|
||||
layer_flush_start_tx,
|
||||
layer_flush_done_tx,
|
||||
@@ -801,23 +794,13 @@ impl Timeline {
|
||||
}
|
||||
|
||||
pub(super) fn maybe_spawn_flush_loop(self: &Arc<Self>) {
|
||||
let mut flush_loop_state = self.flush_loop_state.lock().unwrap();
|
||||
match *flush_loop_state {
|
||||
FlushLoopState::NotStarted => (),
|
||||
FlushLoopState::Running => {
|
||||
info!(
|
||||
"skipping attempt to start flush_loop twice {}/{}",
|
||||
self.tenant_id, self.timeline_id
|
||||
);
|
||||
return;
|
||||
}
|
||||
FlushLoopState::Exited => {
|
||||
warn!(
|
||||
"ignoring attempt to restart exited flush_loop {}/{}",
|
||||
self.tenant_id, self.timeline_id
|
||||
);
|
||||
return;
|
||||
}
|
||||
let mut flush_loop_started = self.flush_loop_started.lock().unwrap();
|
||||
if *flush_loop_started {
|
||||
info!(
|
||||
"skipping attempt to start flush_loop twice {}/{}",
|
||||
self.tenant_id, self.timeline_id
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let layer_flush_start_rx = self.layer_flush_start_tx.subscribe();
|
||||
@@ -830,16 +813,11 @@ impl Timeline {
|
||||
Some(self.timeline_id),
|
||||
"layer flush task",
|
||||
false,
|
||||
async move {
|
||||
self_clone.flush_loop(layer_flush_start_rx).await;
|
||||
let mut flush_loop_state = self_clone.flush_loop_state.lock().unwrap();
|
||||
assert_eq!(*flush_loop_state, FlushLoopState::Running);
|
||||
*flush_loop_state = FlushLoopState::Exited;
|
||||
Ok(()) }
|
||||
async move { self_clone.flush_loop(layer_flush_start_rx).await; Ok(()) }
|
||||
.instrument(info_span!(parent: None, "layer flush task", tenant = %self.tenant_id, timeline = %self.timeline_id))
|
||||
);
|
||||
|
||||
*flush_loop_state = FlushLoopState::Running;
|
||||
*flush_loop_started = true;
|
||||
}
|
||||
|
||||
pub(super) fn launch_wal_receiver(self: &Arc<Self>) {
|
||||
@@ -1387,9 +1365,8 @@ impl Timeline {
|
||||
// finished, instead of some other flush that was started earlier.
|
||||
let mut my_flush_request = 0;
|
||||
|
||||
let flush_loop_state = { *self.flush_loop_state.lock().unwrap() };
|
||||
if flush_loop_state != FlushLoopState::Running {
|
||||
anyhow::bail!("cannot flush frozen layers when flush_loop is not running, state is {flush_loop_state:?}")
|
||||
if !&*self.flush_loop_started.lock().unwrap() {
|
||||
anyhow::bail!("cannot flush frozen layers when flush_loop is not running")
|
||||
}
|
||||
|
||||
self.layer_flush_start_tx.send_modify(|counter| {
|
||||
|
||||
@@ -71,7 +71,7 @@ async fn compaction_loop(tenant_id: TenantId) {
|
||||
let mut sleep_duration = tenant.get_compaction_period();
|
||||
if let Err(e) = tenant.compaction_iteration() {
|
||||
sleep_duration = wait_duration;
|
||||
error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration);
|
||||
error!("Compaction failed, retrying in {:?}: {e:#}", sleep_duration);
|
||||
}
|
||||
|
||||
// Sleep
|
||||
@@ -120,7 +120,7 @@ async fn gc_loop(tenant_id: TenantId) {
|
||||
if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false).await
|
||||
{
|
||||
sleep_duration = wait_duration;
|
||||
error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration);
|
||||
error!("Gc failed, retrying in {:?}: {e:#}", sleep_duration);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,6 +32,11 @@
|
||||
|
||||
#define PageStoreTrace DEBUG5
|
||||
|
||||
#define NEON_TAG "[NEON_SMGR] "
|
||||
#define neon_log(tag, fmt, ...) ereport(tag, \
|
||||
(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
|
||||
errhidestmt(true), errhidecontext(true)))
|
||||
|
||||
bool connected = false;
|
||||
PGconn *pageserver_conn = NULL;
|
||||
|
||||
@@ -92,10 +97,11 @@ pageserver_connect()
|
||||
|
||||
while (PQisBusy(pageserver_conn))
|
||||
{
|
||||
int wc;
|
||||
WaitEvent event;
|
||||
|
||||
/* Sleep until there's something to do */
|
||||
(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
||||
wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
||||
ResetLatch(MyLatch);
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
@@ -135,10 +141,11 @@ retry:
|
||||
|
||||
if (ret == 0)
|
||||
{
|
||||
int wc;
|
||||
WaitEvent event;
|
||||
|
||||
/* Sleep until there's something to do */
|
||||
(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
||||
wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
||||
ResetLatch(MyLatch);
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
@@ -232,9 +239,6 @@ pageserver_receive(void)
|
||||
StringInfoData resp_buff;
|
||||
NeonResponse *resp;
|
||||
|
||||
if (!connected)
|
||||
return NULL;
|
||||
|
||||
PG_TRY();
|
||||
{
|
||||
/* read response */
|
||||
@@ -244,10 +248,7 @@ pageserver_receive(void)
|
||||
if (resp_buff.len < 0)
|
||||
{
|
||||
if (resp_buff.len == -1)
|
||||
{
|
||||
pageserver_disconnect();
|
||||
return NULL;
|
||||
}
|
||||
neon_log(ERROR, "end of COPY");
|
||||
else if (resp_buff.len == -2)
|
||||
neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn));
|
||||
}
|
||||
|
||||
@@ -49,11 +49,6 @@ typedef struct
|
||||
|
||||
#define messageTag(m) (((const NeonMessage *)(m))->tag)
|
||||
|
||||
#define NEON_TAG "[NEON_SMGR] "
|
||||
#define neon_log(tag, fmt, ...) ereport(tag, \
|
||||
(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
|
||||
errhidestmt(true), errhidecontext(true)))
|
||||
|
||||
/*
|
||||
* supertype of all the Neon*Request structs below
|
||||
*
|
||||
|
||||
@@ -251,9 +251,9 @@ XLogRecPtr prefetch_lsn = 0;
|
||||
|
||||
static void consume_prefetch_responses(void);
|
||||
static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
|
||||
static bool prefetch_read(PrefetchRequest *slot);
|
||||
static void prefetch_read(PrefetchRequest *slot);
|
||||
static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
|
||||
static bool prefetch_wait_for(uint64 ring_index);
|
||||
static void prefetch_wait_for(uint64 ring_index);
|
||||
static void prefetch_cleanup(void);
|
||||
static inline void prefetch_set_unused(uint64 ring_index);
|
||||
|
||||
@@ -393,7 +393,7 @@ prefetch_cleanup(void)
|
||||
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
||||
* invalidates any active pointers into the hash table.
|
||||
*/
|
||||
static bool
|
||||
static void
|
||||
prefetch_wait_for(uint64 ring_index)
|
||||
{
|
||||
PrefetchRequest *entry;
|
||||
@@ -412,10 +412,8 @@ prefetch_wait_for(uint64 ring_index)
|
||||
entry = GetPrfSlot(MyPState->ring_receive);
|
||||
|
||||
Assert(entry->status == PRFS_REQUESTED);
|
||||
if (!prefetch_read(entry))
|
||||
return false;
|
||||
prefetch_read(entry);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -427,7 +425,7 @@ prefetch_wait_for(uint64 ring_index)
|
||||
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
||||
* invalidates any active pointers into the hash table.
|
||||
*/
|
||||
static bool
|
||||
static void
|
||||
prefetch_read(PrefetchRequest *slot)
|
||||
{
|
||||
NeonResponse *response;
|
||||
@@ -440,22 +438,15 @@ prefetch_read(PrefetchRequest *slot)
|
||||
old = MemoryContextSwitchTo(MyPState->errctx);
|
||||
response = (NeonResponse *) page_server->receive();
|
||||
MemoryContextSwitchTo(old);
|
||||
if (response)
|
||||
{
|
||||
/* update prefetch state */
|
||||
MyPState->n_responses_buffered += 1;
|
||||
MyPState->n_requests_inflight -= 1;
|
||||
MyPState->ring_receive += 1;
|
||||
|
||||
/* update prefetch state */
|
||||
MyPState->n_responses_buffered += 1;
|
||||
MyPState->n_requests_inflight -= 1;
|
||||
MyPState->ring_receive += 1;
|
||||
|
||||
/* update slot state */
|
||||
slot->status = PRFS_RECEIVED;
|
||||
slot->response = response;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
/* update slot state */
|
||||
slot->status = PRFS_RECEIVED;
|
||||
slot->response = response;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -755,16 +746,11 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
||||
static NeonResponse *
|
||||
page_server_request(void const *req)
|
||||
{
|
||||
NeonResponse* resp;
|
||||
do {
|
||||
page_server->send((NeonRequest *) req);
|
||||
page_server->flush();
|
||||
MyPState->ring_flush = MyPState->ring_unused;
|
||||
consume_prefetch_responses();
|
||||
resp = page_server->receive();
|
||||
} while (resp == NULL);
|
||||
return resp;
|
||||
|
||||
page_server->send((NeonRequest *) req);
|
||||
page_server->flush();
|
||||
MyPState->ring_flush = MyPState->ring_unused;
|
||||
consume_prefetch_responses();
|
||||
return page_server->receive();
|
||||
}
|
||||
|
||||
|
||||
@@ -1649,8 +1635,7 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
|
||||
bool
|
||||
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
{
|
||||
BufferTag tag;
|
||||
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
|
||||
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
|
||||
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
@@ -1666,7 +1651,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
}
|
||||
|
||||
tag = (BufferTag) {
|
||||
BufferTag tag = (BufferTag) {
|
||||
.rnode = reln->smgr_rnode.node,
|
||||
.forkNum = forknum,
|
||||
.blockNum = blocknum
|
||||
@@ -1770,24 +1755,22 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
}
|
||||
}
|
||||
|
||||
do
|
||||
if (entry == NULL)
|
||||
{
|
||||
if (entry == NULL)
|
||||
{
|
||||
n_prefetch_misses += 1;
|
||||
n_prefetch_misses += 1;
|
||||
|
||||
ring_index = prefetch_register_buffer(buftag, &request_latest,
|
||||
&request_lsn);
|
||||
slot = GetPrfSlot(ring_index);
|
||||
}
|
||||
ring_index = prefetch_register_buffer(buftag, &request_latest,
|
||||
&request_lsn);
|
||||
slot = GetPrfSlot(ring_index);
|
||||
}
|
||||
|
||||
Assert(slot->my_ring_index == ring_index);
|
||||
Assert(MyPState->ring_last <= ring_index &&
|
||||
MyPState->ring_unused > ring_index);
|
||||
Assert(slot->status != PRFS_UNUSED);
|
||||
Assert(GetPrfSlot(ring_index) == slot);
|
||||
Assert(slot->my_ring_index == ring_index);
|
||||
Assert(MyPState->ring_last <= ring_index &&
|
||||
MyPState->ring_unused > ring_index);
|
||||
Assert(slot->status != PRFS_UNUSED);
|
||||
Assert(GetPrfSlot(ring_index) == slot);
|
||||
|
||||
} while (!prefetch_wait_for(ring_index));
|
||||
prefetch_wait_for(ring_index);
|
||||
|
||||
Assert(slot->status == PRFS_RECEIVED);
|
||||
|
||||
|
||||
@@ -119,7 +119,6 @@ static TimestampTz last_reconnect_attempt;
|
||||
static WalproposerShmemState * walprop_shared;
|
||||
|
||||
/* Prototypes for private functions */
|
||||
static void WalProposerRegister(void);
|
||||
static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId);
|
||||
static void WalProposerStart(void);
|
||||
static void WalProposerLoop(void);
|
||||
@@ -456,7 +455,7 @@ WalProposerPoll(void)
|
||||
/*
|
||||
* Register a background worker proposing WAL to wal acceptors.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
WalProposerRegister(void)
|
||||
{
|
||||
BackgroundWorker bgw;
|
||||
|
||||
@@ -377,18 +377,18 @@ typedef struct Safekeeper
|
||||
AppendResponse appendResponse; /* feedback for master */
|
||||
} Safekeeper;
|
||||
|
||||
extern void WalProposerSync(int argc, char *argv[]);
|
||||
extern void WalProposerMain(Datum main_arg);
|
||||
extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
|
||||
extern void WalProposerPoll(void);
|
||||
extern void ParseReplicationFeedbackMessage(StringInfo reply_message,
|
||||
ReplicationFeedback *rf);
|
||||
extern PGDLLIMPORT void WalProposerMain(Datum main_arg);
|
||||
void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
|
||||
void WalProposerPoll(void);
|
||||
void WalProposerRegister(void);
|
||||
void ParseReplicationFeedbackMessage(StringInfo reply_message,
|
||||
ReplicationFeedback * rf);
|
||||
extern void StartProposerReplication(StartReplicationCmd *cmd);
|
||||
|
||||
extern Size WalproposerShmemSize(void);
|
||||
extern bool WalproposerShmemInit(void);
|
||||
extern void replication_feedback_set(ReplicationFeedback *rf);
|
||||
extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
|
||||
Size WalproposerShmemSize(void);
|
||||
bool WalproposerShmemInit(void);
|
||||
void replication_feedback_set(ReplicationFeedback * rf);
|
||||
void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
|
||||
|
||||
/* libpqwalproposer hooks & helper type */
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Client authentication mechanisms.
|
||||
|
||||
pub mod backend;
|
||||
pub use backend::{BackendType, ConsoleReqExtra};
|
||||
pub use backend::{BackendType, ConsoleReqExtra, DatabaseInfo};
|
||||
|
||||
mod credentials;
|
||||
pub use credentials::ClientCredentials;
|
||||
|
||||
@@ -12,6 +12,7 @@ use crate::{
|
||||
waiters::{self, Waiter, Waiters},
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Cow;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::{info, warn};
|
||||
@@ -35,6 +36,45 @@ pub fn notify(psql_session_id: &str, msg: mgmt::ComputeReady) -> Result<(), wait
|
||||
CPLANE_WAITERS.notify(psql_session_id, msg)
|
||||
}
|
||||
|
||||
/// Compute node connection params provided by the cloud.
|
||||
/// Note how it implements serde traits, since we receive it over the wire.
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
pub struct DatabaseInfo {
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
pub dbname: String,
|
||||
pub user: String,
|
||||
pub password: Option<String>,
|
||||
}
|
||||
|
||||
// Manually implement debug to omit personal and sensitive info.
|
||||
impl std::fmt::Debug for DatabaseInfo {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
fmt.debug_struct("DatabaseInfo")
|
||||
.field("host", &self.host)
|
||||
.field("port", &self.port)
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DatabaseInfo> for tokio_postgres::Config {
|
||||
fn from(db_info: DatabaseInfo) -> Self {
|
||||
let mut config = tokio_postgres::Config::new();
|
||||
|
||||
config
|
||||
.host(&db_info.host)
|
||||
.port(db_info.port)
|
||||
.dbname(&db_info.dbname)
|
||||
.user(&db_info.user);
|
||||
|
||||
if let Some(password) = db_info.password {
|
||||
config.password(password);
|
||||
}
|
||||
|
||||
config
|
||||
}
|
||||
}
|
||||
|
||||
/// Extra query params we'd like to pass to the console.
|
||||
pub struct ConsoleReqExtra<'a> {
|
||||
/// A unique identifier for a connection.
|
||||
@@ -118,107 +158,54 @@ impl<'a, T, E> BackendType<'a, Result<T, E>> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A product of successful authentication.
|
||||
pub struct AuthSuccess<T> {
|
||||
/// Did we send [`pq_proto::BeMessage::AuthenticationOk`] to client?
|
||||
pub reported_auth_ok: bool,
|
||||
/// Something to be considered a positive result.
|
||||
pub value: T,
|
||||
}
|
||||
|
||||
impl<T> AuthSuccess<T> {
|
||||
/// Very similar to [`std::option::Option::map`].
|
||||
/// Maps [`AuthSuccess<T>`] to [`AuthSuccess<R>`] by applying
|
||||
/// a function to a contained value.
|
||||
pub fn map<R>(self, f: impl FnOnce(T) -> R) -> AuthSuccess<R> {
|
||||
AuthSuccess {
|
||||
reported_auth_ok: self.reported_auth_ok,
|
||||
value: f(self.value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Info for establishing a connection to a compute node.
|
||||
/// This is what we get after auth succeeded, but not before!
|
||||
pub struct NodeInfo {
|
||||
/// Project from [`auth::ClientCredentials`].
|
||||
pub project: String,
|
||||
/// Compute node connection params.
|
||||
pub config: compute::ConnCfg,
|
||||
}
|
||||
|
||||
impl BackendType<'_, ClientCredentials<'_>> {
|
||||
/// Do something special if user didn't provide the `project` parameter.
|
||||
async fn try_password_hack(
|
||||
&mut self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
||||
) -> auth::Result<Option<AuthSuccess<NodeInfo>>> {
|
||||
use BackendType::*;
|
||||
|
||||
// If there's no project so far, that entails that client doesn't
|
||||
// support SNI or other means of passing the project name.
|
||||
// We now expect to see a very specific payload in the place of password.
|
||||
let fetch_magic_payload = async {
|
||||
warn!("project name not specified, resorting to the password hack auth flow");
|
||||
let payload = AuthFlow::new(client)
|
||||
.begin(auth::PasswordHack)
|
||||
.await?
|
||||
.authenticate()
|
||||
.await?;
|
||||
|
||||
info!(project = &payload.project, "received missing parameter");
|
||||
auth::Result::Ok(payload)
|
||||
};
|
||||
|
||||
// TODO: find a proper way to merge those very similar blocks.
|
||||
let (mut config, payload) = match self {
|
||||
Console(endpoint, creds) if creds.project.is_none() => {
|
||||
let payload = fetch_magic_payload.await?;
|
||||
|
||||
let mut creds = creds.as_ref();
|
||||
creds.project = Some(payload.project.as_str().into());
|
||||
let config = console::Api::new(endpoint, extra, &creds)
|
||||
.wake_compute()
|
||||
.await?;
|
||||
|
||||
(config, payload)
|
||||
}
|
||||
Postgres(endpoint, creds) if creds.project.is_none() => {
|
||||
let payload = fetch_magic_payload.await?;
|
||||
|
||||
let mut creds = creds.as_ref();
|
||||
creds.project = Some(payload.project.as_str().into());
|
||||
let config = postgres::Api::new(endpoint, &creds).wake_compute().await?;
|
||||
|
||||
(config, payload)
|
||||
}
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
config.password(payload.password);
|
||||
Ok(Some(AuthSuccess {
|
||||
reported_auth_ok: false,
|
||||
value: NodeInfo {
|
||||
project: payload.project,
|
||||
config,
|
||||
},
|
||||
}))
|
||||
}
|
||||
|
||||
/// Authenticate the client via the requested backend, possibly using credentials.
|
||||
pub async fn authenticate(
|
||||
mut self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
||||
) -> auth::Result<AuthSuccess<NodeInfo>> {
|
||||
) -> super::Result<compute::NodeInfo> {
|
||||
use BackendType::*;
|
||||
|
||||
// Handle cases when `project` is missing in `creds`.
|
||||
// TODO: type safety: return `creds` with irrefutable `project`.
|
||||
if let Some(res) = self.try_password_hack(extra, client).await? {
|
||||
info!("user successfully authenticated (using the password hack)");
|
||||
return Ok(res);
|
||||
if let Console(_, creds) | Postgres(_, creds) = &mut self {
|
||||
// If there's no project so far, that entails that client doesn't
|
||||
// support SNI or other means of passing the project name.
|
||||
// We now expect to see a very specific payload in the place of password.
|
||||
if creds.project().is_none() {
|
||||
warn!("project name not specified, resorting to the password hack auth flow");
|
||||
|
||||
let payload = AuthFlow::new(client)
|
||||
.begin(auth::PasswordHack)
|
||||
.await?
|
||||
.authenticate()
|
||||
.await?;
|
||||
|
||||
// Finally we may finish the initialization of `creds`.
|
||||
// TODO: add missing type safety to ClientCredentials.
|
||||
info!(project = &payload.project, "received missing parameter");
|
||||
creds.project = Some(payload.project.into());
|
||||
|
||||
let mut config = match &self {
|
||||
Console(endpoint, creds) => {
|
||||
console::Api::new(endpoint, extra, creds)
|
||||
.wake_compute()
|
||||
.await?
|
||||
}
|
||||
Postgres(endpoint, creds) => {
|
||||
postgres::Api::new(endpoint, creds).wake_compute().await?
|
||||
}
|
||||
_ => unreachable!("see the patterns above"),
|
||||
};
|
||||
|
||||
// We should use a password from payload as well.
|
||||
config.password(payload.password);
|
||||
|
||||
info!("user successfully authenticated (using the password hack)");
|
||||
return Ok(compute::NodeInfo {
|
||||
reported_auth_ok: false,
|
||||
config,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let res = match self {
|
||||
@@ -228,34 +215,22 @@ impl BackendType<'_, ClientCredentials<'_>> {
|
||||
project = creds.project(),
|
||||
"performing authentication using the console"
|
||||
);
|
||||
|
||||
assert!(creds.project.is_some());
|
||||
console::Api::new(&endpoint, extra, &creds)
|
||||
.handle_user(client)
|
||||
.await?
|
||||
.map(|config| NodeInfo {
|
||||
project: creds.project.unwrap().into_owned(),
|
||||
config,
|
||||
})
|
||||
.await
|
||||
}
|
||||
Postgres(endpoint, creds) => {
|
||||
info!("performing mock authentication using a local postgres instance");
|
||||
|
||||
assert!(creds.project.is_some());
|
||||
postgres::Api::new(&endpoint, &creds)
|
||||
.handle_user(client)
|
||||
.await?
|
||||
.map(|config| NodeInfo {
|
||||
project: creds.project.unwrap().into_owned(),
|
||||
config,
|
||||
})
|
||||
.await
|
||||
}
|
||||
// NOTE: this auth backend doesn't use client credentials.
|
||||
Link(url) => {
|
||||
info!("performing link authentication");
|
||||
link::handle_user(&url, client).await?
|
||||
link::handle_user(&url, client).await
|
||||
}
|
||||
};
|
||||
}?;
|
||||
|
||||
info!("user successfully authenticated");
|
||||
Ok(res)
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
//! Cloud API V2.
|
||||
|
||||
use super::{AuthSuccess, ConsoleReqExtra};
|
||||
use super::ConsoleReqExtra;
|
||||
use crate::{
|
||||
auth::{self, AuthFlow, ClientCredentials},
|
||||
compute,
|
||||
compute::{self, ComputeConnCfg},
|
||||
error::{io_error, UserFacingError},
|
||||
http, scram,
|
||||
stream::PqStream,
|
||||
@@ -128,7 +128,7 @@ impl<'a> Api<'a> {
|
||||
pub(super) async fn handle_user(
|
||||
self,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
||||
) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
|
||||
) -> auth::Result<compute::NodeInfo> {
|
||||
handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
|
||||
}
|
||||
|
||||
@@ -164,7 +164,7 @@ impl<'a> Api<'a> {
|
||||
}
|
||||
|
||||
/// Wake up the compute node and return the corresponding connection info.
|
||||
pub(super) async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
|
||||
pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
|
||||
let request_id = uuid::Uuid::new_v4().to_string();
|
||||
let req = self
|
||||
.endpoint
|
||||
@@ -195,7 +195,7 @@ impl<'a> Api<'a> {
|
||||
Some(x) => x,
|
||||
};
|
||||
|
||||
let mut config = compute::ConnCfg::new();
|
||||
let mut config = ComputeConnCfg::new();
|
||||
config
|
||||
.host(host)
|
||||
.port(port)
|
||||
@@ -213,10 +213,10 @@ pub(super) async fn handle_user<'a, Endpoint, GetAuthInfo, WakeCompute>(
|
||||
endpoint: &'a Endpoint,
|
||||
get_auth_info: impl FnOnce(&'a Endpoint) -> GetAuthInfo,
|
||||
wake_compute: impl FnOnce(&'a Endpoint) -> WakeCompute,
|
||||
) -> auth::Result<AuthSuccess<compute::ConnCfg>>
|
||||
) -> auth::Result<compute::NodeInfo>
|
||||
where
|
||||
GetAuthInfo: Future<Output = Result<AuthInfo, GetAuthInfoError>>,
|
||||
WakeCompute: Future<Output = Result<compute::ConnCfg, WakeComputeError>>,
|
||||
WakeCompute: Future<Output = Result<ComputeConnCfg, WakeComputeError>>,
|
||||
{
|
||||
info!("fetching user's authentication info");
|
||||
let auth_info = get_auth_info(endpoint).await?;
|
||||
@@ -243,9 +243,9 @@ where
|
||||
config.auth_keys(tokio_postgres::config::AuthKeys::ScramSha256(keys));
|
||||
}
|
||||
|
||||
Ok(AuthSuccess {
|
||||
Ok(compute::NodeInfo {
|
||||
reported_auth_ok: false,
|
||||
value: config,
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use super::{AuthSuccess, NodeInfo};
|
||||
use crate::{auth, compute, error::UserFacingError, stream::PqStream, waiters};
|
||||
use pq_proto::{BeMessage as Be, BeParameterStatusMessage};
|
||||
use thiserror::Error;
|
||||
@@ -50,7 +49,7 @@ pub fn new_psql_session_id() -> String {
|
||||
pub async fn handle_user(
|
||||
link_uri: &reqwest::Url,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) -> auth::Result<AuthSuccess<NodeInfo>> {
|
||||
) -> auth::Result<compute::NodeInfo> {
|
||||
let psql_session_id = new_psql_session_id();
|
||||
let span = info_span!("link", psql_session_id = &psql_session_id);
|
||||
let greeting = hello_message(link_uri, &psql_session_id);
|
||||
@@ -72,22 +71,8 @@ pub async fn handle_user(
|
||||
|
||||
client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
|
||||
|
||||
let mut config = compute::ConnCfg::new();
|
||||
config
|
||||
.host(&db_info.host)
|
||||
.port(db_info.port)
|
||||
.dbname(&db_info.dbname)
|
||||
.user(&db_info.user);
|
||||
|
||||
if let Some(password) = db_info.password {
|
||||
config.password(password);
|
||||
}
|
||||
|
||||
Ok(AuthSuccess {
|
||||
Ok(compute::NodeInfo {
|
||||
reported_auth_ok: true,
|
||||
value: NodeInfo {
|
||||
project: db_info.project,
|
||||
config,
|
||||
},
|
||||
config: db_info.into(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
//! Local mock of Cloud API V2.
|
||||
|
||||
use super::{
|
||||
console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
|
||||
AuthSuccess,
|
||||
};
|
||||
use crate::{
|
||||
auth::{self, ClientCredentials},
|
||||
compute,
|
||||
auth::{
|
||||
self,
|
||||
backend::console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
|
||||
ClientCredentials,
|
||||
},
|
||||
compute::{self, ComputeConnCfg},
|
||||
error::io_error,
|
||||
scram,
|
||||
stream::PqStream,
|
||||
@@ -37,7 +37,7 @@ impl<'a> Api<'a> {
|
||||
pub(super) async fn handle_user(
|
||||
self,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
||||
) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
|
||||
) -> auth::Result<compute::NodeInfo> {
|
||||
// We reuse user handling logic from a production module.
|
||||
console::handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
|
||||
}
|
||||
@@ -82,8 +82,8 @@ impl<'a> Api<'a> {
|
||||
}
|
||||
|
||||
/// We don't need to wake anything locally, so we just return the connection info.
|
||||
pub(super) async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
|
||||
let mut config = compute::ConnCfg::new();
|
||||
pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
|
||||
let mut config = ComputeConnCfg::new();
|
||||
config
|
||||
.host(self.endpoint.host_str().unwrap_or("localhost"))
|
||||
.port(self.endpoint.port().unwrap_or(5432))
|
||||
|
||||
@@ -36,23 +36,11 @@ pub struct ClientCredentials<'a> {
|
||||
}
|
||||
|
||||
impl ClientCredentials<'_> {
|
||||
#[inline]
|
||||
pub fn project(&self) -> Option<&str> {
|
||||
self.project.as_deref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ClientCredentials<'a> {
|
||||
#[inline]
|
||||
pub fn as_ref(&'a self) -> ClientCredentials<'a> {
|
||||
Self {
|
||||
user: self.user,
|
||||
dbname: self.dbname,
|
||||
project: self.project().map(Cow::Borrowed),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ClientCredentials<'a> {
|
||||
pub fn parse(
|
||||
params: &'a StartupMessageParams,
|
||||
|
||||
@@ -40,36 +40,17 @@ impl UserFacingError for ConnectionError {
|
||||
/// A pair of `ClientKey` & `ServerKey` for `SCRAM-SHA-256`.
|
||||
pub type ScramKeys = tokio_postgres::config::ScramKeys<32>;
|
||||
|
||||
/// A config for establishing a connection to compute node.
|
||||
/// Eventually, `tokio_postgres` will be replaced with something better.
|
||||
/// Newtype allows us to implement methods on top of it.
|
||||
#[repr(transparent)]
|
||||
pub struct ConnCfg(pub tokio_postgres::Config);
|
||||
pub type ComputeConnCfg = tokio_postgres::Config;
|
||||
|
||||
impl ConnCfg {
|
||||
/// Construct a new connection config.
|
||||
pub fn new() -> Self {
|
||||
Self(tokio_postgres::Config::new())
|
||||
}
|
||||
/// Various compute node info for establishing connection etc.
|
||||
pub struct NodeInfo {
|
||||
/// Did we send [`pq_proto::BeMessage::AuthenticationOk`]?
|
||||
pub reported_auth_ok: bool,
|
||||
/// Compute node connection params.
|
||||
pub config: tokio_postgres::Config,
|
||||
}
|
||||
|
||||
impl std::ops::Deref for ConnCfg {
|
||||
type Target = tokio_postgres::Config;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// For now, let's make it easier to setup the config.
|
||||
impl std::ops::DerefMut for ConnCfg {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl ConnCfg {
|
||||
/// Establish a raw TCP connection to the compute node.
|
||||
impl NodeInfo {
|
||||
async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> {
|
||||
use tokio_postgres::config::Host;
|
||||
|
||||
@@ -87,8 +68,8 @@ impl ConnCfg {
|
||||
// because it has no means for extracting the underlying socket which we
|
||||
// require for our business.
|
||||
let mut connection_error = None;
|
||||
let ports = self.0.get_ports();
|
||||
let hosts = self.0.get_hosts();
|
||||
let ports = self.config.get_ports();
|
||||
let hosts = self.config.get_hosts();
|
||||
// the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array
|
||||
if ports.len() > 1 && ports.len() != hosts.len() {
|
||||
return Err(io::Error::new(
|
||||
@@ -96,7 +77,7 @@ impl ConnCfg {
|
||||
format!(
|
||||
"couldn't connect: bad compute config, \
|
||||
ports and hosts entries' count does not match: {:?}",
|
||||
self.0
|
||||
self.config
|
||||
),
|
||||
));
|
||||
}
|
||||
@@ -122,7 +103,7 @@ impl ConnCfg {
|
||||
Err(connection_error.unwrap_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("couldn't connect: bad compute config: {:?}", self.0),
|
||||
format!("couldn't connect: bad compute config: {:?}", self.config),
|
||||
)
|
||||
}))
|
||||
}
|
||||
@@ -135,7 +116,7 @@ pub struct PostgresConnection {
|
||||
pub version: String,
|
||||
}
|
||||
|
||||
impl ConnCfg {
|
||||
impl NodeInfo {
|
||||
/// Connect to a corresponding compute node.
|
||||
pub async fn connect(
|
||||
mut self,
|
||||
@@ -149,21 +130,21 @@ impl ConnCfg {
|
||||
.intersperse(" ") // TODO: use impl from std once it's stabilized
|
||||
.collect();
|
||||
|
||||
self.0.options(&options);
|
||||
self.config.options(&options);
|
||||
}
|
||||
|
||||
if let Some(app_name) = params.get("application_name") {
|
||||
self.0.application_name(app_name);
|
||||
self.config.application_name(app_name);
|
||||
}
|
||||
|
||||
if let Some(replication) = params.get("replication") {
|
||||
use tokio_postgres::config::ReplicationMode;
|
||||
match replication {
|
||||
"true" | "on" | "yes" | "1" => {
|
||||
self.0.replication_mode(ReplicationMode::Physical);
|
||||
self.config.replication_mode(ReplicationMode::Physical);
|
||||
}
|
||||
"database" => {
|
||||
self.0.replication_mode(ReplicationMode::Logical);
|
||||
self.config.replication_mode(ReplicationMode::Logical);
|
||||
}
|
||||
_other => {}
|
||||
}
|
||||
@@ -179,7 +160,7 @@ impl ConnCfg {
|
||||
.map_err(|_| ConnectionError::FailedToConnectToCompute)?;
|
||||
|
||||
// TODO: establish a secure connection to the DB
|
||||
let (client, conn) = self.0.connect_raw(&mut stream, NoTls).await?;
|
||||
let (client, conn) = self.config.connect_raw(&mut stream, NoTls).await?;
|
||||
let version = conn
|
||||
.parameter("server_version")
|
||||
.ok_or(ConnectionError::FailedToFetchPgVersion)?
|
||||
|
||||
@@ -6,11 +6,16 @@ use std::{
|
||||
net::{TcpListener, TcpStream},
|
||||
thread,
|
||||
};
|
||||
use tracing::{error, info, info_span};
|
||||
use tracing::{error, info};
|
||||
use utils::postgres_backend::{self, AuthType, PostgresBackend};
|
||||
|
||||
/// Console management API listener thread.
|
||||
/// It spawns console response handlers needed for the link auth.
|
||||
/// TODO: move all of that to auth-backend/link.rs when we ditch legacy-console backend
|
||||
|
||||
///
|
||||
/// Main proxy listener loop.
|
||||
///
|
||||
/// Listens for connections, and launches a new handler thread for each.
|
||||
///
|
||||
pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
|
||||
scopeguard::defer! {
|
||||
info!("mgmt has shut down");
|
||||
@@ -19,7 +24,6 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
|
||||
listener
|
||||
.set_nonblocking(false)
|
||||
.context("failed to set listener to blocking")?;
|
||||
|
||||
loop {
|
||||
let (socket, peer_addr) = listener.accept().context("failed to accept a new client")?;
|
||||
info!("accepted connection from {peer_addr}");
|
||||
@@ -27,19 +31,9 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
|
||||
.set_nodelay(true)
|
||||
.context("failed to set client socket option")?;
|
||||
|
||||
// TODO: replace with async tasks.
|
||||
thread::spawn(move || {
|
||||
let tid = std::thread::current().id();
|
||||
let span = info_span!("mgmt", thread = format_args!("{tid:?}"));
|
||||
let _enter = span.enter();
|
||||
|
||||
info!("started a new console management API thread");
|
||||
scopeguard::defer! {
|
||||
info!("console management API thread is about to finish");
|
||||
}
|
||||
|
||||
if let Err(e) = handle_connection(socket) {
|
||||
error!("thread failed with an error: {e}");
|
||||
if let Err(err) = handle_connection(socket) {
|
||||
error!("{err}");
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -50,21 +44,44 @@ fn handle_connection(socket: TcpStream) -> anyhow::Result<()> {
|
||||
pgbackend.run(&mut MgmtHandler)
|
||||
}
|
||||
|
||||
/// Known as `kickResponse` in the console.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct MgmtHandler;
|
||||
|
||||
/// Serialized examples:
|
||||
// {
|
||||
// "session_id": "71d6d03e6d93d99a",
|
||||
// "result": {
|
||||
// "Success": {
|
||||
// "host": "127.0.0.1",
|
||||
// "port": 5432,
|
||||
// "dbname": "stas",
|
||||
// "user": "stas",
|
||||
// "password": "mypass"
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// {
|
||||
// "session_id": "71d6d03e6d93d99a",
|
||||
// "result": {
|
||||
// "Failure": "oops"
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // to test manually by sending a query to mgmt interface:
|
||||
// psql -h 127.0.0.1 -p 9999 -c '{"session_id":"4f10dde522e14739","result":{"Success":{"host":"127.0.0.1","port":5432,"dbname":"stas","user":"stas","password":"stas"}}}'
|
||||
#[derive(Deserialize)]
|
||||
struct PsqlSessionResponse {
|
||||
session_id: String,
|
||||
result: PsqlSessionResult,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[derive(Deserialize)]
|
||||
enum PsqlSessionResult {
|
||||
Success(DatabaseInfo),
|
||||
Success(auth::DatabaseInfo),
|
||||
Failure(String),
|
||||
}
|
||||
|
||||
/// A message received by `mgmt` when a compute node is ready.
|
||||
pub type ComputeReady = Result<DatabaseInfo, String>;
|
||||
pub type ComputeReady = Result<auth::DatabaseInfo, String>;
|
||||
|
||||
impl PsqlSessionResult {
|
||||
fn into_compute_ready(self) -> ComputeReady {
|
||||
@@ -75,51 +92,25 @@ impl PsqlSessionResult {
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute node connection params provided by the console.
|
||||
/// This struct and its parents are mgmt API implementation
|
||||
/// detail and thus should remain in this module.
|
||||
// TODO: restore deserialization tests from git history.
|
||||
#[derive(Deserialize)]
|
||||
pub struct DatabaseInfo {
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
pub dbname: String,
|
||||
pub user: String,
|
||||
/// Console always provides a password, but it might
|
||||
/// be inconvenient for debug with local PG instance.
|
||||
pub password: Option<String>,
|
||||
pub project: String,
|
||||
}
|
||||
|
||||
// Manually implement debug to omit sensitive info.
|
||||
impl std::fmt::Debug for DatabaseInfo {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
fmt.debug_struct("DatabaseInfo")
|
||||
.field("host", &self.host)
|
||||
.field("port", &self.port)
|
||||
.field("dbname", &self.dbname)
|
||||
.field("user", &self.user)
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: replace with an http-based protocol.
|
||||
struct MgmtHandler;
|
||||
impl postgres_backend::Handler for MgmtHandler {
|
||||
fn process_query(&mut self, pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
|
||||
try_process_query(pgb, query).map_err(|e| {
|
||||
error!("failed to process response: {e:?}");
|
||||
e
|
||||
})
|
||||
fn process_query(
|
||||
&mut self,
|
||||
pgb: &mut PostgresBackend,
|
||||
query_string: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
let res = try_process_query(pgb, query_string);
|
||||
// intercept and log error message
|
||||
if res.is_err() {
|
||||
error!("mgmt query failed: {res:?}");
|
||||
}
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
|
||||
let resp: PsqlSessionResponse = serde_json::from_str(query)?;
|
||||
fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::Result<()> {
|
||||
info!("got mgmt query [redacted]"); // Content contains password, don't print it
|
||||
|
||||
let span = info_span!("event", session_id = resp.session_id);
|
||||
let _enter = span.enter();
|
||||
info!("got response: {:?}", resp.result);
|
||||
let resp: PsqlSessionResponse = serde_json::from_str(query_string)?;
|
||||
|
||||
match auth::backend::notify(&resp.session_id, resp.result.into_compute_ready()) {
|
||||
Ok(()) => {
|
||||
@@ -128,50 +119,9 @@ fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<(
|
||||
.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("failed to deliver response to per-client task");
|
||||
pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn parse_db_info() -> anyhow::Result<()> {
|
||||
// with password
|
||||
let _: DatabaseInfo = serde_json::from_value(json!({
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"dbname": "postgres",
|
||||
"user": "john_doe",
|
||||
"password": "password",
|
||||
"project": "hello_world",
|
||||
}))?;
|
||||
|
||||
// without password
|
||||
let _: DatabaseInfo = serde_json::from_value(json!({
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"dbname": "postgres",
|
||||
"user": "john_doe",
|
||||
"project": "hello_world",
|
||||
}))?;
|
||||
|
||||
// new field (forward compatibility)
|
||||
let _: DatabaseInfo = serde_json::from_value(json!({
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"dbname": "postgres",
|
||||
"user": "john_doe",
|
||||
"project": "hello_world",
|
||||
"N.E.W": "forward compatibility check",
|
||||
}))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ use crate::config::{ProxyConfig, TlsConfig};
|
||||
use crate::stream::{MeasuredStream, PqStream, Stream};
|
||||
use anyhow::{bail, Context};
|
||||
use futures::TryFutureExt;
|
||||
use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
|
||||
use metrics::{register_int_counter, IntCounter};
|
||||
use once_cell::sync::Lazy;
|
||||
use pq_proto::{BeMessage as Be, *};
|
||||
use std::sync::Arc;
|
||||
@@ -30,16 +30,10 @@ static NUM_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"proxy_io_bytes_per_client",
|
||||
"Number of bytes sent/received between client and backend.",
|
||||
&[
|
||||
// Received (rx) / sent (tx).
|
||||
"direction",
|
||||
// Proxy can keep calling it `project` internally.
|
||||
"endpoint_id"
|
||||
]
|
||||
static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"proxy_io_bytes_total",
|
||||
"Number of bytes sent/received between any client and backend."
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
@@ -236,17 +230,16 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
|
||||
application_name: params.get("application_name"),
|
||||
};
|
||||
|
||||
let auth_result = async {
|
||||
// `&mut stream` doesn't let us merge those 2 lines.
|
||||
let res = creds.authenticate(&extra, &mut stream).await;
|
||||
async { res }.or_else(|e| stream.throw_error(e)).await
|
||||
}
|
||||
.instrument(info_span!("auth"))
|
||||
.await?;
|
||||
// Authenticate and connect to a compute node.
|
||||
let auth = creds
|
||||
.authenticate(&extra, &mut stream)
|
||||
.instrument(info_span!("auth"))
|
||||
.await;
|
||||
|
||||
let node = async { auth }.or_else(|e| stream.throw_error(e)).await?;
|
||||
let reported_auth_ok = node.reported_auth_ok;
|
||||
|
||||
let node = auth_result.value;
|
||||
let (db, cancel_closure) = node
|
||||
.config
|
||||
.connect(params)
|
||||
.or_else(|e| stream.throw_error(e))
|
||||
.await?;
|
||||
@@ -254,9 +247,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
|
||||
let cancel_key_data = session.enable_query_cancellation(cancel_closure);
|
||||
|
||||
// Report authentication success if we haven't done this already.
|
||||
// Note that we do this only (for the most part) after we've connected
|
||||
// to a compute (see above) which performs its own authentication.
|
||||
if !auth_result.reported_auth_ok {
|
||||
if !reported_auth_ok {
|
||||
stream
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
|
||||
@@ -270,23 +261,17 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
|
||||
.write_message(&BeMessage::ReadyForQuery)
|
||||
.await?;
|
||||
|
||||
// TODO: add more identifiers.
|
||||
let metric_id = node.project;
|
||||
|
||||
let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx", &metric_id]);
|
||||
let mut client = MeasuredStream::new(stream.into_inner(), |cnt| {
|
||||
// Number of bytes we sent to the client (outbound).
|
||||
m_sent.inc_by(cnt as u64);
|
||||
});
|
||||
|
||||
let m_recv = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["rx", &metric_id]);
|
||||
let mut db = MeasuredStream::new(db.stream, |cnt| {
|
||||
// Number of bytes the client sent to the compute node (inbound).
|
||||
m_recv.inc_by(cnt as u64);
|
||||
});
|
||||
/// This function will be called for writes to either direction.
|
||||
fn inc_proxied(cnt: usize) {
|
||||
// Consider inventing something more sophisticated
|
||||
// if this ever becomes a bottleneck (cacheline bouncing).
|
||||
NUM_BYTES_PROXIED_COUNTER.inc_by(cnt as u64);
|
||||
}
|
||||
|
||||
// Starting from here we only proxy the client's traffic.
|
||||
info!("performing the proxy pass...");
|
||||
let mut db = MeasuredStream::new(db.stream, inc_proxied);
|
||||
let mut client = MeasuredStream::new(stream.into_inner(), inc_proxied);
|
||||
let _ = tokio::io::copy_bidirectional(&mut client, &mut db).await?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1568,7 +1568,6 @@ class NeonCli(AbstractNeonCli):
|
||||
def pageserver_start(
|
||||
self,
|
||||
overrides: Tuple[str, ...] = (),
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
start_args = ["pageserver", "start", *overrides]
|
||||
append_pageserver_param_overrides(
|
||||
@@ -1578,11 +1577,11 @@ class NeonCli(AbstractNeonCli):
|
||||
pageserver_config_override=self.env.pageserver.config_override,
|
||||
)
|
||||
|
||||
s3_env_vars = None
|
||||
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
|
||||
s3_env_vars = self.env.remote_storage.access_env_vars()
|
||||
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
|
||||
|
||||
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
|
||||
return self.raw_cli(start_args, extra_env_vars=s3_env_vars)
|
||||
|
||||
def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["pageserver", "stop"]
|
||||
@@ -1761,15 +1760,9 @@ class NeonPageserver(PgProtocol):
|
||||
".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
|
||||
".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
|
||||
".*Removing intermediate uninit mark file.*",
|
||||
# FIXME: known race condition in TaskHandle: https://github.com/neondatabase/neon/issues/2885
|
||||
".*sender is dropped while join handle is still alive.*",
|
||||
]
|
||||
|
||||
def start(
|
||||
self,
|
||||
overrides: Tuple[str, ...] = (),
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
) -> "NeonPageserver":
|
||||
def start(self, overrides: Tuple[str, ...] = ()) -> "NeonPageserver":
|
||||
"""
|
||||
Start the page server.
|
||||
`overrides` allows to add some config to this pageserver start.
|
||||
@@ -1777,7 +1770,7 @@ class NeonPageserver(PgProtocol):
|
||||
"""
|
||||
assert self.running is False
|
||||
|
||||
self.env.neon_cli.pageserver_start(overrides=overrides, extra_env_vars=extra_env_vars)
|
||||
self.env.neon_cli.pageserver_start(overrides=overrides)
|
||||
self.running = True
|
||||
return self
|
||||
|
||||
@@ -2080,9 +2073,9 @@ class NeonProxy(PgProtocol):
|
||||
self,
|
||||
proxy_port: int,
|
||||
http_port: int,
|
||||
mgmt_port: int,
|
||||
neon_binpath: Path,
|
||||
auth_endpoint=None,
|
||||
mgmt_port=None,
|
||||
):
|
||||
super().__init__(dsn=auth_endpoint, port=proxy_port)
|
||||
self.host = "127.0.0.1"
|
||||
@@ -2096,8 +2089,7 @@ class NeonProxy(PgProtocol):
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
Starts a proxy with option '--auth-backend postgres' and a postgres instance
|
||||
already provided though '--auth-endpoint <postgress-instance>'."
|
||||
Starts a proxy with option '--auth-backend postgres' and a postgres instance already provided though '--auth-endpoint <postgress-instance>'."
|
||||
"""
|
||||
assert self._popen is None
|
||||
assert self.auth_endpoint is not None
|
||||
@@ -2107,7 +2099,6 @@ class NeonProxy(PgProtocol):
|
||||
str(self.neon_binpath / "proxy"),
|
||||
*["--http", f"{self.host}:{self.http_port}"],
|
||||
*["--proxy", f"{self.host}:{self.proxy_port}"],
|
||||
*["--mgmt", f"{self.host}:{self.mgmt_port}"],
|
||||
*["--auth-backend", "postgres"],
|
||||
*["--auth-endpoint", self.auth_endpoint],
|
||||
]
|
||||
@@ -2184,13 +2175,11 @@ def static_proxy(
|
||||
auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}"
|
||||
|
||||
proxy_port = port_distributor.get_port()
|
||||
mgmt_port = port_distributor.get_port()
|
||||
http_port = port_distributor.get_port()
|
||||
|
||||
with NeonProxy(
|
||||
proxy_port=proxy_port,
|
||||
http_port=http_port,
|
||||
mgmt_port=mgmt_port,
|
||||
neon_binpath=neon_binpath,
|
||||
auth_endpoint=auth_endpoint,
|
||||
) as proxy:
|
||||
|
||||
@@ -6,7 +6,6 @@ import pytest
|
||||
from fixtures.benchmark_fixture import MetricReport
|
||||
from fixtures.compare_fixtures import PgCompare
|
||||
from fixtures.log_helper import log
|
||||
from pytest_lazyfixture import lazy_fixture # type: ignore
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -21,24 +20,16 @@ from pytest_lazyfixture import lazy_fixture # type: ignore
|
||||
pytest.param(10000000, 1, 4),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"env, scale",
|
||||
[
|
||||
# Run on all envs. Use 50x larger table on remote cluster to make sure
|
||||
# it doesn't fit in shared buffers, which are larger on remote than local.
|
||||
pytest.param(lazy_fixture("neon_compare"), 1, id="neon"),
|
||||
pytest.param(lazy_fixture("vanilla_compare"), 1, id="vanilla"),
|
||||
pytest.param(
|
||||
lazy_fixture("remote_compare"), 50, id="remote", marks=pytest.mark.remote_cluster
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_seqscans(env: PgCompare, scale: int, rows: int, iters: int, workers: int):
|
||||
rows = scale * rows
|
||||
def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int):
|
||||
env = neon_with_baseline
|
||||
|
||||
with closing(env.pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("drop table if exists t;")
|
||||
|
||||
if True:
|
||||
cur.execute("set enable_seqscan_prefetch = on;")
|
||||
cur.execute("set seqscan_prefetch_buffers = 10;")
|
||||
|
||||
cur.execute("create table t (i integer);")
|
||||
cur.execute(f"insert into t values (generate_series(1,{rows}));")
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
|
||||
# normally restarts after it. Also, there should be GC ERRORs in the log,
|
||||
# but the fixture checks the log for any unexpected ERRORs after every
|
||||
# test anyway, so it doesn't need any special attention here.
|
||||
@pytest.mark.timeout(600)
|
||||
def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
@@ -39,7 +38,7 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
|
||||
for _ in range(5):
|
||||
with pytest.raises(Exception):
|
||||
pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
|
||||
pg_bin.run_capture(["pgbench", "-N", "-c5", "-T100", "-Mprepared", connstr])
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
import subprocess
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import psycopg2
|
||||
@@ -7,11 +8,11 @@ from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres
|
||||
|
||||
|
||||
def test_proxy_select_1(static_proxy: NeonProxy):
|
||||
def test_proxy_select_1(static_proxy):
|
||||
static_proxy.safe_psql("select 1", options="project=generic-project-name")
|
||||
|
||||
|
||||
def test_password_hack(static_proxy: NeonProxy):
|
||||
def test_password_hack(static_proxy):
|
||||
user = "borat"
|
||||
password = "password"
|
||||
static_proxy.safe_psql(
|
||||
@@ -23,75 +24,118 @@ def test_password_hack(static_proxy: NeonProxy):
|
||||
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
|
||||
|
||||
# Must also check that invalid magic won't be accepted.
|
||||
with pytest.raises(psycopg2.OperationalError):
|
||||
with pytest.raises(psycopg2.errors.OperationalError):
|
||||
magic = "broken"
|
||||
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
|
||||
|
||||
|
||||
def get_session_id(uri_prefix, uri_line):
|
||||
def get_session_id_from_uri_line(uri_prefix, uri_line):
|
||||
assert uri_prefix in uri_line
|
||||
|
||||
url_parts = urlparse(uri_line)
|
||||
psql_session_id = url_parts.path[1:]
|
||||
assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars"
|
||||
assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars."
|
||||
link_auth_uri_prefix = uri_line[: -len(url_parts.path)]
|
||||
# invariant: the prefix must match the uri_prefix.
|
||||
assert (
|
||||
link_auth_uri_prefix == uri_prefix
|
||||
), f"Line='{uri_line}' should contain a http auth link of form '{uri_prefix}/<psql_session_id>'."
|
||||
# invariant: the entire link_auth_uri should be on its own line, module spaces.
|
||||
assert " ".join(uri_line.split(" ")) == f"{uri_prefix}/{psql_session_id}"
|
||||
|
||||
return psql_session_id
|
||||
|
||||
|
||||
async def find_auth_link(link_auth_uri_prefix, proc):
|
||||
for _ in range(100):
|
||||
line = (await proc.stderr.readline()).decode("utf-8").strip()
|
||||
log.info(f"psql line: {line}")
|
||||
if link_auth_uri_prefix in line:
|
||||
log.info(f"SUCCESS, found auth url: {line}")
|
||||
return line
|
||||
|
||||
|
||||
async def activate_link_auth(local_vanilla_pg, link_proxy, psql_session_id):
|
||||
def create_and_send_db_info(local_vanilla_pg, psql_session_id, mgmt_port):
|
||||
pg_user = "proxy"
|
||||
pg_password = "password"
|
||||
|
||||
log.info("creating a new user for link auth test")
|
||||
local_vanilla_pg.start()
|
||||
local_vanilla_pg.safe_psql(f"create user {pg_user} with login superuser")
|
||||
query = f"create user {pg_user} with login superuser password '{pg_password}'"
|
||||
local_vanilla_pg.safe_psql(query)
|
||||
|
||||
db_info = json.dumps(
|
||||
{
|
||||
"session_id": psql_session_id,
|
||||
"result": {
|
||||
"Success": {
|
||||
"host": local_vanilla_pg.default_options["host"],
|
||||
"port": local_vanilla_pg.default_options["port"],
|
||||
"dbname": local_vanilla_pg.default_options["dbname"],
|
||||
"user": pg_user,
|
||||
"project": "irrelevant",
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
port = local_vanilla_pg.default_options["port"]
|
||||
host = local_vanilla_pg.default_options["host"]
|
||||
dbname = local_vanilla_pg.default_options["dbname"]
|
||||
|
||||
log.info("sending session activation message")
|
||||
psql = await PSQL(host=link_proxy.host, port=link_proxy.mgmt_port).run(db_info)
|
||||
out = (await psql.stdout.read()).decode("utf-8").strip()
|
||||
assert out == "ok"
|
||||
db_info_dict = {
|
||||
"session_id": psql_session_id,
|
||||
"result": {
|
||||
"Success": {
|
||||
"host": host,
|
||||
"port": port,
|
||||
"dbname": dbname,
|
||||
"user": pg_user,
|
||||
"password": pg_password,
|
||||
}
|
||||
},
|
||||
}
|
||||
db_info_str = json.dumps(db_info_dict)
|
||||
cmd_args = [
|
||||
"psql",
|
||||
"-h",
|
||||
"127.0.0.1", # localhost
|
||||
"-p",
|
||||
f"{mgmt_port}",
|
||||
"-c",
|
||||
db_info_str,
|
||||
]
|
||||
|
||||
log.info(f"Sending to proxy the user and db info: {' '.join(cmd_args)}")
|
||||
p = subprocess.Popen(cmd_args, stdout=subprocess.PIPE)
|
||||
out, err = p.communicate()
|
||||
assert "ok" in str(out)
|
||||
|
||||
|
||||
async def get_uri_line_from_process_welcome_notice(link_auth_uri_prefix, proc):
|
||||
"""
|
||||
Returns the line from the welcome notice from proc containing link_auth_uri_prefix.
|
||||
:param link_auth_uri_prefix: the uri prefix used to indicate the line of interest
|
||||
:param proc: the process to read the welcome message from.
|
||||
:return: a line containing the full link authentication uri.
|
||||
"""
|
||||
max_num_lines_of_welcome_message = 15
|
||||
for attempt in range(max_num_lines_of_welcome_message):
|
||||
raw_line = await proc.stderr.readline()
|
||||
line = raw_line.decode("utf-8").strip()
|
||||
if link_auth_uri_prefix in line:
|
||||
return line
|
||||
assert False, f"did not find line containing '{link_auth_uri_prefix}'"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProxy):
|
||||
psql = await PSQL(host=link_proxy.host, port=link_proxy.proxy_port).run("select 42")
|
||||
"""
|
||||
Test copied and modified from: test_project_psql_link_auth test from cloud/tests_e2e/tests/test_project.py
|
||||
Step 1. establish connection to the proxy
|
||||
Step 2. retrieve session_id:
|
||||
Step 2.1: read welcome message
|
||||
Step 2.2: parse session_id
|
||||
Step 3. create a vanilla_pg and send user and db info via command line (using Popen) a psql query via mgmt port to proxy.
|
||||
Step 4. assert that select 1 has been executed correctly.
|
||||
"""
|
||||
|
||||
psql = PSQL(
|
||||
host=link_proxy.host,
|
||||
port=link_proxy.proxy_port,
|
||||
)
|
||||
proc = await psql.run("select 42")
|
||||
|
||||
uri_prefix = link_proxy.link_auth_uri_prefix
|
||||
link = await find_auth_link(uri_prefix, psql)
|
||||
line_str = await get_uri_line_from_process_welcome_notice(uri_prefix, proc)
|
||||
|
||||
psql_session_id = get_session_id(uri_prefix, link)
|
||||
await activate_link_auth(vanilla_pg, link_proxy, psql_session_id)
|
||||
psql_session_id = get_session_id_from_uri_line(uri_prefix, line_str)
|
||||
log.info(f"Parsed psql_session_id='{psql_session_id}' from Neon welcome message.")
|
||||
|
||||
assert psql.stdout is not None
|
||||
out = (await psql.stdout.read()).decode("utf-8").strip()
|
||||
create_and_send_db_info(vanilla_pg, psql_session_id, link_proxy.mgmt_port)
|
||||
|
||||
assert proc.stdout is not None
|
||||
out = (await proc.stdout.read()).decode("utf-8").strip()
|
||||
assert out == "42"
|
||||
|
||||
|
||||
# Pass extra options to the server.
|
||||
def test_proxy_options(static_proxy: NeonProxy):
|
||||
def test_proxy_options(static_proxy):
|
||||
with static_proxy.connect(options="project=irrelevant -cproxytest.option=value") as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SHOW proxytest.option")
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import time
|
||||
from threading import Thread
|
||||
|
||||
import pytest
|
||||
@@ -12,21 +11,11 @@ def do_gc_target(
|
||||
):
|
||||
"""Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
|
||||
try:
|
||||
log.info("sending gc http request")
|
||||
pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
|
||||
except Exception as e:
|
||||
log.error("do_gc failed: %s", e)
|
||||
finally:
|
||||
log.info("gc http thread returning")
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="""
|
||||
Commit 'make test_tenant_detach_smoke fail reproducibly' adds failpoint to make this test fail reproducibly.
|
||||
Fix in https://github.com/neondatabase/neon/pull/2851 will come as part of
|
||||
https://github.com/neondatabase/neon/pull/2785 .
|
||||
"""
|
||||
)
|
||||
def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
@@ -62,7 +51,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
]
|
||||
)
|
||||
|
||||
# gc should not try to even start on a timeline that doesn't exist
|
||||
# gc should not try to even start
|
||||
with pytest.raises(
|
||||
expected_exception=PageserverApiException, match="gc target timeline does not exist"
|
||||
):
|
||||
@@ -72,24 +61,25 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
# the error will be printed to the log too
|
||||
env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
|
||||
|
||||
# Detach while running manual GC.
|
||||
# It should wait for manual GC to finish (right now it doesn't that's why this test fails sometimes)
|
||||
pageserver_http.configure_failpoints(
|
||||
("gc_iteration_internal_after_getting_gc_timelines", "return(2000)")
|
||||
)
|
||||
# try to concurrently run gc and detach
|
||||
gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id))
|
||||
gc_thread.start()
|
||||
time.sleep(1)
|
||||
# By now the gc task is spawned but in sleep for another second due to the failpoint.
|
||||
|
||||
log.info("detaching tenant")
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
log.info("tenant detached without error")
|
||||
last_error = None
|
||||
for i in range(3):
|
||||
try:
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
log.error(f"try {i} error detaching tenant: {e}")
|
||||
continue
|
||||
else:
|
||||
break
|
||||
# else is called if the loop finished without reaching "break"
|
||||
else:
|
||||
pytest.fail(f"could not detach tenant: {last_error}")
|
||||
|
||||
log.info("wait for gc thread to return")
|
||||
gc_thread.join(timeout=10)
|
||||
assert not gc_thread.is_alive()
|
||||
log.info("gc thread returned")
|
||||
|
||||
# check that nothing is left on disk for deleted tenant
|
||||
assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()
|
||||
|
||||
@@ -166,10 +166,6 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: we have a race condition between GC and delete timeline. GC might fail with this
|
||||
# error. Similar to https://github.com/neondatabase/neon/issues/2671
|
||||
env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*")
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
main_branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0]
|
||||
|
||||
@@ -192,8 +188,10 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
|
||||
"first-branch", main_branch_name, tenant_id
|
||||
)
|
||||
|
||||
# unsure why this happens, the size difference is more than a page alignment
|
||||
size_after_first_branch = http_client.tenant_size(tenant_id)
|
||||
assert size_after_first_branch == size_at_branch
|
||||
assert size_after_first_branch > size_at_branch
|
||||
assert size_after_first_branch - size_at_branch == gc_horizon
|
||||
|
||||
first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id)
|
||||
|
||||
@@ -219,7 +217,7 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
|
||||
"second-branch", main_branch_name, tenant_id
|
||||
)
|
||||
size_after_second_branch = http_client.tenant_size(tenant_id)
|
||||
assert size_after_second_branch == size_after_continuing_on_main
|
||||
assert size_after_second_branch > size_after_continuing_on_main
|
||||
|
||||
second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id)
|
||||
|
||||
@@ -265,8 +263,6 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
|
||||
except PageserverApiException as e:
|
||||
# compaction is ok but just retry if this fails; related to #2442
|
||||
if "cannot lock compaction critical section" in str(e):
|
||||
# also ignore it in the log
|
||||
env.pageserver.allowed_errors.append(".*cannot lock compaction critical section.*")
|
||||
time.sleep(1)
|
||||
continue
|
||||
raise
|
||||
|
||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: da50d99db5...cd0693e2be
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: 780c3f8e35...1bf5e3f53c
Reference in New Issue
Block a user