mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-08 13:10:37 +00:00
Compare commits
43 Commits
test-prefe
...
persistent
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8261455019 | ||
|
|
aad88d6c39 | ||
|
|
6188315b51 | ||
|
|
3a4b932d8a | ||
|
|
cc2b3c986c | ||
|
|
c250c2664b | ||
|
|
e5550a01b0 | ||
|
|
45617ceaef | ||
|
|
29b39301fe | ||
|
|
b01a93be60 | ||
|
|
4c68d019e3 | ||
|
|
85f0975c5a | ||
|
|
1af087449a | ||
|
|
37625c4433 | ||
|
|
e9f4ca5972 | ||
|
|
4bf3087aed | ||
|
|
9470bc9fe0 | ||
|
|
86e483f87b | ||
|
|
f50d0ec0c9 | ||
|
|
74ec36a1bf | ||
|
|
a63ebb6446 | ||
|
|
a5b898a31c | ||
|
|
c6f095a821 | ||
|
|
6b2bc7f775 | ||
|
|
6c97fc941a | ||
|
|
cb9b26776e | ||
|
|
684329d4d2 | ||
|
|
ed40a045c0 | ||
|
|
3f39327622 | ||
|
|
a50a7e8ac0 | ||
|
|
e28eda7939 | ||
|
|
f564dff0e3 | ||
|
|
d783889a1f | ||
|
|
2655bdbb2e | ||
|
|
b9152f1ef4 | ||
|
|
328ec1ce24 | ||
|
|
dcb79ef08f | ||
|
|
fd99e0fbc4 | ||
|
|
60ac227196 | ||
|
|
4a60051b0d | ||
|
|
24d3ed0952 | ||
|
|
0a87d71294 | ||
|
|
150bddb929 |
2
.github/ansible/.gitignore
vendored
2
.github/ansible/.gitignore
vendored
@@ -1,5 +1,3 @@
|
|||||||
zenith_install.tar.gz
|
|
||||||
.zenith_current_version
|
|
||||||
neon_install.tar.gz
|
neon_install.tar.gz
|
||||||
.neon_current_version
|
.neon_current_version
|
||||||
|
|
||||||
|
|||||||
33
.github/ansible/staging.eu-west-1.hosts.yaml
vendored
Normal file
33
.github/ansible/staging.eu-west-1.hosts.yaml
vendored
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
storage:
|
||||||
|
vars:
|
||||||
|
bucket_name: neon-dev-storage-eu-west-1
|
||||||
|
bucket_region: eu-west-1
|
||||||
|
console_mgmt_base_url: http://console-staging.local
|
||||||
|
etcd_endpoints: etcd-0.eu-west-1.aws.neon.build:2379
|
||||||
|
pageserver_config_stub:
|
||||||
|
pg_distrib_dir: /usr/local
|
||||||
|
remote_storage:
|
||||||
|
bucket_name: "{{ bucket_name }}"
|
||||||
|
bucket_region: "{{ bucket_region }}"
|
||||||
|
prefix_in_bucket: "pageserver/v1"
|
||||||
|
safekeeper_s3_prefix: safekeeper/v1/wal
|
||||||
|
hostname_suffix: ""
|
||||||
|
remote_user: ssm-user
|
||||||
|
ansible_aws_ssm_region: eu-west-1
|
||||||
|
ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
|
||||||
|
console_region_id: aws-eu-west-1
|
||||||
|
|
||||||
|
children:
|
||||||
|
pageservers:
|
||||||
|
hosts:
|
||||||
|
pageserver-0.eu-west-1.aws.neon.build:
|
||||||
|
ansible_host: i-01d496c5041c7f34c
|
||||||
|
|
||||||
|
safekeepers:
|
||||||
|
hosts:
|
||||||
|
safekeeper-0.eu-west-1.aws.neon.build:
|
||||||
|
ansible_host: i-05226ef85722831bf
|
||||||
|
safekeeper-1.eu-west-1.aws.neon.build:
|
||||||
|
ansible_host: i-06969ee1bf2958bfc
|
||||||
|
safekeeper-2.eu-west-1.aws.neon.build:
|
||||||
|
ansible_host: i-087892e9625984a0b
|
||||||
2
.github/ansible/staging.us-east-2.hosts.yaml
vendored
2
.github/ansible/staging.us-east-2.hosts.yaml
vendored
@@ -22,6 +22,8 @@ storage:
|
|||||||
hosts:
|
hosts:
|
||||||
pageserver-0.us-east-2.aws.neon.build:
|
pageserver-0.us-east-2.aws.neon.build:
|
||||||
ansible_host: i-0c3e70929edb5d691
|
ansible_host: i-0c3e70929edb5d691
|
||||||
|
pageserver-1.us-east-2.aws.neon.build:
|
||||||
|
ansible_host: i-0565a8b4008aa3f40
|
||||||
|
|
||||||
safekeepers:
|
safekeepers:
|
||||||
hosts:
|
hosts:
|
||||||
|
|||||||
31
.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
vendored
Normal file
31
.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
vendored
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# Helm chart values for neon-proxy-scram.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
|
||||||
|
image:
|
||||||
|
repository: neondatabase/neon
|
||||||
|
|
||||||
|
settings:
|
||||||
|
authBackend: "console"
|
||||||
|
authEndpoint: "http://console-staging.local/management/api/v2"
|
||||||
|
domain: "*.eu-west-1.aws.neon.build"
|
||||||
|
|
||||||
|
# -- Additional labels for neon-proxy pods
|
||||||
|
podLabels:
|
||||||
|
zenith_service: proxy-scram
|
||||||
|
zenith_env: dev
|
||||||
|
zenith_region: eu-west-1
|
||||||
|
zenith_region_slug: eu-west-1
|
||||||
|
|
||||||
|
exposedService:
|
||||||
|
annotations:
|
||||||
|
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||||
|
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||||
|
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||||
|
external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
|
||||||
|
|
||||||
|
#metrics:
|
||||||
|
# enabled: true
|
||||||
|
# serviceMonitor:
|
||||||
|
# enabled: true
|
||||||
|
# selector:
|
||||||
|
# release: kube-prometheus-stack
|
||||||
9
.github/workflows/benchmarking.yml
vendored
9
.github/workflows/benchmarking.yml
vendored
@@ -144,7 +144,9 @@ jobs:
|
|||||||
# neon-captest-new: Run pgbench in a freshly created project
|
# neon-captest-new: Run pgbench in a freshly created project
|
||||||
# neon-captest-reuse: Same, but reusing existing project
|
# neon-captest-reuse: Same, but reusing existing project
|
||||||
# neon-captest-prefetch: Same, with prefetching enabled (new project)
|
# neon-captest-prefetch: Same, with prefetching enabled (new project)
|
||||||
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
|
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
|
||||||
|
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
|
||||||
|
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-postgres ]
|
||||||
db_size: [ 10gb ]
|
db_size: [ 10gb ]
|
||||||
include:
|
include:
|
||||||
- platform: neon-captest-new
|
- platform: neon-captest-new
|
||||||
@@ -207,8 +209,11 @@ jobs:
|
|||||||
rds-aurora)
|
rds-aurora)
|
||||||
CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }}
|
CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }}
|
||||||
;;
|
;;
|
||||||
|
rds-postgres)
|
||||||
|
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch' or 'rds-aurora'"
|
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|||||||
41
.github/workflows/build_and_test.yml
vendored
41
.github/workflows/build_and_test.yml
vendored
@@ -761,7 +761,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||||
cd "$(pwd)/.github/ansible"
|
cd "$(pwd)/.github/ansible"
|
||||||
|
|
||||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||||
./get_binaries.sh
|
./get_binaries.sh
|
||||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||||
@@ -770,6 +769,38 @@ jobs:
|
|||||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
ansible-galaxy collection install sivel.toiletwater
|
||||||
|
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
||||||
|
rm -f neon_install.tar.gz .neon_current_version
|
||||||
|
|
||||||
|
deploy-pr-test-new:
|
||||||
|
runs-on: [ self-hosted, dev, x64 ]
|
||||||
|
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||||
|
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
|
||||||
|
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
|
||||||
|
needs: [ push-docker-hub, tag, regress-tests ]
|
||||||
|
if: |
|
||||||
|
contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') &&
|
||||||
|
github.event_name != 'workflow_dispatch'
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
target_region: [ eu-west-1 ]
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Redeploy
|
||||||
|
run: |
|
||||||
|
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||||
|
cd "$(pwd)/.github/ansible"
|
||||||
|
|
||||||
|
./get_binaries.sh
|
||||||
|
|
||||||
ansible-galaxy collection install sivel.toiletwater
|
ansible-galaxy collection install sivel.toiletwater
|
||||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
||||||
@@ -780,7 +811,7 @@ jobs:
|
|||||||
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||||
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
|
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
|
||||||
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
|
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
|
||||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
needs: [ push-docker-hub, tag, regress-tests ]
|
||||||
if: |
|
if: |
|
||||||
(github.ref_name == 'release') &&
|
(github.ref_name == 'release') &&
|
||||||
github.event_name != 'workflow_dispatch'
|
github.event_name != 'workflow_dispatch'
|
||||||
@@ -861,7 +892,7 @@ jobs:
|
|||||||
runs-on: [ self-hosted, dev, x64 ]
|
runs-on: [ self-hosted, dev, x64 ]
|
||||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
needs: [ push-docker-hub, tag, regress-tests ]
|
||||||
if: |
|
if: |
|
||||||
(github.ref_name == 'main') &&
|
(github.ref_name == 'main') &&
|
||||||
github.event_name != 'workflow_dispatch'
|
github.event_name != 'workflow_dispatch'
|
||||||
@@ -873,6 +904,8 @@ jobs:
|
|||||||
include:
|
include:
|
||||||
- target_region: us-east-2
|
- target_region: us-east-2
|
||||||
target_cluster: dev-us-east-2-beta
|
target_cluster: dev-us-east-2-beta
|
||||||
|
- target_region: eu-west-1
|
||||||
|
target_cluster: dev-eu-west-1-zeta
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
@@ -894,7 +927,7 @@ jobs:
|
|||||||
runs-on: prod
|
runs-on: prod
|
||||||
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
needs: [ push-docker-hub, tag, regress-tests ]
|
||||||
if: |
|
if: |
|
||||||
(github.ref_name == 'release') &&
|
(github.ref_name == 'release') &&
|
||||||
github.event_name != 'workflow_dispatch'
|
github.event_name != 'workflow_dispatch'
|
||||||
|
|||||||
@@ -8,3 +8,4 @@
|
|||||||
/pgxn/ @neondatabase/compute
|
/pgxn/ @neondatabase/compute
|
||||||
/proxy/ @neondatabase/control-plane
|
/proxy/ @neondatabase/control-plane
|
||||||
/safekeeper/ @neondatabase/safekeepers
|
/safekeeper/ @neondatabase/safekeepers
|
||||||
|
/vendor/ @neondatabase/compute
|
||||||
|
|||||||
8
Cargo.lock
generated
8
Cargo.lock
generated
@@ -2255,6 +2255,14 @@ version = "2.2.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
|
checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "persistent_range_query"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"rand",
|
||||||
|
"workspace_hack",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "petgraph"
|
name = "petgraph"
|
||||||
version = "0.6.2"
|
version = "0.6.2"
|
||||||
|
|||||||
28
Makefile
28
Makefile
@@ -20,18 +20,18 @@ else
|
|||||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Seccomp BPF is only available for Linux
|
|
||||||
UNAME_S := $(shell uname -s)
|
UNAME_S := $(shell uname -s)
|
||||||
ifeq ($(UNAME_S),Linux)
|
ifeq ($(UNAME_S),Linux)
|
||||||
|
# Seccomp BPF is only available for Linux
|
||||||
PG_CONFIGURE_OPTS += --with-libseccomp
|
PG_CONFIGURE_OPTS += --with-libseccomp
|
||||||
endif
|
else ifeq ($(UNAME_S),Darwin)
|
||||||
|
# macOS with brew-installed openssl requires explicit paths
|
||||||
# macOS with brew-installed openssl requires explicit paths
|
# It can be configured with OPENSSL_PREFIX variable
|
||||||
# It can be configured with OPENSSL_PREFIX variable
|
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
||||||
UNAME_S := $(shell uname -s)
|
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||||
ifeq ($(UNAME_S),Darwin)
|
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
|
||||||
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
|
||||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Use -C option so that when PostgreSQL "make install" installs the
|
# Use -C option so that when PostgreSQL "make install" installs the
|
||||||
@@ -73,7 +73,8 @@ $(POSTGRES_INSTALL_DIR)/build/v14/config.status:
|
|||||||
+@echo "Configuring Postgres v14 build"
|
+@echo "Configuring Postgres v14 build"
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
|
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
|
||||||
(cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
|
(cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
|
||||||
$(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \
|
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure \
|
||||||
|
CFLAGS='$(PG_CFLAGS)' \
|
||||||
$(PG_CONFIGURE_OPTS) \
|
$(PG_CONFIGURE_OPTS) \
|
||||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)
|
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)
|
||||||
|
|
||||||
@@ -81,7 +82,8 @@ $(POSTGRES_INSTALL_DIR)/build/v15/config.status:
|
|||||||
+@echo "Configuring Postgres v15 build"
|
+@echo "Configuring Postgres v15 build"
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
|
mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
|
||||||
(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
|
(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
|
||||||
$(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \
|
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure \
|
||||||
|
CFLAGS='$(PG_CFLAGS)' \
|
||||||
$(PG_CONFIGURE_OPTS) \
|
$(PG_CONFIGURE_OPTS) \
|
||||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)
|
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)
|
||||||
|
|
||||||
@@ -111,6 +113,8 @@ postgres-v14: postgres-v14-configure \
|
|||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
|
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
|
||||||
+@echo "Compiling libpq v14"
|
+@echo "Compiling libpq v14"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
|
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
|
||||||
|
+@echo "Compiling pg_prewarm v14"
|
||||||
|
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_prewarm install
|
||||||
+@echo "Compiling pg_buffercache v14"
|
+@echo "Compiling pg_buffercache v14"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
|
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
|
||||||
+@echo "Compiling pageinspect v14"
|
+@echo "Compiling pageinspect v14"
|
||||||
@@ -123,6 +127,8 @@ postgres-v15: postgres-v15-configure \
|
|||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
|
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
|
||||||
+@echo "Compiling libpq v15"
|
+@echo "Compiling libpq v15"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
|
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
|
||||||
|
+@echo "Compiling pg_prewarm v15"
|
||||||
|
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_prewarm install
|
||||||
+@echo "Compiling pg_buffercache v15"
|
+@echo "Compiling pg_buffercache v15"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
|
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
|
||||||
+@echo "Compiling pageinspect v15"
|
+@echo "Compiling pageinspect v15"
|
||||||
|
|||||||
25
README.md
25
README.md
@@ -53,7 +53,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
|||||||
1. Install XCode and dependencies
|
1. Install XCode and dependencies
|
||||||
```
|
```
|
||||||
xcode-select --install
|
xcode-select --install
|
||||||
brew install protobuf etcd openssl
|
brew install protobuf etcd openssl flex bison
|
||||||
```
|
```
|
||||||
|
|
||||||
2. [Install Rust](https://www.rust-lang.org/tools/install)
|
2. [Install Rust](https://www.rust-lang.org/tools/install)
|
||||||
@@ -125,24 +125,23 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
|
|||||||
# Create repository in .neon with proper paths to binaries and data
|
# Create repository in .neon with proper paths to binaries and data
|
||||||
# Later that would be responsibility of a package install script
|
# Later that would be responsibility of a package install script
|
||||||
> ./target/debug/neon_local init
|
> ./target/debug/neon_local init
|
||||||
Starting pageserver at '127.0.0.1:64000' in '.neon'
|
Starting pageserver at '127.0.0.1:64000' in '.neon'.
|
||||||
|
pageserver started, pid: 2545906
|
||||||
Pageserver started
|
Successfully initialized timeline de200bd42b49cc1814412c7e592dd6e9
|
||||||
Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7
|
Stopped pageserver 1 process with pid 2545906
|
||||||
Stopping pageserver gracefully...done!
|
|
||||||
|
|
||||||
# start pageserver and safekeeper
|
# start pageserver and safekeeper
|
||||||
> ./target/debug/neon_local start
|
> ./target/debug/neon_local start
|
||||||
Starting etcd broker using /usr/bin/etcd
|
Starting etcd broker using "/usr/bin/etcd"
|
||||||
Starting pageserver at '127.0.0.1:64000' in '.neon'
|
etcd started, pid: 2545996
|
||||||
|
Starting pageserver at '127.0.0.1:64000' in '.neon'.
|
||||||
Pageserver started
|
pageserver started, pid: 2546005
|
||||||
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
|
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
|
||||||
Safekeeper started
|
safekeeper 1 started, pid: 2546041
|
||||||
|
|
||||||
# start postgres compute node
|
# start postgres compute node
|
||||||
> ./target/debug/neon_local pg start main
|
> ./target/debug/neon_local pg start main
|
||||||
Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
||||||
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
|
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
|
||||||
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
|
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
|
||||||
|
|
||||||
|
|||||||
188
cli-v2-story.md
188
cli-v2-story.md
@@ -1,188 +0,0 @@
|
|||||||
Create a new Zenith repository in the current directory:
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
|
|
||||||
The files belonging to this database system will be owned by user "heikki".
|
|
||||||
This user must also own the server process.
|
|
||||||
|
|
||||||
The database cluster will be initialized with locale "en_GB.UTF-8".
|
|
||||||
The default database encoding has accordingly been set to "UTF8".
|
|
||||||
The default text search configuration will be set to "english".
|
|
||||||
|
|
||||||
Data page checksums are disabled.
|
|
||||||
|
|
||||||
creating directory tmp ... ok
|
|
||||||
creating subdirectories ... ok
|
|
||||||
selecting dynamic shared memory implementation ... posix
|
|
||||||
selecting default max_connections ... 100
|
|
||||||
selecting default shared_buffers ... 128MB
|
|
||||||
selecting default time zone ... Europe/Helsinki
|
|
||||||
creating configuration files ... ok
|
|
||||||
running bootstrap script ... ok
|
|
||||||
performing post-bootstrap initialization ... ok
|
|
||||||
syncing data to disk ... ok
|
|
||||||
|
|
||||||
initdb: warning: enabling "trust" authentication for local connections
|
|
||||||
You can change this by editing pg_hba.conf or using the option -A, or
|
|
||||||
--auth-local and --auth-host, the next time you run initdb.
|
|
||||||
new zenith repository was created in .zenith
|
|
||||||
|
|
||||||
Initially, there is only one branch:
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
|
|
||||||
main
|
|
||||||
|
|
||||||
Start a local Postgres instance on the branch:
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
|
|
||||||
Creating data directory from snapshot at 0/15FFB08...
|
|
||||||
waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
|
||||||
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv6 address "::1", port 5432
|
|
||||||
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv4 address "127.0.0.1", port 5432
|
|
||||||
2021-04-13 09:27:43.927 EEST [984664] LOG: listening on Unix socket "/tmp/.s.PGSQL.5432"
|
|
||||||
2021-04-13 09:27:43.939 EEST [984665] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
|
||||||
2021-04-13 09:27:43.939 EEST [984665] LOG: creating missing WAL directory "pg_wal/archive_status"
|
|
||||||
2021-04-13 09:27:44.189 EEST [984665] LOG: database system was not properly shut down; automatic recovery in progress
|
|
||||||
2021-04-13 09:27:44.195 EEST [984665] LOG: invalid record length at 0/15FFB80: wanted 24, got 0
|
|
||||||
2021-04-13 09:27:44.195 EEST [984665] LOG: redo is not required
|
|
||||||
2021-04-13 09:27:44.225 EEST [984664] LOG: database system is ready to accept connections
|
|
||||||
done
|
|
||||||
server started
|
|
||||||
|
|
||||||
Run some commands against it:
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);"
|
|
||||||
CREATE TABLE
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');"
|
|
||||||
INSERT 0 1
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo"
|
|
||||||
t
|
|
||||||
-----------------------------
|
|
||||||
inserted on the main branch
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
Create a new branch called 'experimental'. We create it from the
|
|
||||||
current end of the 'main' branch, but you could specify a different
|
|
||||||
LSN as the start point instead.
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
|
|
||||||
branching at end of WAL: 0/161F478
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
|
|
||||||
experimental
|
|
||||||
main
|
|
||||||
|
|
||||||
Start another Postgres instance off the 'experimental' branch:
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
|
|
||||||
Creating data directory from snapshot at 0/15FFB08...
|
|
||||||
waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
|
||||||
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv6 address "::1", port 5433
|
|
||||||
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv4 address "127.0.0.1", port 5433
|
|
||||||
2021-04-13 09:28:41.883 EEST [984766] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
|
|
||||||
2021-04-13 09:28:41.896 EEST [984767] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
|
||||||
2021-04-13 09:28:42.265 EEST [984767] LOG: database system was not properly shut down; automatic recovery in progress
|
|
||||||
2021-04-13 09:28:42.269 EEST [984767] LOG: redo starts at 0/15FFB80
|
|
||||||
2021-04-13 09:28:42.272 EEST [984767] LOG: invalid record length at 0/161F4B0: wanted 24, got 0
|
|
||||||
2021-04-13 09:28:42.272 EEST [984767] LOG: redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
|
|
||||||
2021-04-13 09:28:42.321 EEST [984766] LOG: database system is ready to accept connections
|
|
||||||
done
|
|
||||||
server started
|
|
||||||
|
|
||||||
Insert some a row on the 'experimental' branch:
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
|
||||||
t
|
|
||||||
-----------------------------
|
|
||||||
inserted on the main branch
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')"
|
|
||||||
INSERT 0 1
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
|
||||||
t
|
|
||||||
-----------------------------
|
|
||||||
inserted on the main branch
|
|
||||||
inserted on experimental
|
|
||||||
(2 rows)
|
|
||||||
|
|
||||||
See that the other Postgres instance is still running on 'main' branch on port 5432:
|
|
||||||
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo"
|
|
||||||
t
|
|
||||||
-----------------------------
|
|
||||||
inserted on the main branch
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Everything is stored in the .zenith directory:
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
|
|
||||||
total 12
|
|
||||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
|
|
||||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
|
|
||||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
|
|
||||||
|
|
||||||
The 'datadirs' directory contains the datadirs of the running instances:
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
|
|
||||||
total 8
|
|
||||||
drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
|
|
||||||
drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
|
|
||||||
total 124
|
|
||||||
drwxr-xr-x 5 heikki heikki 4096 Apr 13 09:27 base
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 global
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_commit_ts
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_dynshmem
|
|
||||||
-rw------- 1 heikki heikki 4760 Apr 13 09:27 pg_hba.conf
|
|
||||||
-rw------- 1 heikki heikki 1636 Apr 13 09:27 pg_ident.conf
|
|
||||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:32 pg_logical
|
|
||||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 pg_multixact
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_notify
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_replslot
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_serial
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_snapshots
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_stat
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:34 pg_stat_tmp
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_subtrans
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_tblspc
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_twophase
|
|
||||||
-rw------- 1 heikki heikki 3 Apr 13 09:27 PG_VERSION
|
|
||||||
lrwxrwxrwx 1 heikki heikki 52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
|
|
||||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_xact
|
|
||||||
-rw------- 1 heikki heikki 88 Apr 13 09:27 postgresql.auto.conf
|
|
||||||
-rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
|
|
||||||
-rw------- 1 heikki heikki 96 Apr 13 09:27 postmaster.opts
|
|
||||||
-rw------- 1 heikki heikki 149 Apr 13 09:27 postmaster.pid
|
|
||||||
|
|
||||||
Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
|
|
||||||
datadir is ephemeral, you can delete it at any time, and it can be reconstructed
|
|
||||||
from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
|
|
||||||
the repository, the 'datadirs' are not included. (They are like git working trees)
|
|
||||||
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
|
|
||||||
Creating data directory from snapshot at 0/15FFB08...
|
|
||||||
waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
|
||||||
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv6 address "::1", port 5433
|
|
||||||
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv4 address "127.0.0.1", port 5433
|
|
||||||
2021-04-13 09:37:05.487 EEST [985340] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
|
|
||||||
2021-04-13 09:37:05.498 EEST [985341] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
|
||||||
2021-04-13 09:37:05.808 EEST [985341] LOG: database system was not properly shut down; automatic recovery in progress
|
|
||||||
2021-04-13 09:37:05.813 EEST [985341] LOG: redo starts at 0/15FFB80
|
|
||||||
2021-04-13 09:37:05.815 EEST [985341] LOG: invalid record length at 0/161F770: wanted 24, got 0
|
|
||||||
2021-04-13 09:37:05.815 EEST [985341] LOG: redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
|
|
||||||
2021-04-13 09:37:05.866 EEST [985340] LOG: database system is ready to accept connections
|
|
||||||
done
|
|
||||||
server started
|
|
||||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
|
||||||
t
|
|
||||||
-----------------------------
|
|
||||||
inserted on the main branch
|
|
||||||
inserted on experimental
|
|
||||||
(2 rows)
|
|
||||||
|
|
||||||
@@ -26,8 +26,18 @@ use nix::unistd::Pid;
|
|||||||
|
|
||||||
use utils::lock_file;
|
use utils::lock_file;
|
||||||
|
|
||||||
const RETRIES: u32 = 15;
|
// These constants control the loop used to poll for process start / stop.
|
||||||
const RETRY_TIMEOUT_MILLIS: u64 = 500;
|
//
|
||||||
|
// The loop waits for at most 10 seconds, polling every 100 ms.
|
||||||
|
// Once a second, it prints a dot ("."), to give the user an indication that
|
||||||
|
// it's waiting. If the process hasn't started/stopped after 5 seconds,
|
||||||
|
// it prints a notice that it's taking long, but keeps waiting.
|
||||||
|
//
|
||||||
|
const RETRY_UNTIL_SECS: u64 = 10;
|
||||||
|
const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
|
||||||
|
const RETRY_INTERVAL_MILLIS: u64 = 100;
|
||||||
|
const DOT_EVERY_RETRIES: u64 = 10;
|
||||||
|
const NOTICE_AFTER_RETRIES: u64 = 50;
|
||||||
|
|
||||||
/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
|
/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
|
||||||
/// it itself.
|
/// it itself.
|
||||||
@@ -107,16 +117,16 @@ where
|
|||||||
return Ok(spawned_process);
|
return Ok(spawned_process);
|
||||||
}
|
}
|
||||||
Ok(false) => {
|
Ok(false) => {
|
||||||
if retries < 5 {
|
if retries == NOTICE_AFTER_RETRIES {
|
||||||
|
// The process is taking a long time to start up. Keep waiting, but
|
||||||
|
// print a message
|
||||||
|
print!("\n{process_name} has not started yet, continuing to wait");
|
||||||
|
}
|
||||||
|
if retries % DOT_EVERY_RETRIES == 0 {
|
||||||
print!(".");
|
print!(".");
|
||||||
io::stdout().flush().unwrap();
|
io::stdout().flush().unwrap();
|
||||||
} else {
|
|
||||||
if retries == 5 {
|
|
||||||
println!() // put a line break after dots for second message
|
|
||||||
}
|
|
||||||
println!("{process_name} has not started yet, retrying ({retries})...");
|
|
||||||
}
|
}
|
||||||
thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS));
|
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("{process_name} failed to start: {e:#}");
|
println!("{process_name} failed to start: {e:#}");
|
||||||
@@ -127,7 +137,8 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
anyhow::bail!("{process_name} could not start in {RETRIES} attempts");
|
println!();
|
||||||
|
anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
|
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
|
||||||
@@ -158,7 +169,7 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Wait until process is gone
|
// Wait until process is gone
|
||||||
for _ in 0..RETRIES {
|
for retries in 0..RETRIES {
|
||||||
match process_has_stopped(pid) {
|
match process_has_stopped(pid) {
|
||||||
Ok(true) => {
|
Ok(true) => {
|
||||||
println!("\n{process_name} stopped");
|
println!("\n{process_name} stopped");
|
||||||
@@ -170,9 +181,16 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
Ok(false) => {
|
Ok(false) => {
|
||||||
print!(".");
|
if retries == NOTICE_AFTER_RETRIES {
|
||||||
io::stdout().flush().unwrap();
|
// The process is taking a long time to start up. Keep waiting, but
|
||||||
thread::sleep(Duration::from_secs(1))
|
// print a message
|
||||||
|
print!("\n{process_name} has not stopped yet, continuing to wait");
|
||||||
|
}
|
||||||
|
if retries % DOT_EVERY_RETRIES == 0 {
|
||||||
|
print!(".");
|
||||||
|
io::stdout().flush().unwrap();
|
||||||
|
}
|
||||||
|
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("{process_name} with pid {pid} failed to stop: {e:#}");
|
println!("{process_name} with pid {pid} failed to stop: {e:#}");
|
||||||
@@ -180,24 +198,21 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
println!();
|
||||||
anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts");
|
anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
||||||
let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
|
let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
|
||||||
|
|
||||||
let var = "LLVM_PROFILE_FILE";
|
// Pass through these environment variables to the command
|
||||||
if let Some(val) = std::env::var_os(var) {
|
for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] {
|
||||||
filled_cmd = filled_cmd.env(var, val);
|
if let Some(val) = std::env::var_os(var) {
|
||||||
|
filled_cmd = filled_cmd.env(var, val);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const RUST_LOG_KEY: &str = "RUST_LOG";
|
filled_cmd
|
||||||
if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
|
|
||||||
filled_cmd.env(RUST_LOG_KEY, rust_log_value)
|
|
||||||
} else {
|
|
||||||
filled_cmd
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||||
|
|||||||
@@ -343,7 +343,7 @@ impl PostgresNode {
|
|||||||
// To be able to restore database in case of pageserver node crash, safekeeper should not
|
// To be able to restore database in case of pageserver node crash, safekeeper should not
|
||||||
// remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
|
// remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
|
||||||
// (if they are not able to upload WAL to S3).
|
// (if they are not able to upload WAL to S3).
|
||||||
conf.append("max_replication_write_lag", "500MB");
|
conf.append("max_replication_write_lag", "15MB");
|
||||||
conf.append("max_replication_flush_lag", "10GB");
|
conf.append("max_replication_flush_lag", "10GB");
|
||||||
|
|
||||||
if !self.env.safekeepers.is_empty() {
|
if !self.env.safekeepers.is_empty() {
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use crate::{background_process, local_env};
|
|||||||
|
|
||||||
pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||||
let etcd_broker = &env.etcd_broker;
|
let etcd_broker = &env.etcd_broker;
|
||||||
println!(
|
print!(
|
||||||
"Starting etcd broker using {:?}",
|
"Starting etcd broker using {:?}",
|
||||||
etcd_broker.etcd_binary_path
|
etcd_broker.etcd_binary_path
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -237,7 +237,7 @@ impl PageServerNode {
|
|||||||
datadir: &Path,
|
datadir: &Path,
|
||||||
update_config: bool,
|
update_config: bool,
|
||||||
) -> anyhow::Result<Child> {
|
) -> anyhow::Result<Child> {
|
||||||
println!(
|
print!(
|
||||||
"Starting pageserver at '{}' in '{}'",
|
"Starting pageserver at '{}' in '{}'",
|
||||||
self.pg_connection_config.raw_address(),
|
self.pg_connection_config.raw_address(),
|
||||||
datadir.display()
|
datadir.display()
|
||||||
|
|||||||
@@ -83,6 +83,16 @@ A subject for future modularization.
|
|||||||
`/libs/metrics`:
|
`/libs/metrics`:
|
||||||
Helpers for exposing Prometheus metrics from the server.
|
Helpers for exposing Prometheus metrics from the server.
|
||||||
|
|
||||||
|
### Adding dependencies
|
||||||
|
When you add a Cargo dependency, you should update hakari manifest by running commands below and committing the updated `Cargo.lock` and `workspace_hack/`. There may be no changes, that's fine.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo hakari generate
|
||||||
|
cargo hakari manage-deps
|
||||||
|
```
|
||||||
|
|
||||||
|
If you don't have hakari installed (`error: no such subcommand: hakari`), install it by running `cargo install cargo-hakari`.
|
||||||
|
|
||||||
## Using Python
|
## Using Python
|
||||||
Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
|
Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
|
||||||
so manual installation of dependencies is not recommended.
|
so manual installation of dependencies is not recommended.
|
||||||
|
|||||||
12
libs/persistent_range_query/Cargo.toml
Normal file
12
libs/persistent_range_query/Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
[package]
|
||||||
|
name = "persistent_range_query"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
rand = "0.8.3"
|
||||||
78
libs/persistent_range_query/src/lib.rs
Normal file
78
libs/persistent_range_query/src/lib.rs
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
pub mod naive;
|
||||||
|
pub mod ops;
|
||||||
|
pub mod segment_tree;
|
||||||
|
|
||||||
|
/// Should be a monoid:
|
||||||
|
/// * Identity element: for all a: combine(new_for_empty_range(), a) = combine(a, new_for_empty_range()) = a
|
||||||
|
/// * Associativity: for all a, b, c: combine(combine(a, b), c) == combine(a, combine(b, c))
|
||||||
|
pub trait RangeQueryResult<Key>: Sized + Clone {
|
||||||
|
// Clone is equivalent to combine with an empty range.
|
||||||
|
|
||||||
|
fn new_for_empty_range() -> Self;
|
||||||
|
|
||||||
|
// Contract: left_range.end == right_range.start
|
||||||
|
// left_range.start == left_range.end == right_range.start == right_range.end is still possible
|
||||||
|
fn combine(
|
||||||
|
left: &Self,
|
||||||
|
left_range: &Range<Key>,
|
||||||
|
right: &Self,
|
||||||
|
right_range: &Range<Key>,
|
||||||
|
) -> Self;
|
||||||
|
|
||||||
|
fn add(left: &mut Self, left_range: &Range<Key>, right: &Self, right_range: &Range<Key>);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait LazyRangeInitializer<Result: RangeQueryResult<Key>, Key> {
|
||||||
|
fn get(&self, range: &Range<Key>) -> Result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Should be a monoid:
|
||||||
|
/// * Identity element: for all op: compose(no_op(), op) == compose(op, no_op()) == op
|
||||||
|
/// * Associativity: for all op_1, op_2, op_3: compose(compose(op_1, op_2), op_3) == compose(op_1, compose(op_2, op_3))
|
||||||
|
///
|
||||||
|
/// Should left act on Result:
|
||||||
|
/// * Identity operation: for all r: no_op().apply(r) == r
|
||||||
|
/// * Compatibility: for all op_1, op_2, r: op_1.apply(op_2.apply(r)) == compose(op_1, op_2).apply(r)
|
||||||
|
pub trait RangeModification<Key> {
|
||||||
|
type Result: RangeQueryResult<Key>;
|
||||||
|
|
||||||
|
fn no_op() -> Self;
|
||||||
|
fn is_no_op(&self) -> bool;
|
||||||
|
fn is_reinitialization(&self) -> bool;
|
||||||
|
fn apply(&self, result: &mut Self::Result, range: &Range<Key>);
|
||||||
|
fn compose(later: &Self, earlier: &mut Self);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait VecReadableVersion<Modification: RangeModification<Key>, Key> {
|
||||||
|
fn get(&self, keys: &Range<Key>) -> Modification::Result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: use trait alias when stabilized
|
||||||
|
pub trait VecFrozenVersion<Modification: RangeModification<Key>, Key>:
|
||||||
|
Clone + VecReadableVersion<Modification, Key>
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<
|
||||||
|
T: Clone + VecReadableVersion<Modification, Key>,
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Key,
|
||||||
|
> VecFrozenVersion<Modification, Key> for T
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait PersistentVecStorage<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key,
|
||||||
|
>: VecReadableVersion<Modification, Key>
|
||||||
|
{
|
||||||
|
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self;
|
||||||
|
|
||||||
|
type FrozenVersion: VecFrozenVersion<Modification, Key>;
|
||||||
|
|
||||||
|
fn modify(&mut self, keys: &Range<Key>, modification: &Modification);
|
||||||
|
fn freeze(&mut self) -> Self::FrozenVersion;
|
||||||
|
}
|
||||||
115
libs/persistent_range_query/src/naive.rs
Normal file
115
libs/persistent_range_query/src/naive.rs
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
use crate::{
|
||||||
|
LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
|
||||||
|
VecReadableVersion,
|
||||||
|
};
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::rc::Rc;
|
||||||
|
|
||||||
|
pub struct NaiveFrozenVersion<Modification: RangeModification<Key>, Key> {
|
||||||
|
all_keys: Range<Key>,
|
||||||
|
values: Rc<Box<Vec<Modification::Result>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait IndexableKey: Clone {
|
||||||
|
fn index(all_keys: &Range<Self>, key: &Self) -> usize;
|
||||||
|
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self>;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get<Modification: RangeModification<Key>, Key: IndexableKey>(
|
||||||
|
all_keys: &Range<Key>,
|
||||||
|
values: &Vec<Modification::Result>,
|
||||||
|
keys: &Range<Key>,
|
||||||
|
) -> Modification::Result {
|
||||||
|
let mut result = Modification::Result::new_for_empty_range();
|
||||||
|
let mut result_range = keys.start.clone()..keys.start.clone();
|
||||||
|
for index in
|
||||||
|
IndexableKey::index(&all_keys, &keys.start)..IndexableKey::index(&all_keys, &keys.end)
|
||||||
|
{
|
||||||
|
let element_range = IndexableKey::element_range(&all_keys, index);
|
||||||
|
Modification::Result::add(&mut result, &result_range, &values[index], &element_range);
|
||||||
|
result_range.end = element_range.end;
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Modification: RangeModification<Key>, Key: IndexableKey> VecReadableVersion<Modification, Key>
|
||||||
|
for NaiveFrozenVersion<Modification, Key>
|
||||||
|
{
|
||||||
|
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||||
|
get::<Modification, Key>(&self.all_keys, &self.values, keys)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Manual implementation of `Clone` becase `derive` requires `Modification: Clone`
|
||||||
|
impl<Modification: RangeModification<Key>, Key: Clone> Clone
|
||||||
|
for NaiveFrozenVersion<Modification, Key>
|
||||||
|
{
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
all_keys: self.all_keys.clone(),
|
||||||
|
values: self.values.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: is it at all possible to store previous versions in this struct,
|
||||||
|
// without any Rc<>?
|
||||||
|
pub struct NaiveVecStorage<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: IndexableKey,
|
||||||
|
> {
|
||||||
|
all_keys: Range<Key>,
|
||||||
|
last_version: Vec<Modification::Result>,
|
||||||
|
_initializer: PhantomData<Initializer>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: IndexableKey,
|
||||||
|
> VecReadableVersion<Modification, Key> for NaiveVecStorage<Modification, Initializer, Key>
|
||||||
|
{
|
||||||
|
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||||
|
get::<Modification, Key>(&self.all_keys, &self.last_version, keys)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: IndexableKey,
|
||||||
|
> PersistentVecStorage<Modification, Initializer, Key>
|
||||||
|
for NaiveVecStorage<Modification, Initializer, Key>
|
||||||
|
{
|
||||||
|
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
|
||||||
|
let mut values = Vec::with_capacity(IndexableKey::index(&all_keys, &all_keys.end));
|
||||||
|
for index in 0..values.capacity() {
|
||||||
|
values.push(initializer.get(&IndexableKey::element_range(&all_keys, index)));
|
||||||
|
}
|
||||||
|
NaiveVecStorage {
|
||||||
|
all_keys,
|
||||||
|
last_version: values,
|
||||||
|
_initializer: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type FrozenVersion = NaiveFrozenVersion<Modification, Key>;
|
||||||
|
|
||||||
|
fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
|
||||||
|
for index in IndexableKey::index(&self.all_keys, &keys.start)
|
||||||
|
..IndexableKey::index(&self.all_keys, &keys.end)
|
||||||
|
{
|
||||||
|
let element_range = IndexableKey::element_range(&self.all_keys, index);
|
||||||
|
modification.apply(&mut self.last_version[index], &element_range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn freeze(&mut self) -> Self::FrozenVersion {
|
||||||
|
NaiveFrozenVersion::<Modification, Key> {
|
||||||
|
all_keys: self.all_keys.clone(),
|
||||||
|
values: Rc::new(Box::new(self.last_version.clone())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
14
libs/persistent_range_query/src/ops/mod.rs
Normal file
14
libs/persistent_range_query/src/ops/mod.rs
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
pub mod rsq;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
pub struct SameElementsInitializer<T> {
|
||||||
|
initial_element_value: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> SameElementsInitializer<T> {
|
||||||
|
pub fn new(initial_element_value: T) -> Self {
|
||||||
|
SameElementsInitializer {
|
||||||
|
initial_element_value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
118
libs/persistent_range_query/src/ops/rsq.rs
Normal file
118
libs/persistent_range_query/src/ops/rsq.rs
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
//! # Range Sum Query
|
||||||
|
|
||||||
|
use crate::ops::SameElementsInitializer;
|
||||||
|
use crate::{LazyRangeInitializer, RangeModification, RangeQueryResult};
|
||||||
|
use std::borrow::Borrow;
|
||||||
|
use std::ops::{Add, AddAssign, Range};
|
||||||
|
|
||||||
|
// TODO: commutative Add
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub struct SumResult<T> {
|
||||||
|
sum: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> SumResult<T> {
|
||||||
|
pub fn sum(&self) -> &T {
|
||||||
|
&self.sum
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Clone + for<'a> AddAssign<&'a T> + From<u8>, Key> RangeQueryResult<Key> for SumResult<T>
|
||||||
|
where
|
||||||
|
for<'a> &'a T: Add<&'a T, Output = T>,
|
||||||
|
{
|
||||||
|
fn new_for_empty_range() -> Self {
|
||||||
|
SumResult { sum: 0.into() }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn combine(
|
||||||
|
left: &Self,
|
||||||
|
_left_range: &Range<Key>,
|
||||||
|
right: &Self,
|
||||||
|
_right_range: &Range<Key>,
|
||||||
|
) -> Self {
|
||||||
|
SumResult {
|
||||||
|
sum: &left.sum + &right.sum,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(left: &mut Self, _left_range: &Range<Key>, right: &Self, _right_range: &Range<Key>) {
|
||||||
|
left.sum += &right.sum
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait SumOfSameElements<Key> {
|
||||||
|
fn sum(initial_element_value: &Self, keys: &Range<Key>) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: SumOfSameElements<Key>, TB: Borrow<T>, Key> LazyRangeInitializer<SumResult<T>, Key>
|
||||||
|
for SameElementsInitializer<TB>
|
||||||
|
where
|
||||||
|
SumResult<T>: RangeQueryResult<Key>,
|
||||||
|
{
|
||||||
|
fn get(&self, range: &Range<Key>) -> SumResult<T> {
|
||||||
|
SumResult {
|
||||||
|
sum: SumOfSameElements::sum(self.initial_element_value.borrow(), range),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
pub enum AddAssignModification<T> {
|
||||||
|
None,
|
||||||
|
Add(T),
|
||||||
|
Assign(T),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Clone + for<'a> AddAssign<&'a T>, Key> RangeModification<Key> for AddAssignModification<T>
|
||||||
|
where
|
||||||
|
SumResult<T>: RangeQueryResult<Key>,
|
||||||
|
for<'a> SameElementsInitializer<&'a T>: LazyRangeInitializer<SumResult<T>, Key>,
|
||||||
|
{
|
||||||
|
type Result = SumResult<T>;
|
||||||
|
|
||||||
|
fn no_op() -> Self {
|
||||||
|
AddAssignModification::None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_no_op(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
AddAssignModification::None => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_reinitialization(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
AddAssignModification::Assign(_) => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply(&self, result: &mut SumResult<T>, range: &Range<Key>) {
|
||||||
|
use AddAssignModification::*;
|
||||||
|
match self {
|
||||||
|
None => {}
|
||||||
|
Add(x) | Assign(x) => {
|
||||||
|
let to_add = SameElementsInitializer::new(x).get(range).sum;
|
||||||
|
if let Assign(_) = self {
|
||||||
|
result.sum = to_add;
|
||||||
|
} else {
|
||||||
|
result.sum += &to_add;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compose(later: &Self, earlier: &mut Self) {
|
||||||
|
use AddAssignModification::*;
|
||||||
|
match (later, earlier) {
|
||||||
|
(_, e @ None) => *e = later.clone(),
|
||||||
|
(None, _) => {}
|
||||||
|
(Assign(_), e) => *e = later.clone(),
|
||||||
|
(Add(x), Add(y)) => *y += x,
|
||||||
|
(Add(x), Assign(value)) => *value += x,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
255
libs/persistent_range_query/src/segment_tree.rs
Normal file
255
libs/persistent_range_query/src/segment_tree.rs
Normal file
@@ -0,0 +1,255 @@
|
|||||||
|
//! # Segment Tree
|
||||||
|
//! It is a competitive programming folklore data structure. Do not confuse with the interval tree.
|
||||||
|
|
||||||
|
use crate::{LazyRangeInitializer, PersistentVecStorage, RangeQueryResult, VecReadableVersion};
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::rc::Rc;
|
||||||
|
|
||||||
|
pub trait MidpointableKey: Clone + Ord + Sized {
|
||||||
|
fn midpoint(range: &Range<Self>) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait RangeModification<Key>: Clone + crate::RangeModification<Key> {}
|
||||||
|
|
||||||
|
// TODO: use trait alias when stabilized
|
||||||
|
impl<T: Clone + crate::RangeModification<Key>, Key> RangeModification<Key> for T {}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Node<Modification: RangeModification<Key>, Key> {
|
||||||
|
result: Modification::Result,
|
||||||
|
modify_children: Modification,
|
||||||
|
left: Option<Rc<Self>>,
|
||||||
|
right: Option<Rc<Self>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
|
||||||
|
impl<Modification: RangeModification<Key>, Key> Clone for Node<Modification, Key> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Node {
|
||||||
|
result: self.result.clone(),
|
||||||
|
modify_children: self.modify_children.clone(),
|
||||||
|
left: self.left.clone(),
|
||||||
|
right: self.right.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Modification: RangeModification<Key>, Key> Node<Modification, Key> {
|
||||||
|
fn new<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
|
||||||
|
range: &Range<Key>,
|
||||||
|
initializer: &Initializer,
|
||||||
|
) -> Self {
|
||||||
|
Node {
|
||||||
|
result: initializer.get(range),
|
||||||
|
modify_children: Modification::no_op(),
|
||||||
|
left: None,
|
||||||
|
right: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply(&mut self, modification: &Modification, range: &Range<Key>) {
|
||||||
|
modification.apply(&mut self.result, range);
|
||||||
|
Modification::compose(modification, &mut self.modify_children);
|
||||||
|
if self.modify_children.is_reinitialization() {
|
||||||
|
self.left = None;
|
||||||
|
self.right = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn force_children<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
|
||||||
|
&mut self,
|
||||||
|
initializer: &Initializer,
|
||||||
|
range_left: &Range<Key>,
|
||||||
|
range_right: &Range<Key>,
|
||||||
|
) {
|
||||||
|
let left = Rc::make_mut(
|
||||||
|
self.left
|
||||||
|
.get_or_insert_with(|| Rc::new(Node::new(&range_left, initializer))),
|
||||||
|
);
|
||||||
|
let right = Rc::make_mut(
|
||||||
|
self.right
|
||||||
|
.get_or_insert_with(|| Rc::new(Node::new(&range_right, initializer))),
|
||||||
|
);
|
||||||
|
left.apply(&self.modify_children, &range_left);
|
||||||
|
right.apply(&self.modify_children, &range_right);
|
||||||
|
self.modify_children = Modification::no_op();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn recalculate_from_children(&mut self, range_left: &Range<Key>, range_right: &Range<Key>) {
|
||||||
|
assert!(self.modify_children.is_no_op());
|
||||||
|
assert!(self.left.is_some());
|
||||||
|
assert!(self.right.is_some());
|
||||||
|
self.result = Modification::Result::combine(
|
||||||
|
&self.left.as_ref().unwrap().result,
|
||||||
|
&range_left,
|
||||||
|
&self.right.as_ref().unwrap().result,
|
||||||
|
&range_right,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn split_range<Key: MidpointableKey>(range: &Range<Key>) -> (Range<Key>, Range<Key>) {
|
||||||
|
let range_left = range.start.clone()..MidpointableKey::midpoint(range);
|
||||||
|
let range_right = range_left.end.clone()..range.end.clone();
|
||||||
|
(range_left, range_right)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct PersistentSegmentTreeVersion<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: Clone,
|
||||||
|
> {
|
||||||
|
root: Rc<Node<Modification, Key>>,
|
||||||
|
all_keys: Range<Key>,
|
||||||
|
initializer: Rc<Initializer>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
|
||||||
|
impl<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: Clone,
|
||||||
|
> Clone for PersistentSegmentTreeVersion<Modification, Initializer, Key>
|
||||||
|
{
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
root: self.root.clone(),
|
||||||
|
all_keys: self.all_keys.clone(),
|
||||||
|
initializer: self.initializer.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: MidpointableKey,
|
||||||
|
>(
|
||||||
|
node: &mut Rc<Node<Modification, Key>>,
|
||||||
|
node_keys: &Range<Key>,
|
||||||
|
initializer: &Initializer,
|
||||||
|
keys: &Range<Key>,
|
||||||
|
) -> Modification::Result {
|
||||||
|
if node_keys.end <= keys.start || keys.end <= node_keys.start {
|
||||||
|
return Modification::Result::new_for_empty_range();
|
||||||
|
}
|
||||||
|
if keys.start <= node_keys.start && node_keys.end <= keys.end {
|
||||||
|
return node.result.clone();
|
||||||
|
}
|
||||||
|
let node = Rc::make_mut(node);
|
||||||
|
let (left_keys, right_keys) = split_range(node_keys);
|
||||||
|
node.force_children(initializer, &left_keys, &right_keys);
|
||||||
|
let mut result = get(node.left.as_mut().unwrap(), &left_keys, initializer, keys);
|
||||||
|
Modification::Result::add(
|
||||||
|
&mut result,
|
||||||
|
&left_keys,
|
||||||
|
&get(node.right.as_mut().unwrap(), &right_keys, initializer, keys),
|
||||||
|
&right_keys,
|
||||||
|
);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn modify<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: MidpointableKey,
|
||||||
|
>(
|
||||||
|
node: &mut Rc<Node<Modification, Key>>,
|
||||||
|
node_keys: &Range<Key>,
|
||||||
|
initializer: &Initializer,
|
||||||
|
keys: &Range<Key>,
|
||||||
|
modification: &Modification,
|
||||||
|
) {
|
||||||
|
if modification.is_no_op() || node_keys.end <= keys.start || keys.end <= node_keys.start {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let node = Rc::make_mut(node);
|
||||||
|
if keys.start <= node_keys.start && node_keys.end <= keys.end {
|
||||||
|
node.apply(modification, node_keys);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let (left_keys, right_keys) = split_range(node_keys);
|
||||||
|
node.force_children(initializer, &left_keys, &right_keys);
|
||||||
|
modify(
|
||||||
|
node.left.as_mut().unwrap(),
|
||||||
|
&left_keys,
|
||||||
|
initializer,
|
||||||
|
keys,
|
||||||
|
&modification,
|
||||||
|
);
|
||||||
|
modify(
|
||||||
|
node.right.as_mut().unwrap(),
|
||||||
|
&right_keys,
|
||||||
|
initializer,
|
||||||
|
keys,
|
||||||
|
&modification,
|
||||||
|
);
|
||||||
|
node.recalculate_from_children(&left_keys, &right_keys);
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: MidpointableKey,
|
||||||
|
> VecReadableVersion<Modification, Key>
|
||||||
|
for PersistentSegmentTreeVersion<Modification, Initializer, Key>
|
||||||
|
{
|
||||||
|
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||||
|
get(
|
||||||
|
&mut self.root.clone(), // TODO: do not always force a branch
|
||||||
|
&self.all_keys,
|
||||||
|
self.initializer.as_ref(),
|
||||||
|
keys,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct PersistentSegmentTree<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: MidpointableKey,
|
||||||
|
>(PersistentSegmentTreeVersion<Modification, Initializer, Key>);
|
||||||
|
|
||||||
|
impl<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: MidpointableKey,
|
||||||
|
> VecReadableVersion<Modification, Key>
|
||||||
|
for PersistentSegmentTree<Modification, Initializer, Key>
|
||||||
|
{
|
||||||
|
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||||
|
self.0.get(keys)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<
|
||||||
|
Modification: RangeModification<Key>,
|
||||||
|
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||||
|
Key: MidpointableKey,
|
||||||
|
> PersistentVecStorage<Modification, Initializer, Key>
|
||||||
|
for PersistentSegmentTree<Modification, Initializer, Key>
|
||||||
|
{
|
||||||
|
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
|
||||||
|
PersistentSegmentTree(PersistentSegmentTreeVersion {
|
||||||
|
root: Rc::new(Node::new(&all_keys, &initializer)),
|
||||||
|
all_keys: all_keys,
|
||||||
|
initializer: Rc::new(initializer),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type FrozenVersion = PersistentSegmentTreeVersion<Modification, Initializer, Key>;
|
||||||
|
|
||||||
|
fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
|
||||||
|
modify(
|
||||||
|
&mut self.0.root, // TODO: do not always force a branch
|
||||||
|
&self.0.all_keys,
|
||||||
|
self.0.initializer.as_ref(),
|
||||||
|
keys,
|
||||||
|
modification,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn freeze(&mut self) -> Self::FrozenVersion {
|
||||||
|
self.0.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
295
libs/persistent_range_query/tests/layer_map_test.rs
Normal file
295
libs/persistent_range_query/tests/layer_map_test.rs
Normal file
@@ -0,0 +1,295 @@
|
|||||||
|
use persistent_range_query::naive::{IndexableKey, NaiveVecStorage};
|
||||||
|
use persistent_range_query::ops::SameElementsInitializer;
|
||||||
|
use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
|
||||||
|
use persistent_range_query::{
|
||||||
|
LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
|
||||||
|
VecReadableVersion,
|
||||||
|
};
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
|
||||||
|
struct PageIndex(u32);
|
||||||
|
type LayerId = String;
|
||||||
|
|
||||||
|
impl IndexableKey for PageIndex {
|
||||||
|
fn index(all_keys: &Range<Self>, key: &Self) -> usize {
|
||||||
|
(key.0 as usize) - (all_keys.start.0 as usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
|
||||||
|
PageIndex(all_keys.start.0 + index as u32)..PageIndex(all_keys.start.0 + index as u32 + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MidpointableKey for PageIndex {
|
||||||
|
fn midpoint(range: &Range<Self>) -> Self {
|
||||||
|
PageIndex(range.start.0 + (range.end.0 - range.start.0) / 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
struct LayerMapInformation {
|
||||||
|
// Only make sense for a range of length 1.
|
||||||
|
last_layer: Option<LayerId>,
|
||||||
|
last_image_layer: Option<LayerId>,
|
||||||
|
// Work for all ranges
|
||||||
|
max_delta_layers: (usize, Range<PageIndex>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LayerMapInformation {
|
||||||
|
fn last_layers(&self) -> (&Option<LayerId>, &Option<LayerId>) {
|
||||||
|
(&self.last_layer, &self.last_image_layer)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn max_delta_layers(&self) -> &(usize, Range<PageIndex>) {
|
||||||
|
&self.max_delta_layers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge_ranges(left: &Range<PageIndex>, right: &Range<PageIndex>) -> Range<PageIndex> {
|
||||||
|
if left.is_empty() {
|
||||||
|
right.clone()
|
||||||
|
} else if right.is_empty() {
|
||||||
|
left.clone()
|
||||||
|
} else if left.end == right.start {
|
||||||
|
left.start..right.end
|
||||||
|
} else {
|
||||||
|
left.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RangeQueryResult<PageIndex> for LayerMapInformation {
|
||||||
|
fn new_for_empty_range() -> Self {
|
||||||
|
LayerMapInformation {
|
||||||
|
last_layer: None,
|
||||||
|
last_image_layer: None,
|
||||||
|
max_delta_layers: (0, PageIndex(0)..PageIndex(0)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn combine(
|
||||||
|
left: &Self,
|
||||||
|
_left_range: &Range<PageIndex>,
|
||||||
|
right: &Self,
|
||||||
|
_right_range: &Range<PageIndex>,
|
||||||
|
) -> Self {
|
||||||
|
// Note that either range may be empty.
|
||||||
|
LayerMapInformation {
|
||||||
|
last_layer: left
|
||||||
|
.last_layer
|
||||||
|
.as_ref()
|
||||||
|
.or_else(|| right.last_layer.as_ref())
|
||||||
|
.cloned(),
|
||||||
|
last_image_layer: left
|
||||||
|
.last_image_layer
|
||||||
|
.as_ref()
|
||||||
|
.or_else(|| right.last_image_layer.as_ref())
|
||||||
|
.cloned(),
|
||||||
|
max_delta_layers: match left.max_delta_layers.0.cmp(&right.max_delta_layers.0) {
|
||||||
|
Ordering::Less => right.max_delta_layers.clone(),
|
||||||
|
Ordering::Greater => left.max_delta_layers.clone(),
|
||||||
|
Ordering::Equal => (
|
||||||
|
left.max_delta_layers.0,
|
||||||
|
merge_ranges(&left.max_delta_layers.1, &right.max_delta_layers.1),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(
|
||||||
|
left: &mut Self,
|
||||||
|
left_range: &Range<PageIndex>,
|
||||||
|
right: &Self,
|
||||||
|
right_range: &Range<PageIndex>,
|
||||||
|
) {
|
||||||
|
*left = Self::combine(&left, left_range, right, right_range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct AddDeltaLayers {
|
||||||
|
last_layer: LayerId,
|
||||||
|
count: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct LayerMapModification {
|
||||||
|
add_image_layer: Option<LayerId>,
|
||||||
|
add_delta_layers: Option<AddDeltaLayers>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LayerMapModification {
|
||||||
|
fn add_image_layer(layer: impl Into<LayerId>) -> Self {
|
||||||
|
LayerMapModification {
|
||||||
|
add_image_layer: Some(layer.into()),
|
||||||
|
add_delta_layers: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_delta_layer(layer: impl Into<LayerId>) -> Self {
|
||||||
|
LayerMapModification {
|
||||||
|
add_image_layer: None,
|
||||||
|
add_delta_layers: Some(AddDeltaLayers {
|
||||||
|
last_layer: layer.into(),
|
||||||
|
count: 1,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RangeModification<PageIndex> for LayerMapModification {
|
||||||
|
type Result = LayerMapInformation;
|
||||||
|
|
||||||
|
fn no_op() -> Self {
|
||||||
|
LayerMapModification {
|
||||||
|
add_image_layer: None,
|
||||||
|
add_delta_layers: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_no_op(&self) -> bool {
|
||||||
|
self.add_image_layer.is_none() && self.add_delta_layers.is_none()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_reinitialization(&self) -> bool {
|
||||||
|
self.add_image_layer.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply(&self, result: &mut Self::Result, range: &Range<PageIndex>) {
|
||||||
|
if let Some(layer) = &self.add_image_layer {
|
||||||
|
result.last_layer = Some(layer.clone());
|
||||||
|
result.last_image_layer = Some(layer.clone());
|
||||||
|
result.max_delta_layers = (0, range.clone());
|
||||||
|
}
|
||||||
|
if let Some(AddDeltaLayers { last_layer, count }) = &self.add_delta_layers {
|
||||||
|
result.last_layer = Some(last_layer.clone());
|
||||||
|
result.max_delta_layers.0 += count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compose(later: &Self, earlier: &mut Self) {
|
||||||
|
if later.add_image_layer.is_some() {
|
||||||
|
*earlier = later.clone();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if let Some(AddDeltaLayers { last_layer, count }) = &later.add_delta_layers {
|
||||||
|
let res = earlier.add_delta_layers.get_or_insert(AddDeltaLayers {
|
||||||
|
last_layer: LayerId::default(),
|
||||||
|
count: 0,
|
||||||
|
});
|
||||||
|
res.last_layer = last_layer.clone();
|
||||||
|
res.count += count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LazyRangeInitializer<LayerMapInformation, PageIndex> for SameElementsInitializer<()> {
|
||||||
|
fn get(&self, range: &Range<PageIndex>) -> LayerMapInformation {
|
||||||
|
LayerMapInformation {
|
||||||
|
last_layer: None,
|
||||||
|
last_image_layer: None,
|
||||||
|
max_delta_layers: (0, range.clone()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_layer_map<
|
||||||
|
S: PersistentVecStorage<LayerMapModification, SameElementsInitializer<()>, PageIndex>,
|
||||||
|
>() {
|
||||||
|
let mut s = S::new(
|
||||||
|
PageIndex(0)..PageIndex(100),
|
||||||
|
SameElementsInitializer::new(()),
|
||||||
|
);
|
||||||
|
s.modify(
|
||||||
|
&(PageIndex(0)..PageIndex(70)),
|
||||||
|
&LayerMapModification::add_image_layer("Img0..70"),
|
||||||
|
);
|
||||||
|
s.modify(
|
||||||
|
&(PageIndex(50)..PageIndex(100)),
|
||||||
|
&LayerMapModification::add_image_layer("Img50..100"),
|
||||||
|
);
|
||||||
|
s.modify(
|
||||||
|
&(PageIndex(10)..PageIndex(60)),
|
||||||
|
&LayerMapModification::add_delta_layer("Delta10..60"),
|
||||||
|
);
|
||||||
|
let s_before_last_delta = s.freeze();
|
||||||
|
s.modify(
|
||||||
|
&(PageIndex(20)..PageIndex(80)),
|
||||||
|
&LayerMapModification::add_delta_layer("Delta20..80"),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
s.get(&(PageIndex(5)..PageIndex(6))).last_layers(),
|
||||||
|
(&Some("Img0..70".to_owned()), &Some("Img0..70".to_owned()))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
s.get(&(PageIndex(15)..PageIndex(16))).last_layers(),
|
||||||
|
(
|
||||||
|
&Some("Delta10..60".to_owned()),
|
||||||
|
&Some("Img0..70".to_owned())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
s.get(&(PageIndex(25)..PageIndex(26))).last_layers(),
|
||||||
|
(
|
||||||
|
&Some("Delta20..80".to_owned()),
|
||||||
|
&Some("Img0..70".to_owned())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
s.get(&(PageIndex(65)..PageIndex(66))).last_layers(),
|
||||||
|
(
|
||||||
|
&Some("Delta20..80".to_owned()),
|
||||||
|
&Some("Img50..100".to_owned())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
s.get(&(PageIndex(95)..PageIndex(96))).last_layers(),
|
||||||
|
(
|
||||||
|
&Some("Img50..100".to_owned()),
|
||||||
|
&Some("Img50..100".to_owned())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
s.get(&(PageIndex(0)..PageIndex(100))).max_delta_layers(),
|
||||||
|
&(2, PageIndex(20)..PageIndex(60)),
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
*s_before_last_delta
|
||||||
|
.get(&(PageIndex(0)..PageIndex(100)))
|
||||||
|
.max_delta_layers(),
|
||||||
|
(1, PageIndex(10)..PageIndex(60)),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
*s.get(&(PageIndex(10)..PageIndex(30))).max_delta_layers(),
|
||||||
|
(2, PageIndex(20)..PageIndex(30))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
*s.get(&(PageIndex(10)..PageIndex(20))).max_delta_layers(),
|
||||||
|
(1, PageIndex(10)..PageIndex(20))
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
*s.get(&(PageIndex(70)..PageIndex(80))).max_delta_layers(),
|
||||||
|
(1, PageIndex(70)..PageIndex(80))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
*s_before_last_delta
|
||||||
|
.get(&(PageIndex(70)..PageIndex(80)))
|
||||||
|
.max_delta_layers(),
|
||||||
|
(0, PageIndex(70)..PageIndex(80))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_naive() {
|
||||||
|
test_layer_map::<NaiveVecStorage<_, _, _>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_segment_tree() {
|
||||||
|
test_layer_map::<PersistentSegmentTree<_, _, _>>();
|
||||||
|
}
|
||||||
116
libs/persistent_range_query/tests/rsq_test.rs
Normal file
116
libs/persistent_range_query/tests/rsq_test.rs
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
use persistent_range_query::naive::*;
|
||||||
|
use persistent_range_query::ops::rsq::AddAssignModification::Add;
|
||||||
|
use persistent_range_query::ops::rsq::*;
|
||||||
|
use persistent_range_query::ops::SameElementsInitializer;
|
||||||
|
use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
|
||||||
|
use persistent_range_query::{PersistentVecStorage, VecReadableVersion};
|
||||||
|
use rand::{Rng, SeedableRng};
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
|
||||||
|
struct K(u16);
|
||||||
|
|
||||||
|
impl IndexableKey for K {
|
||||||
|
fn index(all_keys: &Range<Self>, key: &Self) -> usize {
|
||||||
|
(key.0 as usize) - (all_keys.start.0 as usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
|
||||||
|
K(all_keys.start.0 + index as u16)..K(all_keys.start.0 + index as u16 + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SumOfSameElements<K> for i32 {
|
||||||
|
fn sum(initial_element_value: &Self, keys: &Range<K>) -> Self {
|
||||||
|
initial_element_value * (keys.end.0 - keys.start.0) as Self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MidpointableKey for K {
|
||||||
|
fn midpoint(range: &Range<Self>) -> Self {
|
||||||
|
K(range.start.0 + (range.end.0 - range.start.0) / 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_storage<
|
||||||
|
S: PersistentVecStorage<AddAssignModification<i32>, SameElementsInitializer<i32>, K>,
|
||||||
|
>() {
|
||||||
|
let mut s = S::new(K(0)..K(12), SameElementsInitializer::new(0i32));
|
||||||
|
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 0);
|
||||||
|
|
||||||
|
s.modify(&(K(2)..K(5)), &AddAssignModification::Add(3));
|
||||||
|
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 3 + 3);
|
||||||
|
let s_old = s.freeze();
|
||||||
|
|
||||||
|
s.modify(&(K(3)..K(6)), &AddAssignModification::Assign(10));
|
||||||
|
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 10 + 10);
|
||||||
|
|
||||||
|
s.modify(&(K(4)..K(7)), &AddAssignModification::Add(2));
|
||||||
|
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 12 + 12 + 2);
|
||||||
|
|
||||||
|
assert_eq!(*s.get(&(K(4)..K(6))).sum(), 12 + 12);
|
||||||
|
assert_eq!(*s_old.get(&(K(4)..K(6))).sum(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_naive() {
|
||||||
|
test_storage::<NaiveVecStorage<_, _, _>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_segment_tree() {
|
||||||
|
test_storage::<PersistentSegmentTree<_, _, _>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_stress() {
|
||||||
|
const LEN: u16 = 17_238;
|
||||||
|
const OPERATIONS: i32 = 20_000;
|
||||||
|
|
||||||
|
let mut rng = rand::rngs::StdRng::seed_from_u64(0);
|
||||||
|
let mut naive: NaiveVecStorage<AddAssignModification<i32>, _, _> =
|
||||||
|
NaiveVecStorage::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
|
||||||
|
let mut segm_tree: PersistentSegmentTree<AddAssignModification<i32>, _, _> =
|
||||||
|
PersistentSegmentTree::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
|
||||||
|
|
||||||
|
fn gen_range(rng: &mut impl Rng) -> Range<K> {
|
||||||
|
let l: u16 = rng.gen_range(0..LEN);
|
||||||
|
let r: u16 = rng.gen_range(0..LEN);
|
||||||
|
if l <= r {
|
||||||
|
K(l)..K(r)
|
||||||
|
} else {
|
||||||
|
K(r)..K(l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _ in 0..2 {
|
||||||
|
let checksum_range = gen_range(&mut rng);
|
||||||
|
let checksum_before: i32 = *naive.get(&checksum_range).sum();
|
||||||
|
assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
|
||||||
|
|
||||||
|
let naive_before = naive.freeze();
|
||||||
|
let segm_tree_before = segm_tree.freeze();
|
||||||
|
assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
|
||||||
|
assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
|
||||||
|
|
||||||
|
for _ in 0..OPERATIONS {
|
||||||
|
{
|
||||||
|
let range = gen_range(&mut rng);
|
||||||
|
assert_eq!(naive.get(&range).sum(), segm_tree.get(&range).sum());
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let range = gen_range(&mut rng);
|
||||||
|
let val = rng.gen_range(-10i32..=10i32);
|
||||||
|
let op = Add(val);
|
||||||
|
naive.modify(&range, &op);
|
||||||
|
segm_tree.modify(&range, &op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
|
||||||
|
assert_eq!(
|
||||||
|
checksum_before,
|
||||||
|
*segm_tree_before.get(&checksum_range).sum()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -33,8 +33,8 @@ pub struct Segment {
|
|||||||
/// Logical size before this state
|
/// Logical size before this state
|
||||||
start_size: u64,
|
start_size: u64,
|
||||||
|
|
||||||
/// Logical size at this state
|
/// Logical size at this state. Can be None in the last Segment of a branch.
|
||||||
pub end_size: u64,
|
pub end_size: Option<u64>,
|
||||||
|
|
||||||
/// Indices to [`Storage::segments`]
|
/// Indices to [`Storage::segments`]
|
||||||
///
|
///
|
||||||
@@ -115,7 +115,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
|
|||||||
start_lsn: 0,
|
start_lsn: 0,
|
||||||
end_lsn: 0,
|
end_lsn: 0,
|
||||||
start_size: 0,
|
start_size: 0,
|
||||||
end_size: 0,
|
end_size: Some(0),
|
||||||
children_after: Vec::new(),
|
children_after: Vec::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -125,6 +125,39 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Advances the branch with a new point, at given LSN.
|
||||||
|
pub fn insert_point<Q: ?Sized>(
|
||||||
|
&mut self,
|
||||||
|
branch: &Q,
|
||||||
|
op: Cow<'static, str>,
|
||||||
|
lsn: u64,
|
||||||
|
size: Option<u64>,
|
||||||
|
) where
|
||||||
|
K: std::borrow::Borrow<Q>,
|
||||||
|
Q: std::hash::Hash + Eq,
|
||||||
|
{
|
||||||
|
let lastseg_id = *self.branches.get(branch).unwrap();
|
||||||
|
let newseg_id = self.segments.len();
|
||||||
|
let lastseg = &mut self.segments[lastseg_id];
|
||||||
|
|
||||||
|
assert!(lsn > lastseg.end_lsn);
|
||||||
|
|
||||||
|
let newseg = Segment {
|
||||||
|
op,
|
||||||
|
parent: Some(lastseg_id),
|
||||||
|
start_lsn: lastseg.end_lsn,
|
||||||
|
end_lsn: lsn,
|
||||||
|
start_size: lastseg.end_size.unwrap(),
|
||||||
|
end_size: size,
|
||||||
|
children_after: Vec::new(),
|
||||||
|
needed: false,
|
||||||
|
};
|
||||||
|
lastseg.children_after.push(newseg_id);
|
||||||
|
|
||||||
|
self.segments.push(newseg);
|
||||||
|
*self.branches.get_mut(branch).expect("read already") = newseg_id;
|
||||||
|
}
|
||||||
|
|
||||||
/// Advances the branch with the named operation, by the relative LSN and logical size bytes.
|
/// Advances the branch with the named operation, by the relative LSN and logical size bytes.
|
||||||
pub fn modify_branch<Q: ?Sized>(
|
pub fn modify_branch<Q: ?Sized>(
|
||||||
&mut self,
|
&mut self,
|
||||||
@@ -145,8 +178,8 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
|
|||||||
parent: Some(lastseg_id),
|
parent: Some(lastseg_id),
|
||||||
start_lsn: lastseg.end_lsn,
|
start_lsn: lastseg.end_lsn,
|
||||||
end_lsn: lastseg.end_lsn + lsn_bytes,
|
end_lsn: lastseg.end_lsn + lsn_bytes,
|
||||||
start_size: lastseg.end_size,
|
start_size: lastseg.end_size.unwrap(),
|
||||||
end_size: (lastseg.end_size as i64 + size_bytes) as u64,
|
end_size: Some((lastseg.end_size.unwrap() as i64 + size_bytes) as u64),
|
||||||
children_after: Vec::new(),
|
children_after: Vec::new(),
|
||||||
needed: false,
|
needed: false,
|
||||||
};
|
};
|
||||||
@@ -321,7 +354,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
|
|||||||
Some(SegmentSize {
|
Some(SegmentSize {
|
||||||
seg_id,
|
seg_id,
|
||||||
method: SnapshotAfter,
|
method: SnapshotAfter,
|
||||||
this_size: seg.end_size,
|
this_size: seg.end_size.unwrap(),
|
||||||
children,
|
children,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -174,7 +174,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
|
|||||||
let seg_id = node.seg_id;
|
let seg_id = node.seg_id;
|
||||||
let seg = segments.get(seg_id).unwrap();
|
let seg = segments.get(seg_id).unwrap();
|
||||||
let lsn = seg.end_lsn;
|
let lsn = seg.end_lsn;
|
||||||
let size = seg.end_size;
|
let size = seg.end_size.unwrap_or(0);
|
||||||
let method = node.method;
|
let method = node.method;
|
||||||
|
|
||||||
println!(" {{");
|
println!(" {{");
|
||||||
@@ -226,7 +226,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
|
|||||||
print!(
|
print!(
|
||||||
" label=\"{} / {}\"",
|
" label=\"{} / {}\"",
|
||||||
next.end_lsn - seg.end_lsn,
|
next.end_lsn - seg.end_lsn,
|
||||||
(next.end_size as i128 - seg.end_size as i128)
|
(next.end_size.unwrap_or(0) as i128 - seg.end_size.unwrap_or(0) as i128)
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn);
|
print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn);
|
||||||
|
|||||||
@@ -48,6 +48,25 @@ pub mod nonblock;
|
|||||||
// Default signal handling
|
// Default signal handling
|
||||||
pub mod signals;
|
pub mod signals;
|
||||||
|
|
||||||
|
/// use with fail::cfg("$name", "return(2000)")
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! failpoint_sleep_millis_async {
|
||||||
|
($name:literal) => {{
|
||||||
|
let should_sleep: Option<std::time::Duration> = (|| {
|
||||||
|
fail::fail_point!($name, |v: Option<_>| {
|
||||||
|
let millis = v.unwrap().parse::<u64>().unwrap();
|
||||||
|
Some(Duration::from_millis(millis))
|
||||||
|
});
|
||||||
|
None
|
||||||
|
})();
|
||||||
|
if let Some(d) = should_sleep {
|
||||||
|
tracing::info!("failpoint {:?}: sleeping for {:?}", $name, d);
|
||||||
|
tokio::time::sleep(d).await;
|
||||||
|
tracing::info!("failpoint {:?}: sleep done", $name);
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
|
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
|
||||||
///
|
///
|
||||||
/// we have several cases:
|
/// we have several cases:
|
||||||
|
|||||||
@@ -199,6 +199,20 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
|||||||
logging::init(conf.log_format)?;
|
logging::init(conf.log_format)?;
|
||||||
info!("version: {}", version());
|
info!("version: {}", version());
|
||||||
|
|
||||||
|
// If any failpoints were set from FAILPOINTS environment variable,
|
||||||
|
// print them to the log for debugging purposes
|
||||||
|
let failpoints = fail::list();
|
||||||
|
if !failpoints.is_empty() {
|
||||||
|
info!(
|
||||||
|
"started with failpoints: {}",
|
||||||
|
failpoints
|
||||||
|
.iter()
|
||||||
|
.map(|(name, actions)| format!("{name}={actions}"))
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(";")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||||
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
|
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
|
||||||
lock_file::LockCreationResult::Created {
|
lock_file::LockCreationResult::Created {
|
||||||
|
|||||||
@@ -667,6 +667,7 @@ components:
|
|||||||
- disk_consistent_lsn
|
- disk_consistent_lsn
|
||||||
- awaits_download
|
- awaits_download
|
||||||
- state
|
- state
|
||||||
|
- latest_gc_cutoff_lsn
|
||||||
properties:
|
properties:
|
||||||
timeline_id:
|
timeline_id:
|
||||||
type: string
|
type: string
|
||||||
@@ -711,6 +712,9 @@ components:
|
|||||||
type: boolean
|
type: boolean
|
||||||
state:
|
state:
|
||||||
type: string
|
type: string
|
||||||
|
latest_gc_cutoff_lsn:
|
||||||
|
type: string
|
||||||
|
format: hex
|
||||||
|
|
||||||
# These 'local' and 'remote' fields just duplicate some of the fields
|
# These 'local' and 'remote' fields just duplicate some of the fields
|
||||||
# above. They are kept for backwards-compatibility. They can be removed,
|
# above. They are kept for backwards-compatibility. They can be removed,
|
||||||
|
|||||||
@@ -461,14 +461,7 @@ impl Tenant {
|
|||||||
.context("Cannot branch off the timeline that's not present in pageserver")?;
|
.context("Cannot branch off the timeline that's not present in pageserver")?;
|
||||||
|
|
||||||
if let Some(lsn) = ancestor_start_lsn.as_mut() {
|
if let Some(lsn) = ancestor_start_lsn.as_mut() {
|
||||||
// Wait for the WAL to arrive and be processed on the parent branch up
|
|
||||||
// to the requested branch point. The repository code itself doesn't
|
|
||||||
// require it, but if we start to receive WAL on the new timeline,
|
|
||||||
// decoding the new WAL might need to look up previous pages, relation
|
|
||||||
// sizes etc. and that would get confused if the previous page versions
|
|
||||||
// are not in the repository yet.
|
|
||||||
*lsn = lsn.align();
|
*lsn = lsn.align();
|
||||||
ancestor_timeline.wait_lsn(*lsn).await?;
|
|
||||||
|
|
||||||
let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
|
let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
|
||||||
if ancestor_ancestor_lsn > *lsn {
|
if ancestor_ancestor_lsn > *lsn {
|
||||||
@@ -480,6 +473,14 @@ impl Tenant {
|
|||||||
ancestor_ancestor_lsn,
|
ancestor_ancestor_lsn,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wait for the WAL to arrive and be processed on the parent branch up
|
||||||
|
// to the requested branch point. The repository code itself doesn't
|
||||||
|
// require it, but if we start to receive WAL on the new timeline,
|
||||||
|
// decoding the new WAL might need to look up previous pages, relation
|
||||||
|
// sizes etc. and that would get confused if the previous page versions
|
||||||
|
// are not in the repository yet.
|
||||||
|
ancestor_timeline.wait_lsn(*lsn).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?
|
self.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?
|
||||||
@@ -1010,6 +1011,10 @@ impl Tenant {
|
|||||||
|
|
||||||
let gc_timelines = self.refresh_gc_info_internal(target_timeline_id, horizon, pitr)?;
|
let gc_timelines = self.refresh_gc_info_internal(target_timeline_id, horizon, pitr)?;
|
||||||
|
|
||||||
|
utils::failpoint_sleep_millis_async!("gc_iteration_internal_after_getting_gc_timelines");
|
||||||
|
|
||||||
|
info!("starting on {} timelines", gc_timelines.len());
|
||||||
|
|
||||||
// Perform GC for each timeline.
|
// Perform GC for each timeline.
|
||||||
//
|
//
|
||||||
// Note that we don't hold the GC lock here because we don't want
|
// Note that we don't hold the GC lock here because we don't want
|
||||||
|
|||||||
@@ -183,6 +183,19 @@ pub(super) async fn gather_inputs(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// all timelines also have an end point if they have made any progress
|
||||||
|
if last_record_lsn > timeline.get_ancestor_lsn()
|
||||||
|
&& !interesting_lsns
|
||||||
|
.iter()
|
||||||
|
.any(|(lsn, _)| lsn == &last_record_lsn)
|
||||||
|
{
|
||||||
|
updates.push(Update {
|
||||||
|
lsn: last_record_lsn,
|
||||||
|
command: Command::EndOfBranch,
|
||||||
|
timeline_id: timeline.timeline_id,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
timeline_inputs.insert(
|
timeline_inputs.insert(
|
||||||
timeline.timeline_id,
|
timeline.timeline_id,
|
||||||
TimelineInputs {
|
TimelineInputs {
|
||||||
@@ -270,48 +283,22 @@ impl ModelInputs {
|
|||||||
// impossible to always determine the a one main branch.
|
// impossible to always determine the a one main branch.
|
||||||
let mut storage = tenant_size_model::Storage::<Option<TimelineId>>::new(None);
|
let mut storage = tenant_size_model::Storage::<Option<TimelineId>>::new(None);
|
||||||
|
|
||||||
// tracking these not to require modifying the current implementation of the size model,
|
|
||||||
// which works in relative LSNs and sizes.
|
|
||||||
let mut last_state: HashMap<TimelineId, (Lsn, u64)> = HashMap::new();
|
|
||||||
|
|
||||||
for update in &self.updates {
|
for update in &self.updates {
|
||||||
let Update {
|
let Update {
|
||||||
lsn,
|
lsn,
|
||||||
command: op,
|
command: op,
|
||||||
timeline_id,
|
timeline_id,
|
||||||
} = update;
|
} = update;
|
||||||
|
let Lsn(now) = *lsn;
|
||||||
match op {
|
match op {
|
||||||
Command::Update(sz) => {
|
Command::Update(sz) => {
|
||||||
let latest = last_state.get_mut(timeline_id).ok_or_else(|| {
|
storage.insert_point(&Some(*timeline_id), "".into(), now, Some(*sz));
|
||||||
anyhow::anyhow!(
|
}
|
||||||
"ordering-mismatch: there must had been a previous state for {timeline_id}"
|
Command::EndOfBranch => {
|
||||||
)
|
storage.insert_point(&Some(*timeline_id), "".into(), now, None);
|
||||||
})?;
|
|
||||||
|
|
||||||
let lsn_bytes = {
|
|
||||||
let Lsn(now) = lsn;
|
|
||||||
let Lsn(prev) = latest.0;
|
|
||||||
debug_assert!(prev <= *now, "self.updates should had been sorted");
|
|
||||||
now - prev
|
|
||||||
};
|
|
||||||
|
|
||||||
let size_diff =
|
|
||||||
i64::try_from(*sz as i128 - latest.1 as i128).with_context(|| {
|
|
||||||
format!("size difference i64 overflow for {timeline_id}")
|
|
||||||
})?;
|
|
||||||
|
|
||||||
storage.modify_branch(&Some(*timeline_id), "".into(), lsn_bytes, size_diff);
|
|
||||||
*latest = (*lsn, *sz);
|
|
||||||
}
|
}
|
||||||
Command::BranchFrom(parent) => {
|
Command::BranchFrom(parent) => {
|
||||||
storage.branch(parent, Some(*timeline_id));
|
storage.branch(parent, Some(*timeline_id));
|
||||||
|
|
||||||
let size = parent
|
|
||||||
.as_ref()
|
|
||||||
.and_then(|id| last_state.get(id))
|
|
||||||
.map(|x| x.1)
|
|
||||||
.unwrap_or(0);
|
|
||||||
last_state.insert(*timeline_id, (*lsn, size));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -320,10 +307,7 @@ impl ModelInputs {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Single size model update.
|
/// A point of interest in the tree of branches
|
||||||
///
|
|
||||||
/// Sizing model works with relative increments over latest branch state.
|
|
||||||
/// Updates are absolute, so additional state needs to be tracked when applying.
|
|
||||||
#[serde_with::serde_as]
|
#[serde_with::serde_as]
|
||||||
#[derive(
|
#[derive(
|
||||||
Debug, PartialEq, PartialOrd, Eq, Ord, Clone, Copy, serde::Serialize, serde::Deserialize,
|
Debug, PartialEq, PartialOrd, Eq, Ord, Clone, Copy, serde::Serialize, serde::Deserialize,
|
||||||
@@ -342,6 +326,7 @@ struct Update {
|
|||||||
enum Command {
|
enum Command {
|
||||||
Update(u64),
|
Update(u64),
|
||||||
BranchFrom(#[serde_as(as = "Option<serde_with::DisplayFromStr>")] Option<TimelineId>),
|
BranchFrom(#[serde_as(as = "Option<serde_with::DisplayFromStr>")] Option<TimelineId>),
|
||||||
|
EndOfBranch,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Debug for Command {
|
impl std::fmt::Debug for Command {
|
||||||
@@ -351,6 +336,7 @@ impl std::fmt::Debug for Command {
|
|||||||
match self {
|
match self {
|
||||||
Self::Update(arg0) => write!(f, "Update({arg0})"),
|
Self::Update(arg0) => write!(f, "Update({arg0})"),
|
||||||
Self::BranchFrom(arg0) => write!(f, "BranchFrom({arg0:?})"),
|
Self::BranchFrom(arg0) => write!(f, "BranchFrom({arg0:?})"),
|
||||||
|
Self::EndOfBranch => write!(f, "EndOfBranch"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -61,6 +61,13 @@ use crate::{
|
|||||||
storage_sync::{self, index::LayerFileMetadata},
|
storage_sync::{self, index::LayerFileMetadata},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
|
enum FlushLoopState {
|
||||||
|
NotStarted,
|
||||||
|
Running,
|
||||||
|
Exited,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Timeline {
|
pub struct Timeline {
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
tenant_conf: Arc<RwLock<TenantConfOpt>>,
|
tenant_conf: Arc<RwLock<TenantConfOpt>>,
|
||||||
@@ -122,7 +129,7 @@ pub struct Timeline {
|
|||||||
write_lock: Mutex<()>,
|
write_lock: Mutex<()>,
|
||||||
|
|
||||||
/// Used to avoid multiple `flush_loop` tasks running
|
/// Used to avoid multiple `flush_loop` tasks running
|
||||||
flush_loop_started: Mutex<bool>,
|
flush_loop_state: Mutex<FlushLoopState>,
|
||||||
|
|
||||||
/// layer_flush_start_tx can be used to wake up the layer-flushing task.
|
/// layer_flush_start_tx can be used to wake up the layer-flushing task.
|
||||||
/// The value is a counter, incremented every time a new flush cycle is requested.
|
/// The value is a counter, incremented every time a new flush cycle is requested.
|
||||||
@@ -755,7 +762,7 @@ impl Timeline {
|
|||||||
|
|
||||||
upload_layers: AtomicBool::new(upload_layers),
|
upload_layers: AtomicBool::new(upload_layers),
|
||||||
|
|
||||||
flush_loop_started: Mutex::new(false),
|
flush_loop_state: Mutex::new(FlushLoopState::NotStarted),
|
||||||
|
|
||||||
layer_flush_start_tx,
|
layer_flush_start_tx,
|
||||||
layer_flush_done_tx,
|
layer_flush_done_tx,
|
||||||
@@ -794,13 +801,23 @@ impl Timeline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn maybe_spawn_flush_loop(self: &Arc<Self>) {
|
pub(super) fn maybe_spawn_flush_loop(self: &Arc<Self>) {
|
||||||
let mut flush_loop_started = self.flush_loop_started.lock().unwrap();
|
let mut flush_loop_state = self.flush_loop_state.lock().unwrap();
|
||||||
if *flush_loop_started {
|
match *flush_loop_state {
|
||||||
info!(
|
FlushLoopState::NotStarted => (),
|
||||||
"skipping attempt to start flush_loop twice {}/{}",
|
FlushLoopState::Running => {
|
||||||
self.tenant_id, self.timeline_id
|
info!(
|
||||||
);
|
"skipping attempt to start flush_loop twice {}/{}",
|
||||||
return;
|
self.tenant_id, self.timeline_id
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
FlushLoopState::Exited => {
|
||||||
|
warn!(
|
||||||
|
"ignoring attempt to restart exited flush_loop {}/{}",
|
||||||
|
self.tenant_id, self.timeline_id
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let layer_flush_start_rx = self.layer_flush_start_tx.subscribe();
|
let layer_flush_start_rx = self.layer_flush_start_tx.subscribe();
|
||||||
@@ -813,11 +830,16 @@ impl Timeline {
|
|||||||
Some(self.timeline_id),
|
Some(self.timeline_id),
|
||||||
"layer flush task",
|
"layer flush task",
|
||||||
false,
|
false,
|
||||||
async move { self_clone.flush_loop(layer_flush_start_rx).await; Ok(()) }
|
async move {
|
||||||
|
self_clone.flush_loop(layer_flush_start_rx).await;
|
||||||
|
let mut flush_loop_state = self_clone.flush_loop_state.lock().unwrap();
|
||||||
|
assert_eq!(*flush_loop_state, FlushLoopState::Running);
|
||||||
|
*flush_loop_state = FlushLoopState::Exited;
|
||||||
|
Ok(()) }
|
||||||
.instrument(info_span!(parent: None, "layer flush task", tenant = %self.tenant_id, timeline = %self.timeline_id))
|
.instrument(info_span!(parent: None, "layer flush task", tenant = %self.tenant_id, timeline = %self.timeline_id))
|
||||||
);
|
);
|
||||||
|
|
||||||
*flush_loop_started = true;
|
*flush_loop_state = FlushLoopState::Running;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn launch_wal_receiver(self: &Arc<Self>) {
|
pub(super) fn launch_wal_receiver(self: &Arc<Self>) {
|
||||||
@@ -1365,8 +1387,9 @@ impl Timeline {
|
|||||||
// finished, instead of some other flush that was started earlier.
|
// finished, instead of some other flush that was started earlier.
|
||||||
let mut my_flush_request = 0;
|
let mut my_flush_request = 0;
|
||||||
|
|
||||||
if !&*self.flush_loop_started.lock().unwrap() {
|
let flush_loop_state = { *self.flush_loop_state.lock().unwrap() };
|
||||||
anyhow::bail!("cannot flush frozen layers when flush_loop is not running")
|
if flush_loop_state != FlushLoopState::Running {
|
||||||
|
anyhow::bail!("cannot flush frozen layers when flush_loop is not running, state is {flush_loop_state:?}")
|
||||||
}
|
}
|
||||||
|
|
||||||
self.layer_flush_start_tx.send_modify(|counter| {
|
self.layer_flush_start_tx.send_modify(|counter| {
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ async fn compaction_loop(tenant_id: TenantId) {
|
|||||||
let mut sleep_duration = tenant.get_compaction_period();
|
let mut sleep_duration = tenant.get_compaction_period();
|
||||||
if let Err(e) = tenant.compaction_iteration() {
|
if let Err(e) = tenant.compaction_iteration() {
|
||||||
sleep_duration = wait_duration;
|
sleep_duration = wait_duration;
|
||||||
error!("Compaction failed, retrying in {:?}: {e:#}", sleep_duration);
|
error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sleep
|
// Sleep
|
||||||
@@ -120,7 +120,7 @@ async fn gc_loop(tenant_id: TenantId) {
|
|||||||
if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false).await
|
if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false).await
|
||||||
{
|
{
|
||||||
sleep_duration = wait_duration;
|
sleep_duration = wait_duration;
|
||||||
error!("Gc failed, retrying in {:?}: {e:#}", sleep_duration);
|
error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -32,11 +32,6 @@
|
|||||||
|
|
||||||
#define PageStoreTrace DEBUG5
|
#define PageStoreTrace DEBUG5
|
||||||
|
|
||||||
#define NEON_TAG "[NEON_SMGR] "
|
|
||||||
#define neon_log(tag, fmt, ...) ereport(tag, \
|
|
||||||
(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
|
|
||||||
errhidestmt(true), errhidecontext(true)))
|
|
||||||
|
|
||||||
bool connected = false;
|
bool connected = false;
|
||||||
PGconn *pageserver_conn = NULL;
|
PGconn *pageserver_conn = NULL;
|
||||||
|
|
||||||
@@ -97,11 +92,10 @@ pageserver_connect()
|
|||||||
|
|
||||||
while (PQisBusy(pageserver_conn))
|
while (PQisBusy(pageserver_conn))
|
||||||
{
|
{
|
||||||
int wc;
|
|
||||||
WaitEvent event;
|
WaitEvent event;
|
||||||
|
|
||||||
/* Sleep until there's something to do */
|
/* Sleep until there's something to do */
|
||||||
wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
||||||
ResetLatch(MyLatch);
|
ResetLatch(MyLatch);
|
||||||
|
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
@@ -141,11 +135,10 @@ retry:
|
|||||||
|
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
{
|
{
|
||||||
int wc;
|
|
||||||
WaitEvent event;
|
WaitEvent event;
|
||||||
|
|
||||||
/* Sleep until there's something to do */
|
/* Sleep until there's something to do */
|
||||||
wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
|
||||||
ResetLatch(MyLatch);
|
ResetLatch(MyLatch);
|
||||||
|
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
@@ -239,6 +232,9 @@ pageserver_receive(void)
|
|||||||
StringInfoData resp_buff;
|
StringInfoData resp_buff;
|
||||||
NeonResponse *resp;
|
NeonResponse *resp;
|
||||||
|
|
||||||
|
if (!connected)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
PG_TRY();
|
PG_TRY();
|
||||||
{
|
{
|
||||||
/* read response */
|
/* read response */
|
||||||
@@ -248,7 +244,10 @@ pageserver_receive(void)
|
|||||||
if (resp_buff.len < 0)
|
if (resp_buff.len < 0)
|
||||||
{
|
{
|
||||||
if (resp_buff.len == -1)
|
if (resp_buff.len == -1)
|
||||||
neon_log(ERROR, "end of COPY");
|
{
|
||||||
|
pageserver_disconnect();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
else if (resp_buff.len == -2)
|
else if (resp_buff.len == -2)
|
||||||
neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn));
|
neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,6 +49,11 @@ typedef struct
|
|||||||
|
|
||||||
#define messageTag(m) (((const NeonMessage *)(m))->tag)
|
#define messageTag(m) (((const NeonMessage *)(m))->tag)
|
||||||
|
|
||||||
|
#define NEON_TAG "[NEON_SMGR] "
|
||||||
|
#define neon_log(tag, fmt, ...) ereport(tag, \
|
||||||
|
(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
|
||||||
|
errhidestmt(true), errhidecontext(true)))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* supertype of all the Neon*Request structs below
|
* supertype of all the Neon*Request structs below
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -251,9 +251,9 @@ XLogRecPtr prefetch_lsn = 0;
|
|||||||
|
|
||||||
static void consume_prefetch_responses(void);
|
static void consume_prefetch_responses(void);
|
||||||
static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
|
static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
|
||||||
static void prefetch_read(PrefetchRequest *slot);
|
static bool prefetch_read(PrefetchRequest *slot);
|
||||||
static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
|
static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
|
||||||
static void prefetch_wait_for(uint64 ring_index);
|
static bool prefetch_wait_for(uint64 ring_index);
|
||||||
static void prefetch_cleanup(void);
|
static void prefetch_cleanup(void);
|
||||||
static inline void prefetch_set_unused(uint64 ring_index);
|
static inline void prefetch_set_unused(uint64 ring_index);
|
||||||
|
|
||||||
@@ -393,7 +393,7 @@ prefetch_cleanup(void)
|
|||||||
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
||||||
* invalidates any active pointers into the hash table.
|
* invalidates any active pointers into the hash table.
|
||||||
*/
|
*/
|
||||||
static void
|
static bool
|
||||||
prefetch_wait_for(uint64 ring_index)
|
prefetch_wait_for(uint64 ring_index)
|
||||||
{
|
{
|
||||||
PrefetchRequest *entry;
|
PrefetchRequest *entry;
|
||||||
@@ -412,8 +412,10 @@ prefetch_wait_for(uint64 ring_index)
|
|||||||
entry = GetPrfSlot(MyPState->ring_receive);
|
entry = GetPrfSlot(MyPState->ring_receive);
|
||||||
|
|
||||||
Assert(entry->status == PRFS_REQUESTED);
|
Assert(entry->status == PRFS_REQUESTED);
|
||||||
prefetch_read(entry);
|
if (!prefetch_read(entry))
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -425,7 +427,7 @@ prefetch_wait_for(uint64 ring_index)
|
|||||||
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
||||||
* invalidates any active pointers into the hash table.
|
* invalidates any active pointers into the hash table.
|
||||||
*/
|
*/
|
||||||
static void
|
static bool
|
||||||
prefetch_read(PrefetchRequest *slot)
|
prefetch_read(PrefetchRequest *slot)
|
||||||
{
|
{
|
||||||
NeonResponse *response;
|
NeonResponse *response;
|
||||||
@@ -438,15 +440,22 @@ prefetch_read(PrefetchRequest *slot)
|
|||||||
old = MemoryContextSwitchTo(MyPState->errctx);
|
old = MemoryContextSwitchTo(MyPState->errctx);
|
||||||
response = (NeonResponse *) page_server->receive();
|
response = (NeonResponse *) page_server->receive();
|
||||||
MemoryContextSwitchTo(old);
|
MemoryContextSwitchTo(old);
|
||||||
|
if (response)
|
||||||
/* update prefetch state */
|
{
|
||||||
MyPState->n_responses_buffered += 1;
|
/* update prefetch state */
|
||||||
MyPState->n_requests_inflight -= 1;
|
MyPState->n_responses_buffered += 1;
|
||||||
MyPState->ring_receive += 1;
|
MyPState->n_requests_inflight -= 1;
|
||||||
|
MyPState->ring_receive += 1;
|
||||||
|
|
||||||
/* update slot state */
|
/* update slot state */
|
||||||
slot->status = PRFS_RECEIVED;
|
slot->status = PRFS_RECEIVED;
|
||||||
slot->response = response;
|
slot->response = response;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -746,11 +755,16 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
|||||||
static NeonResponse *
|
static NeonResponse *
|
||||||
page_server_request(void const *req)
|
page_server_request(void const *req)
|
||||||
{
|
{
|
||||||
page_server->send((NeonRequest *) req);
|
NeonResponse* resp;
|
||||||
page_server->flush();
|
do {
|
||||||
MyPState->ring_flush = MyPState->ring_unused;
|
page_server->send((NeonRequest *) req);
|
||||||
consume_prefetch_responses();
|
page_server->flush();
|
||||||
return page_server->receive();
|
MyPState->ring_flush = MyPState->ring_unused;
|
||||||
|
consume_prefetch_responses();
|
||||||
|
resp = page_server->receive();
|
||||||
|
} while (resp == NULL);
|
||||||
|
return resp;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1635,7 +1649,8 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
|
|||||||
bool
|
bool
|
||||||
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||||
{
|
{
|
||||||
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
|
BufferTag tag;
|
||||||
|
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
|
||||||
|
|
||||||
switch (reln->smgr_relpersistence)
|
switch (reln->smgr_relpersistence)
|
||||||
{
|
{
|
||||||
@@ -1651,7 +1666,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
|||||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||||
}
|
}
|
||||||
|
|
||||||
BufferTag tag = (BufferTag) {
|
tag = (BufferTag) {
|
||||||
.rnode = reln->smgr_rnode.node,
|
.rnode = reln->smgr_rnode.node,
|
||||||
.forkNum = forknum,
|
.forkNum = forknum,
|
||||||
.blockNum = blocknum
|
.blockNum = blocknum
|
||||||
@@ -1755,22 +1770,24 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (entry == NULL)
|
do
|
||||||
{
|
{
|
||||||
n_prefetch_misses += 1;
|
if (entry == NULL)
|
||||||
|
{
|
||||||
|
n_prefetch_misses += 1;
|
||||||
|
|
||||||
ring_index = prefetch_register_buffer(buftag, &request_latest,
|
ring_index = prefetch_register_buffer(buftag, &request_latest,
|
||||||
&request_lsn);
|
&request_lsn);
|
||||||
slot = GetPrfSlot(ring_index);
|
slot = GetPrfSlot(ring_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert(slot->my_ring_index == ring_index);
|
Assert(slot->my_ring_index == ring_index);
|
||||||
Assert(MyPState->ring_last <= ring_index &&
|
Assert(MyPState->ring_last <= ring_index &&
|
||||||
MyPState->ring_unused > ring_index);
|
MyPState->ring_unused > ring_index);
|
||||||
Assert(slot->status != PRFS_UNUSED);
|
Assert(slot->status != PRFS_UNUSED);
|
||||||
Assert(GetPrfSlot(ring_index) == slot);
|
Assert(GetPrfSlot(ring_index) == slot);
|
||||||
|
|
||||||
prefetch_wait_for(ring_index);
|
} while (!prefetch_wait_for(ring_index));
|
||||||
|
|
||||||
Assert(slot->status == PRFS_RECEIVED);
|
Assert(slot->status == PRFS_RECEIVED);
|
||||||
|
|
||||||
|
|||||||
@@ -119,6 +119,7 @@ static TimestampTz last_reconnect_attempt;
|
|||||||
static WalproposerShmemState * walprop_shared;
|
static WalproposerShmemState * walprop_shared;
|
||||||
|
|
||||||
/* Prototypes for private functions */
|
/* Prototypes for private functions */
|
||||||
|
static void WalProposerRegister(void);
|
||||||
static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId);
|
static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId);
|
||||||
static void WalProposerStart(void);
|
static void WalProposerStart(void);
|
||||||
static void WalProposerLoop(void);
|
static void WalProposerLoop(void);
|
||||||
@@ -455,7 +456,7 @@ WalProposerPoll(void)
|
|||||||
/*
|
/*
|
||||||
* Register a background worker proposing WAL to wal acceptors.
|
* Register a background worker proposing WAL to wal acceptors.
|
||||||
*/
|
*/
|
||||||
void
|
static void
|
||||||
WalProposerRegister(void)
|
WalProposerRegister(void)
|
||||||
{
|
{
|
||||||
BackgroundWorker bgw;
|
BackgroundWorker bgw;
|
||||||
|
|||||||
@@ -377,18 +377,18 @@ typedef struct Safekeeper
|
|||||||
AppendResponse appendResponse; /* feedback for master */
|
AppendResponse appendResponse; /* feedback for master */
|
||||||
} Safekeeper;
|
} Safekeeper;
|
||||||
|
|
||||||
extern PGDLLIMPORT void WalProposerMain(Datum main_arg);
|
extern void WalProposerSync(int argc, char *argv[]);
|
||||||
void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
|
extern void WalProposerMain(Datum main_arg);
|
||||||
void WalProposerPoll(void);
|
extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
|
||||||
void WalProposerRegister(void);
|
extern void WalProposerPoll(void);
|
||||||
void ParseReplicationFeedbackMessage(StringInfo reply_message,
|
extern void ParseReplicationFeedbackMessage(StringInfo reply_message,
|
||||||
ReplicationFeedback * rf);
|
ReplicationFeedback *rf);
|
||||||
extern void StartProposerReplication(StartReplicationCmd *cmd);
|
extern void StartProposerReplication(StartReplicationCmd *cmd);
|
||||||
|
|
||||||
Size WalproposerShmemSize(void);
|
extern Size WalproposerShmemSize(void);
|
||||||
bool WalproposerShmemInit(void);
|
extern bool WalproposerShmemInit(void);
|
||||||
void replication_feedback_set(ReplicationFeedback * rf);
|
extern void replication_feedback_set(ReplicationFeedback *rf);
|
||||||
void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
|
extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
|
||||||
|
|
||||||
/* libpqwalproposer hooks & helper type */
|
/* libpqwalproposer hooks & helper type */
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
//! Client authentication mechanisms.
|
//! Client authentication mechanisms.
|
||||||
|
|
||||||
pub mod backend;
|
pub mod backend;
|
||||||
pub use backend::{BackendType, ConsoleReqExtra, DatabaseInfo};
|
pub use backend::{BackendType, ConsoleReqExtra};
|
||||||
|
|
||||||
mod credentials;
|
mod credentials;
|
||||||
pub use credentials::ClientCredentials;
|
pub use credentials::ClientCredentials;
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ use crate::{
|
|||||||
waiters::{self, Waiter, Waiters},
|
waiters::{self, Waiter, Waiters},
|
||||||
};
|
};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use tokio::io::{AsyncRead, AsyncWrite};
|
use tokio::io::{AsyncRead, AsyncWrite};
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
@@ -36,45 +35,6 @@ pub fn notify(psql_session_id: &str, msg: mgmt::ComputeReady) -> Result<(), wait
|
|||||||
CPLANE_WAITERS.notify(psql_session_id, msg)
|
CPLANE_WAITERS.notify(psql_session_id, msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute node connection params provided by the cloud.
|
|
||||||
/// Note how it implements serde traits, since we receive it over the wire.
|
|
||||||
#[derive(Serialize, Deserialize, Default)]
|
|
||||||
pub struct DatabaseInfo {
|
|
||||||
pub host: String,
|
|
||||||
pub port: u16,
|
|
||||||
pub dbname: String,
|
|
||||||
pub user: String,
|
|
||||||
pub password: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Manually implement debug to omit personal and sensitive info.
|
|
||||||
impl std::fmt::Debug for DatabaseInfo {
|
|
||||||
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
||||||
fmt.debug_struct("DatabaseInfo")
|
|
||||||
.field("host", &self.host)
|
|
||||||
.field("port", &self.port)
|
|
||||||
.finish_non_exhaustive()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<DatabaseInfo> for tokio_postgres::Config {
|
|
||||||
fn from(db_info: DatabaseInfo) -> Self {
|
|
||||||
let mut config = tokio_postgres::Config::new();
|
|
||||||
|
|
||||||
config
|
|
||||||
.host(&db_info.host)
|
|
||||||
.port(db_info.port)
|
|
||||||
.dbname(&db_info.dbname)
|
|
||||||
.user(&db_info.user);
|
|
||||||
|
|
||||||
if let Some(password) = db_info.password {
|
|
||||||
config.password(password);
|
|
||||||
}
|
|
||||||
|
|
||||||
config
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extra query params we'd like to pass to the console.
|
/// Extra query params we'd like to pass to the console.
|
||||||
pub struct ConsoleReqExtra<'a> {
|
pub struct ConsoleReqExtra<'a> {
|
||||||
/// A unique identifier for a connection.
|
/// A unique identifier for a connection.
|
||||||
@@ -158,54 +118,107 @@ impl<'a, T, E> BackendType<'a, Result<T, E>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A product of successful authentication.
|
||||||
|
pub struct AuthSuccess<T> {
|
||||||
|
/// Did we send [`pq_proto::BeMessage::AuthenticationOk`] to client?
|
||||||
|
pub reported_auth_ok: bool,
|
||||||
|
/// Something to be considered a positive result.
|
||||||
|
pub value: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> AuthSuccess<T> {
|
||||||
|
/// Very similar to [`std::option::Option::map`].
|
||||||
|
/// Maps [`AuthSuccess<T>`] to [`AuthSuccess<R>`] by applying
|
||||||
|
/// a function to a contained value.
|
||||||
|
pub fn map<R>(self, f: impl FnOnce(T) -> R) -> AuthSuccess<R> {
|
||||||
|
AuthSuccess {
|
||||||
|
reported_auth_ok: self.reported_auth_ok,
|
||||||
|
value: f(self.value),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Info for establishing a connection to a compute node.
|
||||||
|
/// This is what we get after auth succeeded, but not before!
|
||||||
|
pub struct NodeInfo {
|
||||||
|
/// Project from [`auth::ClientCredentials`].
|
||||||
|
pub project: String,
|
||||||
|
/// Compute node connection params.
|
||||||
|
pub config: compute::ConnCfg,
|
||||||
|
}
|
||||||
|
|
||||||
impl BackendType<'_, ClientCredentials<'_>> {
|
impl BackendType<'_, ClientCredentials<'_>> {
|
||||||
|
/// Do something special if user didn't provide the `project` parameter.
|
||||||
|
async fn try_password_hack(
|
||||||
|
&mut self,
|
||||||
|
extra: &ConsoleReqExtra<'_>,
|
||||||
|
client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
||||||
|
) -> auth::Result<Option<AuthSuccess<NodeInfo>>> {
|
||||||
|
use BackendType::*;
|
||||||
|
|
||||||
|
// If there's no project so far, that entails that client doesn't
|
||||||
|
// support SNI or other means of passing the project name.
|
||||||
|
// We now expect to see a very specific payload in the place of password.
|
||||||
|
let fetch_magic_payload = async {
|
||||||
|
warn!("project name not specified, resorting to the password hack auth flow");
|
||||||
|
let payload = AuthFlow::new(client)
|
||||||
|
.begin(auth::PasswordHack)
|
||||||
|
.await?
|
||||||
|
.authenticate()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
info!(project = &payload.project, "received missing parameter");
|
||||||
|
auth::Result::Ok(payload)
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: find a proper way to merge those very similar blocks.
|
||||||
|
let (mut config, payload) = match self {
|
||||||
|
Console(endpoint, creds) if creds.project.is_none() => {
|
||||||
|
let payload = fetch_magic_payload.await?;
|
||||||
|
|
||||||
|
let mut creds = creds.as_ref();
|
||||||
|
creds.project = Some(payload.project.as_str().into());
|
||||||
|
let config = console::Api::new(endpoint, extra, &creds)
|
||||||
|
.wake_compute()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
(config, payload)
|
||||||
|
}
|
||||||
|
Postgres(endpoint, creds) if creds.project.is_none() => {
|
||||||
|
let payload = fetch_magic_payload.await?;
|
||||||
|
|
||||||
|
let mut creds = creds.as_ref();
|
||||||
|
creds.project = Some(payload.project.as_str().into());
|
||||||
|
let config = postgres::Api::new(endpoint, &creds).wake_compute().await?;
|
||||||
|
|
||||||
|
(config, payload)
|
||||||
|
}
|
||||||
|
_ => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
config.password(payload.password);
|
||||||
|
Ok(Some(AuthSuccess {
|
||||||
|
reported_auth_ok: false,
|
||||||
|
value: NodeInfo {
|
||||||
|
project: payload.project,
|
||||||
|
config,
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
/// Authenticate the client via the requested backend, possibly using credentials.
|
/// Authenticate the client via the requested backend, possibly using credentials.
|
||||||
pub async fn authenticate(
|
pub async fn authenticate(
|
||||||
mut self,
|
mut self,
|
||||||
extra: &ConsoleReqExtra<'_>,
|
extra: &ConsoleReqExtra<'_>,
|
||||||
client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
||||||
) -> super::Result<compute::NodeInfo> {
|
) -> auth::Result<AuthSuccess<NodeInfo>> {
|
||||||
use BackendType::*;
|
use BackendType::*;
|
||||||
|
|
||||||
if let Console(_, creds) | Postgres(_, creds) = &mut self {
|
// Handle cases when `project` is missing in `creds`.
|
||||||
// If there's no project so far, that entails that client doesn't
|
// TODO: type safety: return `creds` with irrefutable `project`.
|
||||||
// support SNI or other means of passing the project name.
|
if let Some(res) = self.try_password_hack(extra, client).await? {
|
||||||
// We now expect to see a very specific payload in the place of password.
|
info!("user successfully authenticated (using the password hack)");
|
||||||
if creds.project().is_none() {
|
return Ok(res);
|
||||||
warn!("project name not specified, resorting to the password hack auth flow");
|
|
||||||
|
|
||||||
let payload = AuthFlow::new(client)
|
|
||||||
.begin(auth::PasswordHack)
|
|
||||||
.await?
|
|
||||||
.authenticate()
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
// Finally we may finish the initialization of `creds`.
|
|
||||||
// TODO: add missing type safety to ClientCredentials.
|
|
||||||
info!(project = &payload.project, "received missing parameter");
|
|
||||||
creds.project = Some(payload.project.into());
|
|
||||||
|
|
||||||
let mut config = match &self {
|
|
||||||
Console(endpoint, creds) => {
|
|
||||||
console::Api::new(endpoint, extra, creds)
|
|
||||||
.wake_compute()
|
|
||||||
.await?
|
|
||||||
}
|
|
||||||
Postgres(endpoint, creds) => {
|
|
||||||
postgres::Api::new(endpoint, creds).wake_compute().await?
|
|
||||||
}
|
|
||||||
_ => unreachable!("see the patterns above"),
|
|
||||||
};
|
|
||||||
|
|
||||||
// We should use a password from payload as well.
|
|
||||||
config.password(payload.password);
|
|
||||||
|
|
||||||
info!("user successfully authenticated (using the password hack)");
|
|
||||||
return Ok(compute::NodeInfo {
|
|
||||||
reported_auth_ok: false,
|
|
||||||
config,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let res = match self {
|
let res = match self {
|
||||||
@@ -215,22 +228,34 @@ impl BackendType<'_, ClientCredentials<'_>> {
|
|||||||
project = creds.project(),
|
project = creds.project(),
|
||||||
"performing authentication using the console"
|
"performing authentication using the console"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assert!(creds.project.is_some());
|
||||||
console::Api::new(&endpoint, extra, &creds)
|
console::Api::new(&endpoint, extra, &creds)
|
||||||
.handle_user(client)
|
.handle_user(client)
|
||||||
.await
|
.await?
|
||||||
|
.map(|config| NodeInfo {
|
||||||
|
project: creds.project.unwrap().into_owned(),
|
||||||
|
config,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
Postgres(endpoint, creds) => {
|
Postgres(endpoint, creds) => {
|
||||||
info!("performing mock authentication using a local postgres instance");
|
info!("performing mock authentication using a local postgres instance");
|
||||||
|
|
||||||
|
assert!(creds.project.is_some());
|
||||||
postgres::Api::new(&endpoint, &creds)
|
postgres::Api::new(&endpoint, &creds)
|
||||||
.handle_user(client)
|
.handle_user(client)
|
||||||
.await
|
.await?
|
||||||
|
.map(|config| NodeInfo {
|
||||||
|
project: creds.project.unwrap().into_owned(),
|
||||||
|
config,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
// NOTE: this auth backend doesn't use client credentials.
|
// NOTE: this auth backend doesn't use client credentials.
|
||||||
Link(url) => {
|
Link(url) => {
|
||||||
info!("performing link authentication");
|
info!("performing link authentication");
|
||||||
link::handle_user(&url, client).await
|
link::handle_user(&url, client).await?
|
||||||
}
|
}
|
||||||
}?;
|
};
|
||||||
|
|
||||||
info!("user successfully authenticated");
|
info!("user successfully authenticated");
|
||||||
Ok(res)
|
Ok(res)
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
//! Cloud API V2.
|
//! Cloud API V2.
|
||||||
|
|
||||||
use super::ConsoleReqExtra;
|
use super::{AuthSuccess, ConsoleReqExtra};
|
||||||
use crate::{
|
use crate::{
|
||||||
auth::{self, AuthFlow, ClientCredentials},
|
auth::{self, AuthFlow, ClientCredentials},
|
||||||
compute::{self, ComputeConnCfg},
|
compute,
|
||||||
error::{io_error, UserFacingError},
|
error::{io_error, UserFacingError},
|
||||||
http, scram,
|
http, scram,
|
||||||
stream::PqStream,
|
stream::PqStream,
|
||||||
@@ -128,7 +128,7 @@ impl<'a> Api<'a> {
|
|||||||
pub(super) async fn handle_user(
|
pub(super) async fn handle_user(
|
||||||
self,
|
self,
|
||||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
||||||
) -> auth::Result<compute::NodeInfo> {
|
) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
|
||||||
handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
|
handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -164,7 +164,7 @@ impl<'a> Api<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Wake up the compute node and return the corresponding connection info.
|
/// Wake up the compute node and return the corresponding connection info.
|
||||||
pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
|
pub(super) async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
|
||||||
let request_id = uuid::Uuid::new_v4().to_string();
|
let request_id = uuid::Uuid::new_v4().to_string();
|
||||||
let req = self
|
let req = self
|
||||||
.endpoint
|
.endpoint
|
||||||
@@ -195,7 +195,7 @@ impl<'a> Api<'a> {
|
|||||||
Some(x) => x,
|
Some(x) => x,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut config = ComputeConnCfg::new();
|
let mut config = compute::ConnCfg::new();
|
||||||
config
|
config
|
||||||
.host(host)
|
.host(host)
|
||||||
.port(port)
|
.port(port)
|
||||||
@@ -213,10 +213,10 @@ pub(super) async fn handle_user<'a, Endpoint, GetAuthInfo, WakeCompute>(
|
|||||||
endpoint: &'a Endpoint,
|
endpoint: &'a Endpoint,
|
||||||
get_auth_info: impl FnOnce(&'a Endpoint) -> GetAuthInfo,
|
get_auth_info: impl FnOnce(&'a Endpoint) -> GetAuthInfo,
|
||||||
wake_compute: impl FnOnce(&'a Endpoint) -> WakeCompute,
|
wake_compute: impl FnOnce(&'a Endpoint) -> WakeCompute,
|
||||||
) -> auth::Result<compute::NodeInfo>
|
) -> auth::Result<AuthSuccess<compute::ConnCfg>>
|
||||||
where
|
where
|
||||||
GetAuthInfo: Future<Output = Result<AuthInfo, GetAuthInfoError>>,
|
GetAuthInfo: Future<Output = Result<AuthInfo, GetAuthInfoError>>,
|
||||||
WakeCompute: Future<Output = Result<ComputeConnCfg, WakeComputeError>>,
|
WakeCompute: Future<Output = Result<compute::ConnCfg, WakeComputeError>>,
|
||||||
{
|
{
|
||||||
info!("fetching user's authentication info");
|
info!("fetching user's authentication info");
|
||||||
let auth_info = get_auth_info(endpoint).await?;
|
let auth_info = get_auth_info(endpoint).await?;
|
||||||
@@ -243,9 +243,9 @@ where
|
|||||||
config.auth_keys(tokio_postgres::config::AuthKeys::ScramSha256(keys));
|
config.auth_keys(tokio_postgres::config::AuthKeys::ScramSha256(keys));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(compute::NodeInfo {
|
Ok(AuthSuccess {
|
||||||
reported_auth_ok: false,
|
reported_auth_ok: false,
|
||||||
config,
|
value: config,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
use super::{AuthSuccess, NodeInfo};
|
||||||
use crate::{auth, compute, error::UserFacingError, stream::PqStream, waiters};
|
use crate::{auth, compute, error::UserFacingError, stream::PqStream, waiters};
|
||||||
use pq_proto::{BeMessage as Be, BeParameterStatusMessage};
|
use pq_proto::{BeMessage as Be, BeParameterStatusMessage};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
@@ -49,7 +50,7 @@ pub fn new_psql_session_id() -> String {
|
|||||||
pub async fn handle_user(
|
pub async fn handle_user(
|
||||||
link_uri: &reqwest::Url,
|
link_uri: &reqwest::Url,
|
||||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||||
) -> auth::Result<compute::NodeInfo> {
|
) -> auth::Result<AuthSuccess<NodeInfo>> {
|
||||||
let psql_session_id = new_psql_session_id();
|
let psql_session_id = new_psql_session_id();
|
||||||
let span = info_span!("link", psql_session_id = &psql_session_id);
|
let span = info_span!("link", psql_session_id = &psql_session_id);
|
||||||
let greeting = hello_message(link_uri, &psql_session_id);
|
let greeting = hello_message(link_uri, &psql_session_id);
|
||||||
@@ -71,8 +72,22 @@ pub async fn handle_user(
|
|||||||
|
|
||||||
client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
|
client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
|
||||||
|
|
||||||
Ok(compute::NodeInfo {
|
let mut config = compute::ConnCfg::new();
|
||||||
|
config
|
||||||
|
.host(&db_info.host)
|
||||||
|
.port(db_info.port)
|
||||||
|
.dbname(&db_info.dbname)
|
||||||
|
.user(&db_info.user);
|
||||||
|
|
||||||
|
if let Some(password) = db_info.password {
|
||||||
|
config.password(password);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(AuthSuccess {
|
||||||
reported_auth_ok: true,
|
reported_auth_ok: true,
|
||||||
config: db_info.into(),
|
value: NodeInfo {
|
||||||
|
project: db_info.project,
|
||||||
|
config,
|
||||||
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
//! Local mock of Cloud API V2.
|
//! Local mock of Cloud API V2.
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
|
||||||
|
AuthSuccess,
|
||||||
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
auth::{
|
auth::{self, ClientCredentials},
|
||||||
self,
|
compute,
|
||||||
backend::console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
|
|
||||||
ClientCredentials,
|
|
||||||
},
|
|
||||||
compute::{self, ComputeConnCfg},
|
|
||||||
error::io_error,
|
error::io_error,
|
||||||
scram,
|
scram,
|
||||||
stream::PqStream,
|
stream::PqStream,
|
||||||
@@ -37,7 +37,7 @@ impl<'a> Api<'a> {
|
|||||||
pub(super) async fn handle_user(
|
pub(super) async fn handle_user(
|
||||||
self,
|
self,
|
||||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
||||||
) -> auth::Result<compute::NodeInfo> {
|
) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
|
||||||
// We reuse user handling logic from a production module.
|
// We reuse user handling logic from a production module.
|
||||||
console::handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
|
console::handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
|
||||||
}
|
}
|
||||||
@@ -82,8 +82,8 @@ impl<'a> Api<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// We don't need to wake anything locally, so we just return the connection info.
|
/// We don't need to wake anything locally, so we just return the connection info.
|
||||||
pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
|
pub(super) async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
|
||||||
let mut config = ComputeConnCfg::new();
|
let mut config = compute::ConnCfg::new();
|
||||||
config
|
config
|
||||||
.host(self.endpoint.host_str().unwrap_or("localhost"))
|
.host(self.endpoint.host_str().unwrap_or("localhost"))
|
||||||
.port(self.endpoint.port().unwrap_or(5432))
|
.port(self.endpoint.port().unwrap_or(5432))
|
||||||
|
|||||||
@@ -36,11 +36,23 @@ pub struct ClientCredentials<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ClientCredentials<'_> {
|
impl ClientCredentials<'_> {
|
||||||
|
#[inline]
|
||||||
pub fn project(&self) -> Option<&str> {
|
pub fn project(&self) -> Option<&str> {
|
||||||
self.project.as_deref()
|
self.project.as_deref()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> ClientCredentials<'a> {
|
||||||
|
#[inline]
|
||||||
|
pub fn as_ref(&'a self) -> ClientCredentials<'a> {
|
||||||
|
Self {
|
||||||
|
user: self.user,
|
||||||
|
dbname: self.dbname,
|
||||||
|
project: self.project().map(Cow::Borrowed),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> ClientCredentials<'a> {
|
impl<'a> ClientCredentials<'a> {
|
||||||
pub fn parse(
|
pub fn parse(
|
||||||
params: &'a StartupMessageParams,
|
params: &'a StartupMessageParams,
|
||||||
|
|||||||
@@ -40,17 +40,36 @@ impl UserFacingError for ConnectionError {
|
|||||||
/// A pair of `ClientKey` & `ServerKey` for `SCRAM-SHA-256`.
|
/// A pair of `ClientKey` & `ServerKey` for `SCRAM-SHA-256`.
|
||||||
pub type ScramKeys = tokio_postgres::config::ScramKeys<32>;
|
pub type ScramKeys = tokio_postgres::config::ScramKeys<32>;
|
||||||
|
|
||||||
pub type ComputeConnCfg = tokio_postgres::Config;
|
/// A config for establishing a connection to compute node.
|
||||||
|
/// Eventually, `tokio_postgres` will be replaced with something better.
|
||||||
|
/// Newtype allows us to implement methods on top of it.
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct ConnCfg(pub tokio_postgres::Config);
|
||||||
|
|
||||||
/// Various compute node info for establishing connection etc.
|
impl ConnCfg {
|
||||||
pub struct NodeInfo {
|
/// Construct a new connection config.
|
||||||
/// Did we send [`pq_proto::BeMessage::AuthenticationOk`]?
|
pub fn new() -> Self {
|
||||||
pub reported_auth_ok: bool,
|
Self(tokio_postgres::Config::new())
|
||||||
/// Compute node connection params.
|
}
|
||||||
pub config: tokio_postgres::Config,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NodeInfo {
|
impl std::ops::Deref for ConnCfg {
|
||||||
|
type Target = tokio_postgres::Config;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// For now, let's make it easier to setup the config.
|
||||||
|
impl std::ops::DerefMut for ConnCfg {
|
||||||
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||||
|
&mut self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ConnCfg {
|
||||||
|
/// Establish a raw TCP connection to the compute node.
|
||||||
async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> {
|
async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> {
|
||||||
use tokio_postgres::config::Host;
|
use tokio_postgres::config::Host;
|
||||||
|
|
||||||
@@ -68,8 +87,8 @@ impl NodeInfo {
|
|||||||
// because it has no means for extracting the underlying socket which we
|
// because it has no means for extracting the underlying socket which we
|
||||||
// require for our business.
|
// require for our business.
|
||||||
let mut connection_error = None;
|
let mut connection_error = None;
|
||||||
let ports = self.config.get_ports();
|
let ports = self.0.get_ports();
|
||||||
let hosts = self.config.get_hosts();
|
let hosts = self.0.get_hosts();
|
||||||
// the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array
|
// the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array
|
||||||
if ports.len() > 1 && ports.len() != hosts.len() {
|
if ports.len() > 1 && ports.len() != hosts.len() {
|
||||||
return Err(io::Error::new(
|
return Err(io::Error::new(
|
||||||
@@ -77,7 +96,7 @@ impl NodeInfo {
|
|||||||
format!(
|
format!(
|
||||||
"couldn't connect: bad compute config, \
|
"couldn't connect: bad compute config, \
|
||||||
ports and hosts entries' count does not match: {:?}",
|
ports and hosts entries' count does not match: {:?}",
|
||||||
self.config
|
self.0
|
||||||
),
|
),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
@@ -103,7 +122,7 @@ impl NodeInfo {
|
|||||||
Err(connection_error.unwrap_or_else(|| {
|
Err(connection_error.unwrap_or_else(|| {
|
||||||
io::Error::new(
|
io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
format!("couldn't connect: bad compute config: {:?}", self.config),
|
format!("couldn't connect: bad compute config: {:?}", self.0),
|
||||||
)
|
)
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
@@ -116,7 +135,7 @@ pub struct PostgresConnection {
|
|||||||
pub version: String,
|
pub version: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NodeInfo {
|
impl ConnCfg {
|
||||||
/// Connect to a corresponding compute node.
|
/// Connect to a corresponding compute node.
|
||||||
pub async fn connect(
|
pub async fn connect(
|
||||||
mut self,
|
mut self,
|
||||||
@@ -130,21 +149,21 @@ impl NodeInfo {
|
|||||||
.intersperse(" ") // TODO: use impl from std once it's stabilized
|
.intersperse(" ") // TODO: use impl from std once it's stabilized
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
self.config.options(&options);
|
self.0.options(&options);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(app_name) = params.get("application_name") {
|
if let Some(app_name) = params.get("application_name") {
|
||||||
self.config.application_name(app_name);
|
self.0.application_name(app_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(replication) = params.get("replication") {
|
if let Some(replication) = params.get("replication") {
|
||||||
use tokio_postgres::config::ReplicationMode;
|
use tokio_postgres::config::ReplicationMode;
|
||||||
match replication {
|
match replication {
|
||||||
"true" | "on" | "yes" | "1" => {
|
"true" | "on" | "yes" | "1" => {
|
||||||
self.config.replication_mode(ReplicationMode::Physical);
|
self.0.replication_mode(ReplicationMode::Physical);
|
||||||
}
|
}
|
||||||
"database" => {
|
"database" => {
|
||||||
self.config.replication_mode(ReplicationMode::Logical);
|
self.0.replication_mode(ReplicationMode::Logical);
|
||||||
}
|
}
|
||||||
_other => {}
|
_other => {}
|
||||||
}
|
}
|
||||||
@@ -160,7 +179,7 @@ impl NodeInfo {
|
|||||||
.map_err(|_| ConnectionError::FailedToConnectToCompute)?;
|
.map_err(|_| ConnectionError::FailedToConnectToCompute)?;
|
||||||
|
|
||||||
// TODO: establish a secure connection to the DB
|
// TODO: establish a secure connection to the DB
|
||||||
let (client, conn) = self.config.connect_raw(&mut stream, NoTls).await?;
|
let (client, conn) = self.0.connect_raw(&mut stream, NoTls).await?;
|
||||||
let version = conn
|
let version = conn
|
||||||
.parameter("server_version")
|
.parameter("server_version")
|
||||||
.ok_or(ConnectionError::FailedToFetchPgVersion)?
|
.ok_or(ConnectionError::FailedToFetchPgVersion)?
|
||||||
|
|||||||
@@ -6,16 +6,11 @@ use std::{
|
|||||||
net::{TcpListener, TcpStream},
|
net::{TcpListener, TcpStream},
|
||||||
thread,
|
thread,
|
||||||
};
|
};
|
||||||
use tracing::{error, info};
|
use tracing::{error, info, info_span};
|
||||||
use utils::postgres_backend::{self, AuthType, PostgresBackend};
|
use utils::postgres_backend::{self, AuthType, PostgresBackend};
|
||||||
|
|
||||||
/// TODO: move all of that to auth-backend/link.rs when we ditch legacy-console backend
|
/// Console management API listener thread.
|
||||||
|
/// It spawns console response handlers needed for the link auth.
|
||||||
///
|
|
||||||
/// Main proxy listener loop.
|
|
||||||
///
|
|
||||||
/// Listens for connections, and launches a new handler thread for each.
|
|
||||||
///
|
|
||||||
pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
|
pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
|
||||||
scopeguard::defer! {
|
scopeguard::defer! {
|
||||||
info!("mgmt has shut down");
|
info!("mgmt has shut down");
|
||||||
@@ -24,6 +19,7 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
|
|||||||
listener
|
listener
|
||||||
.set_nonblocking(false)
|
.set_nonblocking(false)
|
||||||
.context("failed to set listener to blocking")?;
|
.context("failed to set listener to blocking")?;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let (socket, peer_addr) = listener.accept().context("failed to accept a new client")?;
|
let (socket, peer_addr) = listener.accept().context("failed to accept a new client")?;
|
||||||
info!("accepted connection from {peer_addr}");
|
info!("accepted connection from {peer_addr}");
|
||||||
@@ -31,9 +27,19 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
|
|||||||
.set_nodelay(true)
|
.set_nodelay(true)
|
||||||
.context("failed to set client socket option")?;
|
.context("failed to set client socket option")?;
|
||||||
|
|
||||||
|
// TODO: replace with async tasks.
|
||||||
thread::spawn(move || {
|
thread::spawn(move || {
|
||||||
if let Err(err) = handle_connection(socket) {
|
let tid = std::thread::current().id();
|
||||||
error!("{err}");
|
let span = info_span!("mgmt", thread = format_args!("{tid:?}"));
|
||||||
|
let _enter = span.enter();
|
||||||
|
|
||||||
|
info!("started a new console management API thread");
|
||||||
|
scopeguard::defer! {
|
||||||
|
info!("console management API thread is about to finish");
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(e) = handle_connection(socket) {
|
||||||
|
error!("thread failed with an error: {e}");
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -44,44 +50,21 @@ fn handle_connection(socket: TcpStream) -> anyhow::Result<()> {
|
|||||||
pgbackend.run(&mut MgmtHandler)
|
pgbackend.run(&mut MgmtHandler)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct MgmtHandler;
|
/// Known as `kickResponse` in the console.
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
/// Serialized examples:
|
|
||||||
// {
|
|
||||||
// "session_id": "71d6d03e6d93d99a",
|
|
||||||
// "result": {
|
|
||||||
// "Success": {
|
|
||||||
// "host": "127.0.0.1",
|
|
||||||
// "port": 5432,
|
|
||||||
// "dbname": "stas",
|
|
||||||
// "user": "stas",
|
|
||||||
// "password": "mypass"
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// {
|
|
||||||
// "session_id": "71d6d03e6d93d99a",
|
|
||||||
// "result": {
|
|
||||||
// "Failure": "oops"
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // to test manually by sending a query to mgmt interface:
|
|
||||||
// psql -h 127.0.0.1 -p 9999 -c '{"session_id":"4f10dde522e14739","result":{"Success":{"host":"127.0.0.1","port":5432,"dbname":"stas","user":"stas","password":"stas"}}}'
|
|
||||||
#[derive(Deserialize)]
|
|
||||||
struct PsqlSessionResponse {
|
struct PsqlSessionResponse {
|
||||||
session_id: String,
|
session_id: String,
|
||||||
result: PsqlSessionResult,
|
result: PsqlSessionResult,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
enum PsqlSessionResult {
|
enum PsqlSessionResult {
|
||||||
Success(auth::DatabaseInfo),
|
Success(DatabaseInfo),
|
||||||
Failure(String),
|
Failure(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A message received by `mgmt` when a compute node is ready.
|
/// A message received by `mgmt` when a compute node is ready.
|
||||||
pub type ComputeReady = Result<auth::DatabaseInfo, String>;
|
pub type ComputeReady = Result<DatabaseInfo, String>;
|
||||||
|
|
||||||
impl PsqlSessionResult {
|
impl PsqlSessionResult {
|
||||||
fn into_compute_ready(self) -> ComputeReady {
|
fn into_compute_ready(self) -> ComputeReady {
|
||||||
@@ -92,25 +75,51 @@ impl PsqlSessionResult {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl postgres_backend::Handler for MgmtHandler {
|
/// Compute node connection params provided by the console.
|
||||||
fn process_query(
|
/// This struct and its parents are mgmt API implementation
|
||||||
&mut self,
|
/// detail and thus should remain in this module.
|
||||||
pgb: &mut PostgresBackend,
|
// TODO: restore deserialization tests from git history.
|
||||||
query_string: &str,
|
#[derive(Deserialize)]
|
||||||
) -> anyhow::Result<()> {
|
pub struct DatabaseInfo {
|
||||||
let res = try_process_query(pgb, query_string);
|
pub host: String,
|
||||||
// intercept and log error message
|
pub port: u16,
|
||||||
if res.is_err() {
|
pub dbname: String,
|
||||||
error!("mgmt query failed: {res:?}");
|
pub user: String,
|
||||||
}
|
/// Console always provides a password, but it might
|
||||||
res
|
/// be inconvenient for debug with local PG instance.
|
||||||
|
pub password: Option<String>,
|
||||||
|
pub project: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Manually implement debug to omit sensitive info.
|
||||||
|
impl std::fmt::Debug for DatabaseInfo {
|
||||||
|
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
fmt.debug_struct("DatabaseInfo")
|
||||||
|
.field("host", &self.host)
|
||||||
|
.field("port", &self.port)
|
||||||
|
.field("dbname", &self.dbname)
|
||||||
|
.field("user", &self.user)
|
||||||
|
.finish_non_exhaustive()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::Result<()> {
|
// TODO: replace with an http-based protocol.
|
||||||
info!("got mgmt query [redacted]"); // Content contains password, don't print it
|
struct MgmtHandler;
|
||||||
|
impl postgres_backend::Handler for MgmtHandler {
|
||||||
|
fn process_query(&mut self, pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
|
||||||
|
try_process_query(pgb, query).map_err(|e| {
|
||||||
|
error!("failed to process response: {e:?}");
|
||||||
|
e
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let resp: PsqlSessionResponse = serde_json::from_str(query_string)?;
|
fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
|
||||||
|
let resp: PsqlSessionResponse = serde_json::from_str(query)?;
|
||||||
|
|
||||||
|
let span = info_span!("event", session_id = resp.session_id);
|
||||||
|
let _enter = span.enter();
|
||||||
|
info!("got response: {:?}", resp.result);
|
||||||
|
|
||||||
match auth::backend::notify(&resp.session_id, resp.result.into_compute_ready()) {
|
match auth::backend::notify(&resp.session_id, resp.result.into_compute_ready()) {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
@@ -119,9 +128,50 @@ fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::R
|
|||||||
.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
error!("failed to deliver response to per-client task");
|
||||||
pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
|
pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_db_info() -> anyhow::Result<()> {
|
||||||
|
// with password
|
||||||
|
let _: DatabaseInfo = serde_json::from_value(json!({
|
||||||
|
"host": "localhost",
|
||||||
|
"port": 5432,
|
||||||
|
"dbname": "postgres",
|
||||||
|
"user": "john_doe",
|
||||||
|
"password": "password",
|
||||||
|
"project": "hello_world",
|
||||||
|
}))?;
|
||||||
|
|
||||||
|
// without password
|
||||||
|
let _: DatabaseInfo = serde_json::from_value(json!({
|
||||||
|
"host": "localhost",
|
||||||
|
"port": 5432,
|
||||||
|
"dbname": "postgres",
|
||||||
|
"user": "john_doe",
|
||||||
|
"project": "hello_world",
|
||||||
|
}))?;
|
||||||
|
|
||||||
|
// new field (forward compatibility)
|
||||||
|
let _: DatabaseInfo = serde_json::from_value(json!({
|
||||||
|
"host": "localhost",
|
||||||
|
"port": 5432,
|
||||||
|
"dbname": "postgres",
|
||||||
|
"user": "john_doe",
|
||||||
|
"project": "hello_world",
|
||||||
|
"N.E.W": "forward compatibility check",
|
||||||
|
}))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use crate::config::{ProxyConfig, TlsConfig};
|
|||||||
use crate::stream::{MeasuredStream, PqStream, Stream};
|
use crate::stream::{MeasuredStream, PqStream, Stream};
|
||||||
use anyhow::{bail, Context};
|
use anyhow::{bail, Context};
|
||||||
use futures::TryFutureExt;
|
use futures::TryFutureExt;
|
||||||
use metrics::{register_int_counter, IntCounter};
|
use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use pq_proto::{BeMessage as Be, *};
|
use pq_proto::{BeMessage as Be, *};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -30,10 +30,16 @@ static NUM_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
});
|
});
|
||||||
|
|
||||||
static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
|
static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||||
register_int_counter!(
|
register_int_counter_vec!(
|
||||||
"proxy_io_bytes_total",
|
"proxy_io_bytes_per_client",
|
||||||
"Number of bytes sent/received between any client and backend."
|
"Number of bytes sent/received between client and backend.",
|
||||||
|
&[
|
||||||
|
// Received (rx) / sent (tx).
|
||||||
|
"direction",
|
||||||
|
// Proxy can keep calling it `project` internally.
|
||||||
|
"endpoint_id"
|
||||||
|
]
|
||||||
)
|
)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
});
|
});
|
||||||
@@ -230,16 +236,17 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
|
|||||||
application_name: params.get("application_name"),
|
application_name: params.get("application_name"),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Authenticate and connect to a compute node.
|
let auth_result = async {
|
||||||
let auth = creds
|
// `&mut stream` doesn't let us merge those 2 lines.
|
||||||
.authenticate(&extra, &mut stream)
|
let res = creds.authenticate(&extra, &mut stream).await;
|
||||||
.instrument(info_span!("auth"))
|
async { res }.or_else(|e| stream.throw_error(e)).await
|
||||||
.await;
|
}
|
||||||
|
.instrument(info_span!("auth"))
|
||||||
let node = async { auth }.or_else(|e| stream.throw_error(e)).await?;
|
.await?;
|
||||||
let reported_auth_ok = node.reported_auth_ok;
|
|
||||||
|
|
||||||
|
let node = auth_result.value;
|
||||||
let (db, cancel_closure) = node
|
let (db, cancel_closure) = node
|
||||||
|
.config
|
||||||
.connect(params)
|
.connect(params)
|
||||||
.or_else(|e| stream.throw_error(e))
|
.or_else(|e| stream.throw_error(e))
|
||||||
.await?;
|
.await?;
|
||||||
@@ -247,7 +254,9 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
|
|||||||
let cancel_key_data = session.enable_query_cancellation(cancel_closure);
|
let cancel_key_data = session.enable_query_cancellation(cancel_closure);
|
||||||
|
|
||||||
// Report authentication success if we haven't done this already.
|
// Report authentication success if we haven't done this already.
|
||||||
if !reported_auth_ok {
|
// Note that we do this only (for the most part) after we've connected
|
||||||
|
// to a compute (see above) which performs its own authentication.
|
||||||
|
if !auth_result.reported_auth_ok {
|
||||||
stream
|
stream
|
||||||
.write_message_noflush(&Be::AuthenticationOk)?
|
.write_message_noflush(&Be::AuthenticationOk)?
|
||||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
|
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
|
||||||
@@ -261,17 +270,23 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
|
|||||||
.write_message(&BeMessage::ReadyForQuery)
|
.write_message(&BeMessage::ReadyForQuery)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
/// This function will be called for writes to either direction.
|
// TODO: add more identifiers.
|
||||||
fn inc_proxied(cnt: usize) {
|
let metric_id = node.project;
|
||||||
// Consider inventing something more sophisticated
|
|
||||||
// if this ever becomes a bottleneck (cacheline bouncing).
|
let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx", &metric_id]);
|
||||||
NUM_BYTES_PROXIED_COUNTER.inc_by(cnt as u64);
|
let mut client = MeasuredStream::new(stream.into_inner(), |cnt| {
|
||||||
}
|
// Number of bytes we sent to the client (outbound).
|
||||||
|
m_sent.inc_by(cnt as u64);
|
||||||
|
});
|
||||||
|
|
||||||
|
let m_recv = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["rx", &metric_id]);
|
||||||
|
let mut db = MeasuredStream::new(db.stream, |cnt| {
|
||||||
|
// Number of bytes the client sent to the compute node (inbound).
|
||||||
|
m_recv.inc_by(cnt as u64);
|
||||||
|
});
|
||||||
|
|
||||||
// Starting from here we only proxy the client's traffic.
|
// Starting from here we only proxy the client's traffic.
|
||||||
info!("performing the proxy pass...");
|
info!("performing the proxy pass...");
|
||||||
let mut db = MeasuredStream::new(db.stream, inc_proxied);
|
|
||||||
let mut client = MeasuredStream::new(stream.into_inner(), inc_proxied);
|
|
||||||
let _ = tokio::io::copy_bidirectional(&mut client, &mut db).await?;
|
let _ = tokio::io::copy_bidirectional(&mut client, &mut db).await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -1568,6 +1568,7 @@ class NeonCli(AbstractNeonCli):
|
|||||||
def pageserver_start(
|
def pageserver_start(
|
||||||
self,
|
self,
|
||||||
overrides: Tuple[str, ...] = (),
|
overrides: Tuple[str, ...] = (),
|
||||||
|
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||||
) -> "subprocess.CompletedProcess[str]":
|
) -> "subprocess.CompletedProcess[str]":
|
||||||
start_args = ["pageserver", "start", *overrides]
|
start_args = ["pageserver", "start", *overrides]
|
||||||
append_pageserver_param_overrides(
|
append_pageserver_param_overrides(
|
||||||
@@ -1577,11 +1578,11 @@ class NeonCli(AbstractNeonCli):
|
|||||||
pageserver_config_override=self.env.pageserver.config_override,
|
pageserver_config_override=self.env.pageserver.config_override,
|
||||||
)
|
)
|
||||||
|
|
||||||
s3_env_vars = None
|
|
||||||
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
|
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
|
||||||
s3_env_vars = self.env.remote_storage.access_env_vars()
|
s3_env_vars = self.env.remote_storage.access_env_vars()
|
||||||
|
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
|
||||||
|
|
||||||
return self.raw_cli(start_args, extra_env_vars=s3_env_vars)
|
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
|
||||||
|
|
||||||
def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]":
|
def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]":
|
||||||
cmd = ["pageserver", "stop"]
|
cmd = ["pageserver", "stop"]
|
||||||
@@ -1760,9 +1761,15 @@ class NeonPageserver(PgProtocol):
|
|||||||
".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
|
".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
|
||||||
".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
|
".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
|
||||||
".*Removing intermediate uninit mark file.*",
|
".*Removing intermediate uninit mark file.*",
|
||||||
|
# FIXME: known race condition in TaskHandle: https://github.com/neondatabase/neon/issues/2885
|
||||||
|
".*sender is dropped while join handle is still alive.*",
|
||||||
]
|
]
|
||||||
|
|
||||||
def start(self, overrides: Tuple[str, ...] = ()) -> "NeonPageserver":
|
def start(
|
||||||
|
self,
|
||||||
|
overrides: Tuple[str, ...] = (),
|
||||||
|
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||||
|
) -> "NeonPageserver":
|
||||||
"""
|
"""
|
||||||
Start the page server.
|
Start the page server.
|
||||||
`overrides` allows to add some config to this pageserver start.
|
`overrides` allows to add some config to this pageserver start.
|
||||||
@@ -1770,7 +1777,7 @@ class NeonPageserver(PgProtocol):
|
|||||||
"""
|
"""
|
||||||
assert self.running is False
|
assert self.running is False
|
||||||
|
|
||||||
self.env.neon_cli.pageserver_start(overrides=overrides)
|
self.env.neon_cli.pageserver_start(overrides=overrides, extra_env_vars=extra_env_vars)
|
||||||
self.running = True
|
self.running = True
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@@ -2073,9 +2080,9 @@ class NeonProxy(PgProtocol):
|
|||||||
self,
|
self,
|
||||||
proxy_port: int,
|
proxy_port: int,
|
||||||
http_port: int,
|
http_port: int,
|
||||||
|
mgmt_port: int,
|
||||||
neon_binpath: Path,
|
neon_binpath: Path,
|
||||||
auth_endpoint=None,
|
auth_endpoint=None,
|
||||||
mgmt_port=None,
|
|
||||||
):
|
):
|
||||||
super().__init__(dsn=auth_endpoint, port=proxy_port)
|
super().__init__(dsn=auth_endpoint, port=proxy_port)
|
||||||
self.host = "127.0.0.1"
|
self.host = "127.0.0.1"
|
||||||
@@ -2089,7 +2096,8 @@ class NeonProxy(PgProtocol):
|
|||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
"""
|
"""
|
||||||
Starts a proxy with option '--auth-backend postgres' and a postgres instance already provided though '--auth-endpoint <postgress-instance>'."
|
Starts a proxy with option '--auth-backend postgres' and a postgres instance
|
||||||
|
already provided though '--auth-endpoint <postgress-instance>'."
|
||||||
"""
|
"""
|
||||||
assert self._popen is None
|
assert self._popen is None
|
||||||
assert self.auth_endpoint is not None
|
assert self.auth_endpoint is not None
|
||||||
@@ -2099,6 +2107,7 @@ class NeonProxy(PgProtocol):
|
|||||||
str(self.neon_binpath / "proxy"),
|
str(self.neon_binpath / "proxy"),
|
||||||
*["--http", f"{self.host}:{self.http_port}"],
|
*["--http", f"{self.host}:{self.http_port}"],
|
||||||
*["--proxy", f"{self.host}:{self.proxy_port}"],
|
*["--proxy", f"{self.host}:{self.proxy_port}"],
|
||||||
|
*["--mgmt", f"{self.host}:{self.mgmt_port}"],
|
||||||
*["--auth-backend", "postgres"],
|
*["--auth-backend", "postgres"],
|
||||||
*["--auth-endpoint", self.auth_endpoint],
|
*["--auth-endpoint", self.auth_endpoint],
|
||||||
]
|
]
|
||||||
@@ -2175,11 +2184,13 @@ def static_proxy(
|
|||||||
auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}"
|
auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}"
|
||||||
|
|
||||||
proxy_port = port_distributor.get_port()
|
proxy_port = port_distributor.get_port()
|
||||||
|
mgmt_port = port_distributor.get_port()
|
||||||
http_port = port_distributor.get_port()
|
http_port = port_distributor.get_port()
|
||||||
|
|
||||||
with NeonProxy(
|
with NeonProxy(
|
||||||
proxy_port=proxy_port,
|
proxy_port=proxy_port,
|
||||||
http_port=http_port,
|
http_port=http_port,
|
||||||
|
mgmt_port=mgmt_port,
|
||||||
neon_binpath=neon_binpath,
|
neon_binpath=neon_binpath,
|
||||||
auth_endpoint=auth_endpoint,
|
auth_endpoint=auth_endpoint,
|
||||||
) as proxy:
|
) as proxy:
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import pytest
|
|||||||
from fixtures.benchmark_fixture import MetricReport
|
from fixtures.benchmark_fixture import MetricReport
|
||||||
from fixtures.compare_fixtures import PgCompare
|
from fixtures.compare_fixtures import PgCompare
|
||||||
from fixtures.log_helper import log
|
from fixtures.log_helper import log
|
||||||
|
from pytest_lazyfixture import lazy_fixture # type: ignore
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@@ -20,16 +21,24 @@ from fixtures.log_helper import log
|
|||||||
pytest.param(10000000, 1, 4),
|
pytest.param(10000000, 1, 4),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int):
|
@pytest.mark.parametrize(
|
||||||
env = neon_with_baseline
|
"env, scale",
|
||||||
|
[
|
||||||
|
# Run on all envs. Use 50x larger table on remote cluster to make sure
|
||||||
|
# it doesn't fit in shared buffers, which are larger on remote than local.
|
||||||
|
pytest.param(lazy_fixture("neon_compare"), 1, id="neon"),
|
||||||
|
pytest.param(lazy_fixture("vanilla_compare"), 1, id="vanilla"),
|
||||||
|
pytest.param(
|
||||||
|
lazy_fixture("remote_compare"), 50, id="remote", marks=pytest.mark.remote_cluster
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_seqscans(env: PgCompare, scale: int, rows: int, iters: int, workers: int):
|
||||||
|
rows = scale * rows
|
||||||
|
|
||||||
with closing(env.pg.connect()) as conn:
|
with closing(env.pg.connect()) as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("drop table if exists t;")
|
||||||
if True:
|
|
||||||
cur.execute("set enable_seqscan_prefetch = on;")
|
|
||||||
cur.execute("set seqscan_prefetch_buffers = 10;")
|
|
||||||
|
|
||||||
cur.execute("create table t (i integer);")
|
cur.execute("create table t (i integer);")
|
||||||
cur.execute(f"insert into t values (generate_series(1,{rows}));")
|
cur.execute(f"insert into t values (generate_series(1,{rows}));")
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
|
|||||||
# normally restarts after it. Also, there should be GC ERRORs in the log,
|
# normally restarts after it. Also, there should be GC ERRORs in the log,
|
||||||
# but the fixture checks the log for any unexpected ERRORs after every
|
# but the fixture checks the log for any unexpected ERRORs after every
|
||||||
# test anyway, so it doesn't need any special attention here.
|
# test anyway, so it doesn't need any special attention here.
|
||||||
|
@pytest.mark.timeout(600)
|
||||||
def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
|
|
||||||
@@ -38,7 +39,7 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
|||||||
|
|
||||||
for _ in range(5):
|
for _ in range(5):
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
pg_bin.run_capture(["pgbench", "-N", "-c5", "-T100", "-Mprepared", connstr])
|
pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
|
||||||
env.pageserver.stop()
|
env.pageserver.stop()
|
||||||
env.pageserver.start()
|
env.pageserver.start()
|
||||||
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
|
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import json
|
import json
|
||||||
import subprocess
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import psycopg2
|
import psycopg2
|
||||||
@@ -8,11 +7,11 @@ from fixtures.log_helper import log
|
|||||||
from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres
|
from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres
|
||||||
|
|
||||||
|
|
||||||
def test_proxy_select_1(static_proxy):
|
def test_proxy_select_1(static_proxy: NeonProxy):
|
||||||
static_proxy.safe_psql("select 1", options="project=generic-project-name")
|
static_proxy.safe_psql("select 1", options="project=generic-project-name")
|
||||||
|
|
||||||
|
|
||||||
def test_password_hack(static_proxy):
|
def test_password_hack(static_proxy: NeonProxy):
|
||||||
user = "borat"
|
user = "borat"
|
||||||
password = "password"
|
password = "password"
|
||||||
static_proxy.safe_psql(
|
static_proxy.safe_psql(
|
||||||
@@ -24,118 +23,75 @@ def test_password_hack(static_proxy):
|
|||||||
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
|
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
|
||||||
|
|
||||||
# Must also check that invalid magic won't be accepted.
|
# Must also check that invalid magic won't be accepted.
|
||||||
with pytest.raises(psycopg2.errors.OperationalError):
|
with pytest.raises(psycopg2.OperationalError):
|
||||||
magic = "broken"
|
magic = "broken"
|
||||||
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
|
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
|
||||||
|
|
||||||
|
|
||||||
def get_session_id_from_uri_line(uri_prefix, uri_line):
|
def get_session_id(uri_prefix, uri_line):
|
||||||
assert uri_prefix in uri_line
|
assert uri_prefix in uri_line
|
||||||
|
|
||||||
url_parts = urlparse(uri_line)
|
url_parts = urlparse(uri_line)
|
||||||
psql_session_id = url_parts.path[1:]
|
psql_session_id = url_parts.path[1:]
|
||||||
assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars."
|
assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars"
|
||||||
link_auth_uri_prefix = uri_line[: -len(url_parts.path)]
|
|
||||||
# invariant: the prefix must match the uri_prefix.
|
|
||||||
assert (
|
|
||||||
link_auth_uri_prefix == uri_prefix
|
|
||||||
), f"Line='{uri_line}' should contain a http auth link of form '{uri_prefix}/<psql_session_id>'."
|
|
||||||
# invariant: the entire link_auth_uri should be on its own line, module spaces.
|
|
||||||
assert " ".join(uri_line.split(" ")) == f"{uri_prefix}/{psql_session_id}"
|
|
||||||
|
|
||||||
return psql_session_id
|
return psql_session_id
|
||||||
|
|
||||||
|
|
||||||
def create_and_send_db_info(local_vanilla_pg, psql_session_id, mgmt_port):
|
async def find_auth_link(link_auth_uri_prefix, proc):
|
||||||
pg_user = "proxy"
|
for _ in range(100):
|
||||||
pg_password = "password"
|
line = (await proc.stderr.readline()).decode("utf-8").strip()
|
||||||
|
log.info(f"psql line: {line}")
|
||||||
local_vanilla_pg.start()
|
|
||||||
query = f"create user {pg_user} with login superuser password '{pg_password}'"
|
|
||||||
local_vanilla_pg.safe_psql(query)
|
|
||||||
|
|
||||||
port = local_vanilla_pg.default_options["port"]
|
|
||||||
host = local_vanilla_pg.default_options["host"]
|
|
||||||
dbname = local_vanilla_pg.default_options["dbname"]
|
|
||||||
|
|
||||||
db_info_dict = {
|
|
||||||
"session_id": psql_session_id,
|
|
||||||
"result": {
|
|
||||||
"Success": {
|
|
||||||
"host": host,
|
|
||||||
"port": port,
|
|
||||||
"dbname": dbname,
|
|
||||||
"user": pg_user,
|
|
||||||
"password": pg_password,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
db_info_str = json.dumps(db_info_dict)
|
|
||||||
cmd_args = [
|
|
||||||
"psql",
|
|
||||||
"-h",
|
|
||||||
"127.0.0.1", # localhost
|
|
||||||
"-p",
|
|
||||||
f"{mgmt_port}",
|
|
||||||
"-c",
|
|
||||||
db_info_str,
|
|
||||||
]
|
|
||||||
|
|
||||||
log.info(f"Sending to proxy the user and db info: {' '.join(cmd_args)}")
|
|
||||||
p = subprocess.Popen(cmd_args, stdout=subprocess.PIPE)
|
|
||||||
out, err = p.communicate()
|
|
||||||
assert "ok" in str(out)
|
|
||||||
|
|
||||||
|
|
||||||
async def get_uri_line_from_process_welcome_notice(link_auth_uri_prefix, proc):
|
|
||||||
"""
|
|
||||||
Returns the line from the welcome notice from proc containing link_auth_uri_prefix.
|
|
||||||
:param link_auth_uri_prefix: the uri prefix used to indicate the line of interest
|
|
||||||
:param proc: the process to read the welcome message from.
|
|
||||||
:return: a line containing the full link authentication uri.
|
|
||||||
"""
|
|
||||||
max_num_lines_of_welcome_message = 15
|
|
||||||
for attempt in range(max_num_lines_of_welcome_message):
|
|
||||||
raw_line = await proc.stderr.readline()
|
|
||||||
line = raw_line.decode("utf-8").strip()
|
|
||||||
if link_auth_uri_prefix in line:
|
if link_auth_uri_prefix in line:
|
||||||
|
log.info(f"SUCCESS, found auth url: {line}")
|
||||||
return line
|
return line
|
||||||
assert False, f"did not find line containing '{link_auth_uri_prefix}'"
|
|
||||||
|
|
||||||
|
async def activate_link_auth(local_vanilla_pg, link_proxy, psql_session_id):
|
||||||
|
pg_user = "proxy"
|
||||||
|
|
||||||
|
log.info("creating a new user for link auth test")
|
||||||
|
local_vanilla_pg.start()
|
||||||
|
local_vanilla_pg.safe_psql(f"create user {pg_user} with login superuser")
|
||||||
|
|
||||||
|
db_info = json.dumps(
|
||||||
|
{
|
||||||
|
"session_id": psql_session_id,
|
||||||
|
"result": {
|
||||||
|
"Success": {
|
||||||
|
"host": local_vanilla_pg.default_options["host"],
|
||||||
|
"port": local_vanilla_pg.default_options["port"],
|
||||||
|
"dbname": local_vanilla_pg.default_options["dbname"],
|
||||||
|
"user": pg_user,
|
||||||
|
"project": "irrelevant",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
log.info("sending session activation message")
|
||||||
|
psql = await PSQL(host=link_proxy.host, port=link_proxy.mgmt_port).run(db_info)
|
||||||
|
out = (await psql.stdout.read()).decode("utf-8").strip()
|
||||||
|
assert out == "ok"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProxy):
|
async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProxy):
|
||||||
"""
|
psql = await PSQL(host=link_proxy.host, port=link_proxy.proxy_port).run("select 42")
|
||||||
Test copied and modified from: test_project_psql_link_auth test from cloud/tests_e2e/tests/test_project.py
|
|
||||||
Step 1. establish connection to the proxy
|
|
||||||
Step 2. retrieve session_id:
|
|
||||||
Step 2.1: read welcome message
|
|
||||||
Step 2.2: parse session_id
|
|
||||||
Step 3. create a vanilla_pg and send user and db info via command line (using Popen) a psql query via mgmt port to proxy.
|
|
||||||
Step 4. assert that select 1 has been executed correctly.
|
|
||||||
"""
|
|
||||||
|
|
||||||
psql = PSQL(
|
|
||||||
host=link_proxy.host,
|
|
||||||
port=link_proxy.proxy_port,
|
|
||||||
)
|
|
||||||
proc = await psql.run("select 42")
|
|
||||||
|
|
||||||
uri_prefix = link_proxy.link_auth_uri_prefix
|
uri_prefix = link_proxy.link_auth_uri_prefix
|
||||||
line_str = await get_uri_line_from_process_welcome_notice(uri_prefix, proc)
|
link = await find_auth_link(uri_prefix, psql)
|
||||||
|
|
||||||
psql_session_id = get_session_id_from_uri_line(uri_prefix, line_str)
|
psql_session_id = get_session_id(uri_prefix, link)
|
||||||
log.info(f"Parsed psql_session_id='{psql_session_id}' from Neon welcome message.")
|
await activate_link_auth(vanilla_pg, link_proxy, psql_session_id)
|
||||||
|
|
||||||
create_and_send_db_info(vanilla_pg, psql_session_id, link_proxy.mgmt_port)
|
assert psql.stdout is not None
|
||||||
|
out = (await psql.stdout.read()).decode("utf-8").strip()
|
||||||
assert proc.stdout is not None
|
|
||||||
out = (await proc.stdout.read()).decode("utf-8").strip()
|
|
||||||
assert out == "42"
|
assert out == "42"
|
||||||
|
|
||||||
|
|
||||||
# Pass extra options to the server.
|
# Pass extra options to the server.
|
||||||
def test_proxy_options(static_proxy):
|
def test_proxy_options(static_proxy: NeonProxy):
|
||||||
with static_proxy.connect(options="project=irrelevant -cproxytest.option=value") as conn:
|
with static_proxy.connect(options="project=irrelevant -cproxytest.option=value") as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("SHOW proxytest.option")
|
cur.execute("SHOW proxytest.option")
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import time
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -11,11 +12,21 @@ def do_gc_target(
|
|||||||
):
|
):
|
||||||
"""Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
|
"""Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
|
||||||
try:
|
try:
|
||||||
|
log.info("sending gc http request")
|
||||||
pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
|
pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error("do_gc failed: %s", e)
|
log.error("do_gc failed: %s", e)
|
||||||
|
finally:
|
||||||
|
log.info("gc http thread returning")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
reason="""
|
||||||
|
Commit 'make test_tenant_detach_smoke fail reproducibly' adds failpoint to make this test fail reproducibly.
|
||||||
|
Fix in https://github.com/neondatabase/neon/pull/2851 will come as part of
|
||||||
|
https://github.com/neondatabase/neon/pull/2785 .
|
||||||
|
"""
|
||||||
|
)
|
||||||
def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
pageserver_http = env.pageserver.http_client()
|
pageserver_http = env.pageserver.http_client()
|
||||||
@@ -51,7 +62,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
# gc should not try to even start
|
# gc should not try to even start on a timeline that doesn't exist
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
expected_exception=PageserverApiException, match="gc target timeline does not exist"
|
expected_exception=PageserverApiException, match="gc target timeline does not exist"
|
||||||
):
|
):
|
||||||
@@ -61,25 +72,24 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
|||||||
# the error will be printed to the log too
|
# the error will be printed to the log too
|
||||||
env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
|
env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
|
||||||
|
|
||||||
# try to concurrently run gc and detach
|
# Detach while running manual GC.
|
||||||
|
# It should wait for manual GC to finish (right now it doesn't that's why this test fails sometimes)
|
||||||
|
pageserver_http.configure_failpoints(
|
||||||
|
("gc_iteration_internal_after_getting_gc_timelines", "return(2000)")
|
||||||
|
)
|
||||||
gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id))
|
gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id))
|
||||||
gc_thread.start()
|
gc_thread.start()
|
||||||
|
time.sleep(1)
|
||||||
|
# By now the gc task is spawned but in sleep for another second due to the failpoint.
|
||||||
|
|
||||||
last_error = None
|
log.info("detaching tenant")
|
||||||
for i in range(3):
|
pageserver_http.tenant_detach(tenant_id)
|
||||||
try:
|
log.info("tenant detached without error")
|
||||||
pageserver_http.tenant_detach(tenant_id)
|
|
||||||
except Exception as e:
|
|
||||||
last_error = e
|
|
||||||
log.error(f"try {i} error detaching tenant: {e}")
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
# else is called if the loop finished without reaching "break"
|
|
||||||
else:
|
|
||||||
pytest.fail(f"could not detach tenant: {last_error}")
|
|
||||||
|
|
||||||
|
log.info("wait for gc thread to return")
|
||||||
gc_thread.join(timeout=10)
|
gc_thread.join(timeout=10)
|
||||||
|
assert not gc_thread.is_alive()
|
||||||
|
log.info("gc thread returned")
|
||||||
|
|
||||||
# check that nothing is left on disk for deleted tenant
|
# check that nothing is left on disk for deleted tenant
|
||||||
assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()
|
assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()
|
||||||
|
|||||||
@@ -166,6 +166,10 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
|
|||||||
|
|
||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
|
|
||||||
|
# FIXME: we have a race condition between GC and delete timeline. GC might fail with this
|
||||||
|
# error. Similar to https://github.com/neondatabase/neon/issues/2671
|
||||||
|
env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*")
|
||||||
|
|
||||||
tenant_id = env.initial_tenant
|
tenant_id = env.initial_tenant
|
||||||
main_branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0]
|
main_branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0]
|
||||||
|
|
||||||
@@ -188,10 +192,8 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
|
|||||||
"first-branch", main_branch_name, tenant_id
|
"first-branch", main_branch_name, tenant_id
|
||||||
)
|
)
|
||||||
|
|
||||||
# unsure why this happens, the size difference is more than a page alignment
|
|
||||||
size_after_first_branch = http_client.tenant_size(tenant_id)
|
size_after_first_branch = http_client.tenant_size(tenant_id)
|
||||||
assert size_after_first_branch > size_at_branch
|
assert size_after_first_branch == size_at_branch
|
||||||
assert size_after_first_branch - size_at_branch == gc_horizon
|
|
||||||
|
|
||||||
first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id)
|
first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id)
|
||||||
|
|
||||||
@@ -217,7 +219,7 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
|
|||||||
"second-branch", main_branch_name, tenant_id
|
"second-branch", main_branch_name, tenant_id
|
||||||
)
|
)
|
||||||
size_after_second_branch = http_client.tenant_size(tenant_id)
|
size_after_second_branch = http_client.tenant_size(tenant_id)
|
||||||
assert size_after_second_branch > size_after_continuing_on_main
|
assert size_after_second_branch == size_after_continuing_on_main
|
||||||
|
|
||||||
second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id)
|
second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id)
|
||||||
|
|
||||||
@@ -263,6 +265,8 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
|
|||||||
except PageserverApiException as e:
|
except PageserverApiException as e:
|
||||||
# compaction is ok but just retry if this fails; related to #2442
|
# compaction is ok but just retry if this fails; related to #2442
|
||||||
if "cannot lock compaction critical section" in str(e):
|
if "cannot lock compaction critical section" in str(e):
|
||||||
|
# also ignore it in the log
|
||||||
|
env.pageserver.allowed_errors.append(".*cannot lock compaction critical section.*")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
continue
|
continue
|
||||||
raise
|
raise
|
||||||
|
|||||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: cd0693e2be...da50d99db5
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: 1bf5e3f53c...780c3f8e35
Reference in New Issue
Block a user