mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-11 22:50:37 +00:00
Compare commits
4 Commits
bojan/remo
...
proxy-heal
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7342088fcc | ||
|
|
2e4307cfe5 | ||
|
|
fceb428878 | ||
|
|
1e717b5414 |
@@ -1,10 +0,0 @@
|
||||
[defaults]
|
||||
|
||||
localhost_warning = False
|
||||
host_key_checking = False
|
||||
timeout = 30
|
||||
|
||||
[ssh_connection]
|
||||
ssh_args = -F ./ansible.ssh.cfg
|
||||
scp_if_ssh = True
|
||||
pipelining = True
|
||||
@@ -1,11 +0,0 @@
|
||||
Host tele.zenith.tech
|
||||
User admin
|
||||
Port 3023
|
||||
StrictHostKeyChecking no
|
||||
UserKnownHostsFile /dev/null
|
||||
|
||||
Host * !tele.zenith.tech
|
||||
User admin
|
||||
StrictHostKeyChecking no
|
||||
UserKnownHostsFile /dev/null
|
||||
ProxyJump tele.zenith.tech
|
||||
@@ -1,174 +0,0 @@
|
||||
- name: Upload Zenith binaries
|
||||
hosts: pageservers:safekeepers
|
||||
gather_facts: False
|
||||
remote_user: admin
|
||||
vars:
|
||||
force_deploy: false
|
||||
|
||||
tasks:
|
||||
|
||||
- name: get latest version of Zenith binaries
|
||||
ignore_errors: true
|
||||
register: current_version_file
|
||||
set_fact:
|
||||
current_version: "{{ lookup('file', '.zenith_current_version') | trim }}"
|
||||
tags:
|
||||
- pageserver
|
||||
- safekeeper
|
||||
|
||||
- name: set zero value for current_version
|
||||
when: current_version_file is failed
|
||||
set_fact:
|
||||
current_version: "0"
|
||||
tags:
|
||||
- pageserver
|
||||
- safekeeper
|
||||
|
||||
- name: get deployed version from content of remote file
|
||||
ignore_errors: true
|
||||
ansible.builtin.slurp:
|
||||
src: /usr/local/.zenith_current_version
|
||||
register: remote_version_file
|
||||
tags:
|
||||
- pageserver
|
||||
- safekeeper
|
||||
|
||||
- name: decode remote file content
|
||||
when: remote_version_file is succeeded
|
||||
set_fact:
|
||||
remote_version: "{{ remote_version_file['content'] | b64decode | trim }}"
|
||||
tags:
|
||||
- pageserver
|
||||
- safekeeper
|
||||
|
||||
- name: set zero value for remote_version
|
||||
when: remote_version_file is failed
|
||||
set_fact:
|
||||
remote_version: "0"
|
||||
tags:
|
||||
- pageserver
|
||||
- safekeeper
|
||||
|
||||
- name: inform about versions
|
||||
debug: msg="Version to deploy - {{ current_version }}, version on storage node - {{ remote_version }}"
|
||||
tags:
|
||||
- pageserver
|
||||
- safekeeper
|
||||
|
||||
|
||||
- name: upload and extract Zenith binaries to /usr/local
|
||||
when: current_version > remote_version or force_deploy
|
||||
ansible.builtin.unarchive:
|
||||
owner: root
|
||||
group: root
|
||||
src: zenith_install.tar.gz
|
||||
dest: /usr/local
|
||||
become: true
|
||||
tags:
|
||||
- pageserver
|
||||
- safekeeper
|
||||
- binaries
|
||||
- putbinaries
|
||||
|
||||
- name: Deploy pageserver
|
||||
hosts: pageservers
|
||||
gather_facts: False
|
||||
remote_user: admin
|
||||
vars:
|
||||
force_deploy: false
|
||||
|
||||
tasks:
|
||||
- name: init pageserver
|
||||
when: current_version > remote_version or force_deploy
|
||||
shell:
|
||||
cmd: sudo -u pageserver /usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data
|
||||
args:
|
||||
creates: "/storage/pageserver/data/tenants"
|
||||
environment:
|
||||
ZENITH_REPO_DIR: "/storage/pageserver/data"
|
||||
LD_LIBRARY_PATH: "/usr/local/lib"
|
||||
become: true
|
||||
tags:
|
||||
- pageserver
|
||||
|
||||
- name: upload systemd service definition
|
||||
when: current_version > remote_version or force_deploy
|
||||
ansible.builtin.template:
|
||||
src: systemd/pageserver.service
|
||||
dest: /etc/systemd/system/pageserver.service
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
become: true
|
||||
tags:
|
||||
- pageserver
|
||||
|
||||
- name: start systemd service
|
||||
when: current_version > remote_version or force_deploy
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: yes
|
||||
name: pageserver
|
||||
enabled: yes
|
||||
state: restarted
|
||||
become: true
|
||||
tags:
|
||||
- pageserver
|
||||
|
||||
- name: post version to console
|
||||
when: (current_version > remote_version or force_deploy) and console_mgmt_base_url is defined
|
||||
shell:
|
||||
cmd: |
|
||||
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
|
||||
curl -sfS -d '{"version": {{ current_version }} }' -X POST {{ console_mgmt_base_url }}/api/v1/pageservers/$INSTANCE_ID
|
||||
tags:
|
||||
- pageserver
|
||||
|
||||
- name: Deploy safekeeper
|
||||
hosts: safekeepers
|
||||
gather_facts: False
|
||||
remote_user: admin
|
||||
vars:
|
||||
force_deploy: false
|
||||
|
||||
tasks:
|
||||
|
||||
# in the future safekeepers should discover pageservers byself
|
||||
# but currently use first pageserver that was discovered
|
||||
- name: set first pageserver var for safekeepers
|
||||
when: current_version > remote_version or force_deploy
|
||||
set_fact:
|
||||
first_pageserver: "{{ hostvars[groups['pageservers'][0]]['inventory_hostname'] }}"
|
||||
tags:
|
||||
- safekeeper
|
||||
|
||||
- name: upload systemd service definition
|
||||
when: current_version > remote_version or force_deploy
|
||||
ansible.builtin.template:
|
||||
src: systemd/safekeeper.service
|
||||
dest: /etc/systemd/system/safekeeper.service
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
become: true
|
||||
tags:
|
||||
- safekeeper
|
||||
|
||||
- name: start systemd service
|
||||
when: current_version > remote_version or force_deploy
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: yes
|
||||
name: safekeeper
|
||||
enabled: yes
|
||||
state: restarted
|
||||
become: true
|
||||
tags:
|
||||
- safekeeper
|
||||
|
||||
- name: post version to console
|
||||
when: (current_version > remote_version or force_deploy) and console_mgmt_base_url is defined
|
||||
shell:
|
||||
cmd: |
|
||||
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
|
||||
curl -sfS -d '{"version": {{ current_version }} }' -X POST {{ console_mgmt_base_url }}/api/v1/safekeepers/$INSTANCE_ID
|
||||
tags:
|
||||
- safekeeper
|
||||
@@ -1,52 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
RELEASE=${RELEASE:-false}
|
||||
|
||||
# look at docker hub for latest tag fo zenith docker image
|
||||
if [ "${RELEASE}" = "true" ]; then
|
||||
echo "search latest relase tag"
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | tail -1)
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "no any docker tags found, exiting..."
|
||||
exit 1
|
||||
else
|
||||
TAG="release-${VERSION}"
|
||||
fi
|
||||
else
|
||||
echo "search latest dev tag"
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep -v release | tail -1)
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "no any docker tags found, exiting..."
|
||||
exit 1
|
||||
else
|
||||
TAG="${VERSION}"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "found ${VERSION}"
|
||||
|
||||
# do initial cleanup
|
||||
rm -rf zenith_install postgres_install.tar.gz zenith_install.tar.gz .zenith_current_version
|
||||
mkdir zenith_install
|
||||
|
||||
# retrive binaries from docker image
|
||||
echo "getting binaries from docker image"
|
||||
docker pull --quiet zenithdb/zenith:${TAG}
|
||||
ID=$(docker create zenithdb/zenith:${TAG})
|
||||
docker cp ${ID}:/data/postgres_install.tar.gz .
|
||||
tar -xzf postgres_install.tar.gz -C zenith_install
|
||||
docker cp ${ID}:/usr/local/bin/pageserver zenith_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/safekeeper zenith_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/proxy zenith_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/postgres zenith_install/bin/
|
||||
docker rm -vf ${ID}
|
||||
|
||||
# store version to file (for ansible playbooks) and create binaries tarball
|
||||
echo ${VERSION} > zenith_install/.zenith_current_version
|
||||
echo ${VERSION} > .zenith_current_version
|
||||
tar -czf zenith_install.tar.gz -C zenith_install .
|
||||
|
||||
# do final cleaup
|
||||
rm -rf zenith_install postgres_install.tar.gz
|
||||
@@ -1,7 +0,0 @@
|
||||
[pageservers]
|
||||
zenith-1-ps-1
|
||||
|
||||
[safekeepers]
|
||||
zenith-1-sk-1
|
||||
zenith-1-sk-2
|
||||
zenith-1-sk-3
|
||||
@@ -1,7 +0,0 @@
|
||||
[pageservers]
|
||||
zenith-us-stage-ps-1
|
||||
|
||||
[safekeepers]
|
||||
zenith-us-stage-sk-1
|
||||
zenith-us-stage-sk-2
|
||||
zenith-us-stage-sk-3
|
||||
@@ -1,18 +0,0 @@
|
||||
[Unit]
|
||||
Description=Zenith pageserver
|
||||
After=network.target auditd.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=pageserver
|
||||
Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
|
||||
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -D /storage/pageserver/data
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
KillSignal=SIGINT
|
||||
Restart=on-failure
|
||||
TimeoutSec=10
|
||||
LimitNOFILE=30000000
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,18 +0,0 @@
|
||||
[Unit]
|
||||
Description=Zenith safekeeper
|
||||
After=network.target auditd.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=safekeeper
|
||||
Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
KillSignal=SIGINT
|
||||
Restart=on-failure
|
||||
TimeoutSec=10
|
||||
LimitNOFILE=30000000
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,28 +1,29 @@
|
||||
version: 2.1
|
||||
|
||||
executors:
|
||||
zenith-xlarge-executor:
|
||||
zenith-build-executor:
|
||||
resource_class: xlarge
|
||||
docker:
|
||||
# NB: when changed, do not forget to update rust image tag in all Dockerfiles
|
||||
- image: zimg/rust:1.56
|
||||
zenith-executor:
|
||||
- image: cimg/rust:1.56.1
|
||||
zenith-python-executor:
|
||||
docker:
|
||||
- image: zimg/rust:1.56
|
||||
- image: cimg/python:3.7.10 # Oldest available 3.7 with Ubuntu 20.04 (for GLIBC and Rust) at CirlceCI
|
||||
|
||||
jobs:
|
||||
check-codestyle-rust:
|
||||
executor: zenith-xlarge-executor
|
||||
executor: zenith-build-executor
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: rustfmt
|
||||
when: always
|
||||
command: cargo fmt --all -- --check
|
||||
command: |
|
||||
cargo fmt --all -- --check
|
||||
|
||||
# A job to build postgres
|
||||
build-postgres:
|
||||
executor: zenith-xlarge-executor
|
||||
executor: zenith-build-executor
|
||||
parameters:
|
||||
build_type:
|
||||
type: enum
|
||||
@@ -37,7 +38,8 @@ jobs:
|
||||
# Note this works even though the submodule hasn't been checkout out yet.
|
||||
- run:
|
||||
name: Get postgres cache key
|
||||
command: git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
|
||||
command: |
|
||||
git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
|
||||
|
||||
- restore_cache:
|
||||
name: Restore postgres cache
|
||||
@@ -45,6 +47,15 @@ jobs:
|
||||
# Restore ONLY if the rev key matches exactly
|
||||
- v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
|
||||
|
||||
# FIXME We could cache our own docker container, instead of installing packages every time.
|
||||
- run:
|
||||
name: apt install dependencies
|
||||
command: |
|
||||
if [ ! -e tmp_install/bin/postgres ]; then
|
||||
sudo apt update
|
||||
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev
|
||||
fi
|
||||
|
||||
# Build postgres if the restore_cache didn't find a build.
|
||||
# `make` can't figure out whether the cache is valid, since
|
||||
# it only compares file timestamps.
|
||||
@@ -54,8 +65,7 @@ jobs:
|
||||
if [ ! -e tmp_install/bin/postgres ]; then
|
||||
# "depth 1" saves some time by not cloning the whole repo
|
||||
git submodule update --init --depth 1
|
||||
# bail out on any warnings
|
||||
COPT='-Werror' mold -run make postgres -j$(nproc)
|
||||
make postgres -j8
|
||||
fi
|
||||
|
||||
- save_cache:
|
||||
@@ -66,7 +76,7 @@ jobs:
|
||||
|
||||
# A job to build zenith rust code
|
||||
build-zenith:
|
||||
executor: zenith-xlarge-executor
|
||||
executor: zenith-build-executor
|
||||
parameters:
|
||||
build_type:
|
||||
type: enum
|
||||
@@ -74,6 +84,12 @@ jobs:
|
||||
environment:
|
||||
BUILD_TYPE: << parameters.build_type >>
|
||||
steps:
|
||||
- run:
|
||||
name: apt install dependencies
|
||||
command: |
|
||||
sudo apt update
|
||||
sudo apt install libssl-dev clang
|
||||
|
||||
# Checkout the git repo (without submodules)
|
||||
- checkout
|
||||
|
||||
@@ -111,7 +127,7 @@ jobs:
|
||||
fi
|
||||
|
||||
export CARGO_INCREMENTAL=0
|
||||
"${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --bins --tests
|
||||
"${cov_prefix[@]}" cargo build $CARGO_FLAGS --bins --tests
|
||||
|
||||
- save_cache:
|
||||
name: Save rust cache
|
||||
@@ -195,14 +211,6 @@ jobs:
|
||||
command: |
|
||||
cp -a tmp_install /tmp/zenith/pg_install
|
||||
|
||||
- run:
|
||||
name: Merge coverage data
|
||||
command: |
|
||||
# This will speed up workspace uploads
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
|
||||
fi
|
||||
|
||||
# Save the rust binaries and coverage data for other jobs in this workflow.
|
||||
- persist_to_workspace:
|
||||
root: /tmp/zenith
|
||||
@@ -210,30 +218,23 @@ jobs:
|
||||
- "*"
|
||||
|
||||
check-codestyle-python:
|
||||
executor: zenith-executor
|
||||
executor: zenith-python-executor
|
||||
steps:
|
||||
- checkout
|
||||
- restore_cache:
|
||||
keys:
|
||||
- v1-python-deps-{{ checksum "poetry.lock" }}
|
||||
- run:
|
||||
name: Install deps
|
||||
command: ./scripts/pysync
|
||||
- save_cache:
|
||||
key: v1-python-deps-{{ checksum "poetry.lock" }}
|
||||
paths:
|
||||
- /home/circleci/.cache/pypoetry/virtualenvs
|
||||
command: pipenv --python 3.7 install --dev
|
||||
- run:
|
||||
name: Run yapf to ensure code format
|
||||
when: always
|
||||
command: poetry run yapf --recursive --diff .
|
||||
command: pipenv run yapf --recursive --diff .
|
||||
- run:
|
||||
name: Run mypy to check types
|
||||
when: always
|
||||
command: poetry run mypy .
|
||||
command: pipenv run mypy .
|
||||
|
||||
run-pytest:
|
||||
executor: zenith-executor
|
||||
executor: zenith-python-executor
|
||||
parameters:
|
||||
# pytest args to specify the tests to run.
|
||||
#
|
||||
@@ -272,16 +273,9 @@ jobs:
|
||||
condition: << parameters.needs_postgres_source >>
|
||||
steps:
|
||||
- run: git submodule update --init --depth 1
|
||||
- restore_cache:
|
||||
keys:
|
||||
- v1-python-deps-{{ checksum "poetry.lock" }}
|
||||
- run:
|
||||
name: Install deps
|
||||
command: ./scripts/pysync
|
||||
- save_cache:
|
||||
key: v1-python-deps-{{ checksum "poetry.lock" }}
|
||||
paths:
|
||||
- /home/circleci/.cache/pypoetry/virtualenvs
|
||||
command: pipenv --python 3.7 install
|
||||
- run:
|
||||
name: Run pytest
|
||||
# pytest doesn't output test logs in real time, so CI job may fail with
|
||||
@@ -298,7 +292,6 @@ jobs:
|
||||
- PLATFORM: zenith-local-ci
|
||||
command: |
|
||||
PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
|
||||
rm -rf $PERF_REPORT_DIR
|
||||
|
||||
TEST_SELECTION="test_runner/<< parameters.test_selection >>"
|
||||
EXTRA_PARAMS="<< parameters.extra_params >>"
|
||||
@@ -334,7 +327,7 @@ jobs:
|
||||
# -n4 uses four processes to run tests via pytest-xdist
|
||||
# -s is not used to prevent pytest from capturing output, because tests are running
|
||||
# in parallel and logs are mixed between different tests
|
||||
"${cov_prefix[@]}" ./scripts/pytest \
|
||||
"${cov_prefix[@]}" pipenv run pytest \
|
||||
--junitxml=$TEST_OUTPUT/junit.xml \
|
||||
--tb=short \
|
||||
--verbose \
|
||||
@@ -343,6 +336,7 @@ jobs:
|
||||
|
||||
if << parameters.save_perf_report >>; then
|
||||
if [[ $CIRCLE_BRANCH == "main" ]]; then
|
||||
# TODO: reuse scripts/git-upload
|
||||
export REPORT_FROM="$PERF_REPORT_DIR"
|
||||
export REPORT_TO=local
|
||||
scripts/generate_and_push_perf_report.sh
|
||||
@@ -363,13 +357,6 @@ jobs:
|
||||
# The store_test_results step tells CircleCI where to find the junit.xml file.
|
||||
- store_test_results:
|
||||
path: /tmp/test_output
|
||||
- run:
|
||||
name: Merge coverage data
|
||||
command: |
|
||||
# This will speed up workspace uploads
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
|
||||
fi
|
||||
# Save coverage data (if any)
|
||||
- persist_to_workspace:
|
||||
root: /tmp/zenith
|
||||
@@ -377,7 +364,7 @@ jobs:
|
||||
- "*"
|
||||
|
||||
coverage-report:
|
||||
executor: zenith-xlarge-executor
|
||||
executor: zenith-build-executor
|
||||
steps:
|
||||
- attach_workspace:
|
||||
at: /tmp/zenith
|
||||
@@ -389,6 +376,12 @@ jobs:
|
||||
# there's no way to clean out old packages, so the cache grows every time something
|
||||
# changes.
|
||||
- v04-rust-cache-deps-debug-{{ checksum "Cargo.lock" }}
|
||||
- run:
|
||||
name: Install llvm-tools
|
||||
command: |
|
||||
# TODO: install a proper symbol demangler, e.g. rustfilt
|
||||
# TODO: we should embed this into a docker image
|
||||
rustup component add llvm-tools-preview
|
||||
- run:
|
||||
name: Build coverage report
|
||||
command: |
|
||||
@@ -471,78 +464,46 @@ jobs:
|
||||
docker build -t zenithdb/compute-node:latest vendor/postgres && docker push zenithdb/compute-node:latest
|
||||
docker tag zenithdb/compute-node:latest zenithdb/compute-node:${DOCKER_TAG} && docker push zenithdb/compute-node:${DOCKER_TAG}
|
||||
|
||||
# Build production zenithdb/zenith:release image and push it to Docker hub
|
||||
docker-image-release:
|
||||
docker:
|
||||
- image: cimg/base:2021.04
|
||||
steps:
|
||||
- checkout
|
||||
- setup_remote_docker:
|
||||
docker_layer_caching: true
|
||||
- run:
|
||||
name: Init postgres submodule
|
||||
command: git submodule update --init --depth 1
|
||||
- run:
|
||||
name: Build and push Docker image
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||
docker build --build-arg GIT_VERSION=$CIRCLE_SHA1 -t zenithdb/zenith:release . && docker push zenithdb/zenith:release
|
||||
docker tag zenithdb/zenith:release zenithdb/zenith:${DOCKER_TAG} && docker push zenithdb/zenith:${DOCKER_TAG}
|
||||
|
||||
# Build production zenithdb/compute-node:release image and push it to Docker hub
|
||||
docker-image-compute-release:
|
||||
docker:
|
||||
- image: cimg/base:2021.04
|
||||
steps:
|
||||
- checkout
|
||||
- setup_remote_docker:
|
||||
docker_layer_caching: true
|
||||
# Build zenithdb/compute-tools:release image and push it to Docker hub
|
||||
# TODO: this should probably also use versioned tag, not just :latest.
|
||||
# XXX: but should it? We build and use it only locally now.
|
||||
- run:
|
||||
name: Build and push compute-tools Docker image
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
docker build -t zenithdb/compute-tools:release -f Dockerfile.compute-tools .
|
||||
docker push zenithdb/compute-tools:release
|
||||
- run:
|
||||
name: Init postgres submodule
|
||||
command: git submodule update --init --depth 1
|
||||
- run:
|
||||
name: Build and push compute-node Docker image
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||
docker build -t zenithdb/compute-node:release vendor/postgres && docker push zenithdb/compute-node:release
|
||||
docker tag zenithdb/compute-node:release zenithdb/compute-node:${DOCKER_TAG} && docker push zenithdb/compute-node:${DOCKER_TAG}
|
||||
|
||||
deploy-staging:
|
||||
docker:
|
||||
- image: cimg/python:3.10
|
||||
steps:
|
||||
- checkout
|
||||
- setup_remote_docker
|
||||
- run:
|
||||
name: Get Zenith binaries
|
||||
command: |
|
||||
rm -rf zenith_install postgres_install.tar.gz zenith_install.tar.gz
|
||||
mkdir zenith_install
|
||||
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||
docker pull --quiet zenithdb/zenith:${DOCKER_TAG}
|
||||
ID=$(docker create zenithdb/zenith:${DOCKER_TAG})
|
||||
docker cp $ID:/data/postgres_install.tar.gz .
|
||||
tar -xzf postgres_install.tar.gz -C zenith_install && rm postgres_install.tar.gz
|
||||
docker cp $ID:/usr/local/bin/pageserver zenith_install/bin/
|
||||
docker cp $ID:/usr/local/bin/safekeeper zenith_install/bin/
|
||||
docker cp $ID:/usr/local/bin/proxy zenith_install/bin/
|
||||
docker cp $ID:/usr/local/bin/postgres zenith_install/bin/
|
||||
docker rm -v $ID
|
||||
echo ${DOCKER_TAG} | tee zenith_install/.zenith_current_version
|
||||
tar -czf zenith_install.tar.gz -C zenith_install .
|
||||
ls -la zenith_install.tar.gz
|
||||
- run:
|
||||
name: Setup ansible
|
||||
command: |
|
||||
pip install --progress-bar off --user ansible boto3
|
||||
ansible-galaxy collection install amazon.aws
|
||||
- run:
|
||||
name: Redeploy
|
||||
name: Apply re-deploy playbook
|
||||
environment:
|
||||
ANSIBLE_HOST_KEY_CHECKING: false
|
||||
command: |
|
||||
cd "$(pwd)/.circleci/ansible"
|
||||
|
||||
./get_binaries.sh
|
||||
|
||||
echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key
|
||||
echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
|
||||
chmod 0600 ssh-key
|
||||
ssh-add ssh-key
|
||||
rm -f ssh-key ssh-key-cert.pub
|
||||
|
||||
ansible-playbook deploy.yaml -i staging.hosts
|
||||
rm -f zenith_install.tar.gz .zenith_current_version
|
||||
echo "${STAGING_SSH_KEY}" | base64 --decode | ssh-add -
|
||||
export AWS_REGION=${STAGING_AWS_REGION}
|
||||
export AWS_ACCESS_KEY_ID=${STAGING_AWS_ACCESS_KEY_ID}
|
||||
export AWS_SECRET_ACCESS_KEY=${STAGING_AWS_SECRET_ACCESS_KEY}
|
||||
ansible-playbook .circleci/storage-redeploy.playbook.yml
|
||||
rm -f zenith_install.tar.gz
|
||||
|
||||
deploy-staging-proxy:
|
||||
docker:
|
||||
@@ -565,57 +526,7 @@ jobs:
|
||||
name: Re-deploy proxy
|
||||
command: |
|
||||
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||
helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
|
||||
|
||||
deploy-release:
|
||||
docker:
|
||||
- image: cimg/python:3.10
|
||||
steps:
|
||||
- checkout
|
||||
- setup_remote_docker
|
||||
- run:
|
||||
name: Setup ansible
|
||||
command: |
|
||||
pip install --progress-bar off --user ansible boto3
|
||||
- run:
|
||||
name: Redeploy
|
||||
command: |
|
||||
cd "$(pwd)/.circleci/ansible"
|
||||
|
||||
RELEASE=true ./get_binaries.sh
|
||||
|
||||
echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key
|
||||
echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
|
||||
chmod 0600 ssh-key
|
||||
ssh-add ssh-key
|
||||
rm -f ssh-key ssh-key-cert.pub
|
||||
|
||||
ansible-playbook deploy.yaml -i production.hosts -e console_mgmt_base_url=http://console-release.local
|
||||
rm -f zenith_install.tar.gz .zenith_current_version
|
||||
|
||||
deploy-release-proxy:
|
||||
docker:
|
||||
- image: cimg/base:2021.04
|
||||
environment:
|
||||
KUBECONFIG: .kubeconfig
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Store kubeconfig file
|
||||
command: |
|
||||
echo "${PRODUCTION_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
|
||||
chmod 0600 ${KUBECONFIG}
|
||||
- run:
|
||||
name: Setup helm v3
|
||||
command: |
|
||||
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
helm repo add zenithdb https://zenithdb.github.io/helm-charts
|
||||
- run:
|
||||
name: Re-deploy proxy
|
||||
command: |
|
||||
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||
helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/proxy.staging.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
|
||||
# Trigger a new remote CI job
|
||||
remote-ci-trigger:
|
||||
@@ -680,7 +591,6 @@ workflows:
|
||||
- build-postgres-<< matrix.build_type >>
|
||||
- run-pytest:
|
||||
name: pg_regress-tests-<< matrix.build_type >>
|
||||
context: PERF_TEST_RESULT_CONNSTR
|
||||
matrix:
|
||||
parameters:
|
||||
build_type: ["debug", "release"]
|
||||
@@ -698,7 +608,6 @@ workflows:
|
||||
- build-zenith-<< matrix.build_type >>
|
||||
- run-pytest:
|
||||
name: benchmarks
|
||||
context: PERF_TEST_RESULT_CONNSTR
|
||||
build_type: release
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
@@ -751,47 +660,6 @@ workflows:
|
||||
- main
|
||||
requires:
|
||||
- docker-image
|
||||
|
||||
- docker-image-release:
|
||||
# Context gives an ability to login
|
||||
context: Docker Hub
|
||||
# Build image only for commits to main
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- release
|
||||
requires:
|
||||
- pg_regress-tests-release
|
||||
- other-tests-release
|
||||
- docker-image-compute-release:
|
||||
# Context gives an ability to login
|
||||
context: Docker Hub
|
||||
# Build image only for commits to main
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- release
|
||||
requires:
|
||||
- pg_regress-tests-release
|
||||
- other-tests-release
|
||||
- deploy-release:
|
||||
# Context gives an ability to login
|
||||
context: Docker Hub
|
||||
# deploy only for commits to main
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- release
|
||||
requires:
|
||||
- docker-image-release
|
||||
- deploy-release-proxy:
|
||||
# deploy only for commits to main
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- release
|
||||
requires:
|
||||
- docker-image-release
|
||||
- remote-ci-trigger:
|
||||
# Context passes credentials for gh api
|
||||
context: CI_ACCESS_TOKEN
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
# Helm chart values for zenith-proxy.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
settings:
|
||||
authEndpoint: "https://console.zenith.tech/authenticate_proxy_request/"
|
||||
uri: "https://console.zenith.tech/psql_session/"
|
||||
|
||||
# -- Additional labels for zenith-proxy pods
|
||||
podLabels:
|
||||
zenith_service: proxy
|
||||
zenith_env: production
|
||||
zenith_region: us-west-2
|
||||
zenith_region_slug: oregon
|
||||
|
||||
service:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
|
||||
external-dns.alpha.kubernetes.io/hostname: proxy-release.local
|
||||
type: LoadBalancer
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: start.zenith.tech
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
@@ -5,13 +5,6 @@ settings:
|
||||
authEndpoint: "https://console.stage.zenith.tech/authenticate_proxy_request/"
|
||||
uri: "https://console.stage.zenith.tech/psql_session/"
|
||||
|
||||
# -- Additional labels for zenith-proxy pods
|
||||
podLabels:
|
||||
zenith_service: proxy
|
||||
zenith_env: staging
|
||||
zenith_region: us-east-1
|
||||
zenith_region_slug: virginia
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
@@ -24,4 +17,4 @@ metrics:
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
prometheus: zenith
|
||||
138
.circleci/storage-redeploy.playbook.yml
Normal file
138
.circleci/storage-redeploy.playbook.yml
Normal file
@@ -0,0 +1,138 @@
|
||||
- name: discover storage nodes
|
||||
hosts: localhost
|
||||
connection: local
|
||||
gather_facts: False
|
||||
|
||||
tasks:
|
||||
|
||||
- name: discover safekeepers
|
||||
no_log: true
|
||||
ec2_instance_info:
|
||||
filters:
|
||||
"tag:zenith_env": "staging"
|
||||
"tag:zenith_service": "safekeeper"
|
||||
register: ec2_safekeepers
|
||||
|
||||
- name: discover pageservers
|
||||
no_log: true
|
||||
ec2_instance_info:
|
||||
filters:
|
||||
"tag:zenith_env": "staging"
|
||||
"tag:zenith_service": "pageserver"
|
||||
register: ec2_pageservers
|
||||
|
||||
- name: add safekeepers to host group
|
||||
no_log: true
|
||||
add_host:
|
||||
name: safekeeper-{{ ansible_loop.index }}
|
||||
ansible_host: "{{ item.public_ip_address }}"
|
||||
groups:
|
||||
- storage
|
||||
- safekeepers
|
||||
with_items: "{{ ec2_safekeepers.instances }}"
|
||||
loop_control:
|
||||
extended: yes
|
||||
|
||||
- name: add pageservers to host group
|
||||
no_log: true
|
||||
add_host:
|
||||
name: pageserver-{{ ansible_loop.index }}
|
||||
ansible_host: "{{ item.public_ip_address }}"
|
||||
groups:
|
||||
- storage
|
||||
- pageservers
|
||||
with_items: "{{ ec2_pageservers.instances }}"
|
||||
loop_control:
|
||||
extended: yes
|
||||
|
||||
- name: Retrive versions
|
||||
hosts: storage
|
||||
gather_facts: False
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
- name: Get current version of binaries
|
||||
set_fact:
|
||||
current_version: "{{lookup('file', '../zenith_install/.zenith_current_version') }}"
|
||||
|
||||
- name: Check that file with version exists on host
|
||||
stat:
|
||||
path: /usr/local/.zenith_current_version
|
||||
register: version_file
|
||||
|
||||
- name: Try to get current version from the host
|
||||
when: version_file.stat.exists
|
||||
ansible.builtin.fetch:
|
||||
src: /usr/local/.zenith_current_version
|
||||
dest: .remote_version.{{ inventory_hostname }}
|
||||
fail_on_missing: no
|
||||
flat: yes
|
||||
|
||||
- name: Store remote version to variable
|
||||
when: version_file.stat.exists
|
||||
set_fact:
|
||||
remote_version: "{{ lookup('file', '.remote_version.{{ inventory_hostname }}') }}"
|
||||
|
||||
- name: Store default value of remote version to variable in case when remote version file not found
|
||||
when: not version_file.stat.exists
|
||||
set_fact:
|
||||
remote_version: "000"
|
||||
|
||||
- name: Extract Zenith binaries
|
||||
hosts: storage
|
||||
gather_facts: False
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
- name: Inform about version conflict
|
||||
when: current_version <= remote_version
|
||||
debug: msg="Current version {{ current_version }} LE than remote {{ remote_version }}"
|
||||
|
||||
- name: Extract Zenith binaries to /usr/local
|
||||
when: current_version > remote_version
|
||||
ansible.builtin.unarchive:
|
||||
src: ../zenith_install.tar.gz
|
||||
dest: /usr/local
|
||||
become: true
|
||||
|
||||
- name: Restart safekeepers
|
||||
hosts: safekeepers
|
||||
gather_facts: False
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
- name: Inform about version conflict
|
||||
when: current_version <= remote_version
|
||||
debug: msg="Current version {{ current_version }} LE than remote {{ remote_version }}"
|
||||
|
||||
- name: Restart systemd service
|
||||
when: current_version > remote_version
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: yes
|
||||
name: safekeeper
|
||||
enabled: yes
|
||||
state: restarted
|
||||
become: true
|
||||
|
||||
- name: Restart pageservers
|
||||
hosts: pageservers
|
||||
gather_facts: False
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
- name: Inform about version conflict
|
||||
when: current_version <= remote_version
|
||||
debug: msg="Current version {{ current_version }} LE than remote {{ remote_version }}"
|
||||
|
||||
- name: Restart systemd service
|
||||
when: current_version > remote_version
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: yes
|
||||
name: pageserver
|
||||
enabled: yes
|
||||
state: restarted
|
||||
become: true
|
||||
22
.github/workflows/benchmarking.yml
vendored
22
.github/workflows/benchmarking.yml
vendored
@@ -3,7 +3,7 @@ name: benchmarking
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
# push:
|
||||
# branches: [ your branch ]
|
||||
# branches: [ mybranch ]
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
@@ -36,20 +36,20 @@ jobs:
|
||||
# see https://github.com/actions/setup-python/issues/162
|
||||
# and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular
|
||||
# so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs.
|
||||
# there is Python 3.7.10 already installed on the machine so use it to install poetry and then use poetry's virtuealenvs
|
||||
- name: Install poetry & deps
|
||||
# there is Python 3.7.10 already installed on the machine so use it to install pipenv and then use pipenv's virtuealenvs
|
||||
- name: Install pipenv & deps
|
||||
run: |
|
||||
python3 -m pip install --upgrade poetry wheel
|
||||
# since pip/poetry caches are reused there shouldn't be any troubles with install every time
|
||||
./scripts/pysync
|
||||
python3 -m pip install --upgrade pipenv wheel
|
||||
# since pip/pipenv caches are reused there shouldn't be any troubles with install every time
|
||||
pipenv install
|
||||
|
||||
- name: Show versions
|
||||
run: |
|
||||
echo Python
|
||||
python3 --version
|
||||
poetry run python3 --version
|
||||
pipenv run python3 --version
|
||||
echo Pipenv
|
||||
poetry --version
|
||||
pipenv --version
|
||||
echo Pgbench
|
||||
$PG_BIN/pgbench --version
|
||||
|
||||
@@ -89,15 +89,11 @@ jobs:
|
||||
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
||||
REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
|
||||
run: |
|
||||
# just to be sure that no data was cached on self hosted runner
|
||||
# since it might generate duplicates when calling ingest_perf_test_result.py
|
||||
rm -rf perf-report-staging
|
||||
mkdir -p perf-report-staging
|
||||
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging
|
||||
pipenv run pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging
|
||||
|
||||
- name: Submit result
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
run: |
|
||||
REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
|
||||
|
||||
784
Cargo.lock
generated
784
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -16,8 +16,3 @@ members = [
|
||||
# This is useful for profiling and, to some extent, debug.
|
||||
# Besides, debug info should not affect the performance.
|
||||
debug = true
|
||||
|
||||
# This is only needed for proxy's tests
|
||||
# TODO: we should probably fork tokio-postgres-rustls instead
|
||||
[patch.crates-io]
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# Build Postgres separately --- this layer will be rebuilt only if one of
|
||||
# mentioned paths will get any changes.
|
||||
#
|
||||
FROM zimg/rust:1.56 AS pg-build
|
||||
FROM zenithdb/build:buster AS pg-build
|
||||
WORKDIR /zenith
|
||||
COPY ./vendor/postgres vendor/postgres
|
||||
COPY ./Makefile Makefile
|
||||
@@ -20,7 +20,7 @@ RUN rm -rf postgres_install/build
|
||||
# TODO: build cargo deps as separate layer. We used cargo-chef before but that was
|
||||
# net time waste in a lot of cases. Copying Cargo.lock with empty lib.rs should do the work.
|
||||
#
|
||||
FROM zimg/rust:1.56 AS build
|
||||
FROM zenithdb/build:buster AS build
|
||||
|
||||
ARG GIT_VERSION
|
||||
RUN if [ -z "$GIT_VERSION" ]; then echo "GIT_VERSION is reqired, use build_arg to pass it"; exit 1; fi
|
||||
@@ -34,7 +34,7 @@ RUN GIT_VERSION=$GIT_VERSION cargo build --release
|
||||
#
|
||||
# Copy binaries to resulting image.
|
||||
#
|
||||
FROM debian:bullseye-slim
|
||||
FROM debian:buster-slim
|
||||
WORKDIR /data
|
||||
|
||||
RUN apt-get update && apt-get -yq install libreadline-dev libseccomp-dev openssl ca-certificates && \
|
||||
|
||||
16
Dockerfile.build
Normal file
16
Dockerfile.build
Normal file
@@ -0,0 +1,16 @@
|
||||
#
|
||||
# Image with all the required dependencies to build https://github.com/zenithdb/zenith
|
||||
# and Postgres from https://github.com/zenithdb/postgres
|
||||
# Also includes some rust development and build tools.
|
||||
# NB: keep in sync with rust image version in .circle/config.yml
|
||||
#
|
||||
FROM rust:1.56.1-slim-buster
|
||||
WORKDIR /zenith
|
||||
|
||||
# Install postgres and zenith build dependencies
|
||||
# clang is for rocksdb
|
||||
RUN apt-get update && apt-get -yq install automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
|
||||
libseccomp-dev pkg-config libssl-dev clang
|
||||
|
||||
# Install rust tools
|
||||
RUN rustup component add clippy && cargo install cargo-audit
|
||||
30
Pipfile
Normal file
30
Pipfile
Normal file
@@ -0,0 +1,30 @@
|
||||
[[source]]
|
||||
url = "https://pypi.python.org/simple"
|
||||
verify_ssl = true
|
||||
name = "pypi"
|
||||
|
||||
[packages]
|
||||
pytest = ">=6.0.0"
|
||||
typing-extensions = "*"
|
||||
pyjwt = {extras = ["crypto"], version = "*"}
|
||||
requests = "*"
|
||||
pytest-xdist = "*"
|
||||
asyncpg = "*"
|
||||
cached-property = "*"
|
||||
psycopg2-binary = "*"
|
||||
jinja2 = "*"
|
||||
|
||||
[dev-packages]
|
||||
# Behavior may change slightly between versions. These are run continuously,
|
||||
# so we pin exact versions to avoid suprising breaks. Update if comfortable.
|
||||
yapf = "==0.31.0"
|
||||
mypy = "==0.910"
|
||||
# Non-pinned packages follow.
|
||||
pipenv = "*"
|
||||
flake8 = "*"
|
||||
types-requests = "*"
|
||||
types-psycopg2 = "*"
|
||||
|
||||
[requires]
|
||||
# we need at least 3.7, but pipenv doesn't allow to say this directly
|
||||
python_version = "3"
|
||||
652
Pipfile.lock
generated
Normal file
652
Pipfile.lock
generated
Normal file
@@ -0,0 +1,652 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "c309cb963a7b07ae3d30e9cbf08b495f77bdecc0e5356fc89d133c4fbcb65b2b"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
"python_version": "3"
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "pypi",
|
||||
"url": "https://pypi.python.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"default": {
|
||||
"asyncpg": {
|
||||
"hashes": [
|
||||
"sha256:129d501f3d30616afd51eb8d3142ef51ba05374256bd5834cec3ef4956a9b317",
|
||||
"sha256:29ef6ae0a617fc13cc2ac5dc8e9b367bb83cba220614b437af9b67766f4b6b20",
|
||||
"sha256:41704c561d354bef01353835a7846e5606faabbeb846214dfcf666cf53319f18",
|
||||
"sha256:556b0e92e2b75dc028b3c4bc9bd5162ddf0053b856437cf1f04c97f9c6837d03",
|
||||
"sha256:8ff5073d4b654e34bd5eaadc01dc4d68b8a9609084d835acd364cd934190a08d",
|
||||
"sha256:a458fc69051fbb67d995fdda46d75a012b5d6200f91e17d23d4751482640ed4c",
|
||||
"sha256:a7095890c96ba36f9f668eb552bb020dddb44f8e73e932f8573efc613ee83843",
|
||||
"sha256:a738f4807c853623d3f93f0fea11f61be6b0e5ca16ea8aeb42c2c7ee742aa853",
|
||||
"sha256:c4fc0205fe4ddd5aeb3dfdc0f7bafd43411181e1f5650189608e5971cceacff1",
|
||||
"sha256:dd2fa063c3344823487d9ddccb40802f02622ddf8bf8a6cc53885ee7a2c1c0c6",
|
||||
"sha256:ddffcb85227bf39cd1bedd4603e0082b243cf3b14ced64dce506a15b05232b83",
|
||||
"sha256:e36c6806883786b19551bb70a4882561f31135dc8105a59662e0376cf5b2cbc5",
|
||||
"sha256:eed43abc6ccf1dc02e0d0efc06ce46a411362f3358847c6b0ec9a43426f91ece"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.24.0"
|
||||
},
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1",
|
||||
"sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==21.2.0"
|
||||
},
|
||||
"cached-property": {
|
||||
"hashes": [
|
||||
"sha256:9fa5755838eecbb2d234c3aa390bd80fbd3ac6b6869109bfc1b499f7bd89a130",
|
||||
"sha256:df4f613cf7ad9a588cc381aaf4a512d26265ecebd5eb9e1ba12f1319eb85a6a0"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.5.2"
|
||||
},
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
|
||||
"sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
|
||||
],
|
||||
"version": "==2021.10.8"
|
||||
},
|
||||
"cffi": {
|
||||
"hashes": [
|
||||
"sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3",
|
||||
"sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2",
|
||||
"sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636",
|
||||
"sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20",
|
||||
"sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728",
|
||||
"sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27",
|
||||
"sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66",
|
||||
"sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443",
|
||||
"sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0",
|
||||
"sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7",
|
||||
"sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39",
|
||||
"sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605",
|
||||
"sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a",
|
||||
"sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37",
|
||||
"sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029",
|
||||
"sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139",
|
||||
"sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc",
|
||||
"sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df",
|
||||
"sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14",
|
||||
"sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880",
|
||||
"sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2",
|
||||
"sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a",
|
||||
"sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e",
|
||||
"sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474",
|
||||
"sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024",
|
||||
"sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8",
|
||||
"sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0",
|
||||
"sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e",
|
||||
"sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a",
|
||||
"sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e",
|
||||
"sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032",
|
||||
"sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6",
|
||||
"sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e",
|
||||
"sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b",
|
||||
"sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e",
|
||||
"sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954",
|
||||
"sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962",
|
||||
"sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c",
|
||||
"sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4",
|
||||
"sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55",
|
||||
"sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962",
|
||||
"sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023",
|
||||
"sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c",
|
||||
"sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6",
|
||||
"sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8",
|
||||
"sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382",
|
||||
"sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7",
|
||||
"sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc",
|
||||
"sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997",
|
||||
"sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796"
|
||||
],
|
||||
"version": "==1.15.0"
|
||||
},
|
||||
"charset-normalizer": {
|
||||
"hashes": [
|
||||
"sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0",
|
||||
"sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b"
|
||||
],
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==2.0.7"
|
||||
},
|
||||
"cryptography": {
|
||||
"hashes": [
|
||||
"sha256:07bb7fbfb5de0980590ddfc7f13081520def06dc9ed214000ad4372fb4e3c7f6",
|
||||
"sha256:18d90f4711bf63e2fb21e8c8e51ed8189438e6b35a6d996201ebd98a26abbbe6",
|
||||
"sha256:1ed82abf16df40a60942a8c211251ae72858b25b7421ce2497c2eb7a1cee817c",
|
||||
"sha256:22a38e96118a4ce3b97509443feace1d1011d0571fae81fc3ad35f25ba3ea999",
|
||||
"sha256:2d69645f535f4b2c722cfb07a8eab916265545b3475fdb34e0be2f4ee8b0b15e",
|
||||
"sha256:4a2d0e0acc20ede0f06ef7aa58546eee96d2592c00f450c9acb89c5879b61992",
|
||||
"sha256:54b2605e5475944e2213258e0ab8696f4f357a31371e538ef21e8d61c843c28d",
|
||||
"sha256:7075b304cd567694dc692ffc9747f3e9cb393cc4aa4fb7b9f3abd6f5c4e43588",
|
||||
"sha256:7b7ceeff114c31f285528ba8b390d3e9cfa2da17b56f11d366769a807f17cbaa",
|
||||
"sha256:7eba2cebca600a7806b893cb1d541a6e910afa87e97acf2021a22b32da1df52d",
|
||||
"sha256:928185a6d1ccdb816e883f56ebe92e975a262d31cc536429041921f8cb5a62fd",
|
||||
"sha256:9933f28f70d0517686bd7de36166dda42094eac49415459d9bdf5e7df3e0086d",
|
||||
"sha256:a688ebcd08250eab5bb5bca318cc05a8c66de5e4171a65ca51db6bd753ff8953",
|
||||
"sha256:abb5a361d2585bb95012a19ed9b2c8f412c5d723a9836418fab7aaa0243e67d2",
|
||||
"sha256:c10c797ac89c746e488d2ee92bd4abd593615694ee17b2500578b63cad6b93a8",
|
||||
"sha256:ced40344e811d6abba00295ced98c01aecf0c2de39481792d87af4fa58b7b4d6",
|
||||
"sha256:d57e0cdc1b44b6cdf8af1d01807db06886f10177469312fbde8f44ccbb284bc9",
|
||||
"sha256:d99915d6ab265c22873f1b4d6ea5ef462ef797b4140be4c9d8b179915e0985c6",
|
||||
"sha256:eb80e8a1f91e4b7ef8b33041591e6d89b2b8e122d787e87eeb2b08da71bb16ad",
|
||||
"sha256:ebeddd119f526bcf323a89f853afb12e225902a24d29b55fe18dd6fcb2838a76"
|
||||
],
|
||||
"version": "==35.0.0"
|
||||
},
|
||||
"execnet": {
|
||||
"hashes": [
|
||||
"sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5",
|
||||
"sha256:a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==1.9.0"
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
|
||||
"sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
|
||||
],
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==3.3"
|
||||
},
|
||||
"importlib-metadata": {
|
||||
"hashes": [
|
||||
"sha256:b618b6d2d5ffa2f16add5697cf57a46c76a56229b0ed1c438322e4e95645bd15",
|
||||
"sha256:f284b3e11256ad1e5d03ab86bb2ccd6f5339688ff17a4d797a0fe7df326f23b1"
|
||||
],
|
||||
"markers": "python_version < '3.8'",
|
||||
"version": "==4.8.1"
|
||||
},
|
||||
"iniconfig": {
|
||||
"hashes": [
|
||||
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
|
||||
"sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
|
||||
],
|
||||
"version": "==1.1.1"
|
||||
},
|
||||
"jinja2": {
|
||||
"hashes": [
|
||||
"sha256:827a0e32839ab1600d4eb1c4c33ec5a8edfbc5cb42dafa13b81f182f97784b45",
|
||||
"sha256:8569982d3f0889eed11dd620c706d39b60c36d6d25843961f33f77fb6bc6b20c"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.0.2"
|
||||
},
|
||||
"markupsafe": {
|
||||
"hashes": [
|
||||
"sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298",
|
||||
"sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64",
|
||||
"sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b",
|
||||
"sha256:04635854b943835a6ea959e948d19dcd311762c5c0c6e1f0e16ee57022669194",
|
||||
"sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567",
|
||||
"sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff",
|
||||
"sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724",
|
||||
"sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74",
|
||||
"sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646",
|
||||
"sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35",
|
||||
"sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6",
|
||||
"sha256:20dca64a3ef2d6e4d5d615a3fd418ad3bde77a47ec8a23d984a12b5b4c74491a",
|
||||
"sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6",
|
||||
"sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad",
|
||||
"sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26",
|
||||
"sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38",
|
||||
"sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac",
|
||||
"sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7",
|
||||
"sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6",
|
||||
"sha256:4296f2b1ce8c86a6aea78613c34bb1a672ea0e3de9c6ba08a960efe0b0a09047",
|
||||
"sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75",
|
||||
"sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f",
|
||||
"sha256:4dc8f9fb58f7364b63fd9f85013b780ef83c11857ae79f2feda41e270468dd9b",
|
||||
"sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135",
|
||||
"sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8",
|
||||
"sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a",
|
||||
"sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a",
|
||||
"sha256:5b6d930f030f8ed98e3e6c98ffa0652bdb82601e7a016ec2ab5d7ff23baa78d1",
|
||||
"sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9",
|
||||
"sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864",
|
||||
"sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914",
|
||||
"sha256:6300b8454aa6930a24b9618fbb54b5a68135092bc666f7b06901f897fa5c2fee",
|
||||
"sha256:63f3268ba69ace99cab4e3e3b5840b03340efed0948ab8f78d2fd87ee5442a4f",
|
||||
"sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18",
|
||||
"sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8",
|
||||
"sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2",
|
||||
"sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d",
|
||||
"sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b",
|
||||
"sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b",
|
||||
"sha256:89c687013cb1cd489a0f0ac24febe8c7a666e6e221b783e53ac50ebf68e45d86",
|
||||
"sha256:8d206346619592c6200148b01a2142798c989edcb9c896f9ac9722a99d4e77e6",
|
||||
"sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f",
|
||||
"sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb",
|
||||
"sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833",
|
||||
"sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28",
|
||||
"sha256:9f02365d4e99430a12647f09b6cc8bab61a6564363f313126f775eb4f6ef798e",
|
||||
"sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415",
|
||||
"sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902",
|
||||
"sha256:aca6377c0cb8a8253e493c6b451565ac77e98c2951c45f913e0b52facdcff83f",
|
||||
"sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d",
|
||||
"sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9",
|
||||
"sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d",
|
||||
"sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145",
|
||||
"sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066",
|
||||
"sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c",
|
||||
"sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1",
|
||||
"sha256:cdfba22ea2f0029c9261a4bd07e830a8da012291fbe44dc794e488b6c9bb353a",
|
||||
"sha256:d6c7ebd4e944c85e2c3421e612a7057a2f48d478d79e61800d81468a8d842207",
|
||||
"sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f",
|
||||
"sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53",
|
||||
"sha256:deb993cacb280823246a026e3b2d81c493c53de6acfd5e6bfe31ab3402bb37dd",
|
||||
"sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134",
|
||||
"sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85",
|
||||
"sha256:f0567c4dc99f264f49fe27da5f735f414c4e7e7dd850cfd8e69f0862d7c74ea9",
|
||||
"sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5",
|
||||
"sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94",
|
||||
"sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509",
|
||||
"sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51",
|
||||
"sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2.0.1"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:096d689d78ca690e4cd8a89568ba06d07ca097e3306a4381635073ca91479966",
|
||||
"sha256:14317396d1e8cdb122989b916fa2c7e9ca8e2be9e8060a6eff75b6b7b4d8a7e0"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==21.2"
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
|
||||
"sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.0.0"
|
||||
},
|
||||
"psycopg2-binary": {
|
||||
"hashes": [
|
||||
"sha256:0b7dae87f0b729922e06f85f667de7bf16455d411971b2043bbd9577af9d1975",
|
||||
"sha256:0f2e04bd2a2ab54fa44ee67fe2d002bb90cee1c0f1cc0ebc3148af7b02034cbd",
|
||||
"sha256:123c3fb684e9abfc47218d3784c7b4c47c8587951ea4dd5bc38b6636ac57f616",
|
||||
"sha256:1473c0215b0613dd938db54a653f68251a45a78b05f6fc21af4326f40e8360a2",
|
||||
"sha256:14db1752acdd2187d99cb2ca0a1a6dfe57fc65c3281e0f20e597aac8d2a5bd90",
|
||||
"sha256:1e3a362790edc0a365385b1ac4cc0acc429a0c0d662d829a50b6ce743ae61b5a",
|
||||
"sha256:1e85b74cbbb3056e3656f1cc4781294df03383127a8114cbc6531e8b8367bf1e",
|
||||
"sha256:20f1ab44d8c352074e2d7ca67dc00843067788791be373e67a0911998787ce7d",
|
||||
"sha256:24b0b6688b9f31a911f2361fe818492650795c9e5d3a1bc647acbd7440142a4f",
|
||||
"sha256:2f62c207d1740b0bde5c4e949f857b044818f734a3d57f1d0d0edc65050532ed",
|
||||
"sha256:3242b9619de955ab44581a03a64bdd7d5e470cc4183e8fcadd85ab9d3756ce7a",
|
||||
"sha256:35c4310f8febe41f442d3c65066ca93cccefd75013df3d8c736c5b93ec288140",
|
||||
"sha256:4235f9d5ddcab0b8dbd723dca56ea2922b485ea00e1dafacf33b0c7e840b3d32",
|
||||
"sha256:542875f62bc56e91c6eac05a0deadeae20e1730be4c6334d8f04c944fcd99759",
|
||||
"sha256:5ced67f1e34e1a450cdb48eb53ca73b60aa0af21c46b9b35ac3e581cf9f00e31",
|
||||
"sha256:661509f51531ec125e52357a489ea3806640d0ca37d9dada461ffc69ee1e7b6e",
|
||||
"sha256:7360647ea04db2e7dff1648d1da825c8cf68dc5fbd80b8fb5b3ee9f068dcd21a",
|
||||
"sha256:736b8797b58febabb85494142c627bd182b50d2a7ec65322983e71065ad3034c",
|
||||
"sha256:8c13d72ed6af7fd2c8acbd95661cf9477f94e381fce0792c04981a8283b52917",
|
||||
"sha256:988b47ac70d204aed01589ed342303da7c4d84b56c2f4c4b8b00deda123372bf",
|
||||
"sha256:995fc41ebda5a7a663a254a1dcac52638c3e847f48307b5416ee373da15075d7",
|
||||
"sha256:a36c7eb6152ba5467fb264d73844877be8b0847874d4822b7cf2d3c0cb8cdcb0",
|
||||
"sha256:aed4a9a7e3221b3e252c39d0bf794c438dc5453bc2963e8befe9d4cd324dff72",
|
||||
"sha256:aef9aee84ec78af51107181d02fe8773b100b01c5dfde351184ad9223eab3698",
|
||||
"sha256:b0221ca5a9837e040ebf61f48899926b5783668b7807419e4adae8175a31f773",
|
||||
"sha256:b4d7679a08fea64573c969f6994a2631908bb2c0e69a7235648642f3d2e39a68",
|
||||
"sha256:c250a7ec489b652c892e4f0a5d122cc14c3780f9f643e1a326754aedf82d9a76",
|
||||
"sha256:ca86db5b561b894f9e5f115d6a159fff2a2570a652e07889d8a383b5fae66eb4",
|
||||
"sha256:cfc523edecddaef56f6740d7de1ce24a2fdf94fd5e704091856a201872e37f9f",
|
||||
"sha256:d92272c7c16e105788efe2cfa5d680f07e34e0c29b03c1908f8636f55d5f915a",
|
||||
"sha256:da113b70f6ec40e7d81b43d1b139b9db6a05727ab8be1ee559f3a69854a69d34",
|
||||
"sha256:f6fac64a38f6768e7bc7b035b9e10d8a538a9fadce06b983fb3e6fa55ac5f5ce",
|
||||
"sha256:f8559617b1fcf59a9aedba2c9838b5b6aa211ffedecabca412b92a1ff75aac1a",
|
||||
"sha256:fbb42a541b1093385a2d8c7eec94d26d30437d0e77c1d25dae1dcc46741a385e"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.9.1"
|
||||
},
|
||||
"py": {
|
||||
"hashes": [
|
||||
"sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3",
|
||||
"sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==1.10.0"
|
||||
},
|
||||
"pycparser": {
|
||||
"hashes": [
|
||||
"sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
|
||||
"sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==2.20"
|
||||
},
|
||||
"pyjwt": {
|
||||
"extras": [
|
||||
"crypto"
|
||||
],
|
||||
"hashes": [
|
||||
"sha256:b888b4d56f06f6dcd777210c334e69c737be74755d3e5e9ee3fe67dc18a0ee41",
|
||||
"sha256:e0c4bb8d9f0af0c7f5b1ec4c5036309617d03d56932877f2f7a0beeb5318322f"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.3.0"
|
||||
},
|
||||
"pyparsing": {
|
||||
"hashes": [
|
||||
"sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
|
||||
"sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
|
||||
],
|
||||
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==2.4.7"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89",
|
||||
"sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==6.2.5"
|
||||
},
|
||||
"pytest-forked": {
|
||||
"hashes": [
|
||||
"sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca",
|
||||
"sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==1.3.0"
|
||||
},
|
||||
"pytest-xdist": {
|
||||
"hashes": [
|
||||
"sha256:7b61ebb46997a0820a263553179d6d1e25a8c50d8a8620cd1aa1e20e3be99168",
|
||||
"sha256:89b330316f7fc475f999c81b577c2b926c9569f3d397ae432c0c2e2496d61ff9"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.4.0"
|
||||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
"sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
|
||||
"sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.26.0"
|
||||
},
|
||||
"toml": {
|
||||
"hashes": [
|
||||
"sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
|
||||
"sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
|
||||
],
|
||||
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==0.10.2"
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e",
|
||||
"sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7",
|
||||
"sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.10.0.2"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece",
|
||||
"sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
|
||||
"version": "==1.26.7"
|
||||
},
|
||||
"zipp": {
|
||||
"hashes": [
|
||||
"sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832",
|
||||
"sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.6.0"
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
"backports.entry-points-selectable": {
|
||||
"hashes": [
|
||||
"sha256:988468260ec1c196dab6ae1149260e2f5472c9110334e5d51adcb77867361f6a",
|
||||
"sha256:a6d9a871cde5e15b4c4a53e3d43ba890cc6861ec1332c9c2428c92f977192acc"
|
||||
],
|
||||
"markers": "python_version >= '2.7'",
|
||||
"version": "==1.1.0"
|
||||
},
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
|
||||
"sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
|
||||
],
|
||||
"version": "==2021.10.8"
|
||||
},
|
||||
"distlib": {
|
||||
"hashes": [
|
||||
"sha256:c8b54e8454e5bf6237cc84c20e8264c3e991e824ef27e8f1e81049867d861e31",
|
||||
"sha256:d982d0751ff6eaaab5e2ec8e691d949ee80eddf01a62eaa96ddb11531fe16b05"
|
||||
],
|
||||
"version": "==0.3.3"
|
||||
},
|
||||
"filelock": {
|
||||
"hashes": [
|
||||
"sha256:7afc856f74fa7006a289fd10fa840e1eebd8bbff6bffb69c26c54a0512ea8cf8",
|
||||
"sha256:bb2a1c717df74c48a2d00ed625e5a66f8572a3a30baacb7657add1d7bac4097b"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.3.2"
|
||||
},
|
||||
"flake8": {
|
||||
"hashes": [
|
||||
"sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d",
|
||||
"sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.0.1"
|
||||
},
|
||||
"importlib-metadata": {
|
||||
"hashes": [
|
||||
"sha256:b618b6d2d5ffa2f16add5697cf57a46c76a56229b0ed1c438322e4e95645bd15",
|
||||
"sha256:f284b3e11256ad1e5d03ab86bb2ccd6f5339688ff17a4d797a0fe7df326f23b1"
|
||||
],
|
||||
"markers": "python_version < '3.8'",
|
||||
"version": "==4.8.1"
|
||||
},
|
||||
"mccabe": {
|
||||
"hashes": [
|
||||
"sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
|
||||
"sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
|
||||
],
|
||||
"version": "==0.6.1"
|
||||
},
|
||||
"mypy": {
|
||||
"hashes": [
|
||||
"sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9",
|
||||
"sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a",
|
||||
"sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9",
|
||||
"sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e",
|
||||
"sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2",
|
||||
"sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212",
|
||||
"sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b",
|
||||
"sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885",
|
||||
"sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150",
|
||||
"sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703",
|
||||
"sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072",
|
||||
"sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457",
|
||||
"sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e",
|
||||
"sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0",
|
||||
"sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb",
|
||||
"sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97",
|
||||
"sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8",
|
||||
"sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811",
|
||||
"sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6",
|
||||
"sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de",
|
||||
"sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504",
|
||||
"sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921",
|
||||
"sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.910"
|
||||
},
|
||||
"mypy-extensions": {
|
||||
"hashes": [
|
||||
"sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
|
||||
"sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
|
||||
],
|
||||
"version": "==0.4.3"
|
||||
},
|
||||
"pipenv": {
|
||||
"hashes": [
|
||||
"sha256:05958fadcd70b2de6a27542fcd2bd72dd5c59c6d35307fdac3e06361fb06e30e",
|
||||
"sha256:d180f5be4775c552fd5e69ae18a9d6099d9dafb462efe54f11c72cb5f4d5e977"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2021.5.29"
|
||||
},
|
||||
"platformdirs": {
|
||||
"hashes": [
|
||||
"sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2",
|
||||
"sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2.4.0"
|
||||
},
|
||||
"pycodestyle": {
|
||||
"hashes": [
|
||||
"sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20",
|
||||
"sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==2.8.0"
|
||||
},
|
||||
"pyflakes": {
|
||||
"hashes": [
|
||||
"sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c",
|
||||
"sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==2.4.0"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
|
||||
"sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==1.16.0"
|
||||
},
|
||||
"toml": {
|
||||
"hashes": [
|
||||
"sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
|
||||
"sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
|
||||
],
|
||||
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==0.10.2"
|
||||
},
|
||||
"typed-ast": {
|
||||
"hashes": [
|
||||
"sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace",
|
||||
"sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff",
|
||||
"sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266",
|
||||
"sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528",
|
||||
"sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6",
|
||||
"sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808",
|
||||
"sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4",
|
||||
"sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363",
|
||||
"sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341",
|
||||
"sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04",
|
||||
"sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41",
|
||||
"sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e",
|
||||
"sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3",
|
||||
"sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899",
|
||||
"sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805",
|
||||
"sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c",
|
||||
"sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c",
|
||||
"sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39",
|
||||
"sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a",
|
||||
"sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3",
|
||||
"sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7",
|
||||
"sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f",
|
||||
"sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075",
|
||||
"sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0",
|
||||
"sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40",
|
||||
"sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428",
|
||||
"sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927",
|
||||
"sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3",
|
||||
"sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f",
|
||||
"sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"
|
||||
],
|
||||
"markers": "python_version < '3.8'",
|
||||
"version": "==1.4.3"
|
||||
},
|
||||
"types-psycopg2": {
|
||||
"hashes": [
|
||||
"sha256:77ed80f2668582654623e04fb3d741ecce93effcc39c929d7e02f4a917a538ce",
|
||||
"sha256:98a6e0e9580cd7eb4bd4d20f7c7063d154b2589a2b90c0ce4e3ca6085cde77c6"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.9.1"
|
||||
},
|
||||
"types-requests": {
|
||||
"hashes": [
|
||||
"sha256:b279284e51f668e38ee12d9665e4d789089f532dc2a0be4a1508ca0efd98ba9e",
|
||||
"sha256:ba1d108d512e294b6080c37f6ae7cb2a2abf527560e2b671d1786c1fc46b541a"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.25.11"
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e",
|
||||
"sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7",
|
||||
"sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.10.0.2"
|
||||
},
|
||||
"virtualenv": {
|
||||
"hashes": [
|
||||
"sha256:4b02e52a624336eece99c96e3ab7111f469c24ba226a53ec474e8e787b365814",
|
||||
"sha256:576d05b46eace16a9c348085f7d0dc8ef28713a2cabaa1cf0aea41e8f12c9218"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==20.10.0"
|
||||
},
|
||||
"virtualenv-clone": {
|
||||
"hashes": [
|
||||
"sha256:418ee935c36152f8f153c79824bb93eaf6f0f7984bae31d3f48f350b9183501a",
|
||||
"sha256:44d5263bceed0bac3e1424d64f798095233b64def1c5689afa43dc3223caf5b0"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==0.5.7"
|
||||
},
|
||||
"yapf": {
|
||||
"hashes": [
|
||||
"sha256:408fb9a2b254c302f49db83c59f9aa0b4b0fd0ec25be3a5c51181327922ff63d",
|
||||
"sha256:e3a234ba8455fe201eaa649cdac872d590089a18b661e39bbac7020978dd9c2e"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.31.0"
|
||||
},
|
||||
"zipp": {
|
||||
"hashes": [
|
||||
"sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832",
|
||||
"sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.6.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -28,12 +28,12 @@ apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libsec
|
||||
libssl-dev clang pkg-config libpq-dev
|
||||
```
|
||||
|
||||
[Rust] 1.56.1 or later is also required.
|
||||
[Rust] 1.55 or later is also required.
|
||||
|
||||
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
|
||||
|
||||
To run the integration tests or Python scripts (not required to use the code), install
|
||||
Python (3.7 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.
|
||||
Python (3.7 or higher), and install python3 packages using `pipenv install` in the project directory.
|
||||
|
||||
2. Build zenith and patched postgres
|
||||
```sh
|
||||
@@ -128,7 +128,8 @@ INSERT 0 1
|
||||
```sh
|
||||
git clone --recursive https://github.com/zenithdb/zenith.git
|
||||
make # builds also postgres and installs it to ./tmp_install
|
||||
./scripts/pytest
|
||||
cd test_runner
|
||||
pipenv run pytest
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
[package]
|
||||
name = "compute_tools"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Alexey Kondratov <kondratov.aleksey@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
libc = "0.2"
|
||||
anyhow = "1.0"
|
||||
chrono = "0.4"
|
||||
clap = "3.0"
|
||||
env_logger = "0.9"
|
||||
clap = "2.33"
|
||||
env_logger = "0.8"
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
log = { version = "0.4", features = ["std", "serde"] }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
|
||||
@@ -34,7 +34,7 @@ use std::sync::{Arc, RwLock};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Arg;
|
||||
use libc::{prctl, PR_SET_PDEATHSIG, SIGINT};
|
||||
use log::info;
|
||||
use postgres::{Client, NoTls};
|
||||
|
||||
@@ -155,6 +155,20 @@ fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
// During configuration we are starting Postgres as a child process. If we
|
||||
// fail we do not want to leave it running. PR_SET_PDEATHSIG sets the signal
|
||||
// that will be sent to the child process when the parent dies. NB: this is
|
||||
// cleared for the child of a fork(). SIGINT means fast shutdown for Postgres.
|
||||
// This does not matter much for Docker, where `zenith_ctl` is an entrypoint,
|
||||
// so the whole container will exit if it exits. But could be useful when
|
||||
// `zenith_ctl` is used in e.g. systemd.
|
||||
// XXX: this appears to just don't work. When `main` exits, the child process
|
||||
// `postgres` is re-assigned to a new parent (`/lib/systemd/systemd --user`
|
||||
// in my case).
|
||||
unsafe {
|
||||
prctl(PR_SET_PDEATHSIG, SIGINT);
|
||||
}
|
||||
|
||||
// TODO: re-use `zenith_utils::logging` later
|
||||
init_logger(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
@@ -163,34 +177,34 @@ fn main() -> Result<()> {
|
||||
let matches = clap::App::new("zenith_ctl")
|
||||
.version(version.unwrap_or("unknown"))
|
||||
.arg(
|
||||
Arg::new("connstr")
|
||||
.short('C')
|
||||
clap::Arg::with_name("connstr")
|
||||
.short("C")
|
||||
.long("connstr")
|
||||
.value_name("DATABASE_URL")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgdata")
|
||||
.short('D')
|
||||
clap::Arg::with_name("pgdata")
|
||||
.short("D")
|
||||
.long("pgdata")
|
||||
.value_name("DATADIR")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgbin")
|
||||
.short('b')
|
||||
clap::Arg::with_name("pgbin")
|
||||
.short("b")
|
||||
.long("pgbin")
|
||||
.value_name("POSTGRES_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec")
|
||||
.short('s')
|
||||
clap::Arg::with_name("spec")
|
||||
.short("s")
|
||||
.long("spec")
|
||||
.value_name("SPEC_JSON"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec-path")
|
||||
.short('S')
|
||||
clap::Arg::with_name("spec-path")
|
||||
.short("S")
|
||||
.long("spec-path")
|
||||
.value_name("SPEC_PATH"),
|
||||
)
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::process::Command;
|
||||
use std::str::FromStr;
|
||||
use std::{fs, thread, time};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use anyhow::{anyhow, Result};
|
||||
use postgres::{Client, Transaction};
|
||||
use serde::Deserialize;
|
||||
|
||||
@@ -171,7 +171,7 @@ impl PgQuote for PgIdent {
|
||||
/// always quotes provided string with `""` and escapes every `"`. Not idempotent,
|
||||
/// i.e. if string is already escaped it will be escaped again.
|
||||
fn quote(&self) -> String {
|
||||
let result = format!("\"{}\"", self.replace('"', "\"\""));
|
||||
let result = format!("\"{}\"", self.replace("\"", "\"\""));
|
||||
result
|
||||
}
|
||||
}
|
||||
@@ -226,7 +226,7 @@ pub fn wait_for_postgres(port: &str, pgdata: &Path) -> Result<()> {
|
||||
// but postgres starts listening almost immediately, even if it is not really
|
||||
// ready to accept connections).
|
||||
if slept >= POSTGRES_WAIT_TIMEOUT {
|
||||
bail!("timed out while waiting for Postgres to start");
|
||||
return Err(anyhow!("timed out while waiting for Postgres to start"));
|
||||
}
|
||||
|
||||
if pid_path.exists() {
|
||||
|
||||
@@ -215,7 +215,7 @@ pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
|
||||
if let Some(r) = pg_db {
|
||||
// XXX: db owner name is returned as quoted string from Postgres,
|
||||
// when quoting is needed.
|
||||
let new_owner = if r.owner.starts_with('"') {
|
||||
let new_owner = if r.owner.starts_with('\"') {
|
||||
db.owner.quote()
|
||||
} else {
|
||||
db.owner.clone()
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
[package]
|
||||
name = "control_plane"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Stas Kelvich <stas@zenith.tech>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
tar = "0.4.33"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
toml = "0.5"
|
||||
lazy_static = "1.4"
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
# Page server and three safekeepers.
|
||||
[pageserver]
|
||||
listen_pg_addr = '127.0.0.1:64000'
|
||||
listen_http_addr = '127.0.0.1:9898'
|
||||
listen_pg_addr = 'localhost:64000'
|
||||
listen_http_addr = 'localhost:9898'
|
||||
auth_type = 'Trust'
|
||||
|
||||
[[safekeepers]]
|
||||
id = 1
|
||||
name = 'sk1'
|
||||
pg_port = 5454
|
||||
http_port = 7676
|
||||
|
||||
[[safekeepers]]
|
||||
id = 2
|
||||
name = 'sk2'
|
||||
pg_port = 5455
|
||||
http_port = 7677
|
||||
|
||||
[[safekeepers]]
|
||||
id = 3
|
||||
name = 'sk3'
|
||||
pg_port = 5456
|
||||
http_port = 7678
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# Minimal zenith environment with one safekeeper. This is equivalent to the built-in
|
||||
# defaults that you get with no --config
|
||||
[pageserver]
|
||||
listen_pg_addr = '127.0.0.1:64000'
|
||||
listen_http_addr = '127.0.0.1:9898'
|
||||
listen_pg_addr = 'localhost:64000'
|
||||
listen_http_addr = 'localhost:9898'
|
||||
auth_type = 'Trust'
|
||||
|
||||
[[safekeepers]]
|
||||
id = 1
|
||||
name = 'single'
|
||||
pg_port = 5454
|
||||
http_port = 7676
|
||||
|
||||
@@ -82,11 +82,15 @@ impl ComputeControlPlane {
|
||||
let mut strings = s.split('@');
|
||||
let name = strings.next().unwrap();
|
||||
|
||||
let lsn = strings
|
||||
.next()
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("invalid LSN in point-in-time specification")?;
|
||||
let lsn: Option<Lsn>;
|
||||
if let Some(lsnstr) = strings.next() {
|
||||
lsn = Some(
|
||||
Lsn::from_str(lsnstr)
|
||||
.with_context(|| "invalid LSN in point-in-time specification")?,
|
||||
);
|
||||
} else {
|
||||
lsn = None
|
||||
}
|
||||
|
||||
// Resolve the timeline ID, given the human-readable branch name
|
||||
let timeline_id = self
|
||||
@@ -249,16 +253,16 @@ impl PostgresNode {
|
||||
let mut client = self
|
||||
.pageserver
|
||||
.page_server_psql_client()
|
||||
.context("connecting to page server failed")?;
|
||||
.with_context(|| "connecting to page server failed")?;
|
||||
|
||||
let copyreader = client
|
||||
.copy_out(sql.as_str())
|
||||
.context("page server 'basebackup' command failed")?;
|
||||
.with_context(|| "page server 'basebackup' command failed")?;
|
||||
|
||||
// Read the archive directly from the `CopyOutReader`
|
||||
tar::Archive::new(copyreader)
|
||||
.unpack(&self.pgdata())
|
||||
.context("extracting base backup failed")?;
|
||||
.with_context(|| "extracting base backup failed")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -334,26 +338,14 @@ impl PostgresNode {
|
||||
if let Some(lsn) = self.lsn {
|
||||
conf.append("recovery_target_lsn", &lsn.to_string());
|
||||
}
|
||||
|
||||
conf.append_line("");
|
||||
// Configure backpressure
|
||||
// - Replication write lag depends on how fast the walreceiver can process incoming WAL.
|
||||
// This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
|
||||
// so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
|
||||
// Actually latency should be much smaller (better if < 1sec). But we assume that recently
|
||||
// updates pages are not requested from pageserver.
|
||||
// - Replication flush lag depends on speed of persisting data by checkpointer (creation of
|
||||
// delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
|
||||
// remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
|
||||
// recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
|
||||
// - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
|
||||
// To be able to restore database in case of pageserver node crash, safekeeper should not
|
||||
// remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
|
||||
// (if they are not able to upload WAL to S3).
|
||||
conf.append("max_replication_write_lag", "500MB");
|
||||
conf.append("max_replication_flush_lag", "10GB");
|
||||
|
||||
if !self.env.safekeepers.is_empty() {
|
||||
// Configure backpressure
|
||||
// In setup with safekeepers apply_lag depends on
|
||||
// speed of data checkpointing on pageserver (see disk_consistent_lsn).
|
||||
conf.append("max_replication_apply_lag", "1500MB");
|
||||
|
||||
// Configure the node to connect to the safekeepers
|
||||
conf.append("synchronous_standby_names", "walproposer");
|
||||
|
||||
@@ -366,6 +358,11 @@ impl PostgresNode {
|
||||
.join(",");
|
||||
conf.append("wal_acceptors", &wal_acceptors);
|
||||
} else {
|
||||
// Configure backpressure
|
||||
// In setup without safekeepers, flush_lag depends on
|
||||
// speed of of data checkpointing on pageserver (see disk_consistent_lsn)
|
||||
conf.append("max_replication_flush_lag", "1500MB");
|
||||
|
||||
// We only use setup without safekeepers for tests,
|
||||
// and don't care about data durability on pageserver,
|
||||
// so set more relaxed synchronous_commit.
|
||||
@@ -446,7 +443,7 @@ impl PostgresNode {
|
||||
if let Some(token) = auth_token {
|
||||
cmd.env("ZENITH_AUTH_TOKEN", token);
|
||||
}
|
||||
let pg_ctl = cmd.status().context("pg_ctl failed")?;
|
||||
let pg_ctl = cmd.status().with_context(|| "pg_ctl failed")?;
|
||||
|
||||
if !pg_ctl.success() {
|
||||
anyhow::bail!("pg_ctl failed");
|
||||
|
||||
@@ -12,9 +12,7 @@ use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use zenith_utils::auth::{encode_from_key_file, Claims, Scope};
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::{HexZTenantId, ZNodeId, ZTenantId};
|
||||
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
use zenith_utils::zid::{opt_display_serde, ZTenantId};
|
||||
|
||||
//
|
||||
// This data structures represents zenith CLI config
|
||||
@@ -47,8 +45,9 @@ pub struct LocalEnv {
|
||||
|
||||
// Default tenant ID to use with the 'zenith' command line utility, when
|
||||
// --tenantid is not explicitly specified.
|
||||
#[serde(with = "opt_display_serde")]
|
||||
#[serde(default)]
|
||||
pub default_tenantid: Option<HexZTenantId>,
|
||||
pub default_tenantid: Option<ZTenantId>,
|
||||
|
||||
// used to issue tokens during e.g pg start
|
||||
#[serde(default)]
|
||||
@@ -63,8 +62,6 @@ pub struct LocalEnv {
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct PageServerConf {
|
||||
// node id
|
||||
pub id: ZNodeId,
|
||||
// Pageserver connection settings
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
@@ -79,7 +76,6 @@ pub struct PageServerConf {
|
||||
impl Default for PageServerConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
id: ZNodeId(0),
|
||||
listen_pg_addr: String::new(),
|
||||
listen_http_addr: String::new(),
|
||||
auth_type: AuthType::Trust,
|
||||
@@ -91,7 +87,7 @@ impl Default for PageServerConf {
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct SafekeeperConf {
|
||||
pub id: ZNodeId,
|
||||
pub name: String,
|
||||
pub pg_port: u16,
|
||||
pub http_port: u16,
|
||||
pub sync: bool,
|
||||
@@ -100,7 +96,7 @@ pub struct SafekeeperConf {
|
||||
impl Default for SafekeeperConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
id: ZNodeId(0),
|
||||
name: String::new(),
|
||||
pg_port: 0,
|
||||
http_port: 0,
|
||||
sync: true,
|
||||
@@ -140,8 +136,8 @@ impl LocalEnv {
|
||||
self.base_data_dir.clone()
|
||||
}
|
||||
|
||||
pub fn safekeeper_data_dir(&self, data_dir_name: &str) -> PathBuf {
|
||||
self.base_data_dir.join("safekeepers").join(data_dir_name)
|
||||
pub fn safekeeper_data_dir(&self, node_name: &str) -> PathBuf {
|
||||
self.base_data_dir.join("safekeepers").join(node_name)
|
||||
}
|
||||
|
||||
/// Create a LocalEnv from a config file.
|
||||
@@ -184,7 +180,7 @@ impl LocalEnv {
|
||||
|
||||
// If no initial tenant ID was given, generate it.
|
||||
if env.default_tenantid.is_none() {
|
||||
env.default_tenantid = Some(HexZTenantId::from(ZTenantId::generate()));
|
||||
env.default_tenantid = Some(ZTenantId::generate());
|
||||
}
|
||||
|
||||
env.base_data_dir = base_path();
|
||||
@@ -255,7 +251,7 @@ impl LocalEnv {
|
||||
.arg("2048")
|
||||
.stdout(Stdio::null())
|
||||
.output()
|
||||
.context("failed to generate auth private key")?;
|
||||
.with_context(|| "failed to generate auth private key")?;
|
||||
if !keygen_output.status.success() {
|
||||
bail!(
|
||||
"openssl failed: '{}'",
|
||||
@@ -274,7 +270,7 @@ impl LocalEnv {
|
||||
.args(&["-out", public_key_path.to_str().unwrap()])
|
||||
.stdout(Stdio::null())
|
||||
.output()
|
||||
.context("failed to generate auth private key")?;
|
||||
.with_context(|| "failed to generate auth private key")?;
|
||||
if !keygen_output.status.success() {
|
||||
bail!(
|
||||
"openssl failed: '{}'",
|
||||
@@ -289,7 +285,7 @@ impl LocalEnv {
|
||||
fs::create_dir_all(self.pg_data_dirs_path())?;
|
||||
|
||||
for safekeeper in &self.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
|
||||
fs::create_dir_all(self.safekeeper_data_dir(&safekeeper.name))?;
|
||||
}
|
||||
|
||||
let mut conf_content = String::new();
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
/// NOTE: This doesn't implement the full, correct postgresql.conf syntax. Just
|
||||
/// enough to extract a few settings we need in Zenith, assuming you don't do
|
||||
/// funny stuff like include-directives or funny escaping.
|
||||
use anyhow::{bail, Context, Result};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
@@ -78,7 +78,7 @@ impl PostgresConf {
|
||||
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
|
||||
{
|
||||
self.get(field_name)
|
||||
.with_context(|| format!("could not find '{}' option {}", field_name, context))?
|
||||
.ok_or_else(|| anyhow!("could not find '{}' option {}", field_name, context))?
|
||||
.parse::<T>()
|
||||
.with_context(|| format!("could not parse '{}' option {}", field_name, context))
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@ use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use thiserror::Error;
|
||||
use zenith_utils::http::error::HttpErrorBody;
|
||||
use zenith_utils::zid::ZNodeId;
|
||||
|
||||
use crate::local_env::{LocalEnv, SafekeeperConf};
|
||||
use crate::storage::PageServerNode;
|
||||
@@ -62,7 +61,7 @@ impl ResponseErrorMessageExt for Response {
|
||||
//
|
||||
#[derive(Debug)]
|
||||
pub struct SafekeeperNode {
|
||||
pub id: ZNodeId,
|
||||
pub name: String,
|
||||
|
||||
pub conf: SafekeeperConf,
|
||||
|
||||
@@ -78,15 +77,15 @@ impl SafekeeperNode {
|
||||
pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {
|
||||
let pageserver = Arc::new(PageServerNode::from_env(env));
|
||||
|
||||
println!("initializing for sk {} for {}", conf.id, conf.http_port);
|
||||
println!("initializing for {} for {}", conf.name, conf.http_port);
|
||||
|
||||
SafekeeperNode {
|
||||
id: conf.id,
|
||||
name: conf.name.clone(),
|
||||
conf: conf.clone(),
|
||||
pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
|
||||
env: env.clone(),
|
||||
http_client: Client::new(),
|
||||
http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
|
||||
http_base_url: format!("http://localhost:{}/v1", conf.http_port),
|
||||
pageserver,
|
||||
}
|
||||
}
|
||||
@@ -94,17 +93,13 @@ impl SafekeeperNode {
|
||||
/// Construct libpq connection string for connecting to this safekeeper.
|
||||
fn safekeeper_connection_config(port: u16) -> Config {
|
||||
// TODO safekeeper authentication not implemented yet
|
||||
format!("postgresql://no_user@127.0.0.1:{}/no_db", port)
|
||||
format!("postgresql://no_user@localhost:{}/no_db", port)
|
||||
.parse()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: ZNodeId) -> PathBuf {
|
||||
env.safekeeper_data_dir(format!("sk{}", sk_id).as_ref())
|
||||
}
|
||||
|
||||
pub fn datadir_path(&self) -> PathBuf {
|
||||
SafekeeperNode::datadir_path_by_id(&self.env, self.id)
|
||||
self.env.safekeeper_data_dir(&self.name)
|
||||
}
|
||||
|
||||
pub fn pid_file(&self) -> PathBuf {
|
||||
@@ -119,13 +114,12 @@ impl SafekeeperNode {
|
||||
);
|
||||
io::stdout().flush().unwrap();
|
||||
|
||||
let listen_pg = format!("127.0.0.1:{}", self.conf.pg_port);
|
||||
let listen_http = format!("127.0.0.1:{}", self.conf.http_port);
|
||||
let listen_pg = format!("localhost:{}", self.conf.pg_port);
|
||||
let listen_http = format!("localhost:{}", self.conf.http_port);
|
||||
|
||||
let mut cmd = Command::new(self.env.safekeeper_bin()?);
|
||||
fill_rust_env_vars(
|
||||
cmd.args(&["-D", self.datadir_path().to_str().unwrap()])
|
||||
.args(&["--id", self.id.to_string().as_ref()])
|
||||
.args(&["--listen-pg", &listen_pg])
|
||||
.args(&["--listen-http", &listen_http])
|
||||
.args(&["--recall", "1 second"])
|
||||
@@ -189,7 +183,7 @@ impl SafekeeperNode {
|
||||
pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
|
||||
let pid_file = self.pid_file();
|
||||
if !pid_file.exists() {
|
||||
println!("Safekeeper {} is already stopped", self.id);
|
||||
println!("Safekeeper {} is already stopped", self.name);
|
||||
return Ok(());
|
||||
}
|
||||
let pid = read_pidfile(&pid_file)?;
|
||||
|
||||
@@ -103,8 +103,6 @@ impl PageServerNode {
|
||||
) -> anyhow::Result<()> {
|
||||
let mut cmd = Command::new(self.env.pageserver_bin()?);
|
||||
|
||||
let id = format!("id={}", self.env.pageserver.id);
|
||||
|
||||
// FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
|
||||
let base_data_dir_param = self.env.base_data_dir.display().to_string();
|
||||
let pg_distrib_dir_param =
|
||||
@@ -124,7 +122,6 @@ impl PageServerNode {
|
||||
args.extend(["-c", &authg_type_param]);
|
||||
args.extend(["-c", &listen_http_addr_param]);
|
||||
args.extend(["-c", &listen_pg_addr_param]);
|
||||
args.extend(["-c", &id]);
|
||||
|
||||
for config_override in config_overrides {
|
||||
args.extend(["-c", config_override]);
|
||||
|
||||
@@ -4,7 +4,7 @@ set -eux
|
||||
if [ "$1" = 'pageserver' ]; then
|
||||
if [ ! -d "/data/tenants" ]; then
|
||||
echo "Initializing pageserver data directory"
|
||||
pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10"
|
||||
pageserver --init -D /data -c "pg_distrib_dir='/usr/local'"
|
||||
fi
|
||||
echo "Staring pageserver at 0.0.0.0:6400"
|
||||
pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -D /data
|
||||
|
||||
@@ -7,14 +7,32 @@ Currently we build two main images:
|
||||
- [zenithdb/zenith](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [zenithdb/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [zenithdb/postgres](https://github.com/zenithdb/postgres).
|
||||
|
||||
And additional intermediate images:
|
||||
And two intermediate images used either to reduce build time or to deliver some additional binary tools from other repos:
|
||||
|
||||
- [zenithdb/build](https://hub.docker.com/repository/docker/zenithdb/build) — image with all the dependencies required to build Zenith and compute node images. This image is based on `rust:slim-buster`, so it also has a proper `rust` environment. Built from [/Dockerfile.build](/Dockerfile.build).
|
||||
- [zenithdb/compute-tools](https://hub.docker.com/repository/docker/zenithdb/compute-tools) — compute node configuration management tools.
|
||||
|
||||
## Building pipeline
|
||||
|
||||
1. Image `zenithdb/compute-tools` is re-built automatically.
|
||||
|
||||
2. Image `zenithdb/compute-node` is built independently in the [zenithdb/postgres](https://github.com/zenithdb/postgres) repo.
|
||||
2. Image `zenithdb/build` is built manually. If you want to introduce any new compile time dependencies to Zenith or compute node you have to update this image as well, build it and push to Docker Hub.
|
||||
|
||||
3. Image `zenithdb/zenith` is built in this repo after a successful `release` tests run and pushed to Docker Hub automatically.
|
||||
Build:
|
||||
```sh
|
||||
docker build -t zenithdb/build:buster -f Dockerfile.build .
|
||||
```
|
||||
|
||||
Login:
|
||||
```sh
|
||||
docker login
|
||||
```
|
||||
|
||||
Push to Docker Hub:
|
||||
```sh
|
||||
docker push zenithdb/build:buster
|
||||
```
|
||||
|
||||
3. Image `zenithdb/compute-node` is built independently in the [zenithdb/postgres](https://github.com/zenithdb/postgres) repo.
|
||||
|
||||
4. Image `zenithdb/zenith` is built in this repo after a successful `release` tests run and pushed to Docker Hub automatically.
|
||||
|
||||
@@ -2,16 +2,6 @@
|
||||
|
||||
### Authentication
|
||||
|
||||
### Backpresssure
|
||||
|
||||
Backpressure is used to limit the lag between pageserver and compute node or WAL service.
|
||||
|
||||
If compute node or WAL service run far ahead of Page Server,
|
||||
the time of serving page requests increases. This may lead to timeout errors.
|
||||
|
||||
To tune backpressure limits use `max_replication_write_lag`, `max_replication_flush_lag` and `max_replication_apply_lag` settings.
|
||||
When lag between current LSN (pg_current_wal_flush_lsn() at compute node) and minimal write/flush/apply position of replica exceeds the limit
|
||||
backends performing writes are blocked until the replica is caught up.
|
||||
### Base image (page image)
|
||||
|
||||
### Basebackup
|
||||
@@ -86,37 +76,7 @@ The layer map tracks what layers exist for all the relishes in a timeline.
|
||||
Zenith repository implementation that keeps data in layers.
|
||||
### LSN
|
||||
|
||||
The Log Sequence Number (LSN) is a unique identifier of the WAL record[] in the WAL log.
|
||||
The insert position is a byte offset into the logs, increasing monotonically with each new record.
|
||||
Internally, an LSN is a 64-bit integer, representing a byte position in the write-ahead log stream.
|
||||
It is printed as two hexadecimal numbers of up to 8 digits each, separated by a slash.
|
||||
Check also [PostgreSQL doc about pg_lsn type](https://www.postgresql.org/docs/devel/datatype-pg-lsn.html)
|
||||
Values can be compared to calculate the volume of WAL data that separates them, so they are used to measure the progress of replication and recovery.
|
||||
|
||||
In postgres and Zenith lsns are used to describe certain points in WAL handling.
|
||||
|
||||
PostgreSQL LSNs and functions to monitor them:
|
||||
* `pg_current_wal_insert_lsn()` - Returns the current write-ahead log insert location.
|
||||
* `pg_current_wal_lsn()` - Returns the current write-ahead log write location.
|
||||
* `pg_current_wal_flush_lsn()` - Returns the current write-ahead log flush location.
|
||||
* `pg_last_wal_receive_lsn()` - Returns the last write-ahead log location that has been received and synced to disk by streaming replication. While streaming replication is in progress this will increase monotonically.
|
||||
* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
|
||||
[source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):
|
||||
|
||||
Zenith safekeeper LSNs. For more check [walkeeper/README_PROTO.md](/walkeeper/README_PROTO.md)
|
||||
* `CommitLSN`: position in WAL confirmed by quorum safekeepers.
|
||||
* `RestartLSN`: position in WAL confirmed by all safekeepers.
|
||||
* `FlushLSN`: part of WAL persisted to the disk by safekeeper.
|
||||
* `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.
|
||||
|
||||
Zenith pageserver LSNs:
|
||||
* `last_record_lsn` - the end of last processed WAL record.
|
||||
* `disk_consistent_lsn` - data is known to be fully flushed and fsync'd to local disk on pageserver up to this LSN.
|
||||
* `remote_consistent_lsn` - The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash.
|
||||
TODO: use this name consistently in remote storage code. Now `disk_consistent_lsn` is used and meaning depends on the context.
|
||||
* `ancestor_lsn` - LSN of the branch point (the LSN at which this branch was created)
|
||||
|
||||
TODO: add table that describes mapping between PostgreSQL (compute), safekeeper and pageserver LSNs.
|
||||
### Page (block)
|
||||
|
||||
The basic structure used to store relation data. All pages are of the same size.
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
## Pageserver tenant migration
|
||||
|
||||
### Overview
|
||||
|
||||
This feature allows to migrate a timeline from one pageserver to another by utilizing remote storage capability.
|
||||
|
||||
### Migration process
|
||||
|
||||
Pageserver implements two new http handlers: timeline attach and timeline detach.
|
||||
Timeline migration is performed in a following way:
|
||||
1. Timeline attach is called on a target pageserver. This asks pageserver to download latest checkpoint uploaded to s3.
|
||||
2. For now it is necessary to manually initialize replication stream via callmemaybe call so target pageserver initializes replication from safekeeper (it is desired to avoid this and initialize replication directly in attach handler, but this requires some refactoring (probably [#997](https://github.com/zenithdb/zenith/issues/997)/[#1049](https://github.com/zenithdb/zenith/issues/1049))
|
||||
3. Replication state can be tracked via timeline detail pageserver call.
|
||||
4. Compute node should be restarted with new pageserver connection string. Issue with multiple compute nodes for one timeline is handled on the safekeeper consensus level. So this is not a problem here.Currently responsibility for rescheduling the compute with updated config lies on external coordinator (console).
|
||||
5. Timeline is detached from old pageserver. On disk data is removed.
|
||||
|
||||
|
||||
### Implementation details
|
||||
|
||||
Now safekeeper needs to track which pageserver it is replicating to. This introduces complications into replication code:
|
||||
* We need to distinguish different pageservers (now this is done by connection string which is imperfect and is covered here: https://github.com/zenithdb/zenith/issues/1105). Callmemaybe subscription management also needs to track that (this is already implemented).
|
||||
* We need to track which pageserver is the primary. This is needed to avoid reconnections to non primary pageservers. Because we shouldn't reconnect to them when they decide to stop their walreceiver. I e this can appear when there is a load on the compute and we are trying to detach timeline from old pageserver. In this case callmemaybe will try to reconnect to it because replication termination condition is not met (page server with active compute could never catch up to the latest lsn, so there is always some wal tail)
|
||||
@@ -87,29 +87,31 @@ so manual installation of dependencies is not recommended.
|
||||
A single virtual environment with all dependencies is described in the single `Pipfile`.
|
||||
|
||||
### Prerequisites
|
||||
- Install Python 3.7 (the minimal supported version) or greater.
|
||||
- Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesnt work as expected.
|
||||
- If you have some trouble with other version you can resolve it by installing Python 3.7 separately, via pyenv or via system package manager e.g.:
|
||||
- Install Python 3.7 (the minimal supported version)
|
||||
- Later version (e.g. 3.8) is ok if you don't write Python code
|
||||
- You can install Python 3.7 separately, e.g.:
|
||||
```bash
|
||||
# In Ubuntu
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||
sudo apt update
|
||||
sudo apt install python3.7
|
||||
```
|
||||
- Install `poetry`
|
||||
- Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation)`.
|
||||
- Install dependencies via `./scripts/pysync`. Note that CI uses Python 3.7 so if you have different version some linting tools can yield different result locally vs in the CI.
|
||||
- Install `pipenv`
|
||||
- Exact version of `pipenv` is not important, you can use Debian/Ubuntu package `pipenv`.
|
||||
- Install dependencies via either
|
||||
* `pipenv --python 3.7 install --dev` if you will write Python code, or
|
||||
* `pipenv install` if you only want to run Python scripts and don't have Python 3.7.
|
||||
|
||||
Run `poetry shell` to activate the virtual environment.
|
||||
Alternatively, use `poetry run` to run a single command in the venv, e.g. `poetry run pytest`.
|
||||
Run `pipenv shell` to activate the virtual environment.
|
||||
Alternatively, use `pipenv run` to run a single command in the venv, e.g. `pipenv run pytest`.
|
||||
|
||||
### Obligatory checks
|
||||
We force code formatting via `yapf` and type hints via `mypy`.
|
||||
Run the following commands in the repository's root (next to `setup.cfg`):
|
||||
|
||||
```bash
|
||||
poetry run yapf -ri . # All code is reformatted
|
||||
poetry run mypy . # Ensure there are no typing errors
|
||||
pipenv run yapf -ri . # All code is reformatted
|
||||
pipenv run mypy . # Ensure there are no typing errors
|
||||
```
|
||||
|
||||
**WARNING**: do not run `mypy` from a directory other than the root of the repository.
|
||||
@@ -121,6 +123,17 @@ Also consider:
|
||||
* Adding more type hints to your code to avoid `Any`.
|
||||
|
||||
### Changing dependencies
|
||||
To add new package or change an existing one you can use `poetry add` or `poetry update` or edit `pyproject.toml` manually. Do not forget to run `poetry lock` in the latter case.
|
||||
You have to update `Pipfile.lock` if you have changed `Pipfile`:
|
||||
|
||||
More details are available in poetry's [documentation](https://python-poetry.org/docs/).
|
||||
```bash
|
||||
pipenv --python 3.7 install --dev # Re-create venv for Python 3.7 and install recent pipenv inside
|
||||
pipenv run pipenv --version # Should be at least 2021.5.29
|
||||
pipenv run pipenv lock # Regenerate Pipfile.lock
|
||||
```
|
||||
|
||||
As the minimal supported version is Python 3.7 and we use it in CI,
|
||||
you have to use a Python 3.7 environment when updating `Pipfile.lock`.
|
||||
Otherwise some back-compatibility packages will be missing.
|
||||
|
||||
It is also important to run recent `pipenv`.
|
||||
Older versions remove markers from `Pipfile.lock`.
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
[package]
|
||||
name = "pageserver"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Stas Kelvich <stas@zenith.tech>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
bookfile = { git = "https://github.com/zenithdb/bookfile.git", branch="generic-readext" }
|
||||
@@ -14,14 +15,15 @@ futures = "0.3.13"
|
||||
hyper = "0.14"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.14"
|
||||
clap = "3.0"
|
||||
clap = "2.33.0"
|
||||
daemonize = "0.4.1"
|
||||
tokio = { version = "1.11", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
|
||||
postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
tokio-stream = "0.1.8"
|
||||
routerify = "2"
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
crc32c = "0.6.0"
|
||||
thiserror = "1.0"
|
||||
@@ -30,7 +32,7 @@ tar = "0.4.33"
|
||||
humantime = "2.1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
toml_edit = { version = "0.13", features = ["easy"] }
|
||||
toml_edit = { version = "0.12", features = ["easy"] }
|
||||
scopeguard = "1.1.0"
|
||||
async-trait = "0.1"
|
||||
const_format = "0.2.21"
|
||||
@@ -40,8 +42,8 @@ signal-hook = "0.3.10"
|
||||
url = "2"
|
||||
nix = "0.23"
|
||||
once_cell = "1.8.0"
|
||||
parking_lot = "0.11.2"
|
||||
crossbeam-utils = "0.8.5"
|
||||
fail = "0.5.0"
|
||||
|
||||
rust-s3 = { version = "0.28", default-features = false, features = ["no-verify-ssl", "tokio-rustls-tls"] }
|
||||
async-compression = {version = "0.3", features = ["zstd", "tokio"]}
|
||||
|
||||
@@ -13,7 +13,7 @@ fn main() -> Result<()> {
|
||||
.about("Dump contents of one layer file, for debugging")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("path")
|
||||
Arg::with_name("path")
|
||||
.help("Path to file to dump")
|
||||
.required(true)
|
||||
.index(1),
|
||||
|
||||
@@ -27,27 +27,27 @@ fn main() -> Result<()> {
|
||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
Arg::with_name("daemonize")
|
||||
.short("d")
|
||||
.long("daemonize")
|
||||
.takes_value(false)
|
||||
.help("Run in the background"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("init")
|
||||
Arg::with_name("init")
|
||||
.long("init")
|
||||
.takes_value(false)
|
||||
.help("Initialize pageserver repo"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("workdir")
|
||||
.short('D')
|
||||
Arg::with_name("workdir")
|
||||
.short("D")
|
||||
.long("workdir")
|
||||
.takes_value(true)
|
||||
.help("Working directory for the pageserver"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("create-tenant")
|
||||
Arg::with_name("create-tenant")
|
||||
.long("create-tenant")
|
||||
.takes_value(true)
|
||||
.help("Create tenant during init")
|
||||
@@ -55,13 +55,13 @@ fn main() -> Result<()> {
|
||||
)
|
||||
// See `settings.md` for more details on the extra configuration patameters pageserver can process
|
||||
.arg(
|
||||
Arg::new("config-override")
|
||||
.short('c')
|
||||
Arg::with_name("config-override")
|
||||
.short("c")
|
||||
.takes_value(true)
|
||||
.number_of_values(1)
|
||||
.multiple_occurrences(true)
|
||||
.multiple(true)
|
||||
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
|
||||
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
|
||||
Any option has to be a valid toml document, example: `-c \"foo='hey'\"` `-c \"foo={value=1}\"`"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
@@ -115,14 +115,7 @@ fn main() -> Result<()> {
|
||||
option_line
|
||||
)
|
||||
})?;
|
||||
|
||||
for (key, item) in doc.iter() {
|
||||
if key == "id" {
|
||||
anyhow::ensure!(
|
||||
init,
|
||||
"node id can only be set during pageserver init and cannot be overridden"
|
||||
);
|
||||
}
|
||||
toml.insert(key, item.clone());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,334 +0,0 @@
|
||||
//! A CLI helper to deal with remote storage (S3, usually) blobs as archives.
|
||||
//! See [`compression`] for more details about the archives.
|
||||
|
||||
use std::{collections::BTreeSet, path::Path};
|
||||
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use clap::{App, Arg};
|
||||
use pageserver::{
|
||||
layered_repository::metadata::{TimelineMetadata, METADATA_FILE_NAME},
|
||||
remote_storage::compression,
|
||||
};
|
||||
use tokio::{fs, io};
|
||||
use zenith_utils::GIT_VERSION;
|
||||
|
||||
const LIST_SUBCOMMAND: &str = "list";
|
||||
const ARCHIVE_ARG_NAME: &str = "archive";
|
||||
|
||||
const EXTRACT_SUBCOMMAND: &str = "extract";
|
||||
const TARGET_DIRECTORY_ARG_NAME: &str = "target_directory";
|
||||
|
||||
const CREATE_SUBCOMMAND: &str = "create";
|
||||
const SOURCE_DIRECTORY_ARG_NAME: &str = "source_directory";
|
||||
|
||||
#[tokio::main(flavor = "current_thread")]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let arg_matches = App::new("pageserver zst blob [un]compressor utility")
|
||||
.version(GIT_VERSION)
|
||||
.subcommands(vec![
|
||||
App::new(LIST_SUBCOMMAND)
|
||||
.about("List the archive contents")
|
||||
.arg(
|
||||
Arg::new(ARCHIVE_ARG_NAME)
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.help("An archive to list the contents of"),
|
||||
),
|
||||
App::new(EXTRACT_SUBCOMMAND)
|
||||
.about("Extracts the archive into the directory")
|
||||
.arg(
|
||||
Arg::new(ARCHIVE_ARG_NAME)
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.help("An archive to extract"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(TARGET_DIRECTORY_ARG_NAME)
|
||||
.required(false)
|
||||
.takes_value(true)
|
||||
.help("A directory to extract the archive into. Optional, will use the current directory if not specified"),
|
||||
),
|
||||
App::new(CREATE_SUBCOMMAND)
|
||||
.about("Creates an archive with the contents of a directory (only the first level files are taken, metadata file has to be present in the same directory)")
|
||||
.arg(
|
||||
Arg::new(SOURCE_DIRECTORY_ARG_NAME)
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.help("A directory to use for creating the archive"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(TARGET_DIRECTORY_ARG_NAME)
|
||||
.required(false)
|
||||
.takes_value(true)
|
||||
.help("A directory to create the archive in. Optional, will use the current directory if not specified"),
|
||||
),
|
||||
])
|
||||
.get_matches();
|
||||
|
||||
let subcommand_name = match arg_matches.subcommand_name() {
|
||||
Some(name) => name,
|
||||
None => bail!("No subcommand specified"),
|
||||
};
|
||||
|
||||
let subcommand_matches = match arg_matches.subcommand_matches(subcommand_name) {
|
||||
Some(matches) => matches,
|
||||
None => bail!(
|
||||
"No subcommand arguments were recognized for subcommand '{}'",
|
||||
subcommand_name
|
||||
),
|
||||
};
|
||||
|
||||
let target_dir = Path::new(
|
||||
subcommand_matches
|
||||
.value_of(TARGET_DIRECTORY_ARG_NAME)
|
||||
.unwrap_or("./"),
|
||||
);
|
||||
|
||||
match subcommand_name {
|
||||
LIST_SUBCOMMAND => {
|
||||
let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
|
||||
Some(archive) => Path::new(archive),
|
||||
None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
|
||||
};
|
||||
list_archive(archive).await
|
||||
}
|
||||
EXTRACT_SUBCOMMAND => {
|
||||
let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
|
||||
Some(archive) => Path::new(archive),
|
||||
None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
|
||||
};
|
||||
extract_archive(archive, target_dir).await
|
||||
}
|
||||
CREATE_SUBCOMMAND => {
|
||||
let source_dir = match subcommand_matches.value_of(SOURCE_DIRECTORY_ARG_NAME) {
|
||||
Some(source) => Path::new(source),
|
||||
None => bail!("No '{}' argument is specified", SOURCE_DIRECTORY_ARG_NAME),
|
||||
};
|
||||
create_archive(source_dir, target_dir).await
|
||||
}
|
||||
unknown => bail!("Unknown subcommand {}", unknown),
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_archive(archive: &Path) -> anyhow::Result<()> {
|
||||
let archive = archive.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to get the absolute path for the archive path '{}'",
|
||||
archive.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
archive.is_file(),
|
||||
"Path '{}' is not an archive file",
|
||||
archive.display()
|
||||
);
|
||||
println!("Listing an archive at path '{}'", archive.display());
|
||||
let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
|
||||
Some(name) => name,
|
||||
None => bail!(
|
||||
"Failed to get the archive name from the path '{}'",
|
||||
archive.display()
|
||||
),
|
||||
};
|
||||
|
||||
let archive_bytes = fs::read(&archive)
|
||||
.await
|
||||
.context("Failed to read the archive bytes")?;
|
||||
|
||||
let header = compression::read_archive_header(archive_name, &mut archive_bytes.as_slice())
|
||||
.await
|
||||
.context("Failed to read the archive header")?;
|
||||
|
||||
let empty_path = Path::new("");
|
||||
println!("-------------------------------");
|
||||
|
||||
let longest_path_in_archive = header
|
||||
.files
|
||||
.iter()
|
||||
.filter_map(|file| Some(file.subpath.as_path(empty_path).to_str()?.len()))
|
||||
.max()
|
||||
.unwrap_or_default()
|
||||
.max(METADATA_FILE_NAME.len());
|
||||
|
||||
for regular_file in &header.files {
|
||||
println!(
|
||||
"File: {:width$} uncompressed size: {} bytes",
|
||||
regular_file.subpath.as_path(empty_path).display(),
|
||||
regular_file.size,
|
||||
width = longest_path_in_archive,
|
||||
)
|
||||
}
|
||||
println!(
|
||||
"File: {:width$} uncompressed size: {} bytes",
|
||||
METADATA_FILE_NAME,
|
||||
header.metadata_file_size,
|
||||
width = longest_path_in_archive,
|
||||
);
|
||||
println!("-------------------------------");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn extract_archive(archive: &Path, target_dir: &Path) -> anyhow::Result<()> {
|
||||
let archive = archive.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to get the absolute path for the archive path '{}'",
|
||||
archive.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
archive.is_file(),
|
||||
"Path '{}' is not an archive file",
|
||||
archive.display()
|
||||
);
|
||||
let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
|
||||
Some(name) => name,
|
||||
None => bail!(
|
||||
"Failed to get the archive name from the path '{}'",
|
||||
archive.display()
|
||||
),
|
||||
};
|
||||
|
||||
if !target_dir.exists() {
|
||||
fs::create_dir_all(target_dir).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to create the target dir at path '{}'",
|
||||
target_dir.display()
|
||||
)
|
||||
})?;
|
||||
}
|
||||
let target_dir = target_dir.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to get the absolute path for the target dir path '{}'",
|
||||
target_dir.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
target_dir.is_dir(),
|
||||
"Path '{}' is not a directory",
|
||||
target_dir.display()
|
||||
);
|
||||
let mut dir_contents = fs::read_dir(&target_dir)
|
||||
.await
|
||||
.context("Failed to list the target directory contents")?;
|
||||
let dir_entry = dir_contents
|
||||
.next_entry()
|
||||
.await
|
||||
.context("Failed to list the target directory contents")?;
|
||||
ensure!(
|
||||
dir_entry.is_none(),
|
||||
"Target directory '{}' is not empty",
|
||||
target_dir.display()
|
||||
);
|
||||
|
||||
println!(
|
||||
"Extracting an archive at path '{}' into directory '{}'",
|
||||
archive.display(),
|
||||
target_dir.display()
|
||||
);
|
||||
|
||||
let mut archive_file = fs::File::open(&archive).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to get the archive name from the path '{}'",
|
||||
archive.display()
|
||||
)
|
||||
})?;
|
||||
let header = compression::read_archive_header(archive_name, &mut archive_file)
|
||||
.await
|
||||
.context("Failed to read the archive header")?;
|
||||
compression::uncompress_with_header(&BTreeSet::new(), &target_dir, header, &mut archive_file)
|
||||
.await
|
||||
.context("Failed to extract the archive")
|
||||
}
|
||||
|
||||
async fn create_archive(source_dir: &Path, target_dir: &Path) -> anyhow::Result<()> {
|
||||
let source_dir = source_dir.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to get the absolute path for the source dir path '{}'",
|
||||
source_dir.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
source_dir.is_dir(),
|
||||
"Path '{}' is not a directory",
|
||||
source_dir.display()
|
||||
);
|
||||
|
||||
if !target_dir.exists() {
|
||||
fs::create_dir_all(target_dir).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to create the target dir at path '{}'",
|
||||
target_dir.display()
|
||||
)
|
||||
})?;
|
||||
}
|
||||
let target_dir = target_dir.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to get the absolute path for the target dir path '{}'",
|
||||
target_dir.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
target_dir.is_dir(),
|
||||
"Path '{}' is not a directory",
|
||||
target_dir.display()
|
||||
);
|
||||
|
||||
println!(
|
||||
"Compressing directory '{}' and creating resulting archive in directory '{}'",
|
||||
source_dir.display(),
|
||||
target_dir.display()
|
||||
);
|
||||
|
||||
let mut metadata_file_contents = None;
|
||||
let mut files_co_archive = Vec::new();
|
||||
|
||||
let mut source_dir_contents = fs::read_dir(&source_dir)
|
||||
.await
|
||||
.context("Failed to read the source directory contents")?;
|
||||
|
||||
while let Some(source_dir_entry) = source_dir_contents
|
||||
.next_entry()
|
||||
.await
|
||||
.context("Failed to read a source dir entry")?
|
||||
{
|
||||
let entry_path = source_dir_entry.path();
|
||||
if entry_path.is_file() {
|
||||
if entry_path.file_name().and_then(|name| name.to_str()) == Some(METADATA_FILE_NAME) {
|
||||
let metadata_bytes = fs::read(entry_path)
|
||||
.await
|
||||
.context("Failed to read metata file bytes in the source dir")?;
|
||||
metadata_file_contents = Some(
|
||||
TimelineMetadata::from_bytes(&metadata_bytes)
|
||||
.context("Failed to parse metata file contents in the source dir")?,
|
||||
);
|
||||
} else {
|
||||
files_co_archive.push(entry_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let metadata = match metadata_file_contents {
|
||||
Some(metadata) => metadata,
|
||||
None => bail!(
|
||||
"No metadata file found in the source dir '{}', cannot create the archive",
|
||||
source_dir.display()
|
||||
),
|
||||
};
|
||||
|
||||
let _ = compression::archive_files_as_stream(
|
||||
&source_dir,
|
||||
files_co_archive.iter(),
|
||||
&metadata,
|
||||
move |mut archive_streamer, archive_name| async move {
|
||||
let archive_target = target_dir.join(&archive_name);
|
||||
let mut archive_file = fs::File::create(&archive_target).await?;
|
||||
io::copy(&mut archive_streamer, &mut archive_file).await?;
|
||||
Ok(archive_target)
|
||||
},
|
||||
)
|
||||
.await
|
||||
.context("Failed to create an archive")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -14,20 +14,20 @@ fn main() -> Result<()> {
|
||||
.about("Dump or update metadata file")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("path")
|
||||
Arg::with_name("path")
|
||||
.help("Path to metadata file")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("disk_lsn")
|
||||
.short('d')
|
||||
Arg::with_name("disk_lsn")
|
||||
.short("d")
|
||||
.long("disk_lsn")
|
||||
.takes_value(true)
|
||||
.help("Replace disk constistent lsn"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("prev_lsn")
|
||||
.short('p')
|
||||
Arg::with_name("prev_lsn")
|
||||
.short("p")
|
||||
.long("prev_lsn")
|
||||
.takes_value(true)
|
||||
.help("Previous record LSN"),
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
// TODO: move all paths construction to conf impl
|
||||
//
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use postgres_ffi::ControlFileData;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
@@ -16,9 +16,10 @@ use std::{
|
||||
};
|
||||
use tracing::*;
|
||||
|
||||
use zenith_utils::crashsafe_dir;
|
||||
use zenith_utils::logging;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
use zenith_utils::{crashsafe_dir, logging};
|
||||
|
||||
use crate::walredo::WalRedoManager;
|
||||
use crate::CheckpointConfig;
|
||||
@@ -117,7 +118,7 @@ pub fn init_pageserver(conf: &'static PageServerConf, create_tenant: Option<&str
|
||||
if let Some(tenantid) = create_tenant {
|
||||
let tenantid = ZTenantId::from_str(tenantid)?;
|
||||
println!("initializing tenantid {}", tenantid);
|
||||
create_repo(conf, tenantid, dummy_redo_mgr).context("failed to create repo")?;
|
||||
create_repo(conf, tenantid, dummy_redo_mgr).with_context(|| "failed to create repo")?;
|
||||
}
|
||||
crashsafe_dir::create_dir_all(conf.tenants_path())?;
|
||||
|
||||
@@ -196,7 +197,7 @@ fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
|
||||
.env("DYLD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
|
||||
.stdout(Stdio::null())
|
||||
.output()
|
||||
.context("failed to execute initdb")?;
|
||||
.with_context(|| "failed to execute initdb")?;
|
||||
if !initdb_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"initdb failed: '{}'",
|
||||
@@ -307,7 +308,7 @@ pub(crate) fn create_branch(
|
||||
let timeline = repo
|
||||
.get_timeline(startpoint.timelineid)?
|
||||
.local_timeline()
|
||||
.context("Cannot branch off the timeline that's not present locally")?;
|
||||
.ok_or_else(|| anyhow!("Cannot branch off the timeline that's not present locally"))?;
|
||||
if startpoint.lsn == Lsn(0) {
|
||||
// Find end of WAL on the old timeline
|
||||
let end_of_wal = timeline.get_last_record_lsn();
|
||||
@@ -323,13 +324,12 @@ pub(crate) fn create_branch(
|
||||
timeline.wait_lsn(startpoint.lsn)?;
|
||||
}
|
||||
startpoint.lsn = startpoint.lsn.align();
|
||||
if timeline.get_ancestor_lsn() > startpoint.lsn {
|
||||
// can we safely just branch from the ancestor instead?
|
||||
if timeline.get_start_lsn() > startpoint.lsn {
|
||||
anyhow::bail!(
|
||||
"invalid startpoint {} for the branch {}: less than timeline ancestor lsn {:?}",
|
||||
"invalid startpoint {} for the branch {}: less than timeline start {}",
|
||||
startpoint.lsn,
|
||||
branchname,
|
||||
timeline.get_ancestor_lsn()
|
||||
timeline.get_start_lsn()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -383,11 +383,14 @@ fn parse_point_in_time(
|
||||
let mut strings = s.split('@');
|
||||
let name = strings.next().unwrap();
|
||||
|
||||
let lsn = strings
|
||||
.next()
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("invalid LSN in point-in-time specification")?;
|
||||
let lsn: Option<Lsn>;
|
||||
if let Some(lsnstr) = strings.next() {
|
||||
lsn = Some(
|
||||
Lsn::from_str(lsnstr).with_context(|| "invalid LSN in point-in-time specification")?,
|
||||
);
|
||||
} else {
|
||||
lsn = None
|
||||
}
|
||||
|
||||
// Check if it's a tag
|
||||
if lsn.is_none() {
|
||||
|
||||
@@ -4,11 +4,11 @@
|
||||
//! file, or on the command line.
|
||||
//! See also `settings.md` for better description on every parameter.
|
||||
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use toml_edit;
|
||||
use toml_edit::{Document, Item};
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
use std::convert::TryInto;
|
||||
use std::env;
|
||||
@@ -36,9 +36,6 @@ pub mod defaults {
|
||||
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
|
||||
pub const DEFAULT_GC_PERIOD: &str = "100 s";
|
||||
|
||||
pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "60 s";
|
||||
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
|
||||
|
||||
pub const DEFAULT_SUPERUSER: &str = "zenith_admin";
|
||||
pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC: usize = 100;
|
||||
pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
|
||||
@@ -62,9 +59,6 @@ pub mod defaults {
|
||||
#gc_period = '{DEFAULT_GC_PERIOD}'
|
||||
#gc_horizon = {DEFAULT_GC_HORIZON}
|
||||
|
||||
#wait_lsn_timeout = '{DEFAULT_WAIT_LSN_TIMEOUT}'
|
||||
#wal_redo_timeout = '{DEFAULT_WAL_REDO_TIMEOUT}'
|
||||
|
||||
#max_file_descriptors = {DEFAULT_MAX_FILE_DESCRIPTORS}
|
||||
|
||||
# initial superuser role name to use when creating a new tenant
|
||||
@@ -78,10 +72,6 @@ pub mod defaults {
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct PageServerConf {
|
||||
// Identifier of that particular pageserver so e g safekeepers
|
||||
// can safely distinguish different pageservers
|
||||
pub id: ZNodeId,
|
||||
|
||||
/// Example (default): 127.0.0.1:64000
|
||||
pub listen_pg_addr: String,
|
||||
/// Example (default): 127.0.0.1:9898
|
||||
@@ -95,12 +85,6 @@ pub struct PageServerConf {
|
||||
|
||||
pub gc_horizon: u64,
|
||||
pub gc_period: Duration,
|
||||
|
||||
// Timeout when waiting for WAL receiver to catch up to an LSN given in a GetPage@LSN call.
|
||||
pub wait_lsn_timeout: Duration,
|
||||
// How long to wait for WAL redo to complete.
|
||||
pub wal_redo_timeout: Duration,
|
||||
|
||||
pub superuser: String,
|
||||
|
||||
pub page_cache_size: usize,
|
||||
@@ -122,206 +106,6 @@ pub struct PageServerConf {
|
||||
pub remote_storage_config: Option<RemoteStorageConfig>,
|
||||
}
|
||||
|
||||
// use dedicated enum for builder to better indicate the intention
|
||||
// and avoid possible confusion with nested options
|
||||
pub enum BuilderValue<T> {
|
||||
Set(T),
|
||||
NotSet,
|
||||
}
|
||||
|
||||
impl<T> BuilderValue<T> {
|
||||
pub fn ok_or<E>(self, err: E) -> Result<T, E> {
|
||||
match self {
|
||||
Self::Set(v) => Ok(v),
|
||||
Self::NotSet => Err(err),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// needed to simplify config construction
|
||||
struct PageServerConfigBuilder {
|
||||
listen_pg_addr: BuilderValue<String>,
|
||||
|
||||
listen_http_addr: BuilderValue<String>,
|
||||
|
||||
checkpoint_distance: BuilderValue<u64>,
|
||||
checkpoint_period: BuilderValue<Duration>,
|
||||
|
||||
gc_horizon: BuilderValue<u64>,
|
||||
gc_period: BuilderValue<Duration>,
|
||||
|
||||
wait_lsn_timeout: BuilderValue<Duration>,
|
||||
wal_redo_timeout: BuilderValue<Duration>,
|
||||
|
||||
superuser: BuilderValue<String>,
|
||||
|
||||
page_cache_size: BuilderValue<usize>,
|
||||
max_file_descriptors: BuilderValue<usize>,
|
||||
|
||||
workdir: BuilderValue<PathBuf>,
|
||||
|
||||
pg_distrib_dir: BuilderValue<PathBuf>,
|
||||
|
||||
auth_type: BuilderValue<AuthType>,
|
||||
|
||||
//
|
||||
auth_validation_public_key_path: BuilderValue<Option<PathBuf>>,
|
||||
remote_storage_config: BuilderValue<Option<RemoteStorageConfig>>,
|
||||
|
||||
id: BuilderValue<ZNodeId>,
|
||||
}
|
||||
|
||||
impl Default for PageServerConfigBuilder {
|
||||
fn default() -> Self {
|
||||
use self::BuilderValue::*;
|
||||
use defaults::*;
|
||||
Self {
|
||||
listen_pg_addr: Set(DEFAULT_PG_LISTEN_ADDR.to_string()),
|
||||
listen_http_addr: Set(DEFAULT_HTTP_LISTEN_ADDR.to_string()),
|
||||
checkpoint_distance: Set(DEFAULT_CHECKPOINT_DISTANCE),
|
||||
checkpoint_period: Set(humantime::parse_duration(DEFAULT_CHECKPOINT_PERIOD)
|
||||
.expect("cannot parse default checkpoint period")),
|
||||
gc_horizon: Set(DEFAULT_GC_HORIZON),
|
||||
gc_period: Set(humantime::parse_duration(DEFAULT_GC_PERIOD)
|
||||
.expect("cannot parse default gc period")),
|
||||
wait_lsn_timeout: Set(humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
|
||||
.expect("cannot parse default wait lsn timeout")),
|
||||
wal_redo_timeout: Set(humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
|
||||
.expect("cannot parse default wal redo timeout")),
|
||||
superuser: Set(DEFAULT_SUPERUSER.to_string()),
|
||||
page_cache_size: Set(DEFAULT_PAGE_CACHE_SIZE),
|
||||
max_file_descriptors: Set(DEFAULT_MAX_FILE_DESCRIPTORS),
|
||||
workdir: Set(PathBuf::new()),
|
||||
pg_distrib_dir: Set(env::current_dir()
|
||||
.expect("cannot access current directory")
|
||||
.join("tmp_install")),
|
||||
auth_type: Set(AuthType::Trust),
|
||||
auth_validation_public_key_path: Set(None),
|
||||
remote_storage_config: Set(None),
|
||||
id: NotSet,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PageServerConfigBuilder {
|
||||
pub fn listen_pg_addr(&mut self, listen_pg_addr: String) {
|
||||
self.listen_pg_addr = BuilderValue::Set(listen_pg_addr)
|
||||
}
|
||||
|
||||
pub fn listen_http_addr(&mut self, listen_http_addr: String) {
|
||||
self.listen_http_addr = BuilderValue::Set(listen_http_addr)
|
||||
}
|
||||
|
||||
pub fn checkpoint_distance(&mut self, checkpoint_distance: u64) {
|
||||
self.checkpoint_distance = BuilderValue::Set(checkpoint_distance)
|
||||
}
|
||||
|
||||
pub fn checkpoint_period(&mut self, checkpoint_period: Duration) {
|
||||
self.checkpoint_period = BuilderValue::Set(checkpoint_period)
|
||||
}
|
||||
|
||||
pub fn gc_horizon(&mut self, gc_horizon: u64) {
|
||||
self.gc_horizon = BuilderValue::Set(gc_horizon)
|
||||
}
|
||||
|
||||
pub fn gc_period(&mut self, gc_period: Duration) {
|
||||
self.gc_period = BuilderValue::Set(gc_period)
|
||||
}
|
||||
|
||||
pub fn wait_lsn_timeout(&mut self, wait_lsn_timeout: Duration) {
|
||||
self.wait_lsn_timeout = BuilderValue::Set(wait_lsn_timeout)
|
||||
}
|
||||
|
||||
pub fn wal_redo_timeout(&mut self, wal_redo_timeout: Duration) {
|
||||
self.wal_redo_timeout = BuilderValue::Set(wal_redo_timeout)
|
||||
}
|
||||
|
||||
pub fn superuser(&mut self, superuser: String) {
|
||||
self.superuser = BuilderValue::Set(superuser)
|
||||
}
|
||||
|
||||
pub fn page_cache_size(&mut self, page_cache_size: usize) {
|
||||
self.page_cache_size = BuilderValue::Set(page_cache_size)
|
||||
}
|
||||
|
||||
pub fn max_file_descriptors(&mut self, max_file_descriptors: usize) {
|
||||
self.max_file_descriptors = BuilderValue::Set(max_file_descriptors)
|
||||
}
|
||||
|
||||
pub fn workdir(&mut self, workdir: PathBuf) {
|
||||
self.workdir = BuilderValue::Set(workdir)
|
||||
}
|
||||
|
||||
pub fn pg_distrib_dir(&mut self, pg_distrib_dir: PathBuf) {
|
||||
self.pg_distrib_dir = BuilderValue::Set(pg_distrib_dir)
|
||||
}
|
||||
|
||||
pub fn auth_type(&mut self, auth_type: AuthType) {
|
||||
self.auth_type = BuilderValue::Set(auth_type)
|
||||
}
|
||||
|
||||
pub fn auth_validation_public_key_path(
|
||||
&mut self,
|
||||
auth_validation_public_key_path: Option<PathBuf>,
|
||||
) {
|
||||
self.auth_validation_public_key_path = BuilderValue::Set(auth_validation_public_key_path)
|
||||
}
|
||||
|
||||
pub fn remote_storage_config(&mut self, remote_storage_config: Option<RemoteStorageConfig>) {
|
||||
self.remote_storage_config = BuilderValue::Set(remote_storage_config)
|
||||
}
|
||||
|
||||
pub fn id(&mut self, node_id: ZNodeId) {
|
||||
self.id = BuilderValue::Set(node_id)
|
||||
}
|
||||
|
||||
pub fn build(self) -> Result<PageServerConf> {
|
||||
Ok(PageServerConf {
|
||||
listen_pg_addr: self
|
||||
.listen_pg_addr
|
||||
.ok_or(anyhow::anyhow!("missing listen_pg_addr"))?,
|
||||
listen_http_addr: self
|
||||
.listen_http_addr
|
||||
.ok_or(anyhow::anyhow!("missing listen_http_addr"))?,
|
||||
checkpoint_distance: self
|
||||
.checkpoint_distance
|
||||
.ok_or(anyhow::anyhow!("missing checkpoint_distance"))?,
|
||||
checkpoint_period: self
|
||||
.checkpoint_period
|
||||
.ok_or(anyhow::anyhow!("missing checkpoint_period"))?,
|
||||
gc_horizon: self
|
||||
.gc_horizon
|
||||
.ok_or(anyhow::anyhow!("missing gc_horizon"))?,
|
||||
gc_period: self.gc_period.ok_or(anyhow::anyhow!("missing gc_period"))?,
|
||||
wait_lsn_timeout: self
|
||||
.wait_lsn_timeout
|
||||
.ok_or(anyhow::anyhow!("missing wait_lsn_timeout"))?,
|
||||
wal_redo_timeout: self
|
||||
.wal_redo_timeout
|
||||
.ok_or(anyhow::anyhow!("missing wal_redo_timeout"))?,
|
||||
superuser: self.superuser.ok_or(anyhow::anyhow!("missing superuser"))?,
|
||||
page_cache_size: self
|
||||
.page_cache_size
|
||||
.ok_or(anyhow::anyhow!("missing page_cache_size"))?,
|
||||
max_file_descriptors: self
|
||||
.max_file_descriptors
|
||||
.ok_or(anyhow::anyhow!("missing max_file_descriptors"))?,
|
||||
workdir: self.workdir.ok_or(anyhow::anyhow!("missing workdir"))?,
|
||||
pg_distrib_dir: self
|
||||
.pg_distrib_dir
|
||||
.ok_or(anyhow::anyhow!("missing pg_distrib_dir"))?,
|
||||
auth_type: self.auth_type.ok_or(anyhow::anyhow!("missing auth_type"))?,
|
||||
auth_validation_public_key_path: self
|
||||
.auth_validation_public_key_path
|
||||
.ok_or(anyhow::anyhow!("missing auth_validation_public_key_path"))?,
|
||||
remote_storage_config: self
|
||||
.remote_storage_config
|
||||
.ok_or(anyhow::anyhow!("missing remote_storage_config"))?,
|
||||
id: self.id.ok_or(anyhow::anyhow!("missing id"))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// External backup storage configuration, enough for creating a client for that storage.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct RemoteStorageConfig {
|
||||
@@ -360,13 +144,6 @@ pub struct S3Config {
|
||||
pub access_key_id: Option<String>,
|
||||
/// "Password" to use when connecting to bucket.
|
||||
pub secret_access_key: Option<String>,
|
||||
/// A base URL to send S3 requests to.
|
||||
/// By default, the endpoint is derived from a region name, assuming it's
|
||||
/// an AWS S3 region name, erroring on wrong region name.
|
||||
/// Endpoint provides a way to support other S3 flavors and their regions.
|
||||
///
|
||||
/// Example: `http://127.0.0.1:5000`
|
||||
pub endpoint: Option<String>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for S3Config {
|
||||
@@ -437,41 +214,57 @@ impl PageServerConf {
|
||||
///
|
||||
/// This leaves any options not present in the file in the built-in defaults.
|
||||
pub fn parse_and_validate(toml: &Document, workdir: &Path) -> Result<Self> {
|
||||
let mut builder = PageServerConfigBuilder::default();
|
||||
builder.workdir(workdir.to_owned());
|
||||
use defaults::*;
|
||||
|
||||
let mut conf = PageServerConf {
|
||||
workdir: workdir.to_path_buf(),
|
||||
|
||||
listen_pg_addr: DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
listen_http_addr: DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
|
||||
checkpoint_period: humantime::parse_duration(DEFAULT_CHECKPOINT_PERIOD)?,
|
||||
gc_horizon: DEFAULT_GC_HORIZON,
|
||||
gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)?,
|
||||
page_cache_size: DEFAULT_PAGE_CACHE_SIZE,
|
||||
max_file_descriptors: DEFAULT_MAX_FILE_DESCRIPTORS,
|
||||
|
||||
pg_distrib_dir: PathBuf::new(),
|
||||
auth_validation_public_key_path: None,
|
||||
auth_type: AuthType::Trust,
|
||||
|
||||
remote_storage_config: None,
|
||||
|
||||
superuser: DEFAULT_SUPERUSER.to_string(),
|
||||
};
|
||||
|
||||
for (key, item) in toml.iter() {
|
||||
match key {
|
||||
"listen_pg_addr" => builder.listen_pg_addr(parse_toml_string(key, item)?),
|
||||
"listen_http_addr" => builder.listen_http_addr(parse_toml_string(key, item)?),
|
||||
"checkpoint_distance" => builder.checkpoint_distance(parse_toml_u64(key, item)?),
|
||||
"checkpoint_period" => builder.checkpoint_period(parse_toml_duration(key, item)?),
|
||||
"gc_horizon" => builder.gc_horizon(parse_toml_u64(key, item)?),
|
||||
"gc_period" => builder.gc_period(parse_toml_duration(key, item)?),
|
||||
"wait_lsn_timeout" => builder.wait_lsn_timeout(parse_toml_duration(key, item)?),
|
||||
"wal_redo_timeout" => builder.wal_redo_timeout(parse_toml_duration(key, item)?),
|
||||
"initial_superuser_name" => builder.superuser(parse_toml_string(key, item)?),
|
||||
"page_cache_size" => builder.page_cache_size(parse_toml_u64(key, item)? as usize),
|
||||
"listen_pg_addr" => conf.listen_pg_addr = parse_toml_string(key, item)?,
|
||||
"listen_http_addr" => conf.listen_http_addr = parse_toml_string(key, item)?,
|
||||
"checkpoint_distance" => conf.checkpoint_distance = parse_toml_u64(key, item)?,
|
||||
"checkpoint_period" => conf.checkpoint_period = parse_toml_duration(key, item)?,
|
||||
"gc_horizon" => conf.gc_horizon = parse_toml_u64(key, item)?,
|
||||
"gc_period" => conf.gc_period = parse_toml_duration(key, item)?,
|
||||
"initial_superuser_name" => conf.superuser = parse_toml_string(key, item)?,
|
||||
"page_cache_size" => conf.page_cache_size = parse_toml_u64(key, item)? as usize,
|
||||
"max_file_descriptors" => {
|
||||
builder.max_file_descriptors(parse_toml_u64(key, item)? as usize)
|
||||
conf.max_file_descriptors = parse_toml_u64(key, item)? as usize
|
||||
}
|
||||
"pg_distrib_dir" => {
|
||||
builder.pg_distrib_dir(PathBuf::from(parse_toml_string(key, item)?))
|
||||
conf.pg_distrib_dir = PathBuf::from(parse_toml_string(key, item)?)
|
||||
}
|
||||
"auth_validation_public_key_path" => builder.auth_validation_public_key_path(Some(
|
||||
PathBuf::from(parse_toml_string(key, item)?),
|
||||
)),
|
||||
"auth_type" => builder.auth_type(parse_toml_auth_type(key, item)?),
|
||||
"auth_validation_public_key_path" => {
|
||||
conf.auth_validation_public_key_path =
|
||||
Some(PathBuf::from(parse_toml_string(key, item)?))
|
||||
}
|
||||
"auth_type" => conf.auth_type = parse_toml_auth_type(key, item)?,
|
||||
"remote_storage" => {
|
||||
builder.remote_storage_config(Some(Self::parse_remote_storage_config(item)?))
|
||||
conf.remote_storage_config = Some(Self::parse_remote_storage_config(item)?)
|
||||
}
|
||||
"id" => builder.id(ZNodeId(parse_toml_u64(key, item)?)),
|
||||
_ => bail!("unrecognized pageserver option '{}'", key),
|
||||
}
|
||||
}
|
||||
|
||||
let mut conf = builder.build().context("invalid config")?;
|
||||
|
||||
if conf.auth_type == AuthType::ZenithJWT {
|
||||
let auth_validation_public_key_path = conf
|
||||
.auth_validation_public_key_path
|
||||
@@ -485,6 +278,9 @@ impl PageServerConf {
|
||||
);
|
||||
}
|
||||
|
||||
if conf.pg_distrib_dir == PathBuf::new() {
|
||||
conf.pg_distrib_dir = env::current_dir()?.join("tmp_install")
|
||||
};
|
||||
if !conf.pg_distrib_dir.join("bin/postgres").exists() {
|
||||
bail!(
|
||||
"Can't find postgres binary at {}",
|
||||
@@ -510,7 +306,9 @@ impl PageServerConf {
|
||||
})
|
||||
.ok()
|
||||
.and_then(NonZeroUsize::new)
|
||||
.context("'max_concurrent_sync' must be a non-zero positive integer")?
|
||||
.ok_or_else(|| {
|
||||
anyhow!("'max_concurrent_sync' must be a non-zero positive integer")
|
||||
})?
|
||||
} else {
|
||||
NonZeroUsize::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC).unwrap()
|
||||
};
|
||||
@@ -523,7 +321,7 @@ impl PageServerConf {
|
||||
})
|
||||
.ok()
|
||||
.and_then(NonZeroU32::new)
|
||||
.context("'max_sync_errors' must be a non-zero positive integer")?
|
||||
.ok_or_else(|| anyhow!("'max_sync_errors' must be a non-zero positive integer"))?
|
||||
} else {
|
||||
NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS).unwrap()
|
||||
};
|
||||
@@ -553,10 +351,6 @@ impl PageServerConf {
|
||||
.get("prefix_in_bucket")
|
||||
.map(|prefix_in_bucket| parse_toml_string("prefix_in_bucket", prefix_in_bucket))
|
||||
.transpose()?,
|
||||
endpoint: toml
|
||||
.get("endpoint")
|
||||
.map(|endpoint| parse_toml_string("endpoint", endpoint))
|
||||
.transpose()?,
|
||||
}),
|
||||
(Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
|
||||
parse_toml_string("local_path", local_path)?,
|
||||
@@ -579,13 +373,10 @@ impl PageServerConf {
|
||||
#[cfg(test)]
|
||||
pub fn dummy_conf(repo_dir: PathBuf) -> Self {
|
||||
PageServerConf {
|
||||
id: ZNodeId(0),
|
||||
checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
|
||||
checkpoint_period: Duration::from_secs(10),
|
||||
gc_horizon: defaults::DEFAULT_GC_HORIZON,
|
||||
gc_period: Duration::from_secs(10),
|
||||
wait_lsn_timeout: Duration::from_secs(60),
|
||||
wal_redo_timeout: Duration::from_secs(60),
|
||||
page_cache_size: defaults::DEFAULT_PAGE_CACHE_SIZE,
|
||||
max_file_descriptors: defaults::DEFAULT_MAX_FILE_DESCRIPTORS,
|
||||
listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
@@ -605,7 +396,7 @@ impl PageServerConf {
|
||||
fn parse_toml_string(name: &str, item: &Item) -> Result<String> {
|
||||
let s = item
|
||||
.as_str()
|
||||
.with_context(|| format!("configure option {} is not a string", name))?;
|
||||
.ok_or_else(|| anyhow!("configure option {} is not a string", name))?;
|
||||
Ok(s.to_string())
|
||||
}
|
||||
|
||||
@@ -614,7 +405,7 @@ fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
|
||||
// for our use, though.
|
||||
let i: i64 = item
|
||||
.as_integer()
|
||||
.with_context(|| format!("configure option {} is not an integer", name))?;
|
||||
.ok_or_else(|| anyhow!("configure option {} is not an integer", name))?;
|
||||
if i < 0 {
|
||||
bail!("configure option {} cannot be negative", name);
|
||||
}
|
||||
@@ -624,7 +415,7 @@ fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
|
||||
fn parse_toml_duration(name: &str, item: &Item) -> Result<Duration> {
|
||||
let s = item
|
||||
.as_str()
|
||||
.with_context(|| format!("configure option {} is not a string", name))?;
|
||||
.ok_or_else(|| anyhow!("configure option {} is not a string", name))?;
|
||||
|
||||
Ok(humantime::parse_duration(s)?)
|
||||
}
|
||||
@@ -632,7 +423,7 @@ fn parse_toml_duration(name: &str, item: &Item) -> Result<Duration> {
|
||||
fn parse_toml_auth_type(name: &str, item: &Item) -> Result<AuthType> {
|
||||
let v = item
|
||||
.as_str()
|
||||
.with_context(|| format!("configure option {} is not a string", name))?;
|
||||
.ok_or_else(|| anyhow!("configure option {} is not a string", name))?;
|
||||
AuthType::from_str(v)
|
||||
}
|
||||
|
||||
@@ -656,24 +447,20 @@ checkpoint_period = '111 s'
|
||||
gc_period = '222 s'
|
||||
gc_horizon = 222
|
||||
|
||||
wait_lsn_timeout = '111 s'
|
||||
wal_redo_timeout = '111 s'
|
||||
|
||||
page_cache_size = 444
|
||||
max_file_descriptors = 333
|
||||
|
||||
# initial superuser role name to use when creating a new tenant
|
||||
initial_superuser_name = 'zzzz'
|
||||
id = 10
|
||||
|
||||
"#;
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn parse_defaults() -> anyhow::Result<()> {
|
||||
let tempdir = tempdir()?;
|
||||
let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
|
||||
// we have to create dummy pathes to overcome the validation errors
|
||||
let config_string = format!("pg_distrib_dir='{}'\nid=10", pg_distrib_dir.display());
|
||||
let config_string = format!("pg_distrib_dir='{}'", pg_distrib_dir.display());
|
||||
let toml = config_string.parse()?;
|
||||
|
||||
let parsed_config =
|
||||
@@ -684,15 +471,12 @@ id = 10
|
||||
assert_eq!(
|
||||
parsed_config,
|
||||
PageServerConf {
|
||||
id: ZNodeId(10),
|
||||
listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
|
||||
checkpoint_period: humantime::parse_duration(defaults::DEFAULT_CHECKPOINT_PERIOD)?,
|
||||
gc_horizon: defaults::DEFAULT_GC_HORIZON,
|
||||
gc_period: humantime::parse_duration(defaults::DEFAULT_GC_PERIOD)?,
|
||||
wait_lsn_timeout: humantime::parse_duration(defaults::DEFAULT_WAIT_LSN_TIMEOUT)?,
|
||||
wal_redo_timeout: humantime::parse_duration(defaults::DEFAULT_WAL_REDO_TIMEOUT)?,
|
||||
superuser: defaults::DEFAULT_SUPERUSER.to_string(),
|
||||
page_cache_size: defaults::DEFAULT_PAGE_CACHE_SIZE,
|
||||
max_file_descriptors: defaults::DEFAULT_MAX_FILE_DESCRIPTORS,
|
||||
@@ -728,15 +512,12 @@ id = 10
|
||||
assert_eq!(
|
||||
parsed_config,
|
||||
PageServerConf {
|
||||
id: ZNodeId(10),
|
||||
listen_pg_addr: "127.0.0.1:64000".to_string(),
|
||||
listen_http_addr: "127.0.0.1:9898".to_string(),
|
||||
checkpoint_distance: 111,
|
||||
checkpoint_period: Duration::from_secs(111),
|
||||
gc_horizon: 222,
|
||||
gc_period: Duration::from_secs(222),
|
||||
wait_lsn_timeout: Duration::from_secs(111),
|
||||
wal_redo_timeout: Duration::from_secs(111),
|
||||
superuser: "zzzz".to_string(),
|
||||
page_cache_size: 444,
|
||||
max_file_descriptors: 333,
|
||||
@@ -818,7 +599,6 @@ pg_distrib_dir='{}'
|
||||
let prefix_in_bucket = "test_prefix".to_string();
|
||||
let access_key_id = "SOMEKEYAAAAASADSAH*#".to_string();
|
||||
let secret_access_key = "SOMEsEcReTsd292v".to_string();
|
||||
let endpoint = "http://localhost:5000".to_string();
|
||||
let max_concurrent_sync = NonZeroUsize::new(111).unwrap();
|
||||
let max_sync_errors = NonZeroU32::new(222).unwrap();
|
||||
|
||||
@@ -831,13 +611,12 @@ bucket_name = '{}'
|
||||
bucket_region = '{}'
|
||||
prefix_in_bucket = '{}'
|
||||
access_key_id = '{}'
|
||||
secret_access_key = '{}'
|
||||
endpoint = '{}'"#,
|
||||
max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
|
||||
secret_access_key = '{}'"#,
|
||||
max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key
|
||||
),
|
||||
format!(
|
||||
"remote_storage={{max_concurrent_sync={}, max_sync_errors={}, bucket_name='{}', bucket_region='{}', prefix_in_bucket='{}', access_key_id='{}', secret_access_key='{}', endpoint='{}'}}",
|
||||
max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
|
||||
"remote_storage={{max_concurrent_sync={}, max_sync_errors={}, bucket_name='{}', bucket_region='{}', prefix_in_bucket='{}', access_key_id='{}', secret_access_key='{}'}}",
|
||||
max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key
|
||||
),
|
||||
];
|
||||
|
||||
@@ -871,8 +650,7 @@ pg_distrib_dir='{}'
|
||||
bucket_region: bucket_region.clone(),
|
||||
access_key_id: Some(access_key_id.clone()),
|
||||
secret_access_key: Some(secret_access_key.clone()),
|
||||
prefix_in_bucket: Some(prefix_in_bucket.clone()),
|
||||
endpoint: Some(endpoint.clone())
|
||||
prefix_in_bucket: Some(prefix_in_bucket.clone())
|
||||
}),
|
||||
},
|
||||
"Remote storage config should correctly parse the S3 config"
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::ZTenantId;
|
||||
use zenith_utils::zid::ZNodeId;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct BranchCreateRequest {
|
||||
@@ -16,8 +15,3 @@ pub struct TenantCreateRequest {
|
||||
#[serde(with = "hex")]
|
||||
pub tenant_id: ZTenantId,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct StatusResponse {
|
||||
pub id: ZNodeId,
|
||||
}
|
||||
|
||||
@@ -17,11 +17,6 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- id
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
/v1/timeline/{tenant_id}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
@@ -239,7 +234,9 @@ paths:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/BranchInfo"
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/BranchInfo"
|
||||
"400":
|
||||
description: Malformed branch create request
|
||||
content:
|
||||
@@ -373,15 +370,12 @@ components:
|
||||
format: hex
|
||||
ancestor_id:
|
||||
type: string
|
||||
format: hex
|
||||
ancestor_lsn:
|
||||
type: string
|
||||
current_logical_size:
|
||||
type: integer
|
||||
current_logical_size_non_incremental:
|
||||
type: integer
|
||||
latest_valid_lsn:
|
||||
type: integer
|
||||
TimelineInfo:
|
||||
type: object
|
||||
required:
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use hyper::header;
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response, Uri};
|
||||
use routerify::{ext::RequestExt, RouterBuilder};
|
||||
use serde::Serialize;
|
||||
use tracing::*;
|
||||
use zenith_utils::auth::JwtAuth;
|
||||
@@ -17,16 +19,12 @@ use zenith_utils::http::{
|
||||
request::get_request_param,
|
||||
request::parse_request_param,
|
||||
};
|
||||
use zenith_utils::http::{RequestExt, RouterBuilder};
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::zid::HexZTimelineId;
|
||||
use zenith_utils::zid::ZTimelineId;
|
||||
use zenith_utils::zid::{opt_display_serde, ZTimelineId};
|
||||
|
||||
use super::models::BranchCreateRequest;
|
||||
use super::models::StatusResponse;
|
||||
use super::models::TenantCreateRequest;
|
||||
use crate::branches::BranchInfo;
|
||||
use crate::repository::RepositoryTimeline;
|
||||
use crate::repository::TimelineSyncState;
|
||||
use crate::{branches, config::PageServerConf, tenant_mgr, ZTenantId};
|
||||
|
||||
@@ -65,12 +63,12 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
|
||||
}
|
||||
|
||||
// healthcheck handler
|
||||
async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let config = get_config(&request);
|
||||
Ok(json_response(
|
||||
StatusCode::OK,
|
||||
StatusResponse { id: config.id },
|
||||
)?)
|
||||
async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
Ok(Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from("{}"))
|
||||
.map_err(ApiError::from_err)?)
|
||||
}
|
||||
|
||||
async fn branch_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
@@ -199,9 +197,11 @@ enum TimelineInfo {
|
||||
timeline_id: ZTimelineId,
|
||||
#[serde(with = "hex")]
|
||||
tenant_id: ZTenantId,
|
||||
ancestor_timeline_id: Option<HexZTimelineId>,
|
||||
#[serde(with = "opt_display_serde")]
|
||||
ancestor_timeline_id: Option<ZTimelineId>,
|
||||
last_record_lsn: Lsn,
|
||||
prev_record_lsn: Lsn,
|
||||
start_lsn: Lsn,
|
||||
disk_consistent_lsn: Lsn,
|
||||
timeline_state: Option<TimelineSyncState>,
|
||||
},
|
||||
@@ -232,12 +232,11 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
Some(timeline) => TimelineInfo::Local {
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
ancestor_timeline_id: timeline
|
||||
.get_ancestor_timeline_id()
|
||||
.map(HexZTimelineId::from),
|
||||
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
|
||||
disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
|
||||
last_record_lsn: timeline.get_last_record_lsn(),
|
||||
prev_record_lsn: timeline.get_prev_record_lsn(),
|
||||
start_lsn: timeline.get_start_lsn(),
|
||||
timeline_state: repo.get_timeline_state(timeline_id),
|
||||
},
|
||||
})
|
||||
@@ -248,58 +247,6 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
Ok(json_response(StatusCode::OK, response_data)?)
|
||||
}
|
||||
|
||||
async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let _enter =
|
||||
info_span!("timeline_attach_handler", tenant = %tenant_id, timeline = %timeline_id)
|
||||
.entered();
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||
match repo.get_timeline(timeline_id)? {
|
||||
RepositoryTimeline::Local(_) => {
|
||||
anyhow::bail!("Timeline with id {} is already local", timeline_id)
|
||||
}
|
||||
RepositoryTimeline::Remote {
|
||||
id: _,
|
||||
disk_consistent_lsn: _,
|
||||
} => {
|
||||
// FIXME (rodionov) get timeline already schedules timeline for download, and duplicate tasks can cause errors
|
||||
// first should be fixed in https://github.com/zenithdb/zenith/issues/997
|
||||
// TODO (rodionov) change timeline state to awaits download (incapsulate it somewhere in the repo)
|
||||
// TODO (rodionov) can we safely request replication on the timeline before sync is completed? (can be implemented on top of the #997)
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
Ok(json_response(StatusCode::ACCEPTED, ())?)
|
||||
}
|
||||
|
||||
async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let _enter =
|
||||
info_span!("timeline_detach_handler", tenant = %tenant_id, timeline = %timeline_id)
|
||||
.entered();
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||
repo.detach_timeline(timeline_id)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
Ok(json_response(StatusCode::OK, ())?)
|
||||
}
|
||||
|
||||
async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
// check for management permission
|
||||
check_permission(&request, None)?;
|
||||
@@ -320,13 +267,13 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_create", tenant = %request_data.tenant_id).entered();
|
||||
tenant_mgr::create_repository_for_tenant(get_config(&request), request_data.tenant_id)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
Ok(json_response(StatusCode::CREATED, ())?)
|
||||
Ok(json_response(StatusCode::CREATED, response_data)?)
|
||||
}
|
||||
|
||||
async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
@@ -361,14 +308,6 @@ pub fn make_router(
|
||||
"/v1/timeline/:tenant_id/:timeline_id",
|
||||
timeline_detail_handler,
|
||||
)
|
||||
.post(
|
||||
"/v1/timeline/:tenant_id/:timeline_id/attach",
|
||||
timeline_attach_handler,
|
||||
)
|
||||
.post(
|
||||
"/v1/timeline/:tenant_id/:timeline_id/detach",
|
||||
timeline_detach_handler,
|
||||
)
|
||||
.get("/v1/branch/:tenant_id", branch_list_handler)
|
||||
.get("/v1/branch/:tenant_id/:branch_name", branch_detail_handler)
|
||||
.post("/v1/branch", branch_create_handler)
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::fs::File;
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use anyhow::{anyhow, bail, ensure, Result};
|
||||
use bytes::Bytes;
|
||||
use tracing::*;
|
||||
|
||||
@@ -126,7 +126,7 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
writer.advance_last_record_lsn(lsn);
|
||||
|
||||
// We expect the Postgres server to be shut down cleanly.
|
||||
let pg_control = pg_control.context("pg_control file not found")?;
|
||||
let pg_control = pg_control.ok_or_else(|| anyhow!("pg_control file not found"))?;
|
||||
ensure!(
|
||||
pg_control.state == DBState_DB_SHUTDOWNED,
|
||||
"Postgres cluster was not shut down cleanly"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
//! parent timeline, and the last LSN that has been written to disk.
|
||||
//!
|
||||
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bookfile::Book;
|
||||
use bytes::Bytes;
|
||||
use lazy_static::lazy_static;
|
||||
@@ -28,8 +28,8 @@ use std::io::Write;
|
||||
use std::ops::{Bound::Included, Deref};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{self, AtomicBool, AtomicUsize};
|
||||
use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockReadGuard};
|
||||
use std::time::Instant;
|
||||
use std::sync::{Arc, Mutex, MutexGuard};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use self::metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME};
|
||||
use crate::config::PageServerConf;
|
||||
@@ -64,12 +64,14 @@ mod inmemory_layer;
|
||||
mod interval_tree;
|
||||
mod layer_map;
|
||||
pub mod metadata;
|
||||
mod page_versions;
|
||||
mod par_fsync;
|
||||
mod storage_layer;
|
||||
|
||||
use delta_layer::DeltaLayer;
|
||||
use ephemeral_file::is_ephemeral_file;
|
||||
use filename::{DeltaFileName, ImageFileName};
|
||||
use global_layer_map::{LayerId, GLOBAL_LAYER_MAP};
|
||||
use image_layer::ImageLayer;
|
||||
use inmemory_layer::InMemoryLayer;
|
||||
use layer_map::LayerMap;
|
||||
@@ -82,6 +84,9 @@ pub use crate::layered_repository::ephemeral_file::writeback as writeback_epheme
|
||||
|
||||
static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);
|
||||
|
||||
// Timeout when waiting for WAL receiver to catch up to an LSN given in a GetPage@LSN call.
|
||||
static TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
// Metrics collected on operations on the storage repository.
|
||||
lazy_static! {
|
||||
static ref STORAGE_TIME: HistogramVec = register_histogram_vec!(
|
||||
@@ -162,7 +167,7 @@ impl Repository for LayeredRepository {
|
||||
// Create the timeline directory, and write initial metadata to file.
|
||||
crashsafe_dir::create_dir_all(self.conf.timeline_path(&timelineid, &self.tenantid))?;
|
||||
|
||||
let metadata = TimelineMetadata::new(Lsn(0), None, None, Lsn(0), initdb_lsn, initdb_lsn);
|
||||
let metadata = TimelineMetadata::new(Lsn(0), None, None, Lsn(0), Lsn(0), initdb_lsn);
|
||||
Self::save_metadata(self.conf, timelineid, self.tenantid, &metadata, true)?;
|
||||
|
||||
let timeline = LayeredTimeline::new(
|
||||
@@ -196,10 +201,9 @@ impl Repository for LayeredRepository {
|
||||
bail!("Cannot branch off the timeline {} that's not local", src)
|
||||
}
|
||||
};
|
||||
let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn();
|
||||
|
||||
src_timeline
|
||||
.check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn)
|
||||
.check_lsn_is_in_scope(start_lsn)
|
||||
.context("invalid branch start lsn")?;
|
||||
|
||||
let RecordLsn {
|
||||
@@ -227,7 +231,7 @@ impl Repository for LayeredRepository {
|
||||
dst_prev,
|
||||
Some(src),
|
||||
start_lsn,
|
||||
*src_timeline.latest_gc_cutoff_lsn.read().unwrap(),
|
||||
src_timeline.latest_gc_cutoff_lsn.load(),
|
||||
src_timeline.initdb_lsn,
|
||||
);
|
||||
crashsafe_dir::create_dir_all(self.conf.timeline_path(&dst, &self.tenantid))?;
|
||||
@@ -281,46 +285,7 @@ impl Repository for LayeredRepository {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Detaches the timeline from the repository.
|
||||
fn detach_timeline(&self, timeline_id: ZTimelineId) -> Result<()> {
|
||||
let mut timelines = self.timelines.lock().unwrap();
|
||||
match timelines.entry(timeline_id) {
|
||||
Entry::Vacant(_) => {
|
||||
bail!("cannot detach non existing timeline");
|
||||
}
|
||||
Entry::Occupied(mut entry) => {
|
||||
let timeline_entry = entry.get_mut();
|
||||
|
||||
let timeline = match timeline_entry {
|
||||
LayeredTimelineEntry::Remote { .. } => {
|
||||
bail!("cannot detach remote timeline {}", timeline_id);
|
||||
}
|
||||
LayeredTimelineEntry::Local(timeline) => timeline,
|
||||
};
|
||||
|
||||
// TODO (rodionov) keep local state in timeline itself (refactoring related to https://github.com/zenithdb/zenith/issues/997 and #1104)
|
||||
|
||||
// FIXME this is local disk consistent lsn, need to keep the latest succesfully uploaded checkpoint lsn in timeline (metadata?)
|
||||
// https://github.com/zenithdb/zenith/issues/1104
|
||||
let remote_disk_consistent_lsn = timeline.disk_consistent_lsn.load();
|
||||
// reference to timeline is dropped here
|
||||
entry.insert(LayeredTimelineEntry::Remote {
|
||||
id: timeline_id,
|
||||
disk_consistent_lsn: remote_disk_consistent_lsn,
|
||||
});
|
||||
}
|
||||
};
|
||||
// Release the lock to shutdown and remove the files without holding it
|
||||
drop(timelines);
|
||||
// shutdown the timeline (this shuts down the walreceiver)
|
||||
thread_mgr::shutdown_threads(None, Some(self.tenantid), Some(timeline_id));
|
||||
|
||||
// remove timeline files (maybe avoid this for ease of debugging if something goes wrong)
|
||||
fs::remove_dir_all(self.conf.timeline_path(&timeline_id, &self.tenantid))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO this method currentlly does not do anything to prevent (or react to) state updates between a sync task schedule and a sync task end (that causes this update).
|
||||
// TODO this method currently does not do anything to prevent (or react to) state updates between a sync task schedule and a sync task end (that causes this update).
|
||||
// Sync task is enqueued and can error and be rescheduled, so some significant time may pass between the events.
|
||||
//
|
||||
/// Reacts on the timeline sync state change, changing pageserver's memory state for this timeline (unload or load of the timeline files).
|
||||
@@ -329,10 +294,6 @@ impl Repository for LayeredRepository {
|
||||
timeline_id: ZTimelineId,
|
||||
new_state: TimelineSyncState,
|
||||
) -> Result<()> {
|
||||
debug!(
|
||||
"set_timeline_state: timeline_id: {}, new_state: {:?}",
|
||||
timeline_id, new_state
|
||||
);
|
||||
let mut timelines_accessor = self.timelines.lock().unwrap();
|
||||
|
||||
match new_state {
|
||||
@@ -353,7 +314,6 @@ impl Repository for LayeredRepository {
|
||||
},
|
||||
),
|
||||
};
|
||||
// NOTE we do not delete local data in case timeline became cloud only, this is performed in detach_timeline
|
||||
drop(timelines_accessor);
|
||||
|
||||
Ok(())
|
||||
@@ -607,7 +567,7 @@ impl LayeredRepository {
|
||||
}
|
||||
}
|
||||
|
||||
// Now collect info about branchpoints
|
||||
//Now collect info about branchpoints
|
||||
let mut all_branchpoints: BTreeSet<(ZTimelineId, Lsn)> = BTreeSet::new();
|
||||
for &timelineid in &timelineids {
|
||||
let timeline = match self.get_or_init_timeline(timelineid, &mut timelines)? {
|
||||
@@ -691,6 +651,7 @@ impl LayeredRepository {
|
||||
timeline.checkpoint(CheckpointConfig::Forced)?;
|
||||
info!("timeline {} checkpoint_before_gc done", timelineid);
|
||||
}
|
||||
|
||||
let result = timeline.gc_timeline(branchpoints, cutoff)?;
|
||||
|
||||
totals += result;
|
||||
@@ -779,7 +740,7 @@ pub struct LayeredTimeline {
|
||||
checkpoint_cs: Mutex<()>,
|
||||
|
||||
// Needed to ensure that we can't create a branch at a point that was already garbage collected
|
||||
latest_gc_cutoff_lsn: RwLock<Lsn>,
|
||||
latest_gc_cutoff_lsn: AtomicLsn,
|
||||
|
||||
// It may change across major versions so for simplicity
|
||||
// keep it after running initdb for a timeline.
|
||||
@@ -812,7 +773,7 @@ impl Timeline for LayeredTimeline {
|
||||
);
|
||||
|
||||
self.last_record_lsn
|
||||
.wait_for_timeout(lsn, self.conf.wait_lsn_timeout)
|
||||
.wait_for_timeout(lsn, TIMEOUT)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}",
|
||||
@@ -823,10 +784,6 @@ impl Timeline for LayeredTimeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_latest_gc_cutoff_lsn(&self) -> RwLockReadGuard<Lsn> {
|
||||
self.latest_gc_cutoff_lsn.read().unwrap()
|
||||
}
|
||||
|
||||
/// Look up given page version.
|
||||
fn get_page_at_lsn(&self, rel: RelishTag, rel_blknum: BlockNumber, lsn: Lsn) -> Result<Bytes> {
|
||||
if !rel.is_blocky() && rel_blknum != 0 {
|
||||
@@ -837,6 +794,14 @@ impl Timeline for LayeredTimeline {
|
||||
);
|
||||
}
|
||||
debug_assert!(lsn <= self.get_last_record_lsn());
|
||||
let latest_gc_cutoff_lsn = self.latest_gc_cutoff_lsn.load();
|
||||
// error instead of assert to simplify testing
|
||||
ensure!(
|
||||
lsn >= latest_gc_cutoff_lsn,
|
||||
"tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
|
||||
lsn, latest_gc_cutoff_lsn
|
||||
);
|
||||
|
||||
let (seg, seg_blknum) = SegmentTag::from_blknum(rel, rel_blknum);
|
||||
|
||||
if let Some((layer, lsn)) = self.get_layer_for_read(seg, lsn)? {
|
||||
@@ -893,11 +858,12 @@ impl Timeline for LayeredTimeline {
|
||||
|
||||
let seg = SegmentTag { rel, segno: 0 };
|
||||
|
||||
let result = if let Some((layer, lsn)) = self.get_layer_for_read(seg, lsn)? {
|
||||
layer.get_seg_exists(lsn)?
|
||||
let result;
|
||||
if let Some((layer, lsn)) = self.get_layer_for_read(seg, lsn)? {
|
||||
result = layer.get_seg_exists(lsn)?;
|
||||
} else {
|
||||
false
|
||||
};
|
||||
result = false;
|
||||
}
|
||||
|
||||
trace!("get_rel_exists: {} at {} -> {}", rel, lsn, result);
|
||||
Ok(result)
|
||||
@@ -1006,16 +972,21 @@ impl Timeline for LayeredTimeline {
|
||||
///
|
||||
/// Validate lsn against initdb_lsn and latest_gc_cutoff_lsn.
|
||||
///
|
||||
fn check_lsn_is_in_scope(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
|
||||
) -> Result<()> {
|
||||
fn check_lsn_is_in_scope(&self, lsn: Lsn) -> Result<()> {
|
||||
let initdb_lsn = self.initdb_lsn;
|
||||
ensure!(
|
||||
lsn >= **latest_gc_cutoff_lsn,
|
||||
lsn >= initdb_lsn,
|
||||
"LSN {} is earlier than initdb lsn {}",
|
||||
lsn,
|
||||
initdb_lsn,
|
||||
);
|
||||
|
||||
let latest_gc_cutoff_lsn = self.latest_gc_cutoff_lsn.load();
|
||||
ensure!(
|
||||
lsn >= latest_gc_cutoff_lsn,
|
||||
"LSN {} is earlier than latest GC horizon {} (we might've already garbage collected needed data)",
|
||||
lsn,
|
||||
**latest_gc_cutoff_lsn,
|
||||
latest_gc_cutoff_lsn,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
@@ -1032,6 +1003,14 @@ impl Timeline for LayeredTimeline {
|
||||
self.last_record_lsn.load()
|
||||
}
|
||||
|
||||
fn get_start_lsn(&self) -> Lsn {
|
||||
self.ancestor_timeline
|
||||
.as_ref()
|
||||
.and_then(|ancestor_entry| ancestor_entry.local_or_schedule_download(self.tenantid))
|
||||
.map(Timeline::get_start_lsn)
|
||||
.unwrap_or(self.ancestor_lsn)
|
||||
}
|
||||
|
||||
fn get_current_logical_size(&self) -> usize {
|
||||
self.current_logical_size.load(atomic::Ordering::Acquire) as usize
|
||||
}
|
||||
@@ -1121,7 +1100,7 @@ impl LayeredTimeline {
|
||||
write_lock: Mutex::new(()),
|
||||
checkpoint_cs: Mutex::new(()),
|
||||
|
||||
latest_gc_cutoff_lsn: RwLock::new(metadata.latest_gc_cutoff_lsn()),
|
||||
latest_gc_cutoff_lsn: AtomicLsn::from(metadata.latest_gc_cutoff_lsn()),
|
||||
initdb_lsn: metadata.initdb_lsn(),
|
||||
}
|
||||
}
|
||||
@@ -1147,8 +1126,8 @@ impl LayeredTimeline {
|
||||
// create an ImageLayer struct for each image file.
|
||||
if imgfilename.lsn > disk_consistent_lsn {
|
||||
warn!(
|
||||
"found future image layer {} on timeline {} disk_consistent_lsn is {}",
|
||||
imgfilename, self.timelineid, disk_consistent_lsn
|
||||
"found future image layer {} on timeline {}",
|
||||
imgfilename, self.timelineid
|
||||
);
|
||||
|
||||
rename_to_backup(direntry.path())?;
|
||||
@@ -1171,8 +1150,8 @@ impl LayeredTimeline {
|
||||
// before crash.
|
||||
if deltafilename.end_lsn > disk_consistent_lsn + 1 {
|
||||
warn!(
|
||||
"found future delta layer {} on timeline {} disk_consistent_lsn is {}",
|
||||
deltafilename, self.timelineid, disk_consistent_lsn
|
||||
"found future delta layer {} on timeline {}",
|
||||
deltafilename, self.timelineid
|
||||
);
|
||||
|
||||
rename_to_backup(direntry.path())?;
|
||||
@@ -1368,7 +1347,7 @@ impl LayeredTimeline {
|
||||
self.tenantid,
|
||||
seg,
|
||||
lsn,
|
||||
last_record_lsn,
|
||||
lsn,
|
||||
)?;
|
||||
} else {
|
||||
return Ok(open_layer);
|
||||
@@ -1411,7 +1390,7 @@ impl LayeredTimeline {
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
start_lsn,
|
||||
last_record_lsn,
|
||||
lsn,
|
||||
)?;
|
||||
} else {
|
||||
// New relation.
|
||||
@@ -1422,14 +1401,8 @@ impl LayeredTimeline {
|
||||
lsn
|
||||
);
|
||||
|
||||
layer = InMemoryLayer::create(
|
||||
self.conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
seg,
|
||||
lsn,
|
||||
last_record_lsn,
|
||||
)?;
|
||||
layer =
|
||||
InMemoryLayer::create(self.conf, self.timelineid, self.tenantid, seg, lsn, lsn)?;
|
||||
}
|
||||
|
||||
let layer_rc: Arc<InMemoryLayer> = Arc::new(layer);
|
||||
@@ -1446,7 +1419,7 @@ impl LayeredTimeline {
|
||||
// Prevent concurrent checkpoints
|
||||
let _checkpoint_cs = self.checkpoint_cs.lock().unwrap();
|
||||
|
||||
let write_guard = self.write_lock.lock().unwrap();
|
||||
let mut write_guard = self.write_lock.lock().unwrap();
|
||||
let mut layers = self.layers.lock().unwrap();
|
||||
|
||||
// Bump the generation number in the layer map, so that we can distinguish
|
||||
@@ -1472,17 +1445,11 @@ impl LayeredTimeline {
|
||||
let mut disk_consistent_lsn = last_record_lsn;
|
||||
|
||||
let mut layer_paths = Vec::new();
|
||||
let mut freeze_end_lsn = Lsn(0);
|
||||
let mut evicted_layers = Vec::new();
|
||||
|
||||
//
|
||||
// Determine which layers we need to evict and calculate max(latest_lsn)
|
||||
// among those layers.
|
||||
//
|
||||
while let Some((oldest_layer_id, oldest_layer, oldest_generation)) =
|
||||
layers.peek_oldest_open()
|
||||
{
|
||||
let oldest_lsn = oldest_layer.get_oldest_lsn();
|
||||
let oldest_pending_lsn = oldest_layer.get_oldest_pending_lsn();
|
||||
|
||||
// Does this layer need freezing?
|
||||
//
|
||||
// Write out all in-memory layers that contain WAL older than CHECKPOINT_DISTANCE.
|
||||
@@ -1491,60 +1458,28 @@ impl LayeredTimeline {
|
||||
// when we started. We don't want to process layers inserted after we started, to
|
||||
// avoid getting into an infinite loop trying to process again entries that we
|
||||
// inserted ourselves.
|
||||
//
|
||||
// Once we have decided to write out at least one layer, we must also write out
|
||||
// any other layers that contain WAL older than the end LSN of the layers we have
|
||||
// already decided to write out. In other words, we must write out all layers
|
||||
// whose [oldest_lsn, latest_lsn) range overlaps with any of the other layers
|
||||
// that we are writing out. Otherwise, when we advance 'disk_consistent_lsn', it's
|
||||
// ambiguous whether those layers are already durable on disk or not. For example,
|
||||
// imagine that there are two layers in memory that contain page versions in the
|
||||
// following LSN ranges:
|
||||
//
|
||||
// A: 100-150
|
||||
// B: 110-200
|
||||
//
|
||||
// If we flush layer A, we must also flush layer B, because they overlap. If we
|
||||
// flushed only A, and advanced 'disk_consistent_lsn' to 150, we would break the
|
||||
// rule that all WAL older than 'disk_consistent_lsn' are durable on disk, because
|
||||
// B contains some WAL older than 150. On the other hand, if we flushed out A and
|
||||
// advanced 'disk_consistent_lsn' only up to 110, after crash and restart we would
|
||||
// delete the first layer because its end LSN is larger than 110. If we changed
|
||||
// the deletion logic to not delete it, then we would start streaming at 110, and
|
||||
// process again the WAL records in the range 110-150 that are already in layer A,
|
||||
// and the WAL processing code does not cope with that. We solve that dilemma by
|
||||
// insisting that if we write out the first layer, we also write out the second
|
||||
// layer, and advance disk_consistent_lsn all the way up to 200.
|
||||
//
|
||||
let distance = last_record_lsn.widening_sub(oldest_lsn);
|
||||
if (distance < 0
|
||||
let distance = last_record_lsn.widening_sub(oldest_pending_lsn);
|
||||
if distance < 0
|
||||
|| distance < checkpoint_distance.into()
|
||||
|| oldest_generation == current_generation)
|
||||
&& oldest_lsn >= freeze_end_lsn
|
||||
// this layer intersects with evicted layer and so also need to be evicted
|
||||
|| oldest_generation == current_generation
|
||||
{
|
||||
info!(
|
||||
"the oldest layer is now {} which is {} bytes behind last_record_lsn",
|
||||
oldest_layer.filename().display(),
|
||||
distance
|
||||
);
|
||||
disk_consistent_lsn = oldest_lsn;
|
||||
disk_consistent_lsn = oldest_pending_lsn;
|
||||
break;
|
||||
}
|
||||
let latest_lsn = oldest_layer.get_latest_lsn();
|
||||
if latest_lsn > freeze_end_lsn {
|
||||
freeze_end_lsn = latest_lsn; // calculate max of latest_lsn of the layers we're about to evict
|
||||
}
|
||||
layers.remove_open(oldest_layer_id);
|
||||
evicted_layers.push((oldest_layer_id, oldest_layer));
|
||||
}
|
||||
|
||||
// Freeze evicted layers
|
||||
for (_evicted_layer_id, evicted_layer) in evicted_layers.iter() {
|
||||
// Mark the layer as no longer accepting writes and record the end_lsn.
|
||||
// This happens in-place, no new layers are created now.
|
||||
evicted_layer.freeze(freeze_end_lsn);
|
||||
layers.insert_historic(evicted_layer.clone());
|
||||
drop(layers);
|
||||
drop(write_guard);
|
||||
|
||||
let mut this_layer_paths = self.evict_layer(oldest_layer_id, reconstruct_pages)?;
|
||||
layer_paths.append(&mut this_layer_paths);
|
||||
|
||||
write_guard = self.write_lock.lock().unwrap();
|
||||
layers = self.layers.lock().unwrap();
|
||||
}
|
||||
|
||||
// Call unload() on all frozen layers, to release memory.
|
||||
@@ -1557,14 +1492,6 @@ impl LayeredTimeline {
|
||||
drop(layers);
|
||||
drop(write_guard);
|
||||
|
||||
// Create delta/image layers for evicted layers
|
||||
for (_evicted_layer_id, evicted_layer) in evicted_layers.iter() {
|
||||
let mut this_layer_paths =
|
||||
self.evict_layer(evicted_layer.clone(), reconstruct_pages)?;
|
||||
layer_paths.append(&mut this_layer_paths);
|
||||
}
|
||||
|
||||
// Sync layers
|
||||
if !layer_paths.is_empty() {
|
||||
// We must fsync the timeline dir to ensure the directory entries for
|
||||
// new layer files are durable
|
||||
@@ -1605,7 +1532,7 @@ impl LayeredTimeline {
|
||||
ondisk_prev_record_lsn,
|
||||
ancestor_timelineid,
|
||||
self.ancestor_lsn,
|
||||
*self.latest_gc_cutoff_lsn.read().unwrap(),
|
||||
self.latest_gc_cutoff_lsn.load(),
|
||||
self.initdb_lsn,
|
||||
);
|
||||
|
||||
@@ -1632,29 +1559,52 @@ impl LayeredTimeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evict_layer(
|
||||
&self,
|
||||
layer: Arc<InMemoryLayer>,
|
||||
reconstruct_pages: bool,
|
||||
) -> Result<Vec<PathBuf>> {
|
||||
let new_historics = layer.write_to_disk(self, reconstruct_pages)?;
|
||||
fn evict_layer(&self, layer_id: LayerId, reconstruct_pages: bool) -> Result<Vec<PathBuf>> {
|
||||
// Mark the layer as no longer accepting writes and record the end_lsn.
|
||||
// This happens in-place, no new layers are created now.
|
||||
// We call `get_last_record_lsn` again, which may be different from the
|
||||
// original load, as we may have released the write lock since then.
|
||||
|
||||
let mut layer_paths = Vec::new();
|
||||
let _write_guard = self.write_lock.lock().unwrap();
|
||||
let mut write_guard = self.write_lock.lock().unwrap();
|
||||
let mut layers = self.layers.lock().unwrap();
|
||||
|
||||
// Finally, replace the frozen in-memory layer with the new on-disk layers
|
||||
layers.remove_historic(layer);
|
||||
let mut layer_paths = Vec::new();
|
||||
|
||||
// Add the historics to the LayerMap
|
||||
for delta_layer in new_historics.delta_layers {
|
||||
layer_paths.push(delta_layer.path());
|
||||
layers.insert_historic(Arc::new(delta_layer));
|
||||
}
|
||||
for image_layer in new_historics.image_layers {
|
||||
layer_paths.push(image_layer.path());
|
||||
layers.insert_historic(Arc::new(image_layer));
|
||||
let global_layer_map = GLOBAL_LAYER_MAP.read().unwrap();
|
||||
if let Some(oldest_layer) = global_layer_map.get(&layer_id) {
|
||||
drop(global_layer_map);
|
||||
oldest_layer.freeze(self.get_last_record_lsn());
|
||||
|
||||
// The layer is no longer open, update the layer map to reflect this.
|
||||
// We will replace it with on-disk historics below.
|
||||
layers.remove_open(layer_id);
|
||||
layers.insert_historic(oldest_layer.clone());
|
||||
|
||||
// Write the now-frozen layer to disk. That could take a while, so release the lock while do it
|
||||
drop(layers);
|
||||
drop(write_guard);
|
||||
|
||||
let new_historics = oldest_layer.write_to_disk(self, reconstruct_pages)?;
|
||||
|
||||
write_guard = self.write_lock.lock().unwrap();
|
||||
layers = self.layers.lock().unwrap();
|
||||
|
||||
// Finally, replace the frozen in-memory layer with the new on-disk layers
|
||||
layers.remove_historic(oldest_layer);
|
||||
|
||||
// Add the historics to the LayerMap
|
||||
for delta_layer in new_historics.delta_layers {
|
||||
layer_paths.push(delta_layer.path());
|
||||
layers.insert_historic(Arc::new(delta_layer));
|
||||
}
|
||||
for image_layer in new_historics.image_layers {
|
||||
layer_paths.push(image_layer.path());
|
||||
layers.insert_historic(Arc::new(image_layer));
|
||||
}
|
||||
}
|
||||
drop(layers);
|
||||
drop(write_guard);
|
||||
|
||||
Ok(layer_paths)
|
||||
}
|
||||
|
||||
@@ -1683,14 +1633,12 @@ impl LayeredTimeline {
|
||||
pub fn gc_timeline(&self, retain_lsns: Vec<Lsn>, cutoff: Lsn) -> Result<GcResult> {
|
||||
let now = Instant::now();
|
||||
let mut result: GcResult = Default::default();
|
||||
let disk_consistent_lsn = self.get_disk_consistent_lsn();
|
||||
let _checkpoint_cs = self.checkpoint_cs.lock().unwrap();
|
||||
|
||||
let _enter = info_span!("garbage collection", timeline = %self.timelineid, tenant = %self.tenantid, cutoff = %cutoff).entered();
|
||||
|
||||
// We need to ensure that no one branches at a point before latest_gc_cutoff_lsn.
|
||||
// See branch_timeline() for details.
|
||||
*self.latest_gc_cutoff_lsn.write().unwrap() = cutoff;
|
||||
self.latest_gc_cutoff_lsn.store(cutoff);
|
||||
|
||||
info!("GC starting");
|
||||
|
||||
@@ -1770,12 +1718,7 @@ impl LayeredTimeline {
|
||||
}
|
||||
|
||||
// 3. Is there a later on-disk layer for this relation?
|
||||
if !l.is_dropped()
|
||||
&& !layers.newer_image_layer_exists(
|
||||
l.get_seg_tag(),
|
||||
l.get_end_lsn(),
|
||||
disk_consistent_lsn,
|
||||
)
|
||||
if !l.is_dropped() && !layers.newer_image_layer_exists(l.get_seg_tag(), l.get_end_lsn())
|
||||
{
|
||||
info!(
|
||||
"keeping {} {}-{} because it is the latest layer",
|
||||
@@ -1939,21 +1882,22 @@ impl LayeredTimeline {
|
||||
// for redo.
|
||||
let rel = seg.rel;
|
||||
let rel_blknum = seg.segno * RELISH_SEG_SIZE + seg_blknum;
|
||||
let cached_page_img = match self.lookup_cached_page(&rel, rel_blknum, lsn) {
|
||||
let (cached_lsn_opt, cached_page_opt) = match self.lookup_cached_page(&rel, rel_blknum, lsn)
|
||||
{
|
||||
Some((cached_lsn, cached_img)) => {
|
||||
match cached_lsn.cmp(&lsn) {
|
||||
cmp::Ordering::Less => {} // there might be WAL between cached_lsn and lsn, we need to check
|
||||
cmp::Ordering::Equal => return Ok(cached_img), // exact LSN match, return the image
|
||||
cmp::Ordering::Greater => panic!(), // the returned lsn should never be after the requested lsn
|
||||
}
|
||||
Some((cached_lsn, cached_img))
|
||||
(Some(cached_lsn), Some((cached_lsn, cached_img)))
|
||||
}
|
||||
None => None,
|
||||
None => (None, None),
|
||||
};
|
||||
|
||||
let mut data = PageReconstructData {
|
||||
records: Vec::new(),
|
||||
page_img: cached_page_img,
|
||||
page_img: None,
|
||||
};
|
||||
|
||||
// Holds an Arc reference to 'layer_ref' when iterating in the loop below.
|
||||
@@ -1966,14 +1910,15 @@ impl LayeredTimeline {
|
||||
let mut curr_lsn = lsn;
|
||||
loop {
|
||||
let result = layer_ref
|
||||
.get_page_reconstruct_data(seg_blknum, curr_lsn, &mut data)
|
||||
.get_page_reconstruct_data(seg_blknum, curr_lsn, cached_lsn_opt, &mut data)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to get reconstruct data {} {:?} {} {}",
|
||||
"Failed to get reconstruct data {} {:?} {} {} {:?}",
|
||||
layer_ref.get_seg_tag(),
|
||||
layer_ref.filename(),
|
||||
seg_blknum,
|
||||
curr_lsn,
|
||||
cached_lsn_opt,
|
||||
)
|
||||
})?;
|
||||
match result {
|
||||
@@ -2020,6 +1965,16 @@ impl LayeredTimeline {
|
||||
lsn,
|
||||
);
|
||||
}
|
||||
PageReconstructResult::Cached => {
|
||||
let (cached_lsn, cached_img) = cached_page_opt.unwrap();
|
||||
assert!(data.page_img.is_none());
|
||||
if let Some((first_rec_lsn, first_rec)) = data.records.first() {
|
||||
assert!(&cached_lsn < first_rec_lsn);
|
||||
assert!(!first_rec.will_init());
|
||||
}
|
||||
data.page_img = Some(cached_img);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2041,12 +1996,12 @@ impl LayeredTimeline {
|
||||
|
||||
// If we have a page image, and no WAL, we're all set
|
||||
if data.records.is_empty() {
|
||||
if let Some((img_lsn, img)) = &data.page_img {
|
||||
if let Some(img) = &data.page_img {
|
||||
trace!(
|
||||
"found page image for blk {} in {} at {}, no WAL redo required",
|
||||
rel_blknum,
|
||||
rel,
|
||||
img_lsn
|
||||
request_lsn
|
||||
);
|
||||
Ok(img.clone())
|
||||
} else {
|
||||
@@ -2073,13 +2028,11 @@ impl LayeredTimeline {
|
||||
);
|
||||
Ok(ZERO_PAGE.clone())
|
||||
} else {
|
||||
let base_img = if let Some((_lsn, img)) = data.page_img {
|
||||
if data.page_img.is_some() {
|
||||
trace!("found {} WAL records and a base image for blk {} in {} at {}, performing WAL redo", data.records.len(), rel_blknum, rel, request_lsn);
|
||||
Some(img)
|
||||
} else {
|
||||
trace!("found {} WAL records that will init the page for blk {} in {} at {}, performing WAL redo", data.records.len(), rel_blknum, rel, request_lsn);
|
||||
None
|
||||
};
|
||||
}
|
||||
|
||||
let last_rec_lsn = data.records.last().unwrap().0;
|
||||
|
||||
@@ -2087,7 +2040,7 @@ impl LayeredTimeline {
|
||||
rel,
|
||||
rel_blknum,
|
||||
request_lsn,
|
||||
base_img,
|
||||
data.page_img.clone(),
|
||||
data.records,
|
||||
)?;
|
||||
|
||||
@@ -2218,10 +2171,11 @@ impl<'a> TimelineWriter for LayeredTimelineWriter<'a> {
|
||||
let oldsize = self
|
||||
.tl
|
||||
.get_relish_size(rel, self.tl.get_last_record_lsn())?
|
||||
.with_context(|| {
|
||||
format!(
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"attempted to truncate non-existent relish {} at {}",
|
||||
rel, lsn
|
||||
rel,
|
||||
lsn
|
||||
)
|
||||
})?;
|
||||
|
||||
@@ -2344,159 +2298,8 @@ fn rename_to_backup(path: PathBuf) -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
bail!("couldn't find an unused backup number for {:?}", path)
|
||||
}
|
||||
|
||||
///
|
||||
/// Tests that are specific to the layered storage format.
|
||||
///
|
||||
/// There are more unit tests in repository.rs that work through the
|
||||
/// Repository interface and are expected to work regardless of the
|
||||
/// file format and directory layout. The test here are more low level.
|
||||
///
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::repository::repo_harness::*;
|
||||
|
||||
#[test]
|
||||
fn corrupt_metadata() -> Result<()> {
|
||||
const TEST_NAME: &str = "corrupt_metadata";
|
||||
let harness = RepoHarness::create(TEST_NAME)?;
|
||||
let repo = harness.load();
|
||||
|
||||
repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
|
||||
drop(repo);
|
||||
|
||||
let metadata_path = harness.timeline_path(&TIMELINE_ID).join(METADATA_FILE_NAME);
|
||||
|
||||
assert!(metadata_path.is_file());
|
||||
|
||||
let mut metadata_bytes = std::fs::read(&metadata_path)?;
|
||||
assert_eq!(metadata_bytes.len(), 512);
|
||||
metadata_bytes[512 - 4 - 2] ^= 1;
|
||||
std::fs::write(metadata_path, metadata_bytes)?;
|
||||
|
||||
let new_repo = harness.load();
|
||||
let err = new_repo.get_timeline(TIMELINE_ID).err().unwrap();
|
||||
assert_eq!(err.to_string(), "failed to load metadata");
|
||||
assert_eq!(
|
||||
err.source().unwrap().to_string(),
|
||||
"metadata checksum mismatch"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Test the logic in 'load_layer_map' that removes layer files that are
|
||||
/// newer than 'disk_consistent_lsn'.
|
||||
///
|
||||
#[test]
|
||||
fn future_layerfiles() -> Result<()> {
|
||||
const TEST_NAME: &str = "future_layerfiles";
|
||||
let harness = RepoHarness::create(TEST_NAME)?;
|
||||
let repo = harness.load();
|
||||
|
||||
// Create a timeline with disk_consistent_lsn = 8000
|
||||
let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0x8000))?;
|
||||
let writer = tline.writer();
|
||||
writer.advance_last_record_lsn(Lsn(0x8000));
|
||||
drop(writer);
|
||||
repo.checkpoint_iteration(CheckpointConfig::Forced)?;
|
||||
drop(repo);
|
||||
|
||||
let timeline_path = harness.timeline_path(&TIMELINE_ID);
|
||||
|
||||
let make_empty_file = |filename: &str| -> std::io::Result<()> {
|
||||
let path = timeline_path.join(filename);
|
||||
|
||||
assert!(!path.exists());
|
||||
std::fs::write(&path, &[])?;
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// Helper function to check that a relation file exists, and a corresponding
|
||||
// <filename>.0.old file does not.
|
||||
let assert_exists = |filename: &str| {
|
||||
let path = timeline_path.join(filename);
|
||||
assert!(path.exists(), "file {} was removed", filename);
|
||||
|
||||
// Check that there is no .old file
|
||||
let backup_path = timeline_path.join(format!("{}.0.old", filename));
|
||||
assert!(
|
||||
!backup_path.exists(),
|
||||
"unexpected backup file {}",
|
||||
backup_path.display()
|
||||
);
|
||||
};
|
||||
|
||||
// Helper function to check that a relation file does *not* exists, and a corresponding
|
||||
// <filename>.<num>.old file does.
|
||||
let assert_is_renamed = |filename: &str, num: u32| {
|
||||
let path = timeline_path.join(filename);
|
||||
assert!(
|
||||
!path.exists(),
|
||||
"file {} was not removed as expected",
|
||||
filename
|
||||
);
|
||||
|
||||
let backup_path = timeline_path.join(format!("{}.{}.old", filename, num));
|
||||
assert!(
|
||||
backup_path.exists(),
|
||||
"backup file {} was not created",
|
||||
backup_path.display()
|
||||
);
|
||||
};
|
||||
|
||||
// These files are considered to be in the future and will be renamed out
|
||||
// of the way
|
||||
let future_filenames = vec![
|
||||
format!("pg_control_0_{:016X}", 0x8001),
|
||||
format!("pg_control_0_{:016X}_{:016X}", 0x8001, 0x8008),
|
||||
];
|
||||
// But these are not:
|
||||
let past_filenames = vec![
|
||||
format!("pg_control_0_{:016X}", 0x8000),
|
||||
format!("pg_control_0_{:016X}_{:016X}", 0x7000, 0x8001),
|
||||
];
|
||||
|
||||
for filename in future_filenames.iter().chain(past_filenames.iter()) {
|
||||
make_empty_file(filename)?;
|
||||
}
|
||||
|
||||
// Load the timeline. This will cause the files in the "future" to be renamed
|
||||
// away.
|
||||
let new_repo = harness.load();
|
||||
new_repo.get_timeline(TIMELINE_ID).unwrap();
|
||||
drop(new_repo);
|
||||
|
||||
for filename in future_filenames.iter() {
|
||||
assert_is_renamed(filename, 0);
|
||||
}
|
||||
for filename in past_filenames.iter() {
|
||||
assert_exists(filename);
|
||||
}
|
||||
|
||||
// Create the future files again, and load again. They should be renamed to
|
||||
// *.1.old this time.
|
||||
for filename in future_filenames.iter() {
|
||||
make_empty_file(filename)?;
|
||||
}
|
||||
|
||||
let new_repo = harness.load();
|
||||
new_repo.get_timeline(TIMELINE_ID).unwrap();
|
||||
drop(new_repo);
|
||||
|
||||
for filename in future_filenames.iter() {
|
||||
assert_is_renamed(filename, 0);
|
||||
assert_is_renamed(filename, 1);
|
||||
}
|
||||
for filename in past_filenames.iter() {
|
||||
assert_exists(filename);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Err(anyhow!(
|
||||
"couldn't find an unused backup number for {:?}",
|
||||
path
|
||||
))
|
||||
}
|
||||
|
||||
@@ -169,7 +169,7 @@ impl DeltaLayerInner {
|
||||
if let Some((_entry_lsn, entry)) = slice.last() {
|
||||
Ok(*entry)
|
||||
} else {
|
||||
bail!("could not find seg size in delta layer")
|
||||
Err(anyhow::anyhow!("could not find seg size in delta layer"))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -208,15 +208,16 @@ impl Layer for DeltaLayer {
|
||||
&self,
|
||||
blknum: SegmentBlk,
|
||||
lsn: Lsn,
|
||||
cached_img_lsn: Option<Lsn>,
|
||||
reconstruct_data: &mut PageReconstructData,
|
||||
) -> Result<PageReconstructResult> {
|
||||
let mut need_image = true;
|
||||
|
||||
assert!((0..RELISH_SEG_SIZE).contains(&blknum));
|
||||
|
||||
match &reconstruct_data.page_img {
|
||||
Some((cached_lsn, _)) if &self.end_lsn <= cached_lsn => {
|
||||
return Ok(PageReconstructResult::Complete)
|
||||
match &cached_img_lsn {
|
||||
Some(cached_lsn) if &self.end_lsn <= cached_lsn => {
|
||||
return Ok(PageReconstructResult::Cached)
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -239,9 +240,9 @@ impl Layer for DeltaLayer {
|
||||
.iter()
|
||||
.rev();
|
||||
for ((_blknum, pv_lsn), blob_range) in iter {
|
||||
match &reconstruct_data.page_img {
|
||||
Some((cached_lsn, _)) if pv_lsn <= cached_lsn => {
|
||||
return Ok(PageReconstructResult::Complete)
|
||||
match &cached_img_lsn {
|
||||
Some(cached_lsn) if pv_lsn <= cached_lsn => {
|
||||
return Ok(PageReconstructResult::Cached)
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -251,7 +252,7 @@ impl Layer for DeltaLayer {
|
||||
match pv {
|
||||
PageVersion::Page(img) => {
|
||||
// Found a page image, return it
|
||||
reconstruct_data.page_img = Some((*pv_lsn, img));
|
||||
reconstruct_data.page_img = Some(img);
|
||||
need_image = false;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -175,10 +175,7 @@ impl Write for EphemeralFile {
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> Result<(), std::io::Error> {
|
||||
// we don't need to flush data:
|
||||
// * we either write input bytes or not, not keeping any intermediate data buffered
|
||||
// * rust unix file `flush` impl does not flush things either, returning `Ok(())`
|
||||
Ok(())
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -145,15 +145,14 @@ impl Layer for ImageLayer {
|
||||
&self,
|
||||
blknum: SegmentBlk,
|
||||
lsn: Lsn,
|
||||
cached_img_lsn: Option<Lsn>,
|
||||
reconstruct_data: &mut PageReconstructData,
|
||||
) -> Result<PageReconstructResult> {
|
||||
assert!((0..RELISH_SEG_SIZE).contains(&blknum));
|
||||
assert!(lsn >= self.lsn);
|
||||
|
||||
match reconstruct_data.page_img {
|
||||
Some((cached_lsn, _)) if self.lsn <= cached_lsn => {
|
||||
return Ok(PageReconstructResult::Complete)
|
||||
}
|
||||
match cached_img_lsn {
|
||||
Some(cached_lsn) if self.lsn <= cached_lsn => return Ok(PageReconstructResult::Cached),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -174,14 +173,7 @@ impl Layer for ImageLayer {
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.chapter_reader(BLOCKY_IMAGES_CHAPTER)?;
|
||||
|
||||
chapter.read_exact_at(&mut buf, offset).with_context(|| {
|
||||
format!(
|
||||
"failed to read page from data file {} at offset {}",
|
||||
self.filename().display(),
|
||||
offset
|
||||
)
|
||||
})?;
|
||||
chapter.read_exact_at(&mut buf, offset)?;
|
||||
|
||||
buf
|
||||
}
|
||||
@@ -196,7 +188,7 @@ impl Layer for ImageLayer {
|
||||
}
|
||||
};
|
||||
|
||||
reconstruct_data.page_img = Some((self.lsn, Bytes::from(buf)));
|
||||
reconstruct_data.page_img = Some(Bytes::from(buf));
|
||||
Ok(PageReconstructResult::Complete)
|
||||
}
|
||||
|
||||
|
||||
@@ -20,15 +20,13 @@ use crate::{ZTenantId, ZTimelineId};
|
||||
use anyhow::{ensure, Result};
|
||||
use bytes::Bytes;
|
||||
use log::*;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Seek;
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use zenith_utils::bin_ser::BeSer;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::vec_map::VecMap;
|
||||
|
||||
use super::page_versions::PageVersions;
|
||||
|
||||
pub struct InMemoryLayer {
|
||||
conf: &'static PageServerConf,
|
||||
tenantid: ZTenantId,
|
||||
@@ -41,20 +39,8 @@ pub struct InMemoryLayer {
|
||||
///
|
||||
start_lsn: Lsn,
|
||||
|
||||
///
|
||||
/// LSN of the oldest page version stored in this layer.
|
||||
///
|
||||
/// This is different from 'start_lsn' in that we enforce that the 'start_lsn'
|
||||
/// of a layer always matches the 'end_lsn' of its predecessor, even if there
|
||||
/// are no page versions until at a later LSN. That way you can detect any
|
||||
/// missing layer files more easily. 'oldest_lsn' is the first page version
|
||||
/// actually stored in this layer. In the range between 'start_lsn' and
|
||||
/// 'oldest_lsn', there are no changes to the segment.
|
||||
/// 'oldest_lsn' is used to adjust 'disk_consistent_lsn' and that is why it should
|
||||
/// point to the beginning of WAL record. This is the other difference with 'start_lsn'
|
||||
/// which points to end of WAL record. This is why 'oldest_lsn' can be smaller than 'start_lsn'.
|
||||
///
|
||||
oldest_lsn: Lsn,
|
||||
/// LSN of the oldest page version stored in this layer
|
||||
oldest_pending_lsn: Lsn,
|
||||
|
||||
/// The above fields never change. The parts that do change are in 'inner',
|
||||
/// and protected by mutex.
|
||||
@@ -73,15 +59,11 @@ pub struct InMemoryLayerInner {
|
||||
/// The drop LSN is recorded in [`end_lsn`].
|
||||
dropped: bool,
|
||||
|
||||
/// The PageVersion structs are stored in a serialized format in this file.
|
||||
/// Each serialized PageVersion is preceded by a 'u32' length field.
|
||||
/// 'page_versions' map stores offsets into this file.
|
||||
file: EphemeralFile,
|
||||
|
||||
/// Metadata about all versions of all pages in the layer is kept
|
||||
/// here. Indexed by block number and LSN. The value is an offset
|
||||
/// into the ephemeral file where the page version is stored.
|
||||
page_versions: HashMap<SegmentBlk, VecMap<Lsn, u64>>,
|
||||
///
|
||||
/// All versions of all pages in the layer are are kept here.
|
||||
/// Indexed by block number and LSN.
|
||||
///
|
||||
page_versions: PageVersions,
|
||||
|
||||
///
|
||||
/// `seg_sizes` tracks the size of the segment at different points in time.
|
||||
@@ -91,14 +73,6 @@ pub struct InMemoryLayerInner {
|
||||
/// a non-blocky rel, 'seg_sizes' is not used and is always empty.
|
||||
///
|
||||
seg_sizes: VecMap<Lsn, SegmentBlk>,
|
||||
|
||||
///
|
||||
/// LSN of the newest page version stored in this layer.
|
||||
///
|
||||
/// The difference between 'end_lsn' and 'latest_lsn' is the same as between
|
||||
/// 'start_lsn' and 'oldest_lsn'. See comments in 'oldest_lsn'.
|
||||
///
|
||||
latest_lsn: Lsn,
|
||||
}
|
||||
|
||||
impl InMemoryLayerInner {
|
||||
@@ -117,50 +91,6 @@ impl InMemoryLayerInner {
|
||||
panic!("could not find seg size in in-memory layer");
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Read a page version from the ephemeral file.
|
||||
///
|
||||
fn read_pv(&self, off: u64) -> Result<PageVersion> {
|
||||
let mut buf = Vec::new();
|
||||
self.read_pv_bytes(off, &mut buf)?;
|
||||
Ok(PageVersion::des(&buf)?)
|
||||
}
|
||||
|
||||
///
|
||||
/// Read a page version from the ephemeral file, as raw bytes, at
|
||||
/// the given offset. The bytes are read into 'buf', which is
|
||||
/// expanded if necessary. Returns the size of the page version.
|
||||
///
|
||||
fn read_pv_bytes(&self, off: u64, buf: &mut Vec<u8>) -> Result<usize> {
|
||||
// read length
|
||||
let mut lenbuf = [0u8; 4];
|
||||
self.file.read_exact_at(&mut lenbuf, off)?;
|
||||
let len = u32::from_ne_bytes(lenbuf) as usize;
|
||||
|
||||
if buf.len() < len {
|
||||
buf.resize(len, 0);
|
||||
}
|
||||
self.file.read_exact_at(&mut buf[0..len], off + 4)?;
|
||||
Ok(len)
|
||||
}
|
||||
|
||||
fn write_pv(&mut self, pv: &PageVersion) -> Result<u64> {
|
||||
// remember starting position
|
||||
let pos = self.file.stream_position()?;
|
||||
|
||||
// make room for the 'length' field by writing zeros as a placeholder.
|
||||
self.file.seek(std::io::SeekFrom::Start(pos + 4)).unwrap();
|
||||
|
||||
pv.ser_into(&mut self.file).unwrap();
|
||||
|
||||
// write the 'length' field.
|
||||
let len = self.file.stream_position()? - pos - 4;
|
||||
let lenbuf = u32::to_ne_bytes(len as u32);
|
||||
self.file.write_all_at(&lenbuf, pos)?;
|
||||
|
||||
Ok(pos)
|
||||
}
|
||||
}
|
||||
|
||||
impl Layer for InMemoryLayer {
|
||||
@@ -170,11 +100,12 @@ impl Layer for InMemoryLayer {
|
||||
fn filename(&self) -> PathBuf {
|
||||
let inner = self.inner.read().unwrap();
|
||||
|
||||
let end_lsn = if let Some(drop_lsn) = inner.end_lsn {
|
||||
drop_lsn
|
||||
let end_lsn;
|
||||
if let Some(drop_lsn) = inner.end_lsn {
|
||||
end_lsn = drop_lsn;
|
||||
} else {
|
||||
Lsn(u64::MAX)
|
||||
};
|
||||
end_lsn = Lsn(u64::MAX);
|
||||
}
|
||||
|
||||
let delta_filename = DeltaFileName {
|
||||
seg: self.seg,
|
||||
@@ -223,6 +154,7 @@ impl Layer for InMemoryLayer {
|
||||
&self,
|
||||
blknum: SegmentBlk,
|
||||
lsn: Lsn,
|
||||
cached_img_lsn: Option<Lsn>,
|
||||
reconstruct_data: &mut PageReconstructData,
|
||||
) -> Result<PageReconstructResult> {
|
||||
let mut need_image = true;
|
||||
@@ -233,31 +165,33 @@ impl Layer for InMemoryLayer {
|
||||
let inner = self.inner.read().unwrap();
|
||||
|
||||
// Scan the page versions backwards, starting from `lsn`.
|
||||
if let Some(vec_map) = inner.page_versions.get(&blknum) {
|
||||
let slice = vec_map.slice_range(..=lsn);
|
||||
for (entry_lsn, pos) in slice.iter().rev() {
|
||||
match &reconstruct_data.page_img {
|
||||
Some((cached_lsn, _)) if entry_lsn <= cached_lsn => {
|
||||
return Ok(PageReconstructResult::Complete)
|
||||
}
|
||||
_ => {}
|
||||
let iter = inner
|
||||
.page_versions
|
||||
.get_block_lsn_range(blknum, ..=lsn)
|
||||
.iter()
|
||||
.rev();
|
||||
for (entry_lsn, pos) in iter {
|
||||
match &cached_img_lsn {
|
||||
Some(cached_lsn) if entry_lsn <= cached_lsn => {
|
||||
return Ok(PageReconstructResult::Cached)
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let pv = inner.read_pv(*pos)?;
|
||||
match pv {
|
||||
PageVersion::Page(img) => {
|
||||
reconstruct_data.page_img = Some((*entry_lsn, img));
|
||||
let pv = inner.page_versions.read_pv(*pos)?;
|
||||
match pv {
|
||||
PageVersion::Page(img) => {
|
||||
reconstruct_data.page_img = Some(img);
|
||||
need_image = false;
|
||||
break;
|
||||
}
|
||||
PageVersion::Wal(rec) => {
|
||||
reconstruct_data.records.push((*entry_lsn, rec.clone()));
|
||||
if rec.will_init() {
|
||||
// This WAL record initializes the page, so no need to go further back
|
||||
need_image = false;
|
||||
break;
|
||||
}
|
||||
PageVersion::Wal(rec) => {
|
||||
reconstruct_data.records.push((*entry_lsn, rec.clone()));
|
||||
if rec.will_init() {
|
||||
// This WAL record initializes the page, so no need to go further back
|
||||
need_image = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -363,22 +297,14 @@ impl Layer for InMemoryLayer {
|
||||
println!("seg_sizes {}: {}", k, v);
|
||||
}
|
||||
|
||||
// List the blocks in order
|
||||
let mut page_versions: Vec<(&SegmentBlk, &VecMap<Lsn, u64>)> =
|
||||
inner.page_versions.iter().collect();
|
||||
page_versions.sort_by_key(|k| k.0);
|
||||
for (blknum, lsn, pos) in inner.page_versions.ordered_page_version_iter(None) {
|
||||
let pv = inner.page_versions.read_pv(pos)?;
|
||||
let pv_description = match pv {
|
||||
PageVersion::Page(_img) => "page",
|
||||
PageVersion::Wal(_rec) => "wal",
|
||||
};
|
||||
|
||||
for (blknum, versions) in page_versions {
|
||||
for (lsn, off) in versions.as_slice() {
|
||||
let pv = inner.read_pv(*off);
|
||||
let pv_description = match pv {
|
||||
Ok(PageVersion::Page(_img)) => "page",
|
||||
Ok(PageVersion::Wal(_rec)) => "wal",
|
||||
Err(_err) => "INVALID",
|
||||
};
|
||||
|
||||
println!("blk {} at {}: {}\n", blknum, lsn, pv_description);
|
||||
}
|
||||
println!("blk {} at {}: {}\n", blknum, lsn, pv_description);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -393,13 +319,8 @@ pub struct LayersOnDisk {
|
||||
|
||||
impl InMemoryLayer {
|
||||
/// Return the oldest page version that's stored in this layer
|
||||
pub fn get_oldest_lsn(&self) -> Lsn {
|
||||
self.oldest_lsn
|
||||
}
|
||||
|
||||
pub fn get_latest_lsn(&self) -> Lsn {
|
||||
let inner = self.inner.read().unwrap();
|
||||
inner.latest_lsn
|
||||
pub fn get_oldest_pending_lsn(&self) -> Lsn {
|
||||
self.oldest_pending_lsn
|
||||
}
|
||||
|
||||
///
|
||||
@@ -411,7 +332,7 @@ impl InMemoryLayer {
|
||||
tenantid: ZTenantId,
|
||||
seg: SegmentTag,
|
||||
start_lsn: Lsn,
|
||||
oldest_lsn: Lsn,
|
||||
oldest_pending_lsn: Lsn,
|
||||
) -> Result<InMemoryLayer> {
|
||||
trace!(
|
||||
"initializing new empty InMemoryLayer for writing {} on timeline {} at {}",
|
||||
@@ -434,15 +355,13 @@ impl InMemoryLayer {
|
||||
tenantid,
|
||||
seg,
|
||||
start_lsn,
|
||||
oldest_lsn,
|
||||
oldest_pending_lsn,
|
||||
incremental: false,
|
||||
inner: RwLock::new(InMemoryLayerInner {
|
||||
end_lsn: None,
|
||||
dropped: false,
|
||||
file,
|
||||
page_versions: HashMap::new(),
|
||||
page_versions: PageVersions::new(file),
|
||||
seg_sizes,
|
||||
latest_lsn: oldest_lsn,
|
||||
}),
|
||||
})
|
||||
}
|
||||
@@ -479,21 +398,15 @@ impl InMemoryLayer {
|
||||
let mut inner = self.inner.write().unwrap();
|
||||
|
||||
inner.assert_writeable();
|
||||
assert!(lsn >= inner.latest_lsn);
|
||||
inner.latest_lsn = lsn;
|
||||
|
||||
// Write the page version to the file, and remember its offset in 'page_versions'
|
||||
{
|
||||
let off = inner.write_pv(&pv)?;
|
||||
let vec_map = inner.page_versions.entry(blknum).or_default();
|
||||
let old = vec_map.append_or_update_last(lsn, off).unwrap().0;
|
||||
if old.is_some() {
|
||||
// We already had an entry for this LSN. That's odd..
|
||||
warn!(
|
||||
"Page version of rel {} blk {} at {} already exists",
|
||||
self.seg.rel, blknum, lsn
|
||||
);
|
||||
}
|
||||
let old = inner.page_versions.append_or_update_last(blknum, lsn, pv)?;
|
||||
|
||||
if old.is_some() {
|
||||
// We already had an entry for this LSN. That's odd..
|
||||
warn!(
|
||||
"Page version of rel {} blk {} at {} already exists",
|
||||
self.seg.rel, blknum, lsn
|
||||
);
|
||||
}
|
||||
|
||||
// Also update the relation size, if this extended the relation.
|
||||
@@ -527,19 +440,16 @@ impl InMemoryLayer {
|
||||
gapblknum,
|
||||
blknum
|
||||
);
|
||||
let old = inner
|
||||
.page_versions
|
||||
.append_or_update_last(gapblknum, lsn, zeropv)?;
|
||||
// We already had an entry for this LSN. That's odd..
|
||||
|
||||
// Write the page version to the file, and remember its offset in
|
||||
// 'page_versions'
|
||||
{
|
||||
let off = inner.write_pv(&zeropv)?;
|
||||
let vec_map = inner.page_versions.entry(gapblknum).or_default();
|
||||
let old = vec_map.append_or_update_last(lsn, off).unwrap().0;
|
||||
if old.is_some() {
|
||||
warn!(
|
||||
"Page version of seg {} blk {} at {} already exists",
|
||||
self.seg, gapblknum, lsn
|
||||
);
|
||||
}
|
||||
if old.is_some() {
|
||||
warn!(
|
||||
"Page version of seg {} blk {} at {} already exists",
|
||||
self.seg, blknum, lsn
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -599,11 +509,12 @@ impl InMemoryLayer {
|
||||
timelineid: ZTimelineId,
|
||||
tenantid: ZTenantId,
|
||||
start_lsn: Lsn,
|
||||
oldest_lsn: Lsn,
|
||||
oldest_pending_lsn: Lsn,
|
||||
) -> Result<InMemoryLayer> {
|
||||
let seg = src.get_seg_tag();
|
||||
|
||||
assert!(oldest_lsn.is_aligned());
|
||||
assert!(oldest_pending_lsn.is_aligned());
|
||||
assert!(oldest_pending_lsn >= start_lsn);
|
||||
|
||||
trace!(
|
||||
"initializing new InMemoryLayer for writing {} on timeline {} at {}",
|
||||
@@ -627,15 +538,13 @@ impl InMemoryLayer {
|
||||
tenantid,
|
||||
seg,
|
||||
start_lsn,
|
||||
oldest_lsn,
|
||||
oldest_pending_lsn,
|
||||
incremental: true,
|
||||
inner: RwLock::new(InMemoryLayerInner {
|
||||
end_lsn: None,
|
||||
dropped: false,
|
||||
file,
|
||||
page_versions: HashMap::new(),
|
||||
page_versions: PageVersions::new(file),
|
||||
seg_sizes,
|
||||
latest_lsn: oldest_lsn,
|
||||
}),
|
||||
})
|
||||
}
|
||||
@@ -662,10 +571,8 @@ impl InMemoryLayer {
|
||||
assert!(lsn <= &end_lsn, "{:?} {:?}", lsn, end_lsn);
|
||||
}
|
||||
|
||||
for (_blk, vec_map) in inner.page_versions.iter() {
|
||||
for (lsn, _pos) in vec_map.as_slice() {
|
||||
assert!(*lsn <= end_lsn);
|
||||
}
|
||||
for (_blk, lsn, _pv) in inner.page_versions.ordered_page_version_iter(None) {
|
||||
assert!(lsn <= end_lsn);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -743,19 +650,15 @@ impl InMemoryLayer {
|
||||
self.is_dropped(),
|
||||
)?;
|
||||
|
||||
// Write all page versions, in block + LSN order
|
||||
// Write all page versions
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
|
||||
let pv_iter = inner.page_versions.iter();
|
||||
let mut pages: Vec<(&SegmentBlk, &VecMap<Lsn, u64>)> = pv_iter.collect();
|
||||
pages.sort_by_key(|(blknum, _vec_map)| *blknum);
|
||||
for (blknum, vec_map) in pages {
|
||||
for (lsn, pos) in vec_map.as_slice() {
|
||||
if *lsn < delta_end_lsn {
|
||||
let len = inner.read_pv_bytes(*pos, &mut buf)?;
|
||||
delta_layer_writer.put_page_version(*blknum, *lsn, &buf[..len])?;
|
||||
}
|
||||
}
|
||||
let page_versions_iter = inner
|
||||
.page_versions
|
||||
.ordered_page_version_iter(Some(delta_end_lsn));
|
||||
for (blknum, lsn, pos) in page_versions_iter {
|
||||
let len = inner.page_versions.read_pv_bytes(pos, &mut buf)?;
|
||||
delta_layer_writer.put_page_version(blknum, lsn, &buf[..len])?;
|
||||
}
|
||||
|
||||
// Create seg_sizes
|
||||
|
||||
@@ -40,7 +40,7 @@ pub struct LayerMap {
|
||||
/// All the layers keyed by segment tag
|
||||
segs: HashMap<SegmentTag, SegEntry>,
|
||||
|
||||
/// All in-memory layers, ordered by 'oldest_lsn' and generation
|
||||
/// All in-memory layers, ordered by 'oldest_pending_lsn' and generation
|
||||
/// of each layer. This allows easy access to the in-memory layer that
|
||||
/// contains the oldest WAL record.
|
||||
open_layers: BinaryHeap<OpenLayerEntry>,
|
||||
@@ -83,16 +83,16 @@ impl LayerMap {
|
||||
|
||||
let layer_id = segentry.update_open(Arc::clone(&layer));
|
||||
|
||||
let oldest_lsn = layer.get_oldest_lsn();
|
||||
let oldest_pending_lsn = layer.get_oldest_pending_lsn();
|
||||
|
||||
// After a crash and restart, 'oldest_lsn' of the oldest in-memory
|
||||
// After a crash and restart, 'oldest_pending_lsn' of the oldest in-memory
|
||||
// layer becomes the WAL streaming starting point, so it better not point
|
||||
// in the middle of a WAL record.
|
||||
assert!(oldest_lsn.is_aligned());
|
||||
assert!(oldest_pending_lsn.is_aligned());
|
||||
|
||||
// Also add it to the binary heap
|
||||
let open_layer_entry = OpenLayerEntry {
|
||||
oldest_lsn: layer.get_oldest_lsn(),
|
||||
oldest_pending_lsn: layer.get_oldest_pending_lsn(),
|
||||
layer_id,
|
||||
generation: self.current_generation,
|
||||
};
|
||||
@@ -191,15 +191,9 @@ impl LayerMap {
|
||||
///
|
||||
/// This is used for garbage collection, to determine if an old layer can
|
||||
/// be deleted.
|
||||
/// We ignore segments newer than disk_consistent_lsn because they will be removed at restart
|
||||
pub fn newer_image_layer_exists(
|
||||
&self,
|
||||
seg: SegmentTag,
|
||||
lsn: Lsn,
|
||||
disk_consistent_lsn: Lsn,
|
||||
) -> bool {
|
||||
pub fn newer_image_layer_exists(&self, seg: SegmentTag, lsn: Lsn) -> bool {
|
||||
if let Some(segentry) = self.segs.get(&seg) {
|
||||
segentry.newer_image_layer_exists(lsn, disk_consistent_lsn)
|
||||
segentry.newer_image_layer_exists(lsn)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
@@ -317,18 +311,13 @@ impl SegEntry {
|
||||
self.historic.search(lsn)
|
||||
}
|
||||
|
||||
pub fn newer_image_layer_exists(&self, lsn: Lsn, disk_consistent_lsn: Lsn) -> bool {
|
||||
pub fn newer_image_layer_exists(&self, lsn: Lsn) -> bool {
|
||||
// We only check on-disk layers, because
|
||||
// in-memory layers are not durable
|
||||
|
||||
// The end-LSN is exclusive, while disk_consistent_lsn is
|
||||
// inclusive. For example, if disk_consistent_lsn is 100, it is
|
||||
// OK for a delta layer to have end LSN 101, but if the end LSN
|
||||
// is 102, then it might not have been fully flushed to disk
|
||||
// before crash.
|
||||
self.historic
|
||||
.iter_newer(lsn)
|
||||
.any(|layer| !layer.is_incremental() && layer.get_end_lsn() <= disk_consistent_lsn + 1)
|
||||
.any(|layer| !layer.is_incremental())
|
||||
}
|
||||
|
||||
// Set new open layer for a SegEntry.
|
||||
@@ -352,23 +341,23 @@ impl SegEntry {
|
||||
}
|
||||
|
||||
/// Entry held in LayerMap::open_layers, with boilerplate comparison routines
|
||||
/// to implement a min-heap ordered by 'oldest_lsn' and 'generation'
|
||||
/// to implement a min-heap ordered by 'oldest_pending_lsn' and 'generation'
|
||||
///
|
||||
/// The generation number associated with each entry can be used to distinguish
|
||||
/// recently-added entries (i.e after last call to increment_generation()) from older
|
||||
/// entries with the same 'oldest_lsn'.
|
||||
/// entries with the same 'oldest_pending_lsn'.
|
||||
struct OpenLayerEntry {
|
||||
oldest_lsn: Lsn, // copy of layer.get_oldest_lsn()
|
||||
oldest_pending_lsn: Lsn, // copy of layer.get_oldest_pending_lsn()
|
||||
generation: u64,
|
||||
layer_id: LayerId,
|
||||
}
|
||||
impl Ord for OpenLayerEntry {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
// BinaryHeap is a max-heap, and we want a min-heap. Reverse the ordering here
|
||||
// to get that. Entries with identical oldest_lsn are ordered by generation
|
||||
// to get that. Entries with identical oldest_pending_lsn are ordered by generation
|
||||
other
|
||||
.oldest_lsn
|
||||
.cmp(&self.oldest_lsn)
|
||||
.oldest_pending_lsn
|
||||
.cmp(&self.oldest_pending_lsn)
|
||||
.then_with(|| other.generation.cmp(&self.generation))
|
||||
}
|
||||
}
|
||||
@@ -437,7 +426,7 @@ mod tests {
|
||||
conf: &'static PageServerConf,
|
||||
segno: u32,
|
||||
start_lsn: Lsn,
|
||||
oldest_lsn: Lsn,
|
||||
oldest_pending_lsn: Lsn,
|
||||
) -> Arc<InMemoryLayer> {
|
||||
Arc::new(
|
||||
InMemoryLayer::create(
|
||||
@@ -449,7 +438,7 @@ mod tests {
|
||||
segno,
|
||||
},
|
||||
start_lsn,
|
||||
oldest_lsn,
|
||||
oldest_pending_lsn,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
|
||||
268
pageserver/src/layered_repository/page_versions.rs
Normal file
268
pageserver/src/layered_repository/page_versions.rs
Normal file
@@ -0,0 +1,268 @@
|
||||
//!
|
||||
//! Data structure to ingest incoming WAL into an append-only file.
|
||||
//!
|
||||
//! - The file is considered temporary, and will be discarded on crash
|
||||
//! - based on a B-tree
|
||||
//!
|
||||
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::{collections::HashMap, ops::RangeBounds, slice};
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use std::cmp::min;
|
||||
use std::io::Seek;
|
||||
|
||||
use zenith_utils::{lsn::Lsn, vec_map::VecMap};
|
||||
|
||||
use super::storage_layer::PageVersion;
|
||||
use crate::layered_repository::ephemeral_file::EphemeralFile;
|
||||
|
||||
use zenith_utils::bin_ser::BeSer;
|
||||
|
||||
const EMPTY_SLICE: &[(Lsn, u64)] = &[];
|
||||
|
||||
pub struct PageVersions {
|
||||
map: HashMap<u32, VecMap<Lsn, u64>>,
|
||||
|
||||
/// The PageVersion structs are stored in a serialized format in this file.
|
||||
/// Each serialized PageVersion is preceded by a 'u32' length field.
|
||||
/// The 'map' stores offsets into this file.
|
||||
file: EphemeralFile,
|
||||
}
|
||||
|
||||
impl PageVersions {
|
||||
pub fn new(file: EphemeralFile) -> PageVersions {
|
||||
PageVersions {
|
||||
map: HashMap::new(),
|
||||
file,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn append_or_update_last(
|
||||
&mut self,
|
||||
blknum: u32,
|
||||
lsn: Lsn,
|
||||
page_version: PageVersion,
|
||||
) -> Result<Option<u64>> {
|
||||
// remember starting position
|
||||
let pos = self.file.stream_position()?;
|
||||
|
||||
// make room for the 'length' field by writing zeros as a placeholder.
|
||||
self.file.seek(std::io::SeekFrom::Start(pos + 4)).unwrap();
|
||||
|
||||
page_version.ser_into(&mut self.file).unwrap();
|
||||
|
||||
// write the 'length' field.
|
||||
let len = self.file.stream_position()? - pos - 4;
|
||||
let lenbuf = u32::to_ne_bytes(len as u32);
|
||||
self.file.write_all_at(&lenbuf, pos)?;
|
||||
|
||||
let map = self.map.entry(blknum).or_insert_with(VecMap::default);
|
||||
Ok(map.append_or_update_last(lsn, pos as u64).unwrap().0)
|
||||
}
|
||||
|
||||
/// Get all [`PageVersion`]s in a block
|
||||
fn get_block_slice(&self, blknum: u32) -> &[(Lsn, u64)] {
|
||||
self.map
|
||||
.get(&blknum)
|
||||
.map(VecMap::as_slice)
|
||||
.unwrap_or(EMPTY_SLICE)
|
||||
}
|
||||
|
||||
/// Get a range of [`PageVersions`] in a block
|
||||
pub fn get_block_lsn_range<R: RangeBounds<Lsn>>(&self, blknum: u32, range: R) -> &[(Lsn, u64)] {
|
||||
self.map
|
||||
.get(&blknum)
|
||||
.map(|vec_map| vec_map.slice_range(range))
|
||||
.unwrap_or(EMPTY_SLICE)
|
||||
}
|
||||
|
||||
/// Iterate through [`PageVersion`]s in (block, lsn) order.
|
||||
/// If a [`cutoff_lsn`] is set, only show versions with `lsn < cutoff_lsn`
|
||||
pub fn ordered_page_version_iter(&self, cutoff_lsn: Option<Lsn>) -> OrderedPageVersionIter<'_> {
|
||||
let mut ordered_blocks: Vec<u32> = self.map.keys().cloned().collect();
|
||||
ordered_blocks.sort_unstable();
|
||||
|
||||
let slice = ordered_blocks
|
||||
.first()
|
||||
.map(|&blknum| self.get_block_slice(blknum))
|
||||
.unwrap_or(EMPTY_SLICE);
|
||||
|
||||
OrderedPageVersionIter {
|
||||
page_versions: self,
|
||||
ordered_blocks,
|
||||
cur_block_idx: 0,
|
||||
cutoff_lsn,
|
||||
cur_slice_iter: slice.iter(),
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Read a page version.
|
||||
///
|
||||
pub fn read_pv(&self, off: u64) -> Result<PageVersion> {
|
||||
let mut buf = Vec::new();
|
||||
self.read_pv_bytes(off, &mut buf)?;
|
||||
Ok(PageVersion::des(&buf)?)
|
||||
}
|
||||
|
||||
///
|
||||
/// Read a page version, as raw bytes, at the given offset. The bytes
|
||||
/// are read into 'buf', which is expanded if necessary. Returns the
|
||||
/// size of the page version.
|
||||
///
|
||||
pub fn read_pv_bytes(&self, off: u64, buf: &mut Vec<u8>) -> Result<usize> {
|
||||
// read length
|
||||
let mut lenbuf = [0u8; 4];
|
||||
self.file.read_exact_at(&mut lenbuf, off)?;
|
||||
let len = u32::from_ne_bytes(lenbuf) as usize;
|
||||
|
||||
// Resize the buffer to fit the data, if needed.
|
||||
//
|
||||
// We don't shrink the buffer if it's larger than necessary. That avoids
|
||||
// repeatedly shrinking and expanding when you reuse the same buffer to
|
||||
// read multiple page versions. Expanding a Vec requires initializing the
|
||||
// new bytes, which is a waste of time because we're immediately overwriting
|
||||
// it, but there's no way to avoid it without resorting to unsafe code.
|
||||
if buf.len() < len {
|
||||
buf.resize(len, 0);
|
||||
}
|
||||
self.file.read_exact_at(&mut buf[0..len], off + 4)?;
|
||||
|
||||
Ok(len)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PageVersionReader<'a> {
|
||||
file: &'a EphemeralFile,
|
||||
pos: u64,
|
||||
end_pos: u64,
|
||||
}
|
||||
|
||||
impl<'a> std::io::Read for PageVersionReader<'a> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
|
||||
let len = min(buf.len(), (self.end_pos - self.pos) as usize);
|
||||
let n = self.file.read_at(&mut buf[..len], self.pos)?;
|
||||
self.pos += n as u64;
|
||||
Ok(n)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OrderedPageVersionIter<'a> {
|
||||
page_versions: &'a PageVersions,
|
||||
|
||||
ordered_blocks: Vec<u32>,
|
||||
cur_block_idx: usize,
|
||||
|
||||
cutoff_lsn: Option<Lsn>,
|
||||
|
||||
cur_slice_iter: slice::Iter<'a, (Lsn, u64)>,
|
||||
}
|
||||
|
||||
impl OrderedPageVersionIter<'_> {
|
||||
fn is_lsn_before_cutoff(&self, lsn: &Lsn) -> bool {
|
||||
if let Some(cutoff_lsn) = self.cutoff_lsn.as_ref() {
|
||||
lsn < cutoff_lsn
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for OrderedPageVersionIter<'a> {
|
||||
type Item = (u32, Lsn, u64);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
if let Some((lsn, pos)) = self.cur_slice_iter.next() {
|
||||
if self.is_lsn_before_cutoff(lsn) {
|
||||
let blknum = self.ordered_blocks[self.cur_block_idx];
|
||||
return Some((blknum, *lsn, *pos));
|
||||
}
|
||||
}
|
||||
|
||||
let next_block_idx = self.cur_block_idx + 1;
|
||||
let blknum: u32 = *self.ordered_blocks.get(next_block_idx)?;
|
||||
self.cur_block_idx = next_block_idx;
|
||||
self.cur_slice_iter = self.page_versions.get_block_slice(blknum).iter();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use bytes::Bytes;
|
||||
|
||||
use super::*;
|
||||
use crate::config::PageServerConf;
|
||||
use std::fs;
|
||||
use std::str::FromStr;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
fn repo_harness(test_name: &str) -> Result<(&'static PageServerConf, ZTenantId, ZTimelineId)> {
|
||||
let repo_dir = PageServerConf::test_repo_dir(test_name);
|
||||
let _ = fs::remove_dir_all(&repo_dir);
|
||||
let conf = PageServerConf::dummy_conf(repo_dir);
|
||||
// Make a static copy of the config. This can never be free'd, but that's
|
||||
// OK in a test.
|
||||
let conf: &'static PageServerConf = Box::leak(Box::new(conf));
|
||||
|
||||
let tenantid = ZTenantId::from_str("11000000000000000000000000000000").unwrap();
|
||||
let timelineid = ZTimelineId::from_str("22000000000000000000000000000000").unwrap();
|
||||
fs::create_dir_all(conf.timeline_path(&timelineid, &tenantid))?;
|
||||
|
||||
Ok((conf, tenantid, timelineid))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ordered_iter() -> Result<()> {
|
||||
let (conf, tenantid, timelineid) = repo_harness("test_ordered_iter")?;
|
||||
|
||||
let file = EphemeralFile::create(conf, tenantid, timelineid)?;
|
||||
|
||||
let mut page_versions = PageVersions::new(file);
|
||||
|
||||
const BLOCKS: u32 = 1000;
|
||||
const LSNS: u64 = 50;
|
||||
|
||||
let empty_page = Bytes::from_static(&[0u8; 8192]);
|
||||
let empty_page_version = PageVersion::Page(empty_page);
|
||||
|
||||
for blknum in 0..BLOCKS {
|
||||
for lsn in 0..LSNS {
|
||||
let old = page_versions.append_or_update_last(
|
||||
blknum,
|
||||
Lsn(lsn),
|
||||
empty_page_version.clone(),
|
||||
)?;
|
||||
assert!(old.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
let mut iter = page_versions.ordered_page_version_iter(None);
|
||||
for blknum in 0..BLOCKS {
|
||||
for lsn in 0..LSNS {
|
||||
let (actual_blknum, actual_lsn, _pv) = iter.next().unwrap();
|
||||
assert_eq!(actual_blknum, blknum);
|
||||
assert_eq!(Lsn(lsn), actual_lsn);
|
||||
}
|
||||
}
|
||||
assert!(iter.next().is_none());
|
||||
assert!(iter.next().is_none()); // should be robust against excessive next() calls
|
||||
|
||||
const CUTOFF_LSN: Lsn = Lsn(30);
|
||||
let mut iter = page_versions.ordered_page_version_iter(Some(CUTOFF_LSN));
|
||||
for blknum in 0..BLOCKS {
|
||||
for lsn in 0..CUTOFF_LSN.0 {
|
||||
let (actual_blknum, actual_lsn, _pv) = iter.next().unwrap();
|
||||
assert_eq!(actual_blknum, blknum);
|
||||
assert_eq!(Lsn(lsn), actual_lsn);
|
||||
}
|
||||
}
|
||||
assert!(iter.next().is_none());
|
||||
assert!(iter.next().is_none()); // should be robust against excessive next() calls
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -71,26 +71,15 @@ pub enum PageVersion {
|
||||
}
|
||||
|
||||
///
|
||||
/// Struct used to communicate across calls to 'get_page_reconstruct_data'.
|
||||
/// Data needed to reconstruct a page version
|
||||
///
|
||||
/// Before first call to get_page_reconstruct_data, you can fill in 'page_img'
|
||||
/// if you have an older cached version of the page available. That can save
|
||||
/// work in 'get_page_reconstruct_data', as it can stop searching for page
|
||||
/// versions when all the WAL records going back to the cached image have been
|
||||
/// collected.
|
||||
///
|
||||
/// When get_page_reconstruct_data returns Complete, 'page_img' is set to an
|
||||
/// image of the page, or the oldest WAL record in 'records' is a will_init-type
|
||||
/// record that initializes the page without requiring a previous image.
|
||||
///
|
||||
/// If 'get_page_reconstruct_data' returns Continue, some 'records' may have
|
||||
/// been collected, but there are more records outside the current layer. Pass
|
||||
/// the same PageReconstructData struct in the next 'get_page_reconstruct_data'
|
||||
/// call, to collect more records.
|
||||
/// 'page_img' is the old base image of the page to start the WAL replay with.
|
||||
/// It can be None, if the first WAL record initializes the page (will_init)
|
||||
/// 'records' contains the records to apply over the base image.
|
||||
///
|
||||
pub struct PageReconstructData {
|
||||
pub records: Vec<(Lsn, ZenithWalRecord)>,
|
||||
pub page_img: Option<(Lsn, Bytes)>,
|
||||
pub page_img: Option<Bytes>,
|
||||
}
|
||||
|
||||
/// Return value from Layer::get_page_reconstruct_data
|
||||
@@ -104,6 +93,8 @@ pub enum PageReconstructResult {
|
||||
/// the returned LSN. This is usually considered an error, but might be OK
|
||||
/// in some circumstances.
|
||||
Missing(Lsn),
|
||||
/// Use the cached image at `cached_img_lsn` as the base image
|
||||
Cached,
|
||||
}
|
||||
|
||||
///
|
||||
@@ -147,16 +138,19 @@ pub trait Layer: Send + Sync {
|
||||
/// It is up to the caller to collect more data from previous layer and
|
||||
/// perform WAL redo, if necessary.
|
||||
///
|
||||
/// `cached_img_lsn` should be set to a cached page image's lsn < `lsn`.
|
||||
/// This function will only return data after `cached_img_lsn`.
|
||||
///
|
||||
/// See PageReconstructResult for possible return values. The collected data
|
||||
/// is appended to reconstruct_data; the caller should pass an empty struct
|
||||
/// on first call, or a struct with a cached older image of the page if one
|
||||
/// is available. If this returns PageReconstructResult::Continue, look up
|
||||
/// the predecessor layer and call again with the same 'reconstruct_data' to
|
||||
/// collect more data.
|
||||
/// on first call. If this returns PageReconstructResult::Continue, look up
|
||||
/// the predecessor layer and call again with the same 'reconstruct_data'
|
||||
/// to collect more data.
|
||||
fn get_page_reconstruct_data(
|
||||
&self,
|
||||
blknum: SegmentBlk,
|
||||
lsn: Lsn,
|
||||
cached_img_lsn: Option<Lsn>,
|
||||
reconstruct_data: &mut PageReconstructData,
|
||||
) -> Result<PageReconstructResult>;
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
// *callmemaybe <zenith timelineid> $url* -- ask pageserver to start walreceiver on $url
|
||||
//
|
||||
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
@@ -18,7 +18,7 @@ use std::io;
|
||||
use std::net::TcpListener;
|
||||
use std::str;
|
||||
use std::str::FromStr;
|
||||
use std::sync::{Arc, RwLockReadGuard};
|
||||
use std::sync::Arc;
|
||||
use tracing::*;
|
||||
use zenith_metrics::{register_histogram_vec, HistogramVec};
|
||||
use zenith_utils::auth::{self, JwtAuth};
|
||||
@@ -27,10 +27,13 @@ use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::postgres_backend::is_socket_read_timed_out;
|
||||
use zenith_utils::postgres_backend::PostgresBackend;
|
||||
use zenith_utils::postgres_backend::{self, AuthType};
|
||||
use zenith_utils::pq_proto::{BeMessage, FeMessage, RowDescriptor, SINGLE_COL_ROWDESC};
|
||||
use zenith_utils::pq_proto::{
|
||||
BeMessage, FeMessage, RowDescriptor, HELLO_WORLD_ROW, SINGLE_COL_ROWDESC,
|
||||
};
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
use crate::basebackup;
|
||||
use crate::branches;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::relish::*;
|
||||
use crate::repository::Timeline;
|
||||
@@ -395,12 +398,7 @@ impl PageServerHandler {
|
||||
/// In either case, if the page server hasn't received the WAL up to the
|
||||
/// requested LSN yet, we will wait for it to arrive. The return value is
|
||||
/// the LSN that should be used to look up the page versions.
|
||||
fn wait_or_get_last_lsn(
|
||||
timeline: &dyn Timeline,
|
||||
mut lsn: Lsn,
|
||||
latest: bool,
|
||||
latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
|
||||
) -> Result<Lsn> {
|
||||
fn wait_or_get_last_lsn(timeline: &dyn Timeline, lsn: Lsn, latest: bool) -> Result<Lsn> {
|
||||
if latest {
|
||||
// Latest page version was requested. If LSN is given, it is a hint
|
||||
// to the page server that there have been no modifications to the
|
||||
@@ -421,26 +419,22 @@ impl PageServerHandler {
|
||||
// walsender completes the authentication and starts streaming the
|
||||
// WAL.
|
||||
if lsn <= last_record_lsn {
|
||||
lsn = last_record_lsn;
|
||||
Ok(last_record_lsn)
|
||||
} else {
|
||||
timeline.wait_lsn(lsn)?;
|
||||
// Since we waited for 'lsn' to arrive, that is now the last
|
||||
// record LSN. (Or close enough for our purposes; the
|
||||
// last-record LSN can advance immediately after we return
|
||||
// anyway)
|
||||
Ok(lsn)
|
||||
}
|
||||
} else {
|
||||
if lsn == Lsn(0) {
|
||||
bail!("invalid LSN(0) in request");
|
||||
}
|
||||
timeline.wait_lsn(lsn)?;
|
||||
Ok(lsn)
|
||||
}
|
||||
ensure!(
|
||||
lsn >= **latest_gc_cutoff_lsn,
|
||||
"tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
|
||||
lsn, **latest_gc_cutoff_lsn
|
||||
);
|
||||
Ok(lsn)
|
||||
}
|
||||
|
||||
fn handle_get_rel_exists_request(
|
||||
@@ -451,8 +445,7 @@ impl PageServerHandler {
|
||||
let _enter = info_span!("get_rel_exists", rel = %req.rel, req_lsn = %req.lsn).entered();
|
||||
|
||||
let tag = RelishTag::Relation(req.rel);
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
|
||||
|
||||
let exists = timeline.get_rel_exists(tag, lsn)?;
|
||||
|
||||
@@ -468,8 +461,7 @@ impl PageServerHandler {
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
let _enter = info_span!("get_nblocks", rel = %req.rel, req_lsn = %req.lsn).entered();
|
||||
let tag = RelishTag::Relation(req.rel);
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
|
||||
|
||||
let n_blocks = timeline.get_relish_size(tag, lsn)?;
|
||||
|
||||
@@ -490,16 +482,8 @@ impl PageServerHandler {
|
||||
let _enter = info_span!("get_page", rel = %req.rel, blkno = &req.blkno, req_lsn = %req.lsn)
|
||||
.entered();
|
||||
let tag = RelishTag::Relation(req.rel);
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;
|
||||
/*
|
||||
// Add a 1s delay to some requests. The delayed causes the requests to
|
||||
// hit the race condition from github issue #1047 more easily.
|
||||
use rand::Rng;
|
||||
if rand::thread_rng().gen::<u8>() < 5 {
|
||||
std::thread::sleep(std::time::Duration::from_millis(1000));
|
||||
}
|
||||
*/
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
|
||||
|
||||
let page = timeline.get_page_at_lsn(tag, req.blkno, lsn)?;
|
||||
|
||||
Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
|
||||
@@ -520,10 +504,9 @@ impl PageServerHandler {
|
||||
// check that the timeline exists
|
||||
let timeline = tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)
|
||||
.context("Cannot handle basebackup request for a remote timeline")?;
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
if let Some(lsn) = lsn {
|
||||
timeline
|
||||
.check_lsn_is_in_scope(lsn, &latest_gc_cutoff_lsn)
|
||||
.check_lsn_is_in_scope(lsn)
|
||||
.context("invalid basebackup lsn")?;
|
||||
}
|
||||
|
||||
@@ -641,7 +624,7 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
let re = Regex::new(r"^callmemaybe ([[:xdigit:]]+) ([[:xdigit:]]+) (.*)$").unwrap();
|
||||
let caps = re
|
||||
.captures(query_string)
|
||||
.with_context(|| format!("invalid callmemaybe: '{}'", query_string))?;
|
||||
.ok_or_else(|| anyhow!("invalid callmemaybe: '{}'", query_string))?;
|
||||
|
||||
let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
|
||||
let timelineid = ZTimelineId::from_str(caps.get(2).unwrap().as_str())?;
|
||||
@@ -659,21 +642,79 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
walreceiver::launch_wal_receiver(self.conf, tenantid, timelineid, &connstr)?;
|
||||
|
||||
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.starts_with("branch_create ") {
|
||||
let err = || anyhow!("invalid branch_create: '{}'", query_string);
|
||||
|
||||
// branch_create <tenantid> <branchname> <startpoint>
|
||||
// TODO lazy static
|
||||
// TODO: escaping, to allow branch names with spaces
|
||||
let re = Regex::new(r"^branch_create ([[:xdigit:]]+) (\S+) ([^\r\n\s;]+)[\r\n\s;]*;?$")
|
||||
.unwrap();
|
||||
let caps = re.captures(query_string).ok_or_else(err)?;
|
||||
|
||||
let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
|
||||
let branchname = caps.get(2).ok_or_else(err)?.as_str().to_owned();
|
||||
let startpoint_str = caps.get(3).ok_or_else(err)?.as_str().to_owned();
|
||||
|
||||
self.check_permission(Some(tenantid))?;
|
||||
|
||||
let _enter =
|
||||
info_span!("branch_create", name = %branchname, tenant = %tenantid).entered();
|
||||
|
||||
let branch =
|
||||
branches::create_branch(self.conf, &branchname, &startpoint_str, &tenantid)?;
|
||||
let branch = serde_json::to_vec(&branch)?;
|
||||
|
||||
pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
|
||||
.write_message_noflush(&BeMessage::DataRow(&[Some(&branch)]))?
|
||||
.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.starts_with("branch_list ") {
|
||||
// branch_list <zenith tenantid as hex string>
|
||||
let re = Regex::new(r"^branch_list ([[:xdigit:]]+)$").unwrap();
|
||||
let caps = re
|
||||
.captures(query_string)
|
||||
.ok_or_else(|| anyhow!("invalid branch_list: '{}'", query_string))?;
|
||||
|
||||
let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
|
||||
|
||||
// since these handlers for tenant/branch commands are deprecated (in favor of http based ones)
|
||||
// just use false in place of include non incremental logical size
|
||||
let branches = crate::branches::get_branches(self.conf, &tenantid, false)?;
|
||||
let branches_buf = serde_json::to_vec(&branches)?;
|
||||
|
||||
pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
|
||||
.write_message_noflush(&BeMessage::DataRow(&[Some(&branches_buf)]))?
|
||||
.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.starts_with("tenant_list") {
|
||||
let tenants = crate::tenant_mgr::list_tenants()?;
|
||||
let tenants_buf = serde_json::to_vec(&tenants)?;
|
||||
|
||||
pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
|
||||
.write_message_noflush(&BeMessage::DataRow(&[Some(&tenants_buf)]))?
|
||||
.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.starts_with("tenant_create") {
|
||||
let err = || anyhow!("invalid tenant_create: '{}'", query_string);
|
||||
|
||||
// tenant_create <tenantid>
|
||||
let re = Regex::new(r"^tenant_create ([[:xdigit:]]+)$").unwrap();
|
||||
let caps = re.captures(query_string).ok_or_else(err)?;
|
||||
|
||||
self.check_permission(None)?;
|
||||
|
||||
let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
|
||||
|
||||
tenant_mgr::create_repository_for_tenant(self.conf, tenantid)?;
|
||||
|
||||
pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
|
||||
.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.starts_with("status") {
|
||||
pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
|
||||
.write_message_noflush(&HELLO_WORLD_ROW)?
|
||||
.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.to_ascii_lowercase().starts_with("set ") {
|
||||
// important because psycopg2 executes "SET datestyle TO 'ISO'"
|
||||
// on connect
|
||||
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.starts_with("failpoints ") {
|
||||
let (_, failpoints) = query_string.split_at("failpoints ".len());
|
||||
for failpoint in failpoints.split(';') {
|
||||
if let Some((name, actions)) = failpoint.split_once('=') {
|
||||
info!("cfg failpoint: {} {}", name, actions);
|
||||
fail::cfg(name, actions).unwrap();
|
||||
} else {
|
||||
bail!("Invalid failpoints format");
|
||||
}
|
||||
}
|
||||
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.starts_with("do_gc ") {
|
||||
// Run GC immediately on given timeline.
|
||||
// FIXME: This is just for tests. See test_runner/batch_others/test_gc.py.
|
||||
@@ -687,7 +728,7 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
|
||||
let caps = re
|
||||
.captures(query_string)
|
||||
.with_context(|| format!("invalid do_gc: '{}'", query_string))?;
|
||||
.ok_or_else(|| anyhow!("invalid do_gc: '{}'", query_string))?;
|
||||
|
||||
let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
|
||||
let timelineid = ZTimelineId::from_str(caps.get(2).unwrap().as_str())?;
|
||||
@@ -771,7 +812,7 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
|
||||
let caps = re
|
||||
.captures(query_string)
|
||||
.with_context(|| format!("invalid checkpoint command: '{}'", query_string))?;
|
||||
.ok_or_else(|| anyhow!("invalid checkpoint command: '{}'", query_string))?;
|
||||
|
||||
let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
|
||||
let timelineid = ZTimelineId::from_str(caps.get(2).unwrap().as_str())?;
|
||||
|
||||
@@ -94,7 +94,7 @@ use std::{
|
||||
use anyhow::{bail, Context};
|
||||
use tokio::io;
|
||||
use tracing::{error, info};
|
||||
use zenith_utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
pub use self::storage_sync::{schedule_timeline_checkpoint_upload, schedule_timeline_download};
|
||||
use self::{local_fs::LocalFs, rust_s3::S3};
|
||||
@@ -104,7 +104,16 @@ use crate::{
|
||||
repository::TimelineSyncState,
|
||||
};
|
||||
|
||||
pub use storage_sync::compression;
|
||||
/// Any timeline has its own id and its own tenant it belongs to,
|
||||
/// the sync processes group timelines by both for simplicity.
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
|
||||
pub struct TimelineSyncId(ZTenantId, ZTimelineId);
|
||||
|
||||
impl std::fmt::Display for TimelineSyncId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "(tenant: {}, timeline: {})", self.0, self.1)
|
||||
}
|
||||
}
|
||||
|
||||
/// A structure to combine all synchronization data to share with pageserver after a successful sync loop initialization.
|
||||
/// Successful initialization includes a case when sync loop is not started, in which case the startup data is returned still,
|
||||
@@ -129,27 +138,20 @@ pub fn start_local_timeline_sync(
|
||||
|
||||
match &config.remote_storage_config {
|
||||
Some(storage_config) => match &storage_config.storage {
|
||||
RemoteStorageKind::LocalFs(root) => {
|
||||
info!("Using fs root '{}' as a remote storage", root.display());
|
||||
storage_sync::spawn_storage_sync_thread(
|
||||
config,
|
||||
local_timeline_files,
|
||||
LocalFs::new(root.clone(), &config.workdir)?,
|
||||
storage_config.max_concurrent_sync,
|
||||
storage_config.max_sync_errors,
|
||||
)
|
||||
},
|
||||
RemoteStorageKind::AwsS3(s3_config) => {
|
||||
info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}'",
|
||||
s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
|
||||
storage_sync::spawn_storage_sync_thread(
|
||||
config,
|
||||
local_timeline_files,
|
||||
S3::new(s3_config, &config.workdir)?,
|
||||
storage_config.max_concurrent_sync,
|
||||
storage_config.max_sync_errors,
|
||||
)
|
||||
},
|
||||
RemoteStorageKind::LocalFs(root) => storage_sync::spawn_storage_sync_thread(
|
||||
config,
|
||||
local_timeline_files,
|
||||
LocalFs::new(root.clone(), &config.workdir)?,
|
||||
storage_config.max_concurrent_sync,
|
||||
storage_config.max_sync_errors,
|
||||
),
|
||||
RemoteStorageKind::AwsS3(s3_config) => storage_sync::spawn_storage_sync_thread(
|
||||
config,
|
||||
local_timeline_files,
|
||||
S3::new(s3_config, &config.workdir)?,
|
||||
storage_config.max_concurrent_sync,
|
||||
storage_config.max_sync_errors,
|
||||
),
|
||||
}
|
||||
.context("Failed to spawn the storage sync thread"),
|
||||
None => {
|
||||
@@ -158,7 +160,7 @@ pub fn start_local_timeline_sync(
|
||||
ZTenantId,
|
||||
HashMap<ZTimelineId, TimelineSyncState>,
|
||||
> = HashMap::new();
|
||||
for (ZTenantTimelineId{tenant_id, timeline_id}, (timeline_metadata, _)) in
|
||||
for (TimelineSyncId(tenant_id, timeline_id), (timeline_metadata, _)) in
|
||||
local_timeline_files
|
||||
{
|
||||
initial_timeline_states
|
||||
@@ -178,7 +180,7 @@ pub fn start_local_timeline_sync(
|
||||
|
||||
fn local_tenant_timeline_files(
|
||||
config: &'static PageServerConf,
|
||||
) -> anyhow::Result<HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>> {
|
||||
) -> anyhow::Result<HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>> {
|
||||
let mut local_tenant_timeline_files = HashMap::new();
|
||||
let tenants_dir = config.tenants_path();
|
||||
for tenants_dir_entry in fs::read_dir(&tenants_dir)
|
||||
@@ -213,9 +215,8 @@ fn local_tenant_timeline_files(
|
||||
fn collect_timelines_for_tenant(
|
||||
config: &'static PageServerConf,
|
||||
tenant_path: &Path,
|
||||
) -> anyhow::Result<HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>> {
|
||||
let mut timelines: HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)> =
|
||||
HashMap::new();
|
||||
) -> anyhow::Result<HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>> {
|
||||
let mut timelines: HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)> = HashMap::new();
|
||||
let tenant_id = tenant_path
|
||||
.file_name()
|
||||
.and_then(ffi::OsStr::to_str)
|
||||
@@ -236,10 +237,7 @@ fn collect_timelines_for_tenant(
|
||||
match collect_timeline_files(&timeline_path) {
|
||||
Ok((timeline_id, metadata, timeline_files)) => {
|
||||
timelines.insert(
|
||||
ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
},
|
||||
TimelineSyncId(tenant_id, timeline_id),
|
||||
(metadata, timeline_files),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -70,3 +70,8 @@ on the timeline download, missing remote branch files are downlaoded.
|
||||
|
||||
A branch is a per-tenant entity, yet a current implementaion requires synchronizing a timeline first to get the branch files locally.
|
||||
Currently, there's no other way to know about the remote branch files, neither the file contents is verified and updated.
|
||||
|
||||
* no IT tests
|
||||
|
||||
Automated S3 testing is lacking currently, due to no convenient way to enable backups during the tests.
|
||||
After it's fixed, benchmark runs should also be carried out to find bottlenecks.
|
||||
|
||||
@@ -73,7 +73,7 @@ impl RemoteStorage for LocalFs {
|
||||
}
|
||||
|
||||
async fn list(&self) -> anyhow::Result<Vec<Self::StoragePath>> {
|
||||
get_all_files(&self.root).await
|
||||
Ok(get_all_files(&self.root).await?.into_iter().collect())
|
||||
}
|
||||
|
||||
async fn upload(
|
||||
|
||||
@@ -9,7 +9,6 @@ use std::path::{Path, PathBuf};
|
||||
use anyhow::Context;
|
||||
use s3::{bucket::Bucket, creds::Credentials, region::Region};
|
||||
use tokio::io::{self, AsyncWriteExt};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::{
|
||||
config::S3Config,
|
||||
@@ -59,21 +58,10 @@ pub struct S3 {
|
||||
impl S3 {
|
||||
/// Creates the storage, errors if incorrect AWS S3 configuration provided.
|
||||
pub fn new(aws_config: &S3Config, pageserver_workdir: &'static Path) -> anyhow::Result<Self> {
|
||||
debug!(
|
||||
"Creating s3 remote storage around bucket {}",
|
||||
aws_config.bucket_name
|
||||
);
|
||||
let region = match aws_config.endpoint.clone() {
|
||||
Some(endpoint) => Region::Custom {
|
||||
endpoint,
|
||||
region: aws_config.bucket_region.clone(),
|
||||
},
|
||||
None => aws_config
|
||||
.bucket_region
|
||||
.parse::<Region>()
|
||||
.context("Failed to parse the s3 region from config")?,
|
||||
};
|
||||
|
||||
let region = aws_config
|
||||
.bucket_region
|
||||
.parse::<Region>()
|
||||
.context("Failed to parse the s3 region from config")?;
|
||||
let credentials = Credentials::new(
|
||||
aws_config.access_key_id.as_deref(),
|
||||
aws_config.secret_access_key.as_deref(),
|
||||
|
||||
@@ -70,8 +70,7 @@
|
||||
//!
|
||||
//! When pageserver signals shutdown, current sync task gets finished and the loop exists.
|
||||
|
||||
/// Expose the module for a binary CLI tool that deals with the corresponding blobs.
|
||||
pub mod compression;
|
||||
mod compression;
|
||||
mod download;
|
||||
pub mod index;
|
||||
mod upload;
|
||||
@@ -106,7 +105,7 @@ use self::{
|
||||
},
|
||||
upload::upload_timeline_checkpoint,
|
||||
};
|
||||
use super::{RemoteStorage, SyncStartupData, ZTenantTimelineId};
|
||||
use super::{RemoteStorage, SyncStartupData, TimelineSyncId};
|
||||
use crate::{
|
||||
config::PageServerConf, layered_repository::metadata::TimelineMetadata,
|
||||
remote_storage::storage_sync::compression::read_archive_header, repository::TimelineSyncState,
|
||||
@@ -243,13 +242,13 @@ mod sync_queue {
|
||||
/// Limited by the number of retries, after certain threshold the failing task gets evicted and the timeline disabled.
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub struct SyncTask {
|
||||
sync_id: ZTenantTimelineId,
|
||||
sync_id: TimelineSyncId,
|
||||
retries: u32,
|
||||
kind: SyncKind,
|
||||
}
|
||||
|
||||
impl SyncTask {
|
||||
fn new(sync_id: ZTenantTimelineId, retries: u32, kind: SyncKind) -> Self {
|
||||
fn new(sync_id: TimelineSyncId, retries: u32, kind: SyncKind) -> Self {
|
||||
Self {
|
||||
sync_id,
|
||||
retries,
|
||||
@@ -308,10 +307,7 @@ pub fn schedule_timeline_checkpoint_upload(
|
||||
}
|
||||
|
||||
if !sync_queue::push(SyncTask::new(
|
||||
ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
},
|
||||
TimelineSyncId(tenant_id, timeline_id),
|
||||
0,
|
||||
SyncKind::Upload(NewCheckpoint { layers, metadata }),
|
||||
)) {
|
||||
@@ -337,15 +333,8 @@ pub fn schedule_timeline_checkpoint_upload(
|
||||
///
|
||||
/// Ensure that the loop is started otherwise the task is never processed.
|
||||
pub fn schedule_timeline_download(tenant_id: ZTenantId, timeline_id: ZTimelineId) {
|
||||
debug!(
|
||||
"Scheduling timeline download for tenant {}, timeline {}",
|
||||
tenant_id, timeline_id
|
||||
);
|
||||
sync_queue::push(SyncTask::new(
|
||||
ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
},
|
||||
TimelineSyncId(tenant_id, timeline_id),
|
||||
0,
|
||||
SyncKind::Download(TimelineDownload {
|
||||
files_to_skip: Arc::new(BTreeSet::new()),
|
||||
@@ -361,7 +350,7 @@ pub(super) fn spawn_storage_sync_thread<
|
||||
S: RemoteStorage<StoragePath = P> + Send + Sync + 'static,
|
||||
>(
|
||||
conf: &'static PageServerConf,
|
||||
local_timeline_files: HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>,
|
||||
local_timeline_files: HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>,
|
||||
storage: S,
|
||||
max_concurrent_sync: NonZeroUsize,
|
||||
max_sync_errors: NonZeroU32,
|
||||
@@ -517,7 +506,7 @@ async fn loop_step<
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to process storage sync task for tenant {}, timeline {}: {:?}",
|
||||
sync_id.tenant_id, sync_id.timeline_id, e
|
||||
sync_id.0, sync_id.1, e
|
||||
);
|
||||
None
|
||||
}
|
||||
@@ -531,10 +520,7 @@ async fn loop_step<
|
||||
while let Some((sync_id, state_update)) = task_batch.next().await {
|
||||
debug!("Finished storage sync task for sync id {}", sync_id);
|
||||
if let Some(state_update) = state_update {
|
||||
let ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
} = sync_id;
|
||||
let TimelineSyncId(tenant_id, timeline_id) = sync_id;
|
||||
new_timeline_states
|
||||
.entry(tenant_id)
|
||||
.or_default()
|
||||
@@ -628,7 +614,7 @@ async fn process_task<
|
||||
|
||||
fn schedule_first_sync_tasks(
|
||||
index: &RemoteTimelineIndex,
|
||||
local_timeline_files: HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>,
|
||||
local_timeline_files: HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>,
|
||||
) -> HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncState>> {
|
||||
let mut initial_timeline_statuses: HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncState>> =
|
||||
HashMap::new();
|
||||
@@ -639,10 +625,7 @@ fn schedule_first_sync_tasks(
|
||||
for (sync_id, (local_metadata, local_files)) in local_timeline_files {
|
||||
let local_disk_consistent_lsn = local_metadata.disk_consistent_lsn();
|
||||
|
||||
let ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
} = sync_id;
|
||||
let TimelineSyncId(tenant_id, timeline_id) = sync_id;
|
||||
match index.timeline_entry(&sync_id) {
|
||||
Some(index_entry) => {
|
||||
let timeline_status = compare_local_and_remote_timeline(
|
||||
@@ -685,10 +668,10 @@ fn schedule_first_sync_tasks(
|
||||
}
|
||||
}
|
||||
|
||||
let unprocessed_remote_ids = |remote_id: &ZTenantTimelineId| {
|
||||
let unprocessed_remote_ids = |remote_id: &TimelineSyncId| {
|
||||
initial_timeline_statuses
|
||||
.get(&remote_id.tenant_id)
|
||||
.and_then(|timelines| timelines.get(&remote_id.timeline_id))
|
||||
.get(&remote_id.0)
|
||||
.and_then(|timelines| timelines.get(&remote_id.1))
|
||||
.is_none()
|
||||
};
|
||||
for unprocessed_remote_id in index
|
||||
@@ -696,10 +679,7 @@ fn schedule_first_sync_tasks(
|
||||
.filter(unprocessed_remote_ids)
|
||||
.collect::<Vec<_>>()
|
||||
{
|
||||
let ZTenantTimelineId {
|
||||
tenant_id: cloud_only_tenant_id,
|
||||
timeline_id: cloud_only_timeline_id,
|
||||
} = unprocessed_remote_id;
|
||||
let TimelineSyncId(cloud_only_tenant_id, cloud_only_timeline_id) = unprocessed_remote_id;
|
||||
match index
|
||||
.timeline_entry(&unprocessed_remote_id)
|
||||
.and_then(TimelineIndexEntry::disk_consistent_lsn)
|
||||
@@ -728,7 +708,7 @@ fn schedule_first_sync_tasks(
|
||||
|
||||
fn compare_local_and_remote_timeline(
|
||||
new_sync_tasks: &mut VecDeque<SyncTask>,
|
||||
sync_id: ZTenantTimelineId,
|
||||
sync_id: TimelineSyncId,
|
||||
local_metadata: TimelineMetadata,
|
||||
local_files: Vec<PathBuf>,
|
||||
remote_entry: &TimelineIndexEntry,
|
||||
@@ -785,7 +765,7 @@ async fn update_index_description<
|
||||
>(
|
||||
(storage, index): &(S, RwLock<RemoteTimelineIndex>),
|
||||
timeline_dir: &Path,
|
||||
id: ZTenantTimelineId,
|
||||
id: TimelineSyncId,
|
||||
) -> anyhow::Result<RemoteTimeline> {
|
||||
let mut index_write = index.write().await;
|
||||
let full_index = match index_write.timeline_entry(&id) {
|
||||
@@ -808,7 +788,7 @@ async fn update_index_description<
|
||||
Ok((archive_id, header_size, header)) => full_index.update_archive_contents(archive_id.0, header, header_size),
|
||||
Err((e, archive_id)) => bail!(
|
||||
"Failed to download archive header for tenant {}, timeline {}, archive for Lsn {}: {}",
|
||||
id.tenant_id, id.timeline_id, archive_id.0,
|
||||
id.0, id.1, archive_id.0,
|
||||
e
|
||||
),
|
||||
}
|
||||
@@ -886,7 +866,7 @@ mod test_utils {
|
||||
timeline_id: ZTimelineId,
|
||||
new_upload: NewCheckpoint,
|
||||
) {
|
||||
let sync_id = ZTenantTimelineId::new(harness.tenant_id, timeline_id);
|
||||
let sync_id = TimelineSyncId(harness.tenant_id, timeline_id);
|
||||
upload_timeline_checkpoint(
|
||||
harness.conf,
|
||||
Arc::clone(&remote_assets),
|
||||
@@ -942,7 +922,7 @@ mod test_utils {
|
||||
|
||||
pub async fn expect_timeline(
|
||||
index: &RwLock<RemoteTimelineIndex>,
|
||||
sync_id: ZTenantTimelineId,
|
||||
sync_id: TimelineSyncId,
|
||||
) -> RemoteTimeline {
|
||||
if let Some(TimelineIndexEntry::Full(remote_timeline)) =
|
||||
index.read().await.timeline_entry(&sync_id)
|
||||
@@ -977,18 +957,18 @@ mod test_utils {
|
||||
let mut expected_timeline_entries = BTreeMap::new();
|
||||
for sync_id in actual_sync_ids {
|
||||
actual_branches.insert(
|
||||
sync_id.tenant_id,
|
||||
sync_id.1,
|
||||
index_read
|
||||
.branch_files(sync_id.tenant_id)
|
||||
.branch_files(sync_id.0)
|
||||
.into_iter()
|
||||
.flat_map(|branch_paths| branch_paths.iter())
|
||||
.cloned()
|
||||
.collect::<BTreeSet<_>>(),
|
||||
);
|
||||
expected_branches.insert(
|
||||
sync_id.tenant_id,
|
||||
sync_id.1,
|
||||
expected_index_with_descriptions
|
||||
.branch_files(sync_id.tenant_id)
|
||||
.branch_files(sync_id.0)
|
||||
.into_iter()
|
||||
.flat_map(|branch_paths| branch_paths.iter())
|
||||
.cloned()
|
||||
|
||||
@@ -34,7 +34,7 @@ use std::{
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use anyhow::{anyhow, bail, ensure, Context};
|
||||
use async_compression::tokio::bufread::{ZstdDecoder, ZstdEncoder};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::{
|
||||
@@ -211,18 +211,16 @@ pub async fn read_archive_header<A: io::AsyncRead + Send + Sync + Unpin>(
|
||||
pub fn parse_archive_name(archive_path: &Path) -> anyhow::Result<(Lsn, u64)> {
|
||||
let archive_name = archive_path
|
||||
.file_name()
|
||||
.with_context(|| format!("Archive '{}' has no file name", archive_path.display()))?
|
||||
.ok_or_else(|| anyhow!("Archive '{}' has no file name", archive_path.display()))?
|
||||
.to_string_lossy();
|
||||
let (lsn_str, header_size_str) =
|
||||
archive_name
|
||||
.rsplit_once(ARCHIVE_EXTENSION)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Archive '{}' has incorrect extension, expected to contain '{}'",
|
||||
archive_path.display(),
|
||||
ARCHIVE_EXTENSION
|
||||
)
|
||||
})?;
|
||||
archive_name.rsplit_once(ARCHIVE_EXTENSION).ok_or_else(|| {
|
||||
anyhow!(
|
||||
"Archive '{}' has incorrect extension, expected to contain '{}'",
|
||||
archive_path.display(),
|
||||
ARCHIVE_EXTENSION
|
||||
)
|
||||
})?;
|
||||
let disk_consistent_lsn = Lsn::from_hex(lsn_str).with_context(|| {
|
||||
format!(
|
||||
"Archive '{}' has an invalid disk consistent lsn in its extension",
|
||||
@@ -248,7 +246,7 @@ fn archive_name(disk_consistent_lsn: Lsn, header_size: u64) -> String {
|
||||
archive_name
|
||||
}
|
||||
|
||||
pub async fn uncompress_with_header(
|
||||
async fn uncompress_with_header(
|
||||
files_to_skip: &BTreeSet<PathBuf>,
|
||||
destination_dir: &Path,
|
||||
header: ArchiveHeader,
|
||||
@@ -376,7 +374,7 @@ async fn write_archive_contents(
|
||||
}
|
||||
let metadata_bytes_written = io::copy(&mut metadata_bytes.as_slice(), &mut archive_input)
|
||||
.await
|
||||
.context("Failed to add metadata into the archive")?;
|
||||
.with_context(|| "Failed to add metadata into the archive")?;
|
||||
ensure!(
|
||||
header.metadata_file_size == metadata_bytes_written,
|
||||
"Metadata file was written to the archive incompletely",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
use std::{borrow::Cow, collections::BTreeSet, path::PathBuf, sync::Arc};
|
||||
|
||||
use anyhow::{ensure, Context};
|
||||
use anyhow::{anyhow, ensure, Context};
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use tokio::{fs, sync::RwLock};
|
||||
use tracing::{debug, error, trace, warn};
|
||||
@@ -17,7 +17,7 @@ use crate::{
|
||||
compression, index::TimelineIndexEntry, sync_queue, tenant_branch_files,
|
||||
update_index_description, SyncKind, SyncTask,
|
||||
},
|
||||
RemoteStorage, ZTenantTimelineId,
|
||||
RemoteStorage, TimelineSyncId,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -52,16 +52,13 @@ pub(super) async fn download_timeline<
|
||||
>(
|
||||
conf: &'static PageServerConf,
|
||||
remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
|
||||
sync_id: ZTenantTimelineId,
|
||||
sync_id: TimelineSyncId,
|
||||
mut download: TimelineDownload,
|
||||
retries: u32,
|
||||
) -> DownloadedTimeline {
|
||||
debug!("Downloading layers for sync id {}", sync_id);
|
||||
|
||||
let ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
} = sync_id;
|
||||
let TimelineSyncId(tenant_id, timeline_id) = sync_id;
|
||||
let index_read = remote_assets.1.read().await;
|
||||
let remote_timeline = match index_read.timeline_entry(&sync_id) {
|
||||
None => {
|
||||
@@ -113,8 +110,7 @@ pub(super) async fn download_timeline<
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = download_missing_branches(conf, remote_assets.as_ref(), sync_id.tenant_id).await
|
||||
{
|
||||
if let Err(e) = download_missing_branches(conf, remote_assets.as_ref(), sync_id.0).await {
|
||||
error!(
|
||||
"Failed to download missing branches for sync id {}: {:?}",
|
||||
sync_id, e
|
||||
@@ -184,10 +180,7 @@ async fn try_download_archive<
|
||||
S: RemoteStorage<StoragePath = P> + Send + Sync + 'static,
|
||||
>(
|
||||
conf: &'static PageServerConf,
|
||||
ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
}: ZTenantTimelineId,
|
||||
TimelineSyncId(tenant_id, timeline_id): TimelineSyncId,
|
||||
remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
|
||||
remote_timeline: &RemoteTimeline,
|
||||
archive_id: ArchiveId,
|
||||
@@ -196,7 +189,7 @@ async fn try_download_archive<
|
||||
debug!("Downloading archive {:?}", archive_id);
|
||||
let archive_to_download = remote_timeline
|
||||
.archive_data(archive_id)
|
||||
.with_context(|| format!("Archive {:?} not found in remote storage", archive_id))?;
|
||||
.ok_or_else(|| anyhow!("Archive {:?} not found in remote storage", archive_id))?;
|
||||
let (archive_header, header_size) = remote_timeline
|
||||
.restore_header(archive_id)
|
||||
.context("Failed to restore header when downloading an archive")?;
|
||||
@@ -350,7 +343,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_download_timeline() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("test_download_timeline")?;
|
||||
let sync_id = ZTenantTimelineId::new(repo_harness.tenant_id, TIMELINE_ID);
|
||||
let sync_id = TimelineSyncId(repo_harness.tenant_id, TIMELINE_ID);
|
||||
let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?;
|
||||
let index = RwLock::new(RemoteTimelineIndex::try_parse_descriptions_from_paths(
|
||||
repo_harness.conf,
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::{
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use anyhow::{anyhow, bail, ensure, Context};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::debug;
|
||||
use zenith_utils::{
|
||||
@@ -22,7 +22,7 @@ use crate::{
|
||||
layered_repository::TIMELINES_SEGMENT_NAME,
|
||||
remote_storage::{
|
||||
storage_sync::compression::{parse_archive_name, FileEntry},
|
||||
ZTenantTimelineId,
|
||||
TimelineSyncId,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -53,7 +53,7 @@ impl RelativePath {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RemoteTimelineIndex {
|
||||
branch_files: HashMap<ZTenantId, HashSet<RelativePath>>,
|
||||
timeline_files: HashMap<ZTenantTimelineId, TimelineIndexEntry>,
|
||||
timeline_files: HashMap<TimelineSyncId, TimelineIndexEntry>,
|
||||
}
|
||||
|
||||
impl RemoteTimelineIndex {
|
||||
@@ -80,22 +80,19 @@ impl RemoteTimelineIndex {
|
||||
index
|
||||
}
|
||||
|
||||
pub fn timeline_entry(&self, id: &ZTenantTimelineId) -> Option<&TimelineIndexEntry> {
|
||||
pub fn timeline_entry(&self, id: &TimelineSyncId) -> Option<&TimelineIndexEntry> {
|
||||
self.timeline_files.get(id)
|
||||
}
|
||||
|
||||
pub fn timeline_entry_mut(
|
||||
&mut self,
|
||||
id: &ZTenantTimelineId,
|
||||
) -> Option<&mut TimelineIndexEntry> {
|
||||
pub fn timeline_entry_mut(&mut self, id: &TimelineSyncId) -> Option<&mut TimelineIndexEntry> {
|
||||
self.timeline_files.get_mut(id)
|
||||
}
|
||||
|
||||
pub fn add_timeline_entry(&mut self, id: ZTenantTimelineId, entry: TimelineIndexEntry) {
|
||||
pub fn add_timeline_entry(&mut self, id: TimelineSyncId, entry: TimelineIndexEntry) {
|
||||
self.timeline_files.insert(id, entry);
|
||||
}
|
||||
|
||||
pub fn all_sync_ids(&self) -> impl Iterator<Item = ZTenantTimelineId> + '_ {
|
||||
pub fn all_sync_ids(&self) -> impl Iterator<Item = TimelineSyncId> + '_ {
|
||||
self.timeline_files.keys().copied()
|
||||
}
|
||||
|
||||
@@ -217,7 +214,7 @@ impl RemoteTimeline {
|
||||
let archive = self
|
||||
.checkpoint_archives
|
||||
.get(&archive_id)
|
||||
.with_context(|| format!("Archive {:?} not found", archive_id))?;
|
||||
.ok_or_else(|| anyhow!("Archive {:?} not found", archive_id))?;
|
||||
|
||||
let mut header_files = Vec::with_capacity(archive.files.len());
|
||||
for (expected_archive_position, archive_file) in archive.files.iter().enumerate() {
|
||||
@@ -229,10 +226,11 @@ impl RemoteTimeline {
|
||||
archive_id,
|
||||
);
|
||||
|
||||
let timeline_file = self.timeline_files.get(archive_file).with_context(|| {
|
||||
format!(
|
||||
let timeline_file = self.timeline_files.get(archive_file).ok_or_else(|| {
|
||||
anyhow!(
|
||||
"File with id {:?} not found for archive {:?}",
|
||||
archive_file, archive_id
|
||||
archive_file,
|
||||
archive_id
|
||||
)
|
||||
})?;
|
||||
header_files.push(timeline_file.clone());
|
||||
@@ -301,7 +299,7 @@ fn try_parse_index_entry(
|
||||
})?
|
||||
.iter()
|
||||
.next()
|
||||
.with_context(|| format!("Found no tenant id in path '{}'", path.display()))?
|
||||
.ok_or_else(|| anyhow!("Found no tenant id in path '{}'", path.display()))?
|
||||
.to_string_lossy()
|
||||
.parse::<ZTenantId>()
|
||||
.with_context(|| format!("Failed to parse tenant id from path '{}'", path.display()))?;
|
||||
@@ -323,8 +321,8 @@ fn try_parse_index_entry(
|
||||
let mut segments = timelines_subpath.iter();
|
||||
let timeline_id = segments
|
||||
.next()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"{} directory of tenant {} (path '{}') is not an index entry",
|
||||
TIMELINES_SEGMENT_NAME,
|
||||
tenant_id,
|
||||
@@ -347,14 +345,11 @@ fn try_parse_index_entry(
|
||||
|
||||
let archive_name = path
|
||||
.file_name()
|
||||
.with_context(|| format!("Archive '{}' has no file name", path.display()))?
|
||||
.ok_or_else(|| anyhow!("Archive '{}' has no file name", path.display()))?
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
|
||||
let sync_id = ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
};
|
||||
let sync_id = TimelineSyncId(tenant_id, timeline_id);
|
||||
let timeline_index_entry = index
|
||||
.timeline_files
|
||||
.entry(sync_id)
|
||||
|
||||
@@ -17,7 +17,7 @@ use crate::{
|
||||
index::{RemoteTimeline, TimelineIndexEntry},
|
||||
sync_queue, tenant_branch_files, update_index_description, SyncKind, SyncTask,
|
||||
},
|
||||
RemoteStorage, ZTenantTimelineId,
|
||||
RemoteStorage, TimelineSyncId,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -36,13 +36,12 @@ pub(super) async fn upload_timeline_checkpoint<
|
||||
>(
|
||||
config: &'static PageServerConf,
|
||||
remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
|
||||
sync_id: ZTenantTimelineId,
|
||||
sync_id: TimelineSyncId,
|
||||
new_checkpoint: NewCheckpoint,
|
||||
retries: u32,
|
||||
) -> Option<bool> {
|
||||
debug!("Uploading checkpoint for sync id {}", sync_id);
|
||||
if let Err(e) = upload_missing_branches(config, remote_assets.as_ref(), sync_id.tenant_id).await
|
||||
{
|
||||
if let Err(e) = upload_missing_branches(config, remote_assets.as_ref(), sync_id.0).await {
|
||||
error!(
|
||||
"Failed to upload missing branches for sync id {}: {:?}",
|
||||
sync_id, e
|
||||
@@ -58,10 +57,7 @@ pub(super) async fn upload_timeline_checkpoint<
|
||||
|
||||
let index = &remote_assets.1;
|
||||
|
||||
let ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
} = sync_id;
|
||||
let TimelineSyncId(tenant_id, timeline_id) = sync_id;
|
||||
let timeline_dir = config.timeline_path(&timeline_id, &tenant_id);
|
||||
|
||||
let index_read = index.read().await;
|
||||
@@ -155,14 +151,11 @@ async fn try_upload_checkpoint<
|
||||
>(
|
||||
config: &'static PageServerConf,
|
||||
remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
|
||||
sync_id: ZTenantTimelineId,
|
||||
sync_id: TimelineSyncId,
|
||||
new_checkpoint: &NewCheckpoint,
|
||||
files_to_skip: BTreeSet<PathBuf>,
|
||||
) -> anyhow::Result<(ArchiveHeader, u64)> {
|
||||
let ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
} = sync_id;
|
||||
let TimelineSyncId(tenant_id, timeline_id) = sync_id;
|
||||
let timeline_dir = config.timeline_path(&timeline_id, &tenant_id);
|
||||
|
||||
let files_to_upload = new_checkpoint
|
||||
@@ -295,7 +288,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn reupload_timeline() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("reupload_timeline")?;
|
||||
let sync_id = ZTenantTimelineId::new(repo_harness.tenant_id, TIMELINE_ID);
|
||||
let sync_id = TimelineSyncId(repo_harness.tenant_id, TIMELINE_ID);
|
||||
let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?;
|
||||
let index = RwLock::new(RemoteTimelineIndex::try_parse_descriptions_from_paths(
|
||||
repo_harness.conf,
|
||||
@@ -491,7 +484,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn reupload_timeline_rejected() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("reupload_timeline_rejected")?;
|
||||
let sync_id = ZTenantTimelineId::new(repo_harness.tenant_id, TIMELINE_ID);
|
||||
let sync_id = TimelineSyncId(repo_harness.tenant_id, TIMELINE_ID);
|
||||
let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?;
|
||||
let index = RwLock::new(RemoteTimelineIndex::try_parse_descriptions_from_paths(
|
||||
repo_harness.conf,
|
||||
|
||||
@@ -7,7 +7,7 @@ use postgres_ffi::{MultiXactId, MultiXactOffset, TransactionId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::ops::{AddAssign, Deref};
|
||||
use std::sync::{Arc, RwLockReadGuard};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use zenith_utils::lsn::{Lsn, RecordLsn};
|
||||
use zenith_utils::zid::ZTimelineId;
|
||||
@@ -19,8 +19,6 @@ pub type BlockNumber = u32;
|
||||
/// A repository corresponds to one .zenith directory. One repository holds multiple
|
||||
/// timelines, forked off from the same initial call to 'initdb'.
|
||||
pub trait Repository: Send + Sync {
|
||||
fn detach_timeline(&self, timeline_id: ZTimelineId) -> Result<()>;
|
||||
|
||||
/// Updates timeline based on the new sync state, received from the remote storage synchronization.
|
||||
/// See [`crate::remote_storage`] for more details about the synchronization.
|
||||
fn set_timeline_state(
|
||||
@@ -184,9 +182,6 @@ pub trait Timeline: Send + Sync {
|
||||
///
|
||||
fn wait_lsn(&self, lsn: Lsn) -> Result<()>;
|
||||
|
||||
/// Lock and get timeline's GC cuttof
|
||||
fn get_latest_gc_cutoff_lsn(&self) -> RwLockReadGuard<Lsn>;
|
||||
|
||||
/// Look up given page version.
|
||||
fn get_page_at_lsn(&self, tag: RelishTag, blknum: BlockNumber, lsn: Lsn) -> Result<Bytes>;
|
||||
|
||||
@@ -220,12 +215,10 @@ pub trait Timeline: Send + Sync {
|
||||
|
||||
/// Atomically get both last and prev.
|
||||
fn get_last_record_rlsn(&self) -> RecordLsn;
|
||||
|
||||
/// Get last or prev record separately. Same as get_last_record_rlsn().last/prev.
|
||||
fn get_last_record_lsn(&self) -> Lsn;
|
||||
|
||||
fn get_prev_record_lsn(&self) -> Lsn;
|
||||
|
||||
fn get_start_lsn(&self) -> Lsn;
|
||||
fn get_disk_consistent_lsn(&self) -> Lsn;
|
||||
|
||||
/// Mutate the timeline with a [`TimelineWriter`].
|
||||
@@ -240,11 +233,7 @@ pub trait Timeline: Send + Sync {
|
||||
|
||||
///
|
||||
/// Check that it is valid to request operations with that lsn.
|
||||
fn check_lsn_is_in_scope(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
|
||||
) -> Result<()>;
|
||||
fn check_lsn_is_in_scope(&self, lsn: Lsn) -> Result<()>;
|
||||
|
||||
/// Retrieve current logical size of the timeline
|
||||
///
|
||||
@@ -253,7 +242,7 @@ pub trait Timeline: Send + Sync {
|
||||
fn get_current_logical_size(&self) -> usize;
|
||||
|
||||
/// Does the same as get_current_logical_size but counted on demand.
|
||||
/// Used in tests to ensure that incremental and non incremental variants match.
|
||||
/// Used in tests to ensure thet incremental and non incremental variants match.
|
||||
fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result<usize>;
|
||||
|
||||
/// An escape hatch to allow "casting" a generic Timeline to LayeredTimeline.
|
||||
@@ -306,12 +295,8 @@ pub enum ZenithWalRecord {
|
||||
/// Native PostgreSQL WAL record
|
||||
Postgres { will_init: bool, rec: Bytes },
|
||||
|
||||
/// Clear bits in heap visibility map. ('flags' is bitmap of bits to clear)
|
||||
ClearVisibilityMapFlags {
|
||||
new_heap_blkno: Option<u32>,
|
||||
old_heap_blkno: Option<u32>,
|
||||
flags: u8,
|
||||
},
|
||||
/// Set bits in heap visibility map. (heap blkno, flag bits to clear)
|
||||
ClearVisibilityMapFlags { heap_blkno: u32, flags: u8 },
|
||||
/// Mark transaction IDs as committed on a CLOG page
|
||||
ClogSetCommitted { xids: Vec<TransactionId> },
|
||||
/// Mark transaction IDs as aborted on a CLOG page
|
||||
@@ -447,6 +432,8 @@ pub mod repo_harness {
|
||||
#[allow(clippy::bool_assert_comparison)]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::layered_repository::metadata::METADATA_FILE_NAME;
|
||||
|
||||
use super::repo_harness::*;
|
||||
use super::*;
|
||||
use postgres_ffi::{pg_constants, xlog_utils::SIZEOF_CHECKPOINT};
|
||||
@@ -744,8 +731,8 @@ mod tests {
|
||||
|
||||
let mut lsn = 0x10;
|
||||
for blknum in 0..pg_constants::RELSEG_SIZE + 1 {
|
||||
lsn += 0x10;
|
||||
let img = TEST_IMG(&format!("foo blk {} at {}", blknum, Lsn(lsn)));
|
||||
lsn += 0x10;
|
||||
writer.put_page_image(TESTREL_A, blknum as BlockNumber, Lsn(lsn), img)?;
|
||||
}
|
||||
writer.advance_last_record_lsn(Lsn(lsn));
|
||||
@@ -998,7 +985,7 @@ mod tests {
|
||||
.source()
|
||||
.unwrap()
|
||||
.to_string()
|
||||
.contains("is earlier than latest GC horizon"));
|
||||
.contains("is earlier than initdb lsn"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1015,11 +1002,12 @@ mod tests {
|
||||
make_some_layers(&tline, Lsn(0x20))?;
|
||||
|
||||
repo.gc_iteration(Some(TIMELINE_ID), 0x10, false)?;
|
||||
let latest_gc_cutoff_lsn = tline.get_latest_gc_cutoff_lsn();
|
||||
assert!(*latest_gc_cutoff_lsn > Lsn(0x25));
|
||||
|
||||
match tline.get_page_at_lsn(TESTREL_A, 0, Lsn(0x25)) {
|
||||
Ok(_) => panic!("request for page should have failed"),
|
||||
Err(err) => assert!(err.to_string().contains("not found at")),
|
||||
Err(err) => assert!(err
|
||||
.to_string()
|
||||
.contains("tried to request a page version that was garbage collected")),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -1130,4 +1118,141 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrupt_metadata() -> Result<()> {
|
||||
const TEST_NAME: &str = "corrupt_metadata";
|
||||
let harness = RepoHarness::create(TEST_NAME)?;
|
||||
let repo = harness.load();
|
||||
|
||||
repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
|
||||
drop(repo);
|
||||
|
||||
let metadata_path = harness.timeline_path(&TIMELINE_ID).join(METADATA_FILE_NAME);
|
||||
|
||||
assert!(metadata_path.is_file());
|
||||
|
||||
let mut metadata_bytes = std::fs::read(&metadata_path)?;
|
||||
assert_eq!(metadata_bytes.len(), 512);
|
||||
metadata_bytes[512 - 4 - 2] ^= 1;
|
||||
std::fs::write(metadata_path, metadata_bytes)?;
|
||||
|
||||
let new_repo = harness.load();
|
||||
let err = new_repo.get_timeline(TIMELINE_ID).err().unwrap();
|
||||
assert_eq!(err.to_string(), "failed to load metadata");
|
||||
assert_eq!(
|
||||
err.source().unwrap().to_string(),
|
||||
"metadata checksum mismatch"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn future_layerfiles() -> Result<()> {
|
||||
const TEST_NAME: &str = "future_layerfiles";
|
||||
let harness = RepoHarness::create(TEST_NAME)?;
|
||||
let repo = harness.load();
|
||||
|
||||
// Create a timeline with disk_consistent_lsn = 8000
|
||||
let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0x8000))?;
|
||||
let writer = tline.writer();
|
||||
writer.advance_last_record_lsn(Lsn(0x8000));
|
||||
drop(writer);
|
||||
repo.checkpoint_iteration(CheckpointConfig::Forced)?;
|
||||
drop(repo);
|
||||
|
||||
let timeline_path = harness.timeline_path(&TIMELINE_ID);
|
||||
|
||||
let make_empty_file = |filename: &str| -> std::io::Result<()> {
|
||||
let path = timeline_path.join(filename);
|
||||
|
||||
assert!(!path.exists());
|
||||
std::fs::write(&path, &[])?;
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// Helper function to check that a relation file exists, and a corresponding
|
||||
// <filename>.0.old file does not.
|
||||
let assert_exists = |filename: &str| {
|
||||
let path = timeline_path.join(filename);
|
||||
assert!(path.exists(), "file {} was removed", filename);
|
||||
|
||||
// Check that there is no .old file
|
||||
let backup_path = timeline_path.join(format!("{}.0.old", filename));
|
||||
assert!(
|
||||
!backup_path.exists(),
|
||||
"unexpected backup file {}",
|
||||
backup_path.display()
|
||||
);
|
||||
};
|
||||
|
||||
// Helper function to check that a relation file does *not* exists, and a corresponding
|
||||
// <filename>.<num>.old file does.
|
||||
let assert_is_renamed = |filename: &str, num: u32| {
|
||||
let path = timeline_path.join(filename);
|
||||
assert!(
|
||||
!path.exists(),
|
||||
"file {} was not removed as expected",
|
||||
filename
|
||||
);
|
||||
|
||||
let backup_path = timeline_path.join(format!("{}.{}.old", filename, num));
|
||||
assert!(
|
||||
backup_path.exists(),
|
||||
"backup file {} was not created",
|
||||
backup_path.display()
|
||||
);
|
||||
};
|
||||
|
||||
// These files are considered to be in the future and will be renamed out
|
||||
// of the way
|
||||
let future_filenames = vec![
|
||||
format!("pg_control_0_{:016X}", 0x8001),
|
||||
format!("pg_control_0_{:016X}_{:016X}", 0x8001, 0x8008),
|
||||
];
|
||||
// But these are not:
|
||||
let past_filenames = vec![
|
||||
format!("pg_control_0_{:016X}", 0x8000),
|
||||
format!("pg_control_0_{:016X}_{:016X}", 0x7000, 0x8001),
|
||||
];
|
||||
|
||||
for filename in future_filenames.iter().chain(past_filenames.iter()) {
|
||||
make_empty_file(filename)?;
|
||||
}
|
||||
|
||||
// Load the timeline. This will cause the files in the "future" to be renamed
|
||||
// away.
|
||||
let new_repo = harness.load();
|
||||
new_repo.get_timeline(TIMELINE_ID).unwrap();
|
||||
drop(new_repo);
|
||||
|
||||
for filename in future_filenames.iter() {
|
||||
assert_is_renamed(filename, 0);
|
||||
}
|
||||
for filename in past_filenames.iter() {
|
||||
assert_exists(filename);
|
||||
}
|
||||
|
||||
// Create the future files again, and load again. They should be renamed to
|
||||
// *.1.old this time.
|
||||
for filename in future_filenames.iter() {
|
||||
make_empty_file(filename)?;
|
||||
}
|
||||
|
||||
let new_repo = harness.load();
|
||||
new_repo.get_timeline(TIMELINE_ID).unwrap();
|
||||
drop(new_repo);
|
||||
|
||||
for filename in future_filenames.iter() {
|
||||
assert_is_renamed(filename, 0);
|
||||
assert_is_renamed(filename, 1);
|
||||
}
|
||||
for filename in past_filenames.iter() {
|
||||
assert_exists(filename);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use crate::thread_mgr;
|
||||
use crate::thread_mgr::ThreadKind;
|
||||
use crate::walredo::PostgresRedoManager;
|
||||
use crate::CheckpointConfig;
|
||||
use anyhow::{bail, Context, Result};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use log::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -73,7 +73,6 @@ pub fn set_timeline_states(
|
||||
let mut m = access_tenants();
|
||||
for (tenant_id, timeline_states) in timeline_states {
|
||||
let tenant = m.entry(tenant_id).or_insert_with(|| {
|
||||
// TODO (rodionov) reuse one of the initialisation routines
|
||||
// Set up a WAL redo manager, for applying WAL records.
|
||||
let walredo_mgr = PostgresRedoManager::new(conf, tenant_id);
|
||||
|
||||
@@ -209,7 +208,7 @@ pub fn activate_tenant(conf: &'static PageServerConf, tenantid: ZTenantId) -> Re
|
||||
let mut m = access_tenants();
|
||||
let tenant = m
|
||||
.get_mut(&tenantid)
|
||||
.with_context(|| format!("Tenant not found for id {}", tenantid))?;
|
||||
.ok_or_else(|| anyhow!("Tenant not found for id {}", tenantid))?;
|
||||
|
||||
info!("activating tenant {}", tenantid);
|
||||
|
||||
@@ -252,7 +251,7 @@ pub fn get_repository_for_tenant(tenantid: ZTenantId) -> Result<Arc<dyn Reposito
|
||||
let m = access_tenants();
|
||||
let tenant = m
|
||||
.get(&tenantid)
|
||||
.with_context(|| format!("Tenant not found for tenant {}", tenantid))?;
|
||||
.ok_or_else(|| anyhow!("Tenant not found for tenant {}", tenantid))?;
|
||||
|
||||
Ok(Arc::clone(&tenant.repo))
|
||||
}
|
||||
@@ -264,7 +263,7 @@ pub fn get_timeline_for_tenant(
|
||||
get_repository_for_tenant(tenantid)?
|
||||
.get_timeline(timelineid)?
|
||||
.local_timeline()
|
||||
.with_context(|| format!("cannot fetch timeline {}", timelineid))
|
||||
.ok_or_else(|| anyhow!("cannot fetch timeline {}", timelineid))
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
|
||||
@@ -332,11 +332,8 @@ impl VirtualFile {
|
||||
// TODO: We could downgrade the locks to read mode before calling
|
||||
// 'func', to allow a little bit more concurrency, but the standard
|
||||
// library RwLock doesn't allow downgrading without releasing the lock,
|
||||
// and that doesn't seem worth the trouble.
|
||||
//
|
||||
// XXX: `parking_lot::RwLock` can enable such downgrades, yet its implemenation is fair and
|
||||
// may deadlock on subsequent read calls.
|
||||
// Simply replacing all `RwLock` in project causes deadlocks, so use it sparingly.
|
||||
// and that doesn't seem worth the trouble. (parking_lot RwLock would
|
||||
// allow it)
|
||||
let result = STORAGE_IO_TIME
|
||||
.with_label_values(&[op, &self.tenantid, &self.timelineid])
|
||||
.observe_closure_duration(|| func(&file));
|
||||
|
||||
@@ -349,25 +349,49 @@ impl WalIngest {
|
||||
decoded: &mut DecodedWALRecord,
|
||||
) -> Result<()> {
|
||||
// Handle VM bit updates that are implicitly part of heap records.
|
||||
|
||||
// First, look at the record to determine which VM bits need
|
||||
// to be cleared. If either of these variables is set, we
|
||||
// need to clear the corresponding bits in the visibility map.
|
||||
let mut new_heap_blkno: Option<u32> = None;
|
||||
let mut old_heap_blkno: Option<u32> = None;
|
||||
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP_INSERT {
|
||||
let xlrec = XlHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
if (xlrec.flags
|
||||
& (pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED
|
||||
| pg_constants::XLH_INSERT_ALL_FROZEN_SET))
|
||||
!= 0
|
||||
{
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
RelishTag::Relation(RelTag {
|
||||
forknum: pg_constants::VISIBILITYMAP_FORKNUM,
|
||||
spcnode: decoded.blocks[0].rnode_spcnode,
|
||||
dbnode: decoded.blocks[0].rnode_dbnode,
|
||||
relnode: decoded.blocks[0].rnode_relnode,
|
||||
}),
|
||||
decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
|
||||
ZenithWalRecord::ClearVisibilityMapFlags {
|
||||
heap_blkno: decoded.blocks[0].blkno,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_DELETE {
|
||||
let xlrec = XlHeapDelete::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
RelishTag::Relation(RelTag {
|
||||
forknum: pg_constants::VISIBILITYMAP_FORKNUM,
|
||||
spcnode: decoded.blocks[0].rnode_spcnode,
|
||||
dbnode: decoded.blocks[0].rnode_dbnode,
|
||||
relnode: decoded.blocks[0].rnode_relnode,
|
||||
}),
|
||||
decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
|
||||
ZenithWalRecord::ClearVisibilityMapFlags {
|
||||
heap_blkno: decoded.blocks[0].blkno,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_UPDATE
|
||||
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
|
||||
@@ -376,15 +400,39 @@ impl WalIngest {
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[1].blkno);
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
RelishTag::Relation(RelTag {
|
||||
forknum: pg_constants::VISIBILITYMAP_FORKNUM,
|
||||
spcnode: decoded.blocks[0].rnode_spcnode,
|
||||
dbnode: decoded.blocks[0].rnode_dbnode,
|
||||
relnode: decoded.blocks[0].rnode_relnode,
|
||||
}),
|
||||
decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
|
||||
ZenithWalRecord::ClearVisibilityMapFlags {
|
||||
heap_blkno: decoded.blocks[0].blkno,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0
|
||||
&& decoded.blocks.len() > 1
|
||||
{
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
RelishTag::Relation(RelTag {
|
||||
forknum: pg_constants::VISIBILITYMAP_FORKNUM,
|
||||
spcnode: decoded.blocks[1].rnode_spcnode,
|
||||
dbnode: decoded.blocks[1].rnode_dbnode,
|
||||
relnode: decoded.blocks[1].rnode_relnode,
|
||||
}),
|
||||
decoded.blocks[1].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
|
||||
ZenithWalRecord::ClearVisibilityMapFlags {
|
||||
heap_blkno: decoded.blocks[1].blkno,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
@@ -400,67 +448,32 @@ impl WalIngest {
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
// FIXME: why also ALL_FROZEN_SET?
|
||||
if (xlrec.flags
|
||||
& (pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED
|
||||
| pg_constants::XLH_INSERT_ALL_FROZEN_SET))
|
||||
!= 0
|
||||
{
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
RelishTag::Relation(RelTag {
|
||||
forknum: pg_constants::VISIBILITYMAP_FORKNUM,
|
||||
spcnode: decoded.blocks[0].rnode_spcnode,
|
||||
dbnode: decoded.blocks[0].rnode_dbnode,
|
||||
relnode: decoded.blocks[0].rnode_relnode,
|
||||
}),
|
||||
decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
|
||||
ZenithWalRecord::ClearVisibilityMapFlags {
|
||||
heap_blkno: decoded.blocks[0].blkno,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: What about XLOG_HEAP_LOCK and XLOG_HEAP2_LOCK_UPDATED?
|
||||
|
||||
// Clear the VM bits if required.
|
||||
if new_heap_blkno.is_some() || old_heap_blkno.is_some() {
|
||||
let vm_relish = RelishTag::Relation(RelTag {
|
||||
forknum: pg_constants::VISIBILITYMAP_FORKNUM,
|
||||
spcnode: decoded.blocks[0].rnode_spcnode,
|
||||
dbnode: decoded.blocks[0].rnode_dbnode,
|
||||
relnode: decoded.blocks[0].rnode_relnode,
|
||||
});
|
||||
|
||||
let new_vm_blk = new_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
|
||||
let old_vm_blk = old_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
|
||||
if new_vm_blk == old_vm_blk {
|
||||
// An UPDATE record that needs to clear the bits for both old and the
|
||||
// new page, both of which reside on the same VM page.
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
vm_relish,
|
||||
new_vm_blk.unwrap(),
|
||||
ZenithWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
)?;
|
||||
} else {
|
||||
// Clear VM bits for one heap page, or for two pages that reside on
|
||||
// different VM pages.
|
||||
if let Some(new_vm_blk) = new_vm_blk {
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
vm_relish,
|
||||
new_vm_blk,
|
||||
ZenithWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno: None,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
if let Some(old_vm_blk) = old_vm_blk {
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
vm_relish,
|
||||
old_vm_blk,
|
||||
ZenithWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno: None,
|
||||
old_heap_blkno,
|
||||
flags: pg_constants::VISIBILITYMAP_VALID_BITS,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -11,16 +11,14 @@ use crate::thread_mgr;
|
||||
use crate::thread_mgr::ThreadKind;
|
||||
use crate::walingest::WalIngest;
|
||||
use anyhow::{bail, Context, Error, Result};
|
||||
use bytes::BytesMut;
|
||||
use fail::fail_point;
|
||||
use lazy_static::lazy_static;
|
||||
use parking_lot::Mutex;
|
||||
use postgres_ffi::waldecoder::*;
|
||||
use postgres_protocol::message::backend::ReplicationMessage;
|
||||
use postgres_types::PgLsn;
|
||||
use std::cell::Cell;
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Mutex;
|
||||
use std::thread_local;
|
||||
use std::time::SystemTime;
|
||||
use tokio::pin;
|
||||
@@ -29,7 +27,6 @@ use tokio_postgres::{Client, NoTls, SimpleQueryMessage, SimpleQueryRow};
|
||||
use tokio_stream::StreamExt;
|
||||
use tracing::*;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::pq_proto::ZenithFeedback;
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
use zenith_utils::zid::ZTimelineId;
|
||||
|
||||
@@ -53,7 +50,7 @@ thread_local! {
|
||||
}
|
||||
|
||||
fn drop_wal_receiver(tenantid: ZTenantId, timelineid: ZTimelineId) {
|
||||
let mut receivers = WAL_RECEIVERS.lock().unwrap();
|
||||
let mut receivers = WAL_RECEIVERS.lock();
|
||||
receivers.remove(&(tenantid, timelineid));
|
||||
}
|
||||
|
||||
@@ -64,11 +61,10 @@ pub fn launch_wal_receiver(
|
||||
timelineid: ZTimelineId,
|
||||
wal_producer_connstr: &str,
|
||||
) -> Result<()> {
|
||||
let mut receivers = WAL_RECEIVERS.lock().unwrap();
|
||||
let mut receivers = WAL_RECEIVERS.lock();
|
||||
|
||||
match receivers.get_mut(&(tenantid, timelineid)) {
|
||||
Some(receiver) => {
|
||||
info!("wal receiver already running, updating connection string");
|
||||
receiver.wal_producer_connstr = wal_producer_connstr.into();
|
||||
}
|
||||
None => {
|
||||
@@ -97,7 +93,7 @@ pub fn launch_wal_receiver(
|
||||
|
||||
// Look up current WAL producer connection string in the hash table
|
||||
fn get_wal_producer_connstr(tenantid: ZTenantId, timelineid: ZTimelineId) -> String {
|
||||
let receivers = WAL_RECEIVERS.lock().unwrap();
|
||||
let receivers = WAL_RECEIVERS.lock();
|
||||
|
||||
receivers
|
||||
.get(&(tenantid, timelineid))
|
||||
@@ -162,7 +158,7 @@ fn walreceiver_main(
|
||||
// This is from tokio-postgres docs, but it is a bit weird in our case because we extensively use block_on
|
||||
runtime.spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
error!("connection error: {}", e);
|
||||
eprintln!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -256,8 +252,6 @@ fn walreceiver_main(
|
||||
let writer = timeline.writer();
|
||||
walingest.ingest_record(writer.as_ref(), recdata, lsn)?;
|
||||
|
||||
fail_point!("walreceiver-after-ingest");
|
||||
|
||||
last_rec_lsn = lsn;
|
||||
}
|
||||
|
||||
@@ -292,6 +286,7 @@ fn walreceiver_main(
|
||||
};
|
||||
|
||||
if let Some(last_lsn) = status_update {
|
||||
let last_lsn = PgLsn::from(u64::from(last_lsn));
|
||||
let timeline_synced_disk_consistent_lsn =
|
||||
tenant_mgr::get_repository_for_tenant(tenantid)?
|
||||
.get_timeline_state(timelineid)
|
||||
@@ -299,32 +294,18 @@ fn walreceiver_main(
|
||||
.unwrap_or(Lsn(0));
|
||||
|
||||
// The last LSN we processed. It is not guaranteed to survive pageserver crash.
|
||||
let write_lsn = u64::from(last_lsn);
|
||||
let write_lsn = last_lsn;
|
||||
// `disk_consistent_lsn` is the LSN at which page server guarantees local persistence of all received data
|
||||
let flush_lsn = u64::from(timeline.get_disk_consistent_lsn());
|
||||
let flush_lsn = PgLsn::from(u64::from(timeline.get_disk_consistent_lsn()));
|
||||
// The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash
|
||||
// Used by safekeepers to remove WAL preceding `remote_consistent_lsn`.
|
||||
let apply_lsn = u64::from(timeline_synced_disk_consistent_lsn);
|
||||
let apply_lsn = PgLsn::from(u64::from(timeline_synced_disk_consistent_lsn));
|
||||
let ts = SystemTime::now();
|
||||
|
||||
// Send zenith feedback message.
|
||||
// Regular standby_status_update fields are put into this message.
|
||||
let zenith_status_update = ZenithFeedback {
|
||||
current_timeline_size: timeline.get_current_logical_size() as u64,
|
||||
ps_writelsn: write_lsn,
|
||||
ps_flushlsn: flush_lsn,
|
||||
ps_applylsn: apply_lsn,
|
||||
ps_replytime: ts,
|
||||
};
|
||||
|
||||
debug!("zenith_status_update {:?}", zenith_status_update);
|
||||
|
||||
let mut data = BytesMut::new();
|
||||
zenith_status_update.serialize(&mut data)?;
|
||||
const NO_REPLY: u8 = 0;
|
||||
runtime.block_on(
|
||||
physical_stream
|
||||
.as_mut()
|
||||
.zenith_status_update(data.len() as u64, &data),
|
||||
.standby_status_update(write_lsn, flush_lsn, apply_lsn, ts, NO_REPLY),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -268,11 +268,12 @@ impl XlXactParsedRecord {
|
||||
let info = xl_info & pg_constants::XLOG_XACT_OPMASK;
|
||||
// The record starts with time of commit/abort
|
||||
let xact_time = buf.get_i64_le();
|
||||
let xinfo = if xl_info & pg_constants::XLOG_XACT_HAS_INFO != 0 {
|
||||
buf.get_u32_le()
|
||||
let xinfo;
|
||||
if xl_info & pg_constants::XLOG_XACT_HAS_INFO != 0 {
|
||||
xinfo = buf.get_u32_le();
|
||||
} else {
|
||||
0
|
||||
};
|
||||
xinfo = 0;
|
||||
}
|
||||
let db_id;
|
||||
let ts_id;
|
||||
if xinfo & pg_constants::XACT_XINFO_HAS_DBINFO != 0 {
|
||||
@@ -501,6 +502,7 @@ pub fn decode_wal_record(record: Bytes) -> DecodedWALRecord {
|
||||
0..=pg_constants::XLR_MAX_BLOCK_ID => {
|
||||
/* XLogRecordBlockHeader */
|
||||
let mut blk = DecodedBkpBlock::new();
|
||||
let fork_flags: u8;
|
||||
|
||||
if block_id <= max_block_id {
|
||||
// TODO
|
||||
@@ -513,7 +515,7 @@ pub fn decode_wal_record(record: Bytes) -> DecodedWALRecord {
|
||||
}
|
||||
max_block_id = block_id;
|
||||
|
||||
let fork_flags: u8 = buf.get_u8();
|
||||
fork_flags = buf.get_u8();
|
||||
blk.forknum = fork_flags & pg_constants::BKPBLOCK_FORK_MASK;
|
||||
blk.flags = fork_flags;
|
||||
blk.has_image = (fork_flags & pg_constants::BKPBLOCK_HAS_IMAGE) != 0;
|
||||
|
||||
@@ -102,6 +102,8 @@ impl crate::walredo::WalRedoManager for DummyRedoManager {
|
||||
}
|
||||
}
|
||||
|
||||
static TIMEOUT: Duration = Duration::from_secs(20);
|
||||
|
||||
// Metrics collected on WAL redo operations
|
||||
//
|
||||
// We collect the time spent in actual WAL redo ('redo'), and time waiting
|
||||
@@ -219,14 +221,7 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
let result = if batch_zenith {
|
||||
self.apply_batch_zenith(rel, blknum, lsn, img, &records[batch_start..i])
|
||||
} else {
|
||||
self.apply_batch_postgres(
|
||||
rel,
|
||||
blknum,
|
||||
lsn,
|
||||
img,
|
||||
&records[batch_start..i],
|
||||
self.conf.wal_redo_timeout,
|
||||
)
|
||||
self.apply_batch_postgres(rel, blknum, lsn, img, &records[batch_start..i])
|
||||
};
|
||||
img = Some(result?);
|
||||
|
||||
@@ -238,14 +233,7 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
if batch_zenith {
|
||||
self.apply_batch_zenith(rel, blknum, lsn, img, &records[batch_start..])
|
||||
} else {
|
||||
self.apply_batch_postgres(
|
||||
rel,
|
||||
blknum,
|
||||
lsn,
|
||||
img,
|
||||
&records[batch_start..],
|
||||
self.conf.wal_redo_timeout,
|
||||
)
|
||||
self.apply_batch_postgres(rel, blknum, lsn, img, &records[batch_start..])
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -273,7 +261,6 @@ impl PostgresRedoManager {
|
||||
lsn: Lsn,
|
||||
base_img: Option<Bytes>,
|
||||
records: &[(Lsn, ZenithWalRecord)],
|
||||
wal_redo_timeout: Duration,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
let start_time = Instant::now();
|
||||
|
||||
@@ -294,7 +281,7 @@ impl PostgresRedoManager {
|
||||
let result = if let RelishTag::Relation(rel) = rel {
|
||||
// Relational WAL records are applied using wal-redo-postgres
|
||||
let buf_tag = BufferTag { rel, blknum };
|
||||
apply_result = process.apply_wal_records(buf_tag, base_img, records, wal_redo_timeout);
|
||||
apply_result = process.apply_wal_records(buf_tag, base_img, records);
|
||||
|
||||
apply_result.map_err(WalRedoError::IoError)
|
||||
} else {
|
||||
@@ -376,44 +363,25 @@ impl PostgresRedoManager {
|
||||
will_init: _,
|
||||
rec: _,
|
||||
} => panic!("tried to pass postgres wal record to zenith WAL redo"),
|
||||
ZenithWalRecord::ClearVisibilityMapFlags {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno,
|
||||
flags,
|
||||
} => {
|
||||
// sanity check that this is modifying the correct relish
|
||||
ZenithWalRecord::ClearVisibilityMapFlags { heap_blkno, flags } => {
|
||||
// Calculate the VM block and offset that corresponds to the heap block.
|
||||
let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(*heap_blkno);
|
||||
let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(*heap_blkno);
|
||||
let map_offset = pg_constants::HEAPBLK_TO_OFFSET(*heap_blkno);
|
||||
|
||||
// Check that we're modifying the correct VM block.
|
||||
assert!(
|
||||
check_forknum(&rel, pg_constants::VISIBILITYMAP_FORKNUM),
|
||||
"ClearVisibilityMapFlags record on unexpected rel {:?}",
|
||||
rel
|
||||
);
|
||||
if let Some(heap_blkno) = *new_heap_blkno {
|
||||
// Calculate the VM block and offset that corresponds to the heap block.
|
||||
let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blkno);
|
||||
let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(heap_blkno);
|
||||
let map_offset = pg_constants::HEAPBLK_TO_OFFSET(heap_blkno);
|
||||
assert!(map_block == blknum);
|
||||
|
||||
// Check that we're modifying the correct VM block.
|
||||
assert!(map_block == blknum);
|
||||
// equivalent to PageGetContents(page)
|
||||
let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
|
||||
|
||||
// equivalent to PageGetContents(page)
|
||||
let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
|
||||
|
||||
map[map_byte as usize] &= !(flags << map_offset);
|
||||
}
|
||||
|
||||
// Repeat for 'old_heap_blkno', if any
|
||||
if let Some(heap_blkno) = *old_heap_blkno {
|
||||
let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blkno);
|
||||
let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(heap_blkno);
|
||||
let map_offset = pg_constants::HEAPBLK_TO_OFFSET(heap_blkno);
|
||||
|
||||
assert!(map_block == blknum);
|
||||
|
||||
let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
|
||||
|
||||
map[map_byte as usize] &= !(flags << map_offset);
|
||||
}
|
||||
let mask: u8 = flags << map_offset;
|
||||
map[map_byte as usize] &= !mask;
|
||||
}
|
||||
// Non-relational WAL records are handled here, with custom code that has the
|
||||
// same effects as the corresponding Postgres WAL redo function.
|
||||
@@ -616,7 +584,6 @@ impl PostgresRedoProcess {
|
||||
tag: BufferTag,
|
||||
base_img: Option<Bytes>,
|
||||
records: &[(Lsn, ZenithWalRecord)],
|
||||
wal_redo_timeout: Duration,
|
||||
) -> Result<Bytes, std::io::Error> {
|
||||
// Serialize all the messages to send the WAL redo process first.
|
||||
//
|
||||
@@ -667,7 +634,7 @@ impl PostgresRedoProcess {
|
||||
// If we have more data to write, wake up if 'stdin' becomes writeable or
|
||||
// we have data to read. Otherwise only wake up if there's data to read.
|
||||
let nfds = if nwrite < writebuf.len() { 3 } else { 2 };
|
||||
let n = nix::poll::poll(&mut pollfds[0..nfds], wal_redo_timeout.as_millis() as i32)?;
|
||||
let n = nix::poll::poll(&mut pollfds[0..nfds], TIMEOUT.as_millis() as i32)?;
|
||||
|
||||
if n == 0 {
|
||||
return Err(Error::new(ErrorKind::Other, "WAL redo timed out"));
|
||||
|
||||
2080
poetry.lock
generated
2080
poetry.lock
generated
File diff suppressed because one or more lines are too long
@@ -1,7 +1,10 @@
|
||||
[package]
|
||||
name = "postgres_ffi"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Heikki Linnakangas <heikki@zenith.tech>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
chrono = "0.4.19"
|
||||
|
||||
@@ -51,13 +51,6 @@ pub type TimeLineID = u32;
|
||||
pub type TimestampTz = i64;
|
||||
pub type XLogSegNo = u64;
|
||||
|
||||
/// Interval of checkpointing metadata file. We should store metadata file to enforce
|
||||
/// predicate that checkpoint.nextXid is larger than any XID in WAL.
|
||||
/// But flushing checkpoint file for each transaction seems to be too expensive,
|
||||
/// so XID_CHECKPOINT_INTERVAL is used to forward align nextXid and so perform
|
||||
/// metadata checkpoint only once per XID_CHECKPOINT_INTERVAL transactions.
|
||||
/// XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
|
||||
/// in order to let CLOG_TRUNCATE mechanism correctly extend CLOG.
|
||||
const XID_CHECKPOINT_INTERVAL: u32 = 1024;
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
@@ -407,13 +400,9 @@ impl CheckPoint {
|
||||
///
|
||||
/// Returns 'true' if the XID was updated.
|
||||
pub fn update_next_xid(&mut self, xid: u32) -> bool {
|
||||
// nextXid should nw greate than any XID in WAL, so increment provided XID and check for wraparround.
|
||||
let mut new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
|
||||
// To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.
|
||||
// XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
|
||||
new_xid =
|
||||
new_xid.wrapping_add(XID_CHECKPOINT_INTERVAL - 1) & !(XID_CHECKPOINT_INTERVAL - 1);
|
||||
let xid = xid.wrapping_add(XID_CHECKPOINT_INTERVAL - 1) & !(XID_CHECKPOINT_INTERVAL - 1);
|
||||
let full_xid = self.nextXid.value;
|
||||
let new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
|
||||
let old_xid = full_xid as u32;
|
||||
if new_xid.wrapping_sub(old_xid) as i32 > 0 {
|
||||
let mut epoch = full_xid >> 32;
|
||||
@@ -531,34 +520,4 @@ mod tests {
|
||||
println!("wal_end={}, tli={}", wal_end, tli);
|
||||
assert_eq!(wal_end, waldump_wal_end);
|
||||
}
|
||||
|
||||
/// Check the math in update_next_xid
|
||||
///
|
||||
/// NOTE: These checks are sensitive to the value of XID_CHECKPOINT_INTERVAL,
|
||||
/// currently 1024.
|
||||
#[test]
|
||||
pub fn test_update_next_xid() {
|
||||
let checkpoint_buf = [0u8; std::mem::size_of::<CheckPoint>()];
|
||||
let mut checkpoint = CheckPoint::decode(&checkpoint_buf).unwrap();
|
||||
|
||||
checkpoint.nextXid = FullTransactionId { value: 10 };
|
||||
assert_eq!(checkpoint.nextXid.value, 10);
|
||||
|
||||
// The input XID gets rounded up to the next XID_CHECKPOINT_INTERVAL
|
||||
// boundary
|
||||
checkpoint.update_next_xid(100);
|
||||
assert_eq!(checkpoint.nextXid.value, 1024);
|
||||
|
||||
// No change
|
||||
checkpoint.update_next_xid(500);
|
||||
assert_eq!(checkpoint.nextXid.value, 1024);
|
||||
checkpoint.update_next_xid(1023);
|
||||
assert_eq!(checkpoint.nextXid.value, 1024);
|
||||
|
||||
// The function returns the *next* XID, given the highest XID seen so
|
||||
// far. So when we pass 1024, the nextXid gets bumped up to the next
|
||||
// XID_CHECKPOINT_INTERVAL boundary.
|
||||
checkpoint.update_next_xid(1024);
|
||||
assert_eq!(checkpoint.nextXid.value, 2048);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ def rustfmt(fix_inplace: bool = False, no_color: bool = False) -> str:
|
||||
|
||||
|
||||
def yapf(fix_inplace: bool) -> str:
|
||||
cmd = "poetry run yapf --recursive"
|
||||
cmd = "pipenv run yapf --recursive"
|
||||
if fix_inplace:
|
||||
cmd += " --in-place"
|
||||
else:
|
||||
@@ -47,7 +47,7 @@ def yapf(fix_inplace: bool) -> str:
|
||||
|
||||
|
||||
def mypy() -> str:
|
||||
return "poetry run mypy"
|
||||
return "pipenv run mypy"
|
||||
|
||||
|
||||
def get_commit_files() -> List[str]:
|
||||
@@ -72,7 +72,7 @@ def check(name: str, suffix: str, cmd: str, changed_files: List[str], no_color:
|
||||
print("Please inspect the output below and run make fmt to fix automatically.")
|
||||
if suffix == ".py":
|
||||
print("If the output is empty, ensure that you've installed Python tooling by\n"
|
||||
"running './scripts/pysync' in the current directory (no root needed)")
|
||||
"running 'pipenv install --dev' in the current directory (no root needed)")
|
||||
print()
|
||||
print(res.stdout.decode())
|
||||
exit(1)
|
||||
|
||||
@@ -1,33 +1,28 @@
|
||||
[package]
|
||||
name = "proxy"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Stas Kelvich <stas.kelvich@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
bytes = { version = "1.0.1", features = ['serde'] }
|
||||
clap = "3.0"
|
||||
futures = "0.3.13"
|
||||
hashbrown = "0.11.2"
|
||||
hex = "0.4.3"
|
||||
hyper = "0.14"
|
||||
lazy_static = "1.4.0"
|
||||
md5 = "0.7.0"
|
||||
parking_lot = "0.11.2"
|
||||
pin-project-lite = "0.2.7"
|
||||
rand = "0.8.3"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
rustls = "0.19.1"
|
||||
scopeguard = "1.1.0"
|
||||
hex = "0.4.3"
|
||||
hyper = "0.14"
|
||||
routerify = "2"
|
||||
parking_lot = "0.11.2"
|
||||
serde = "1"
|
||||
serde_json = "1"
|
||||
tokio = { version = "1.11", features = ["macros"] }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
tokio-rustls = "0.22.0"
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
clap = "2.33.0"
|
||||
rustls = "0.19.1"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
|
||||
zenith_utils = { path = "../zenith_utils" }
|
||||
zenith_metrics = { path = "../zenith_metrics" }
|
||||
|
||||
[dev-dependencies]
|
||||
tokio-postgres-rustls = "0.8.0"
|
||||
rcgen = "0.8.14"
|
||||
|
||||
@@ -1,169 +0,0 @@
|
||||
use crate::compute::DatabaseInfo;
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::cplane_api::{self, CPlaneApi};
|
||||
use crate::stream::PqStream;
|
||||
use anyhow::{anyhow, bail, Context};
|
||||
use std::collections::HashMap;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use zenith_utils::pq_proto::{BeMessage as Be, BeParameterStatusMessage, FeMessage as Fe};
|
||||
|
||||
/// Various client credentials which we use for authentication.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct ClientCredentials {
|
||||
pub user: String,
|
||||
pub dbname: String,
|
||||
}
|
||||
|
||||
impl TryFrom<HashMap<String, String>> for ClientCredentials {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(mut value: HashMap<String, String>) -> Result<Self, Self::Error> {
|
||||
let mut get_param = |key| {
|
||||
value
|
||||
.remove(key)
|
||||
.with_context(|| format!("{} is missing in startup packet", key))
|
||||
};
|
||||
|
||||
let user = get_param("user")?;
|
||||
let db = get_param("database")?;
|
||||
|
||||
Ok(Self { user, dbname: db })
|
||||
}
|
||||
}
|
||||
|
||||
impl ClientCredentials {
|
||||
/// Use credentials to authenticate the user.
|
||||
pub async fn authenticate(
|
||||
self,
|
||||
config: &ProxyConfig,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) -> anyhow::Result<DatabaseInfo> {
|
||||
use crate::config::ClientAuthMethod::*;
|
||||
use crate::config::RouterConfig::*;
|
||||
let db_info = match &config.router_config {
|
||||
Static { host, port } => handle_static(host.clone(), *port, client, self).await,
|
||||
Dynamic(Mixed) => {
|
||||
if self.user.ends_with("@zenith") {
|
||||
handle_existing_user(config, client, self).await
|
||||
} else {
|
||||
handle_new_user(config, client).await
|
||||
}
|
||||
}
|
||||
Dynamic(Password) => handle_existing_user(config, client, self).await,
|
||||
Dynamic(Link) => handle_new_user(config, client).await,
|
||||
};
|
||||
|
||||
db_info.context("failed to authenticate client")
|
||||
}
|
||||
}
|
||||
|
||||
fn new_psql_session_id() -> String {
|
||||
hex::encode(rand::random::<[u8; 8]>())
|
||||
}
|
||||
|
||||
async fn handle_static(
|
||||
host: String,
|
||||
port: u16,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
creds: ClientCredentials,
|
||||
) -> anyhow::Result<DatabaseInfo> {
|
||||
client
|
||||
.write_message(&Be::AuthenticationCleartextPassword)
|
||||
.await?;
|
||||
|
||||
// Read client's password bytes
|
||||
let msg = match client.read_message().await? {
|
||||
Fe::PasswordMessage(msg) => msg,
|
||||
bad => bail!("unexpected message type: {:?}", bad),
|
||||
};
|
||||
|
||||
let cleartext_password = std::str::from_utf8(&msg)?.split('\0').next().unwrap();
|
||||
|
||||
let db_info = DatabaseInfo {
|
||||
host,
|
||||
port,
|
||||
dbname: creds.dbname.clone(),
|
||||
user: creds.user.clone(),
|
||||
password: Some(cleartext_password.into()),
|
||||
};
|
||||
|
||||
client
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
|
||||
|
||||
Ok(db_info)
|
||||
}
|
||||
|
||||
async fn handle_existing_user(
|
||||
config: &ProxyConfig,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
creds: ClientCredentials,
|
||||
) -> anyhow::Result<DatabaseInfo> {
|
||||
let psql_session_id = new_psql_session_id();
|
||||
let md5_salt = rand::random();
|
||||
|
||||
client
|
||||
.write_message(&Be::AuthenticationMD5Password(&md5_salt))
|
||||
.await?;
|
||||
|
||||
// Read client's password hash
|
||||
let msg = match client.read_message().await? {
|
||||
Fe::PasswordMessage(msg) => msg,
|
||||
bad => bail!("unexpected message type: {:?}", bad),
|
||||
};
|
||||
|
||||
let (_trailing_null, md5_response) = msg
|
||||
.split_last()
|
||||
.ok_or_else(|| anyhow!("unexpected password message"))?;
|
||||
|
||||
let cplane = CPlaneApi::new(&config.auth_endpoint);
|
||||
let db_info = cplane
|
||||
.authenticate_proxy_request(creds, md5_response, &md5_salt, &psql_session_id)
|
||||
.await?;
|
||||
|
||||
client
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
|
||||
|
||||
Ok(db_info)
|
||||
}
|
||||
|
||||
async fn handle_new_user(
|
||||
config: &ProxyConfig,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) -> anyhow::Result<DatabaseInfo> {
|
||||
let psql_session_id = new_psql_session_id();
|
||||
let greeting = hello_message(&config.redirect_uri, &psql_session_id);
|
||||
|
||||
let db_info = cplane_api::with_waiter(psql_session_id, |waiter| async {
|
||||
// Give user a URL to spawn a new database
|
||||
client
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?
|
||||
.write_message(&Be::NoticeResponse(greeting))
|
||||
.await?;
|
||||
|
||||
// Wait for web console response
|
||||
waiter.await?.map_err(|e| anyhow!(e))
|
||||
})
|
||||
.await?;
|
||||
|
||||
client.write_message_noflush(&Be::NoticeResponse("Connecting to database.".into()))?;
|
||||
|
||||
Ok(db_info)
|
||||
}
|
||||
|
||||
fn hello_message(redirect_uri: &str, session_id: &str) -> String {
|
||||
format!(
|
||||
concat![
|
||||
"☀️ Welcome to Zenith!\n",
|
||||
"To proceed with database creation, open the following link:\n\n",
|
||||
" {redirect_uri}{session_id}\n\n",
|
||||
"It needs to be done once and we will send you '.pgpass' file,\n",
|
||||
"which will allow you to access or create ",
|
||||
"databases without opening your web browser."
|
||||
],
|
||||
redirect_uri = redirect_uri,
|
||||
session_id = session_id,
|
||||
)
|
||||
}
|
||||
@@ -1,106 +0,0 @@
|
||||
use anyhow::{anyhow, Context};
|
||||
use hashbrown::HashMap;
|
||||
use parking_lot::Mutex;
|
||||
use std::net::SocketAddr;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio_postgres::{CancelToken, NoTls};
|
||||
use zenith_utils::pq_proto::CancelKeyData;
|
||||
|
||||
/// Enables serving CancelRequests.
|
||||
#[derive(Default)]
|
||||
pub struct CancelMap(Mutex<HashMap<CancelKeyData, Option<CancelClosure>>>);
|
||||
|
||||
impl CancelMap {
|
||||
/// Cancel a running query for the corresponding connection.
|
||||
pub async fn cancel_session(&self, key: CancelKeyData) -> anyhow::Result<()> {
|
||||
let cancel_closure = self
|
||||
.0
|
||||
.lock()
|
||||
.get(&key)
|
||||
.and_then(|x| x.clone())
|
||||
.with_context(|| format!("unknown session: {:?}", key))?;
|
||||
|
||||
cancel_closure.try_cancel_query().await
|
||||
}
|
||||
|
||||
/// Run async action within an ephemeral session identified by [`CancelKeyData`].
|
||||
pub async fn with_session<'a, F, R, V>(&'a self, f: F) -> anyhow::Result<V>
|
||||
where
|
||||
F: FnOnce(Session<'a>) -> R,
|
||||
R: std::future::Future<Output = anyhow::Result<V>>,
|
||||
{
|
||||
// HACK: We'd rather get the real backend_pid but tokio_postgres doesn't
|
||||
// expose it and we don't want to do another roundtrip to query
|
||||
// for it. The client will be able to notice that this is not the
|
||||
// actual backend_pid, but backend_pid is not used for anything
|
||||
// so it doesn't matter.
|
||||
let key = rand::random();
|
||||
|
||||
// Random key collisions are unlikely to happen here, but they're still possible,
|
||||
// which is why we have to take care not to rewrite an existing key.
|
||||
self.0
|
||||
.lock()
|
||||
.try_insert(key, None)
|
||||
.map_err(|_| anyhow!("session already exists: {:?}", key))?;
|
||||
|
||||
// This will guarantee that the session gets dropped
|
||||
// as soon as the future is finished.
|
||||
scopeguard::defer! {
|
||||
self.0.lock().remove(&key);
|
||||
}
|
||||
|
||||
let session = Session::new(key, self);
|
||||
f(session).await
|
||||
}
|
||||
}
|
||||
|
||||
/// This should've been a [`std::future::Future`], but
|
||||
/// it's impossible to name a type of an unboxed future
|
||||
/// (we'd need something like `#![feature(type_alias_impl_trait)]`).
|
||||
#[derive(Clone)]
|
||||
pub struct CancelClosure {
|
||||
socket_addr: SocketAddr,
|
||||
cancel_token: CancelToken,
|
||||
}
|
||||
|
||||
impl CancelClosure {
|
||||
pub fn new(socket_addr: SocketAddr, cancel_token: CancelToken) -> Self {
|
||||
Self {
|
||||
socket_addr,
|
||||
cancel_token,
|
||||
}
|
||||
}
|
||||
|
||||
/// Cancels the query running on user's compute node.
|
||||
pub async fn try_cancel_query(self) -> anyhow::Result<()> {
|
||||
let socket = TcpStream::connect(self.socket_addr).await?;
|
||||
self.cancel_token.cancel_query_raw(socket, NoTls).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper for registering query cancellation tokens.
|
||||
pub struct Session<'a> {
|
||||
/// The user-facing key identifying this session.
|
||||
key: CancelKeyData,
|
||||
/// The [`CancelMap`] this session belongs to.
|
||||
cancel_map: &'a CancelMap,
|
||||
}
|
||||
|
||||
impl<'a> Session<'a> {
|
||||
fn new(key: CancelKeyData, cancel_map: &'a CancelMap) -> Self {
|
||||
Self { key, cancel_map }
|
||||
}
|
||||
|
||||
/// Store the cancel token for the given session.
|
||||
/// This enables query cancellation in [`crate::proxy::handshake`].
|
||||
pub fn enable_cancellation(self, cancel_closure: CancelClosure) -> CancelKeyData {
|
||||
self.cancel_map
|
||||
.0
|
||||
.lock()
|
||||
.insert(self.key, Some(cancel_closure));
|
||||
|
||||
self.key
|
||||
}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
use anyhow::Context;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::net::{SocketAddr, ToSocketAddrs};
|
||||
|
||||
/// Compute node connection params.
|
||||
#[derive(Serialize, Deserialize, Debug, Default)]
|
||||
pub struct DatabaseInfo {
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
pub dbname: String,
|
||||
pub user: String,
|
||||
pub password: Option<String>,
|
||||
}
|
||||
|
||||
impl DatabaseInfo {
|
||||
pub fn socket_addr(&self) -> anyhow::Result<SocketAddr> {
|
||||
let host_port = format!("{}:{}", self.host, self.port);
|
||||
host_port
|
||||
.to_socket_addrs()
|
||||
.with_context(|| format!("cannot resolve {} to SocketAddr", host_port))?
|
||||
.next()
|
||||
.context("cannot resolve at least one SocketAddr")
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DatabaseInfo> for tokio_postgres::Config {
|
||||
fn from(db_info: DatabaseInfo) -> Self {
|
||||
let mut config = tokio_postgres::Config::new();
|
||||
|
||||
config
|
||||
.host(&db_info.host)
|
||||
.port(db_info.port)
|
||||
.dbname(&db_info.dbname)
|
||||
.user(&db_info.user);
|
||||
|
||||
if let Some(password) = db_info.password {
|
||||
config.password(password);
|
||||
}
|
||||
|
||||
config
|
||||
}
|
||||
}
|
||||
@@ -1,79 +1,18 @@
|
||||
use crate::auth::ClientCredentials;
|
||||
use crate::compute::DatabaseInfo;
|
||||
use crate::waiters::{Waiter, Waiters};
|
||||
use anyhow::{anyhow, bail};
|
||||
use lazy_static::lazy_static;
|
||||
use anyhow::{anyhow, bail, Context};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::net::{SocketAddr, ToSocketAddrs};
|
||||
|
||||
lazy_static! {
|
||||
static ref CPLANE_WAITERS: Waiters<Result<DatabaseInfo, String>> = Default::default();
|
||||
use crate::state::ProxyWaiters;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Default)]
|
||||
pub struct DatabaseInfo {
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
pub dbname: String,
|
||||
pub user: String,
|
||||
pub password: Option<String>,
|
||||
}
|
||||
|
||||
/// Give caller an opportunity to wait for cplane's reply.
|
||||
pub async fn with_waiter<F, R, T>(psql_session_id: impl Into<String>, f: F) -> anyhow::Result<T>
|
||||
where
|
||||
F: FnOnce(Waiter<'static, Result<DatabaseInfo, String>>) -> R,
|
||||
R: std::future::Future<Output = anyhow::Result<T>>,
|
||||
{
|
||||
let waiter = CPLANE_WAITERS.register(psql_session_id.into())?;
|
||||
f(waiter).await
|
||||
}
|
||||
|
||||
pub fn notify(psql_session_id: &str, msg: Result<DatabaseInfo, String>) -> anyhow::Result<()> {
|
||||
CPLANE_WAITERS.notify(psql_session_id, msg)
|
||||
}
|
||||
|
||||
/// Zenith console API wrapper.
|
||||
pub struct CPlaneApi<'a> {
|
||||
auth_endpoint: &'a str,
|
||||
}
|
||||
|
||||
impl<'a> CPlaneApi<'a> {
|
||||
pub fn new(auth_endpoint: &'a str) -> Self {
|
||||
Self { auth_endpoint }
|
||||
}
|
||||
}
|
||||
|
||||
impl CPlaneApi<'_> {
|
||||
pub async fn authenticate_proxy_request(
|
||||
&self,
|
||||
creds: ClientCredentials,
|
||||
md5_response: &[u8],
|
||||
salt: &[u8; 4],
|
||||
psql_session_id: &str,
|
||||
) -> anyhow::Result<DatabaseInfo> {
|
||||
let mut url = reqwest::Url::parse(self.auth_endpoint)?;
|
||||
url.query_pairs_mut()
|
||||
.append_pair("login", &creds.user)
|
||||
.append_pair("database", &creds.dbname)
|
||||
.append_pair("md5response", std::str::from_utf8(md5_response)?)
|
||||
.append_pair("salt", &hex::encode(salt))
|
||||
.append_pair("psql_session_id", psql_session_id);
|
||||
|
||||
with_waiter(psql_session_id, |waiter| async {
|
||||
println!("cplane request: {}", url);
|
||||
// TODO: leverage `reqwest::Client` to reuse connections
|
||||
let resp = reqwest::get(url).await?;
|
||||
if !resp.status().is_success() {
|
||||
bail!("Auth failed: {}", resp.status())
|
||||
}
|
||||
|
||||
let auth_info: ProxyAuthResponse = serde_json::from_str(resp.text().await?.as_str())?;
|
||||
println!("got auth info: #{:?}", auth_info);
|
||||
|
||||
use ProxyAuthResponse::*;
|
||||
match auth_info {
|
||||
Ready { conn_info } => Ok(conn_info),
|
||||
Error { error } => bail!(error),
|
||||
NotReady { .. } => waiter.await?.map_err(|e| anyhow!(e)),
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: the order of constructors is important.
|
||||
// https://serde.rs/enum-representations.html#untagged
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(untagged)]
|
||||
enum ProxyAuthResponse {
|
||||
@@ -82,6 +21,86 @@ enum ProxyAuthResponse {
|
||||
NotReady { ready: bool }, // TODO: get rid of `ready`
|
||||
}
|
||||
|
||||
impl DatabaseInfo {
|
||||
pub fn socket_addr(&self) -> anyhow::Result<SocketAddr> {
|
||||
let host_port = format!("{}:{}", self.host, self.port);
|
||||
host_port
|
||||
.to_socket_addrs()
|
||||
.with_context(|| format!("cannot resolve {} to SocketAddr", host_port))?
|
||||
.next()
|
||||
.ok_or_else(|| anyhow!("cannot resolve at least one SocketAddr"))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DatabaseInfo> for tokio_postgres::Config {
|
||||
fn from(db_info: DatabaseInfo) -> Self {
|
||||
let mut config = tokio_postgres::Config::new();
|
||||
|
||||
config
|
||||
.host(&db_info.host)
|
||||
.port(db_info.port)
|
||||
.dbname(&db_info.dbname)
|
||||
.user(&db_info.user);
|
||||
|
||||
if let Some(password) = db_info.password {
|
||||
config.password(password);
|
||||
}
|
||||
|
||||
config
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CPlaneApi<'a> {
|
||||
auth_endpoint: &'a str,
|
||||
waiters: &'a ProxyWaiters,
|
||||
}
|
||||
|
||||
impl<'a> CPlaneApi<'a> {
|
||||
pub fn new(auth_endpoint: &'a str, waiters: &'a ProxyWaiters) -> Self {
|
||||
Self {
|
||||
auth_endpoint,
|
||||
waiters,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CPlaneApi<'_> {
|
||||
pub fn authenticate_proxy_request(
|
||||
&self,
|
||||
user: &str,
|
||||
database: &str,
|
||||
md5_response: &[u8],
|
||||
salt: &[u8; 4],
|
||||
psql_session_id: &str,
|
||||
) -> anyhow::Result<DatabaseInfo> {
|
||||
let mut url = reqwest::Url::parse(self.auth_endpoint)?;
|
||||
url.query_pairs_mut()
|
||||
.append_pair("login", user)
|
||||
.append_pair("database", database)
|
||||
.append_pair("md5response", std::str::from_utf8(md5_response)?)
|
||||
.append_pair("salt", &hex::encode(salt))
|
||||
.append_pair("psql_session_id", psql_session_id);
|
||||
|
||||
let waiter = self.waiters.register(psql_session_id.to_owned());
|
||||
|
||||
println!("cplane request: {}", url);
|
||||
let resp = reqwest::blocking::get(url)?;
|
||||
if !resp.status().is_success() {
|
||||
bail!("Auth failed: {}", resp.status())
|
||||
}
|
||||
|
||||
let auth_info: ProxyAuthResponse = serde_json::from_str(resp.text()?.as_str())?;
|
||||
println!("got auth info: #{:?}", auth_info);
|
||||
|
||||
use ProxyAuthResponse::*;
|
||||
match auth_info {
|
||||
Ready { conn_info } => Ok(conn_info),
|
||||
Error { error } => bail!(error),
|
||||
NotReady { .. } => waiter.wait()?.map_err(|e| anyhow!(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -1,30 +1,15 @@
|
||||
use anyhow::anyhow;
|
||||
use hyper::{Body, Request, Response, StatusCode};
|
||||
use std::net::TcpListener;
|
||||
use routerify::RouterBuilder;
|
||||
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::http::error::ApiError;
|
||||
use zenith_utils::http::json::json_response;
|
||||
use zenith_utils::http::{RouterBuilder, RouterService};
|
||||
|
||||
async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
Ok(json_response(StatusCode::OK, "")?)
|
||||
}
|
||||
|
||||
fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
|
||||
pub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
|
||||
let router = endpoint::make_router();
|
||||
router.get("/v1/status", status_handler)
|
||||
}
|
||||
|
||||
pub async fn thread_main(http_listener: TcpListener) -> anyhow::Result<()> {
|
||||
scopeguard::defer! {
|
||||
println!("http has shut down");
|
||||
}
|
||||
|
||||
let service = || RouterService::new(make_router().build()?);
|
||||
|
||||
hyper::Server::from_tcp(http_listener)?
|
||||
.serve(service().map_err(|e| anyhow!(e))?)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -5,162 +5,137 @@
|
||||
/// (control plane API in our case) and can create new databases and accounts
|
||||
/// in somewhat transparent manner (again via communication with control plane API).
|
||||
///
|
||||
use anyhow::{bail, Context};
|
||||
use anyhow::bail;
|
||||
use clap::{App, Arg};
|
||||
use config::ProxyConfig;
|
||||
use futures::FutureExt;
|
||||
use std::future::Future;
|
||||
use tokio::{net::TcpListener, task::JoinError};
|
||||
use zenith_utils::GIT_VERSION;
|
||||
use state::{ProxyConfig, ProxyState};
|
||||
use std::thread;
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::{tcp_listener, GIT_VERSION};
|
||||
|
||||
use crate::config::{ClientAuthMethod, RouterConfig};
|
||||
|
||||
mod auth;
|
||||
mod cancellation;
|
||||
mod compute;
|
||||
mod config;
|
||||
mod cplane_api;
|
||||
mod http;
|
||||
mod mgmt;
|
||||
mod proxy;
|
||||
mod stream;
|
||||
mod state;
|
||||
mod waiters;
|
||||
|
||||
/// Flattens Result<Result<T>> into Result<T>.
|
||||
async fn flatten_err(
|
||||
f: impl Future<Output = Result<anyhow::Result<()>, JoinError>>,
|
||||
) -> anyhow::Result<()> {
|
||||
f.map(|r| r.context("join error").and_then(|x| x)).await
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
fn main() -> anyhow::Result<()> {
|
||||
zenith_metrics::set_common_metrics_prefix("zenith_proxy");
|
||||
let arg_matches = App::new("Zenith proxy/router")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("proxy")
|
||||
.short('p')
|
||||
Arg::with_name("proxy")
|
||||
.short("p")
|
||||
.long("proxy")
|
||||
.takes_value(true)
|
||||
.help("listen for incoming client connections on ip:port")
|
||||
.default_value("127.0.0.1:4432"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("auth-method")
|
||||
.long("auth-method")
|
||||
.takes_value(true)
|
||||
.help("Possible values: password | link | mixed")
|
||||
.default_value("mixed"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("static-router")
|
||||
.short('s')
|
||||
.long("static-router")
|
||||
.takes_value(true)
|
||||
.help("Route all clients to host:port"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("mgmt")
|
||||
.short('m')
|
||||
Arg::with_name("mgmt")
|
||||
.short("m")
|
||||
.long("mgmt")
|
||||
.takes_value(true)
|
||||
.help("listen for management callback connection on ip:port")
|
||||
.default_value("127.0.0.1:7000"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("http")
|
||||
.short('h')
|
||||
Arg::with_name("http")
|
||||
.short("h")
|
||||
.long("http")
|
||||
.takes_value(true)
|
||||
.help("listen for incoming http connections (metrics, etc) on ip:port")
|
||||
.default_value("127.0.0.1:7001"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("uri")
|
||||
.short('u')
|
||||
Arg::with_name("uri")
|
||||
.short("u")
|
||||
.long("uri")
|
||||
.takes_value(true)
|
||||
.help("redirect unauthenticated users to given uri")
|
||||
.default_value("http://localhost:3000/psql_session/"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("auth-endpoint")
|
||||
.short('a')
|
||||
Arg::with_name("auth-endpoint")
|
||||
.short("a")
|
||||
.long("auth-endpoint")
|
||||
.takes_value(true)
|
||||
.help("API endpoint for authenticating users")
|
||||
.help("redirect unauthenticated users to given uri")
|
||||
.default_value("http://localhost:3000/authenticate_proxy_request/"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("ssl-key")
|
||||
.short('k')
|
||||
Arg::with_name("ssl-key")
|
||||
.short("k")
|
||||
.long("ssl-key")
|
||||
.takes_value(true)
|
||||
.help("path to SSL key for client postgres connections"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("ssl-cert")
|
||||
.short('c')
|
||||
Arg::with_name("ssl-cert")
|
||||
.short("c")
|
||||
.long("ssl-cert")
|
||||
.takes_value(true)
|
||||
.help("path to SSL cert for client postgres connections"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let tls_config = match (
|
||||
let ssl_config = match (
|
||||
arg_matches.value_of("ssl-key"),
|
||||
arg_matches.value_of("ssl-cert"),
|
||||
) {
|
||||
(Some(key_path), Some(cert_path)) => Some(config::configure_ssl(key_path, cert_path)?),
|
||||
(Some(key_path), Some(cert_path)) => {
|
||||
Some(crate::state::configure_ssl(key_path, cert_path)?)
|
||||
}
|
||||
(None, None) => None,
|
||||
_ => bail!("either both or neither ssl-key and ssl-cert must be specified"),
|
||||
};
|
||||
|
||||
let auth_method = arg_matches.value_of("auth-method").unwrap().parse()?;
|
||||
let router_config = match arg_matches.value_of("static-router") {
|
||||
None => RouterConfig::Dynamic(auth_method),
|
||||
Some(addr) => {
|
||||
if let ClientAuthMethod::Password = auth_method {
|
||||
let (host, port) = addr.split_once(':').unwrap();
|
||||
RouterConfig::Static {
|
||||
host: host.to_string(),
|
||||
port: port.parse().unwrap(),
|
||||
}
|
||||
} else {
|
||||
bail!("static-router requires --auth-method password")
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let config: &ProxyConfig = Box::leak(Box::new(ProxyConfig {
|
||||
router_config,
|
||||
let config = ProxyConfig {
|
||||
proxy_address: arg_matches.value_of("proxy").unwrap().parse()?,
|
||||
mgmt_address: arg_matches.value_of("mgmt").unwrap().parse()?,
|
||||
http_address: arg_matches.value_of("http").unwrap().parse()?,
|
||||
redirect_uri: arg_matches.value_of("uri").unwrap().parse()?,
|
||||
auth_endpoint: arg_matches.value_of("auth-endpoint").unwrap().parse()?,
|
||||
tls_config,
|
||||
}));
|
||||
ssl_config,
|
||||
};
|
||||
let state: &ProxyState = Box::leak(Box::new(ProxyState::new(config)));
|
||||
|
||||
println!("Version: {}", GIT_VERSION);
|
||||
|
||||
// Check that we can bind to address before further initialization
|
||||
println!("Starting http on {}", config.http_address);
|
||||
let http_listener = TcpListener::bind(config.http_address).await?.into_std()?;
|
||||
println!("Starting http on {}", state.conf.http_address);
|
||||
let http_listener = tcp_listener::bind(state.conf.http_address)?;
|
||||
|
||||
println!("Starting mgmt on {}", config.mgmt_address);
|
||||
let mgmt_listener = TcpListener::bind(config.mgmt_address).await?.into_std()?;
|
||||
println!("Starting proxy on {}", state.conf.proxy_address);
|
||||
let pageserver_listener = tcp_listener::bind(state.conf.proxy_address)?;
|
||||
|
||||
println!("Starting proxy on {}", config.proxy_address);
|
||||
let proxy_listener = TcpListener::bind(config.proxy_address).await?;
|
||||
println!("Starting mgmt on {}", state.conf.mgmt_address);
|
||||
let mgmt_listener = tcp_listener::bind(state.conf.mgmt_address)?;
|
||||
|
||||
let http = tokio::spawn(http::thread_main(http_listener));
|
||||
let proxy = tokio::spawn(proxy::thread_main(config, proxy_listener));
|
||||
let mgmt = tokio::task::spawn_blocking(move || mgmt::thread_main(mgmt_listener));
|
||||
let threads = [
|
||||
thread::Builder::new()
|
||||
.name("Http thread".into())
|
||||
.spawn(move || {
|
||||
let router = http::make_router();
|
||||
endpoint::serve_thread_main(
|
||||
router,
|
||||
http_listener,
|
||||
std::future::pending(), // never shut down
|
||||
)
|
||||
})?,
|
||||
// Spawn a thread to listen for connections. It will spawn further threads
|
||||
// for each connection.
|
||||
thread::Builder::new()
|
||||
.name("Listener thread".into())
|
||||
.spawn(move || proxy::thread_main(state, pageserver_listener))?,
|
||||
thread::Builder::new()
|
||||
.name("Mgmt thread".into())
|
||||
.spawn(move || mgmt::thread_main(state, mgmt_listener))?,
|
||||
];
|
||||
|
||||
let tasks = [flatten_err(http), flatten_err(proxy), flatten_err(mgmt)];
|
||||
let _: Vec<()> = futures::future::try_join_all(tasks).await?;
|
||||
for t in threads {
|
||||
t.join().unwrap()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,49 +1,44 @@
|
||||
use crate::{compute::DatabaseInfo, cplane_api};
|
||||
use anyhow::Context;
|
||||
use serde::Deserialize;
|
||||
use std::{
|
||||
net::{TcpListener, TcpStream},
|
||||
thread,
|
||||
};
|
||||
|
||||
use serde::Deserialize;
|
||||
use zenith_utils::{
|
||||
postgres_backend::{self, AuthType, PostgresBackend},
|
||||
pq_proto::{BeMessage, SINGLE_COL_ROWDESC},
|
||||
};
|
||||
|
||||
use crate::{cplane_api::DatabaseInfo, ProxyState};
|
||||
|
||||
///
|
||||
/// Main proxy listener loop.
|
||||
///
|
||||
/// Listens for connections, and launches a new handler thread for each.
|
||||
///
|
||||
pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
|
||||
scopeguard::defer! {
|
||||
println!("mgmt has shut down");
|
||||
}
|
||||
|
||||
listener
|
||||
.set_nonblocking(false)
|
||||
.context("failed to set listener to blocking")?;
|
||||
pub fn thread_main(state: &'static ProxyState, listener: TcpListener) -> anyhow::Result<()> {
|
||||
loop {
|
||||
let (socket, peer_addr) = listener.accept().context("failed to accept a new client")?;
|
||||
let (socket, peer_addr) = listener.accept()?;
|
||||
println!("accepted connection from {}", peer_addr);
|
||||
socket
|
||||
.set_nodelay(true)
|
||||
.context("failed to set client socket option")?;
|
||||
socket.set_nodelay(true).unwrap();
|
||||
|
||||
thread::spawn(move || {
|
||||
if let Err(err) = handle_connection(socket) {
|
||||
if let Err(err) = handle_connection(state, socket) {
|
||||
println!("error: {}", err);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_connection(socket: TcpStream) -> anyhow::Result<()> {
|
||||
fn handle_connection(state: &ProxyState, socket: TcpStream) -> anyhow::Result<()> {
|
||||
let mut conn_handler = MgmtHandler { state };
|
||||
let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None, true)?;
|
||||
pgbackend.run(&mut MgmtHandler)
|
||||
pgbackend.run(&mut conn_handler)
|
||||
}
|
||||
|
||||
struct MgmtHandler;
|
||||
struct MgmtHandler<'a> {
|
||||
state: &'a ProxyState,
|
||||
}
|
||||
|
||||
/// Serialized examples:
|
||||
// {
|
||||
@@ -79,13 +74,13 @@ enum PsqlSessionResult {
|
||||
Failure(String),
|
||||
}
|
||||
|
||||
impl postgres_backend::Handler for MgmtHandler {
|
||||
impl postgres_backend::Handler for MgmtHandler<'_> {
|
||||
fn process_query(
|
||||
&mut self,
|
||||
pgb: &mut PostgresBackend,
|
||||
query_string: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
let res = try_process_query(pgb, query_string);
|
||||
let res = try_process_query(self, pgb, query_string);
|
||||
// intercept and log error message
|
||||
if res.is_err() {
|
||||
println!("Mgmt query failed: #{:?}", res);
|
||||
@@ -94,7 +89,11 @@ impl postgres_backend::Handler for MgmtHandler {
|
||||
}
|
||||
}
|
||||
|
||||
fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::Result<()> {
|
||||
fn try_process_query(
|
||||
mgmt: &mut MgmtHandler,
|
||||
pgb: &mut PostgresBackend,
|
||||
query_string: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
println!("Got mgmt query: '{}'", query_string);
|
||||
|
||||
let resp: PsqlSessionResponse = serde_json::from_str(query_string)?;
|
||||
@@ -105,14 +104,14 @@ fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::R
|
||||
Failure(message) => Err(message),
|
||||
};
|
||||
|
||||
match cplane_api::notify(&resp.session_id, msg) {
|
||||
match mgmt.state.waiters.notify(&resp.session_id, msg) {
|
||||
Ok(()) => {
|
||||
pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
|
||||
.write_message_noflush(&BeMessage::DataRow(&[Some(b"ok")]))?
|
||||
.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
}
|
||||
Err(e) => {
|
||||
pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
|
||||
pgb.write_message(&BeMessage::ErrorResponse(e.to_string()))?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,332 +1,396 @@
|
||||
use crate::auth;
|
||||
use crate::cancellation::{self, CancelClosure, CancelMap};
|
||||
use crate::compute::DatabaseInfo;
|
||||
use crate::config::{ProxyConfig, TlsConfig};
|
||||
use crate::stream::{MetricsStream, PqStream, Stream};
|
||||
use anyhow::{bail, Context};
|
||||
use crate::cplane_api::{CPlaneApi, DatabaseInfo};
|
||||
use crate::ProxyState;
|
||||
use anyhow::{anyhow, bail};
|
||||
use lazy_static::lazy_static;
|
||||
use std::sync::Arc;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio::net::TcpStream;
|
||||
use parking_lot::Mutex;
|
||||
use rand::prelude::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use std::cell::Cell;
|
||||
use std::collections::HashMap;
|
||||
use std::net::{SocketAddr, TcpStream};
|
||||
use std::{io, thread};
|
||||
use tokio_postgres::NoTls;
|
||||
use zenith_metrics::{new_common_metric_name, register_int_counter, IntCounter};
|
||||
use zenith_utils::pq_proto::{BeMessage as Be, *};
|
||||
use zenith_utils::postgres_backend::{self, PostgresBackend, ProtoState, Stream};
|
||||
use zenith_utils::pq_proto::{BeMessage as Be, FeMessage as Fe, *};
|
||||
use zenith_utils::sock_split::{ReadStream, WriteStream};
|
||||
|
||||
lazy_static! {
|
||||
static ref NUM_CONNECTIONS_ACCEPTED_COUNTER: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("num_connections_accepted"),
|
||||
"Number of TCP client connections accepted."
|
||||
)
|
||||
.unwrap();
|
||||
static ref NUM_CONNECTIONS_CLOSED_COUNTER: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("num_connections_closed"),
|
||||
"Number of TCP client connections closed."
|
||||
)
|
||||
.unwrap();
|
||||
static ref NUM_BYTES_PROXIED_COUNTER: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("num_bytes_proxied"),
|
||||
"Number of bytes sent/received between any client and backend."
|
||||
)
|
||||
.unwrap();
|
||||
struct CancelClosure {
|
||||
socket_addr: SocketAddr,
|
||||
cancel_token: tokio_postgres::CancelToken,
|
||||
}
|
||||
|
||||
async fn log_error<R, F>(future: F) -> F::Output
|
||||
where
|
||||
F: std::future::Future<Output = anyhow::Result<R>>,
|
||||
{
|
||||
future.await.map_err(|err| {
|
||||
println!("error: {}", err);
|
||||
err
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn thread_main(
|
||||
config: &'static ProxyConfig,
|
||||
listener: tokio::net::TcpListener,
|
||||
) -> anyhow::Result<()> {
|
||||
scopeguard::defer! {
|
||||
println!("proxy has shut down");
|
||||
}
|
||||
|
||||
let cancel_map = Arc::new(CancelMap::default());
|
||||
loop {
|
||||
let (socket, peer_addr) = listener.accept().await?;
|
||||
println!("accepted connection from {}", peer_addr);
|
||||
|
||||
let cancel_map = Arc::clone(&cancel_map);
|
||||
tokio::spawn(log_error(async move {
|
||||
socket
|
||||
.set_nodelay(true)
|
||||
.context("failed to set socket option")?;
|
||||
|
||||
handle_client(config, &cancel_map, socket).await
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_client(
|
||||
config: &ProxyConfig,
|
||||
cancel_map: &CancelMap,
|
||||
stream: impl AsyncRead + AsyncWrite + Unpin,
|
||||
) -> anyhow::Result<()> {
|
||||
// The `closed` counter will increase when this future is destroyed.
|
||||
NUM_CONNECTIONS_ACCEPTED_COUNTER.inc();
|
||||
scopeguard::defer! {
|
||||
NUM_CONNECTIONS_CLOSED_COUNTER.inc();
|
||||
}
|
||||
|
||||
let tls = config.tls_config.clone();
|
||||
if let Some((client, creds)) = handshake(stream, tls, cancel_map).await? {
|
||||
cancel_map
|
||||
.with_session(|session| async {
|
||||
connect_client_to_db(config, session, client, creds).await
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle a connection from one client.
|
||||
/// For better testing experience, `stream` can be
|
||||
/// any object satisfying the traits.
|
||||
async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
stream: S,
|
||||
mut tls: Option<TlsConfig>,
|
||||
cancel_map: &CancelMap,
|
||||
) -> anyhow::Result<Option<(PqStream<Stream<S>>, auth::ClientCredentials)>> {
|
||||
// Client may try upgrading to each protocol only once
|
||||
let (mut tried_ssl, mut tried_gss) = (false, false);
|
||||
|
||||
let mut stream = PqStream::new(Stream::from_raw(stream));
|
||||
loop {
|
||||
let msg = stream.read_startup_packet().await?;
|
||||
println!("got message: {:?}", msg);
|
||||
|
||||
use FeStartupPacket::*;
|
||||
match msg {
|
||||
SslRequest => match stream.get_ref() {
|
||||
Stream::Raw { .. } if !tried_ssl => {
|
||||
tried_ssl = true;
|
||||
|
||||
// We can't perform TLS handshake without a config
|
||||
let enc = tls.is_some();
|
||||
stream.write_message(&Be::EncryptionResponse(enc)).await?;
|
||||
|
||||
if let Some(tls) = tls.take() {
|
||||
// Upgrade raw stream into a secure TLS-backed stream.
|
||||
// NOTE: We've consumed `tls`; this fact will be used later.
|
||||
stream = PqStream::new(stream.into_inner().upgrade(tls).await?);
|
||||
}
|
||||
}
|
||||
_ => bail!("protocol violation"),
|
||||
},
|
||||
GssEncRequest => match stream.get_ref() {
|
||||
Stream::Raw { .. } if !tried_gss => {
|
||||
tried_gss = true;
|
||||
|
||||
// Currently, we don't support GSSAPI
|
||||
stream.write_message(&Be::EncryptionResponse(false)).await?;
|
||||
}
|
||||
_ => bail!("protocol violation"),
|
||||
},
|
||||
StartupMessage { params, .. } => {
|
||||
// Check that the config has been consumed during upgrade
|
||||
// OR we didn't provide it at all (for dev purposes).
|
||||
if tls.is_some() {
|
||||
let msg = "connection is insecure (try using `sslmode=require`)";
|
||||
stream.write_message(&Be::ErrorResponse(msg)).await?;
|
||||
bail!(msg);
|
||||
}
|
||||
|
||||
break Ok(Some((stream, params.try_into()?)));
|
||||
}
|
||||
CancelRequest(cancel_key_data) => {
|
||||
cancel_map.cancel_session(cancel_key_data).await?;
|
||||
|
||||
break Ok(None);
|
||||
}
|
||||
impl CancelClosure {
|
||||
async fn try_cancel_query(&self) {
|
||||
if let Ok(socket) = tokio::net::TcpStream::connect(self.socket_addr).await {
|
||||
// NOTE ignoring the result because:
|
||||
// 1. This is a best effort attempt, the database doesn't have to listen
|
||||
// 2. Being opaque about errors here helps avoid leaking info to unauthenticated user
|
||||
let _ = self.cancel_token.cancel_query_raw(socket, NoTls).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn connect_client_to_db(
|
||||
config: &ProxyConfig,
|
||||
session: cancellation::Session<'_>,
|
||||
mut client: PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
creds: auth::ClientCredentials,
|
||||
lazy_static! {
|
||||
// Enables serving CancelRequests
|
||||
static ref CANCEL_MAP: Mutex<HashMap<CancelKeyData, CancelClosure>> = Mutex::new(HashMap::new());
|
||||
|
||||
// Metrics
|
||||
static ref NUM_CONNECTIONS_ACCEPTED_COUNTER: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("num_connections_accepted"),
|
||||
"Number of TCP client connections accepted."
|
||||
).unwrap();
|
||||
static ref NUM_CONNECTIONS_CLOSED_COUNTER: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("num_connections_closed"),
|
||||
"Number of TCP client connections closed."
|
||||
).unwrap();
|
||||
static ref NUM_CONNECTIONS_FAILED_COUNTER: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("num_connections_failed"),
|
||||
"Number of TCP client connections that closed due to error."
|
||||
).unwrap();
|
||||
static ref NUM_BYTES_PROXIED_COUNTER: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("num_bytes_proxied"),
|
||||
"Number of bytes sent/received between any client and backend."
|
||||
).unwrap();
|
||||
}
|
||||
|
||||
thread_local! {
|
||||
// Used to clean up the CANCEL_MAP. Might not be necessary if we use tokio thread pool in main loop.
|
||||
static THREAD_CANCEL_KEY_DATA: Cell<Option<CancelKeyData>> = Cell::new(None);
|
||||
}
|
||||
|
||||
///
|
||||
/// Main proxy listener loop.
|
||||
///
|
||||
/// Listens for connections, and launches a new handler thread for each.
|
||||
///
|
||||
pub fn thread_main(
|
||||
state: &'static ProxyState,
|
||||
listener: std::net::TcpListener,
|
||||
) -> anyhow::Result<()> {
|
||||
let db_info = creds.authenticate(config, &mut client).await?;
|
||||
let (db, version, cancel_closure) = connect_to_db(db_info).await?;
|
||||
let cancel_key_data = session.enable_cancellation(cancel_closure);
|
||||
loop {
|
||||
let (socket, peer_addr) = listener.accept()?;
|
||||
println!("accepted connection from {}", peer_addr);
|
||||
NUM_CONNECTIONS_ACCEPTED_COUNTER.inc();
|
||||
socket.set_nodelay(true).unwrap();
|
||||
|
||||
client
|
||||
.write_message_noflush(&BeMessage::ParameterStatus(
|
||||
BeParameterStatusMessage::ServerVersion(&version),
|
||||
))?
|
||||
.write_message_noflush(&Be::BackendKeyData(cancel_key_data))?
|
||||
.write_message(&BeMessage::ReadyForQuery)
|
||||
.await?;
|
||||
// TODO Use a threadpool instead. Maybe use tokio's threadpool by
|
||||
// spawning a future into its runtime. Tokio's JoinError should
|
||||
// allow us to handle cleanup properly even if the future panics.
|
||||
thread::Builder::new()
|
||||
.name("Proxy thread".into())
|
||||
.spawn(move || {
|
||||
if let Err(err) = proxy_conn_main(state, socket) {
|
||||
NUM_CONNECTIONS_FAILED_COUNTER.inc();
|
||||
println!("error: {}", err);
|
||||
}
|
||||
|
||||
// This function will be called for writes to either direction.
|
||||
fn inc_proxied(cnt: usize) {
|
||||
// Consider inventing something more sophisticated
|
||||
// if this ever becomes a bottleneck (cacheline bouncing).
|
||||
NUM_BYTES_PROXIED_COUNTER.inc_by(cnt as u64);
|
||||
// Clean up CANCEL_MAP.
|
||||
NUM_CONNECTIONS_CLOSED_COUNTER.inc();
|
||||
THREAD_CANCEL_KEY_DATA.with(|cell| {
|
||||
if let Some(cancel_key_data) = cell.get() {
|
||||
CANCEL_MAP.lock().remove(&cancel_key_data);
|
||||
};
|
||||
});
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: clean up fields
|
||||
struct ProxyConnection {
|
||||
state: &'static ProxyState,
|
||||
psql_session_id: String,
|
||||
pgb: PostgresBackend,
|
||||
}
|
||||
|
||||
pub fn proxy_conn_main(state: &'static ProxyState, socket: TcpStream) -> anyhow::Result<()> {
|
||||
let conn = ProxyConnection {
|
||||
state,
|
||||
psql_session_id: hex::encode(rand::random::<[u8; 8]>()),
|
||||
pgb: PostgresBackend::new(
|
||||
socket,
|
||||
postgres_backend::AuthType::MD5,
|
||||
state.conf.ssl_config.clone(),
|
||||
false,
|
||||
)?,
|
||||
};
|
||||
|
||||
let (client, server) = match conn.handle_client()? {
|
||||
Some(x) => x,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
let server = zenith_utils::sock_split::BidiStream::from_tcp(server);
|
||||
|
||||
let client = match client {
|
||||
Stream::Bidirectional(bidi_stream) => bidi_stream,
|
||||
_ => panic!("invalid stream type"),
|
||||
};
|
||||
|
||||
proxy(client.split(), server.split())
|
||||
}
|
||||
|
||||
impl ProxyConnection {
|
||||
/// Returns Ok(None) when connection was successfully closed.
|
||||
fn handle_client(mut self) -> anyhow::Result<Option<(Stream, TcpStream)>> {
|
||||
let mut authenticate = || {
|
||||
let (username, dbname) = match self.handle_startup()? {
|
||||
Some(x) => x,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
// Both scenarios here should end up producing database credentials
|
||||
if username.ends_with("@zenith") {
|
||||
self.handle_existing_user(&username, &dbname).map(Some)
|
||||
} else {
|
||||
self.handle_new_user().map(Some)
|
||||
}
|
||||
};
|
||||
|
||||
let conn = match authenticate() {
|
||||
Ok(Some(db_info)) => connect_to_db(db_info),
|
||||
Ok(None) => return Ok(None),
|
||||
Err(e) => {
|
||||
// Report the error to the client
|
||||
self.pgb.write_message(&Be::ErrorResponse(e.to_string()))?;
|
||||
bail!("failed to handle client: {:?}", e);
|
||||
}
|
||||
};
|
||||
|
||||
// We'll get rid of this once migration to async is complete
|
||||
let (pg_version, db_stream) = {
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()?;
|
||||
|
||||
let (pg_version, stream, cancel_key_data) = runtime.block_on(conn)?;
|
||||
self.pgb
|
||||
.write_message(&BeMessage::BackendKeyData(cancel_key_data))?;
|
||||
let stream = stream.into_std()?;
|
||||
stream.set_nonblocking(false)?;
|
||||
|
||||
(pg_version, stream)
|
||||
};
|
||||
|
||||
// Let the client send new requests
|
||||
self.pgb
|
||||
.write_message_noflush(&BeMessage::ParameterStatus(
|
||||
BeParameterStatusMessage::ServerVersion(&pg_version),
|
||||
))?
|
||||
.write_message(&Be::ReadyForQuery)?;
|
||||
|
||||
Ok(Some((self.pgb.into_stream(), db_stream)))
|
||||
}
|
||||
|
||||
let mut db = MetricsStream::new(db, inc_proxied);
|
||||
let mut client = MetricsStream::new(client.into_inner(), inc_proxied);
|
||||
let _ = tokio::io::copy_bidirectional(&mut client, &mut db).await?;
|
||||
/// Returns Ok(None) when connection was successfully closed.
|
||||
fn handle_startup(&mut self) -> anyhow::Result<Option<(String, String)>> {
|
||||
let have_tls = self.pgb.tls_config.is_some();
|
||||
let mut encrypted = false;
|
||||
let mut received_something = false;
|
||||
|
||||
loop {
|
||||
let msg = match self.pgb.read_message()? {
|
||||
Some(Fe::StartupPacket(msg)) => msg,
|
||||
None => {
|
||||
if received_something {
|
||||
bail!("connection is lost");
|
||||
} else {
|
||||
// Probably load balancer health check
|
||||
// TODO check peer_addr to make sure.
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
bad => bail!("unexpected message type: {:?}", bad),
|
||||
};
|
||||
println!("got message: {:?}", msg);
|
||||
received_something = true;
|
||||
|
||||
match msg {
|
||||
FeStartupPacket::GssEncRequest => {
|
||||
self.pgb.write_message(&Be::EncryptionResponse(false))?;
|
||||
}
|
||||
FeStartupPacket::SslRequest => {
|
||||
self.pgb.write_message(&Be::EncryptionResponse(have_tls))?;
|
||||
if have_tls {
|
||||
self.pgb.start_tls()?;
|
||||
encrypted = true;
|
||||
}
|
||||
}
|
||||
FeStartupPacket::StartupMessage { mut params, .. } => {
|
||||
if have_tls && !encrypted {
|
||||
bail!("must connect with TLS");
|
||||
}
|
||||
|
||||
let mut get_param = |key| {
|
||||
params
|
||||
.remove(key)
|
||||
.ok_or_else(|| anyhow!("{} is missing in startup packet", key))
|
||||
};
|
||||
|
||||
return Ok(Some((get_param("user")?, get_param("database")?)));
|
||||
}
|
||||
FeStartupPacket::CancelRequest(cancel_key_data) => {
|
||||
if let Some(cancel_closure) = CANCEL_MAP.lock().get(&cancel_key_data) {
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap();
|
||||
runtime.block_on(cancel_closure.try_cancel_query());
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_existing_user(&mut self, user: &str, db: &str) -> anyhow::Result<DatabaseInfo> {
|
||||
let md5_salt = rand::random::<[u8; 4]>();
|
||||
|
||||
// Ask password
|
||||
self.pgb
|
||||
.write_message(&Be::AuthenticationMD5Password(&md5_salt))?;
|
||||
self.pgb.state = ProtoState::Authentication; // XXX
|
||||
|
||||
// Check password
|
||||
let msg = match self.pgb.read_message()? {
|
||||
Some(Fe::PasswordMessage(msg)) => msg,
|
||||
None => bail!("connection is lost"),
|
||||
bad => bail!("unexpected message type: {:?}", bad),
|
||||
};
|
||||
println!("got message: {:?}", msg);
|
||||
|
||||
let (_trailing_null, md5_response) = msg
|
||||
.split_last()
|
||||
.ok_or_else(|| anyhow!("unexpected password message"))?;
|
||||
|
||||
let cplane = CPlaneApi::new(&self.state.conf.auth_endpoint, &self.state.waiters);
|
||||
let db_info = cplane.authenticate_proxy_request(
|
||||
user,
|
||||
db,
|
||||
md5_response,
|
||||
&md5_salt,
|
||||
&self.psql_session_id,
|
||||
)?;
|
||||
|
||||
self.pgb
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
|
||||
|
||||
Ok(db_info)
|
||||
}
|
||||
|
||||
fn handle_new_user(&mut self) -> anyhow::Result<DatabaseInfo> {
|
||||
let greeting = hello_message(&self.state.conf.redirect_uri, &self.psql_session_id);
|
||||
|
||||
// First, register this session
|
||||
let waiter = self.state.waiters.register(self.psql_session_id.clone());
|
||||
|
||||
// Give user a URL to spawn a new database
|
||||
self.pgb
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?
|
||||
.write_message(&Be::NoticeResponse(greeting))?;
|
||||
|
||||
// Wait for web console response
|
||||
let db_info = waiter.wait()?.map_err(|e| anyhow!(e))?;
|
||||
|
||||
self.pgb
|
||||
.write_message_noflush(&Be::NoticeResponse("Connecting to database.".into()))?;
|
||||
|
||||
Ok(db_info)
|
||||
}
|
||||
}
|
||||
|
||||
fn hello_message(redirect_uri: &str, session_id: &str) -> String {
|
||||
format!(
|
||||
concat![
|
||||
"☀️ Welcome to Zenith!\n",
|
||||
"To proceed with database creation, open the following link:\n\n",
|
||||
" {redirect_uri}{session_id}\n\n",
|
||||
"It needs to be done once and we will send you '.pgpass' file,\n",
|
||||
"which will allow you to access or create ",
|
||||
"databases without opening your web browser."
|
||||
],
|
||||
redirect_uri = redirect_uri,
|
||||
session_id = session_id,
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a TCP connection to a postgres database, authenticate with it, and receive the ReadyForQuery message
|
||||
async fn connect_to_db(
|
||||
db_info: DatabaseInfo,
|
||||
) -> anyhow::Result<(String, tokio::net::TcpStream, CancelKeyData)> {
|
||||
// Make raw connection. When connect_raw finishes we've received ReadyForQuery.
|
||||
let socket_addr = db_info.socket_addr()?;
|
||||
let mut socket = tokio::net::TcpStream::connect(socket_addr).await?;
|
||||
let config = tokio_postgres::Config::from(db_info);
|
||||
// NOTE We effectively ignore some ParameterStatus and NoticeResponse
|
||||
// messages here. Not sure if that could break something.
|
||||
let (client, conn) = config.connect_raw(&mut socket, NoTls).await?;
|
||||
|
||||
// Save info for potentially cancelling the query later
|
||||
let mut rng = StdRng::from_entropy();
|
||||
let cancel_key_data = CancelKeyData {
|
||||
// HACK We'd rather get the real backend_pid but tokio_postgres doesn't
|
||||
// expose it and we don't want to do another roundtrip to query
|
||||
// for it. The client will be able to notice that this is not the
|
||||
// actual backend_pid, but backend_pid is not used for anything
|
||||
// so it doesn't matter.
|
||||
backend_pid: rng.gen(),
|
||||
cancel_key: rng.gen(),
|
||||
};
|
||||
let cancel_closure = CancelClosure {
|
||||
socket_addr,
|
||||
cancel_token: client.cancel_token(),
|
||||
};
|
||||
CANCEL_MAP.lock().insert(cancel_key_data, cancel_closure);
|
||||
THREAD_CANCEL_KEY_DATA.with(|cell| {
|
||||
let prev_value = cell.replace(Some(cancel_key_data));
|
||||
assert!(
|
||||
prev_value.is_none(),
|
||||
"THREAD_CANCEL_KEY_DATA was already set"
|
||||
);
|
||||
});
|
||||
|
||||
let version = conn.parameter("server_version").unwrap();
|
||||
Ok((version.into(), socket, cancel_key_data))
|
||||
}
|
||||
|
||||
/// Concurrently proxy both directions of the client and server connections
|
||||
fn proxy(
|
||||
(client_read, client_write): (ReadStream, WriteStream),
|
||||
(server_read, server_write): (ReadStream, WriteStream),
|
||||
) -> anyhow::Result<()> {
|
||||
fn do_proxy(mut reader: impl io::Read, mut writer: WriteStream) -> io::Result<u64> {
|
||||
/// FlushWriter will make sure that every message is sent as soon as possible
|
||||
struct FlushWriter<W>(W);
|
||||
|
||||
impl<W: io::Write> io::Write for FlushWriter<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
// `std::io::copy` is guaranteed to exit if we return an error,
|
||||
// so we can afford to lose `res` in case `flush` fails
|
||||
let res = self.0.write(buf);
|
||||
if res.is_ok() {
|
||||
NUM_BYTES_PROXIED_COUNTER.inc_by(buf.len() as u64);
|
||||
self.flush()?;
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
self.0.flush()
|
||||
}
|
||||
}
|
||||
|
||||
let res = std::io::copy(&mut reader, &mut FlushWriter(&mut writer));
|
||||
writer.shutdown(std::net::Shutdown::Both)?;
|
||||
res
|
||||
}
|
||||
|
||||
let client_to_server_jh = thread::spawn(move || do_proxy(client_read, server_write));
|
||||
|
||||
do_proxy(server_read, client_write)?;
|
||||
client_to_server_jh.join().unwrap()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Connect to a corresponding compute node.
|
||||
async fn connect_to_db(
|
||||
db_info: DatabaseInfo,
|
||||
) -> anyhow::Result<(TcpStream, String, CancelClosure)> {
|
||||
// TODO: establish a secure connection to the DB
|
||||
let socket_addr = db_info.socket_addr()?;
|
||||
let mut socket = TcpStream::connect(socket_addr).await?;
|
||||
|
||||
let (client, conn) = tokio_postgres::Config::from(db_info)
|
||||
.connect_raw(&mut socket, NoTls)
|
||||
.await?;
|
||||
|
||||
let version = conn
|
||||
.parameter("server_version")
|
||||
.context("failed to fetch postgres server version")?
|
||||
.into();
|
||||
|
||||
let cancel_closure = CancelClosure::new(socket_addr, client.cancel_token());
|
||||
|
||||
Ok((socket, version, cancel_closure))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use tokio::io::DuplexStream;
|
||||
use tokio_postgres::config::SslMode;
|
||||
use tokio_postgres::tls::MakeTlsConnect;
|
||||
use tokio_postgres_rustls::MakeRustlsConnect;
|
||||
|
||||
async fn dummy_proxy(
|
||||
client: impl AsyncRead + AsyncWrite + Unpin,
|
||||
tls: Option<TlsConfig>,
|
||||
) -> anyhow::Result<()> {
|
||||
let cancel_map = CancelMap::default();
|
||||
|
||||
// TODO: add some infra + tests for credentials
|
||||
let (mut stream, _creds) = handshake(client, tls, &cancel_map)
|
||||
.await?
|
||||
.context("no stream")?;
|
||||
|
||||
stream
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?
|
||||
.write_message(&BeMessage::ReadyForQuery)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn generate_certs(
|
||||
hostname: &str,
|
||||
) -> anyhow::Result<(rustls::Certificate, rustls::Certificate, rustls::PrivateKey)> {
|
||||
let ca = rcgen::Certificate::from_params({
|
||||
let mut params = rcgen::CertificateParams::default();
|
||||
params.is_ca = rcgen::IsCa::Ca(rcgen::BasicConstraints::Unconstrained);
|
||||
params
|
||||
})?;
|
||||
|
||||
let cert = rcgen::generate_simple_self_signed(vec![hostname.into()])?;
|
||||
Ok((
|
||||
rustls::Certificate(ca.serialize_der()?),
|
||||
rustls::Certificate(cert.serialize_der_with_signer(&ca)?),
|
||||
rustls::PrivateKey(cert.serialize_private_key_der()),
|
||||
))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn handshake_tls_is_enforced_by_proxy() -> anyhow::Result<()> {
|
||||
let (client, server) = tokio::io::duplex(1024);
|
||||
|
||||
let server_config = {
|
||||
let (_ca, cert, key) = generate_certs("localhost")?;
|
||||
|
||||
let mut config = rustls::ServerConfig::new(rustls::NoClientAuth::new());
|
||||
config.set_single_cert(vec![cert], key)?;
|
||||
config
|
||||
};
|
||||
|
||||
let proxy = tokio::spawn(dummy_proxy(client, Some(server_config.into())));
|
||||
|
||||
tokio_postgres::Config::new()
|
||||
.user("john_doe")
|
||||
.dbname("earth")
|
||||
.ssl_mode(SslMode::Disable)
|
||||
.connect_raw(server, NoTls)
|
||||
.await
|
||||
.err() // -> Option<E>
|
||||
.context("client shouldn't be able to connect")?;
|
||||
|
||||
proxy
|
||||
.await?
|
||||
.err() // -> Option<E>
|
||||
.context("server shouldn't accept client")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn handshake_tls() -> anyhow::Result<()> {
|
||||
let (client, server) = tokio::io::duplex(1024);
|
||||
|
||||
let (ca, cert, key) = generate_certs("localhost")?;
|
||||
|
||||
let server_config = {
|
||||
let mut config = rustls::ServerConfig::new(rustls::NoClientAuth::new());
|
||||
config.set_single_cert(vec![cert], key)?;
|
||||
config
|
||||
};
|
||||
|
||||
let proxy = tokio::spawn(dummy_proxy(client, Some(server_config.into())));
|
||||
|
||||
let client_config = {
|
||||
let mut config = rustls::ClientConfig::new();
|
||||
config.root_store.add(&ca)?;
|
||||
config
|
||||
};
|
||||
|
||||
let mut mk = MakeRustlsConnect::new(client_config);
|
||||
let tls = MakeTlsConnect::<DuplexStream>::make_tls_connect(&mut mk, "localhost")?;
|
||||
|
||||
let (_client, _conn) = tokio_postgres::Config::new()
|
||||
.user("john_doe")
|
||||
.dbname("earth")
|
||||
.ssl_mode(SslMode::Require)
|
||||
.connect_raw(server, tls)
|
||||
.await?;
|
||||
|
||||
proxy.await?
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn handshake_raw() -> anyhow::Result<()> {
|
||||
let (client, server) = tokio::io::duplex(1024);
|
||||
|
||||
let proxy = tokio::spawn(dummy_proxy(client, None));
|
||||
|
||||
let (_client, _conn) = tokio_postgres::Config::new()
|
||||
.user("john_doe")
|
||||
.dbname("earth")
|
||||
.ssl_mode(SslMode::Prefer)
|
||||
.connect_raw(server, NoTls)
|
||||
.await?;
|
||||
|
||||
proxy.await?
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,46 +1,15 @@
|
||||
use crate::cplane_api::DatabaseInfo;
|
||||
use anyhow::{anyhow, ensure, Context};
|
||||
use rustls::{internal::pemfile, NoClientAuth, ProtocolVersion, ServerConfig};
|
||||
use std::net::SocketAddr;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub type TlsConfig = Arc<ServerConfig>;
|
||||
|
||||
#[non_exhaustive]
|
||||
pub enum ClientAuthMethod {
|
||||
Password,
|
||||
Link,
|
||||
|
||||
/// Use password auth only if username ends with "@zenith"
|
||||
Mixed,
|
||||
}
|
||||
|
||||
pub enum RouterConfig {
|
||||
Static { host: String, port: u16 },
|
||||
Dynamic(ClientAuthMethod),
|
||||
}
|
||||
|
||||
impl FromStr for ClientAuthMethod {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> anyhow::Result<Self> {
|
||||
use ClientAuthMethod::*;
|
||||
match s {
|
||||
"password" => Ok(Password),
|
||||
"link" => Ok(Link),
|
||||
"mixed" => Ok(Mixed),
|
||||
_ => Err(anyhow::anyhow!("Invlid option for router")),
|
||||
}
|
||||
}
|
||||
}
|
||||
pub type SslConfig = Arc<ServerConfig>;
|
||||
|
||||
pub struct ProxyConfig {
|
||||
/// main entrypoint for users to connect to
|
||||
pub proxy_address: SocketAddr,
|
||||
|
||||
/// method of assigning compute nodes
|
||||
pub router_config: RouterConfig,
|
||||
|
||||
/// internally used for status and prometheus metrics
|
||||
pub http_address: SocketAddr,
|
||||
|
||||
@@ -55,10 +24,26 @@ pub struct ProxyConfig {
|
||||
/// control plane address where we would check auth.
|
||||
pub auth_endpoint: String,
|
||||
|
||||
pub tls_config: Option<TlsConfig>,
|
||||
pub ssl_config: Option<SslConfig>,
|
||||
}
|
||||
|
||||
pub fn configure_ssl(key_path: &str, cert_path: &str) -> anyhow::Result<TlsConfig> {
|
||||
pub type ProxyWaiters = crate::waiters::Waiters<Result<DatabaseInfo, String>>;
|
||||
|
||||
pub struct ProxyState {
|
||||
pub conf: ProxyConfig,
|
||||
pub waiters: ProxyWaiters,
|
||||
}
|
||||
|
||||
impl ProxyState {
|
||||
pub fn new(conf: ProxyConfig) -> Self {
|
||||
Self {
|
||||
conf,
|
||||
waiters: ProxyWaiters::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn configure_ssl(key_path: &str, cert_path: &str) -> anyhow::Result<SslConfig> {
|
||||
let key = {
|
||||
let key_bytes = std::fs::read(key_path).context("SSL key file")?;
|
||||
let mut keys = pemfile::pkcs8_private_keys(&mut &key_bytes[..])
|
||||
@@ -1,230 +0,0 @@
|
||||
use anyhow::Context;
|
||||
use bytes::BytesMut;
|
||||
use pin_project_lite::pin_project;
|
||||
use rustls::ServerConfig;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::{io, task};
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, ReadBuf};
|
||||
use tokio_rustls::server::TlsStream;
|
||||
use zenith_utils::pq_proto::{BeMessage, FeMessage, FeStartupPacket};
|
||||
|
||||
pin_project! {
|
||||
/// Stream wrapper which implements libpq's protocol.
|
||||
/// NOTE: This object deliberately doesn't implement [`AsyncRead`]
|
||||
/// or [`AsyncWrite`] to prevent subtle errors (e.g. trying
|
||||
/// to pass random malformed bytes through the connection).
|
||||
pub struct PqStream<S> {
|
||||
#[pin]
|
||||
stream: S,
|
||||
buffer: BytesMut,
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> PqStream<S> {
|
||||
/// Construct a new libpq protocol wrapper.
|
||||
pub fn new(stream: S) -> Self {
|
||||
Self {
|
||||
stream,
|
||||
buffer: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the underlying stream.
|
||||
pub fn into_inner(self) -> S {
|
||||
self.stream
|
||||
}
|
||||
|
||||
/// Get a reference to the underlying stream.
|
||||
pub fn get_ref(&self) -> &S {
|
||||
&self.stream
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + Unpin> PqStream<S> {
|
||||
/// Receive [`FeStartupPacket`], which is a first packet sent by a client.
|
||||
pub async fn read_startup_packet(&mut self) -> anyhow::Result<FeStartupPacket> {
|
||||
match FeStartupPacket::read_fut(&mut self.stream).await? {
|
||||
Some(FeMessage::StartupPacket(packet)) => Ok(packet),
|
||||
None => anyhow::bail!("connection is lost"),
|
||||
other => anyhow::bail!("bad message type: {:?}", other),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn read_message(&mut self) -> anyhow::Result<FeMessage> {
|
||||
FeMessage::read_fut(&mut self.stream)
|
||||
.await?
|
||||
.context("connection is lost")
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncWrite + Unpin> PqStream<S> {
|
||||
/// Write the message into an internal buffer, but don't flush the underlying stream.
|
||||
pub fn write_message_noflush<'a>(&mut self, message: &BeMessage<'a>) -> io::Result<&mut Self> {
|
||||
BeMessage::write(&mut self.buffer, message)?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Write the message into an internal buffer and flush it.
|
||||
pub async fn write_message<'a>(&mut self, message: &BeMessage<'a>) -> io::Result<&mut Self> {
|
||||
self.write_message_noflush(message)?;
|
||||
self.flush().await?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Flush the output buffer into the underlying stream.
|
||||
pub async fn flush(&mut self) -> io::Result<&mut Self> {
|
||||
self.stream.write_all(&self.buffer).await?;
|
||||
self.buffer.clear();
|
||||
self.stream.flush().await?;
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
/// Wrapper for upgrading raw streams into secure streams.
|
||||
/// NOTE: it should be possible to decompose this object as necessary.
|
||||
#[project = StreamProj]
|
||||
pub enum Stream<S> {
|
||||
/// We always begin with a raw stream,
|
||||
/// which may then be upgraded into a secure stream.
|
||||
Raw { #[pin] raw: S },
|
||||
/// We box [`TlsStream`] since it can be quite large.
|
||||
Tls { #[pin] tls: Box<TlsStream<S>> },
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> Stream<S> {
|
||||
/// Construct a new instance from a raw stream.
|
||||
pub fn from_raw(raw: S) -> Self {
|
||||
Self::Raw { raw }
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> Stream<S> {
|
||||
/// If possible, upgrade raw stream into a secure TLS-based stream.
|
||||
pub async fn upgrade(self, cfg: Arc<ServerConfig>) -> anyhow::Result<Self> {
|
||||
match self {
|
||||
Stream::Raw { raw } => {
|
||||
let tls = Box::new(tokio_rustls::TlsAcceptor::from(cfg).accept(raw).await?);
|
||||
Ok(Stream::Tls { tls })
|
||||
}
|
||||
Stream::Tls { .. } => anyhow::bail!("can't upgrade TLS stream"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> AsyncRead for Stream<S> {
|
||||
fn poll_read(
|
||||
self: Pin<&mut Self>,
|
||||
context: &mut task::Context<'_>,
|
||||
buf: &mut ReadBuf<'_>,
|
||||
) -> task::Poll<io::Result<()>> {
|
||||
use StreamProj::*;
|
||||
match self.project() {
|
||||
Raw { raw } => raw.poll_read(context, buf),
|
||||
Tls { tls } => tls.poll_read(context, buf),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> AsyncWrite for Stream<S> {
|
||||
fn poll_write(
|
||||
self: Pin<&mut Self>,
|
||||
context: &mut task::Context<'_>,
|
||||
buf: &[u8],
|
||||
) -> task::Poll<io::Result<usize>> {
|
||||
use StreamProj::*;
|
||||
match self.project() {
|
||||
Raw { raw } => raw.poll_write(context, buf),
|
||||
Tls { tls } => tls.poll_write(context, buf),
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_flush(
|
||||
self: Pin<&mut Self>,
|
||||
context: &mut task::Context<'_>,
|
||||
) -> task::Poll<io::Result<()>> {
|
||||
use StreamProj::*;
|
||||
match self.project() {
|
||||
Raw { raw } => raw.poll_flush(context),
|
||||
Tls { tls } => tls.poll_flush(context),
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_shutdown(
|
||||
self: Pin<&mut Self>,
|
||||
context: &mut task::Context<'_>,
|
||||
) -> task::Poll<io::Result<()>> {
|
||||
use StreamProj::*;
|
||||
match self.project() {
|
||||
Raw { raw } => raw.poll_shutdown(context),
|
||||
Tls { tls } => tls.poll_shutdown(context),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
/// This stream tracks all writes and calls user provided
|
||||
/// callback when the underlying stream is flushed.
|
||||
pub struct MetricsStream<S, W> {
|
||||
#[pin]
|
||||
stream: S,
|
||||
write_count: usize,
|
||||
inc_write_count: W,
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, W> MetricsStream<S, W> {
|
||||
pub fn new(stream: S, inc_write_count: W) -> Self {
|
||||
Self {
|
||||
stream,
|
||||
write_count: 0,
|
||||
inc_write_count,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + Unpin, W> AsyncRead for MetricsStream<S, W> {
|
||||
fn poll_read(
|
||||
self: Pin<&mut Self>,
|
||||
context: &mut task::Context<'_>,
|
||||
buf: &mut ReadBuf<'_>,
|
||||
) -> task::Poll<io::Result<()>> {
|
||||
self.project().stream.poll_read(context, buf)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncWrite + Unpin, W: FnMut(usize)> AsyncWrite for MetricsStream<S, W> {
|
||||
fn poll_write(
|
||||
self: Pin<&mut Self>,
|
||||
context: &mut task::Context<'_>,
|
||||
buf: &[u8],
|
||||
) -> task::Poll<io::Result<usize>> {
|
||||
let this = self.project();
|
||||
this.stream.poll_write(context, buf).map_ok(|cnt| {
|
||||
// Increment the write count.
|
||||
*this.write_count += cnt;
|
||||
cnt
|
||||
})
|
||||
}
|
||||
|
||||
fn poll_flush(
|
||||
self: Pin<&mut Self>,
|
||||
context: &mut task::Context<'_>,
|
||||
) -> task::Poll<io::Result<()>> {
|
||||
let this = self.project();
|
||||
this.stream.poll_flush(context).map_ok(|()| {
|
||||
// Call the user provided callback and reset the write count.
|
||||
(this.inc_write_count)(*this.write_count);
|
||||
*this.write_count = 0;
|
||||
})
|
||||
}
|
||||
|
||||
fn poll_shutdown(
|
||||
self: Pin<&mut Self>,
|
||||
context: &mut task::Context<'_>,
|
||||
) -> task::Poll<io::Result<()>> {
|
||||
self.project().stream.poll_shutdown(context)
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,8 @@
|
||||
use anyhow::{anyhow, Context};
|
||||
use hashbrown::HashMap;
|
||||
use parking_lot::Mutex;
|
||||
use pin_project_lite::pin_project;
|
||||
use std::pin::Pin;
|
||||
use std::task;
|
||||
use tokio::sync::oneshot;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{mpsc, Mutex};
|
||||
|
||||
pub struct Waiters<T>(pub(self) Mutex<HashMap<String, oneshot::Sender<T>>>);
|
||||
pub struct Waiters<T>(pub(self) Mutex<HashMap<String, mpsc::Sender<T>>>);
|
||||
|
||||
impl<T> Default for Waiters<T> {
|
||||
fn default() -> Self {
|
||||
@@ -15,86 +11,48 @@ impl<T> Default for Waiters<T> {
|
||||
}
|
||||
|
||||
impl<T> Waiters<T> {
|
||||
pub fn register(&self, key: String) -> anyhow::Result<Waiter<T>> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
pub fn register(&self, key: String) -> Waiter<T> {
|
||||
let (tx, rx) = mpsc::channel();
|
||||
|
||||
self.0
|
||||
.lock()
|
||||
.try_insert(key.clone(), tx)
|
||||
.map_err(|_| anyhow!("waiter already registered"))?;
|
||||
// TODO: use `try_insert` (unstable)
|
||||
let prev = self.0.lock().unwrap().insert(key.clone(), tx);
|
||||
assert!(matches!(prev, None)); // assert_matches! is nightly-only
|
||||
|
||||
Ok(Waiter {
|
||||
Waiter {
|
||||
receiver: rx,
|
||||
guard: DropKey {
|
||||
registry: self,
|
||||
key,
|
||||
},
|
||||
})
|
||||
registry: self,
|
||||
key,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn notify(&self, key: &str, value: T) -> anyhow::Result<()>
|
||||
where
|
||||
T: Send + Sync,
|
||||
T: Send + Sync + 'static,
|
||||
{
|
||||
let tx = self
|
||||
.0
|
||||
.lock()
|
||||
.unwrap()
|
||||
.remove(key)
|
||||
.with_context(|| format!("key {} not found", key))?;
|
||||
|
||||
tx.send(value).map_err(|_| anyhow!("waiter channel hangup"))
|
||||
.ok_or_else(|| anyhow!("key {} not found", key))?;
|
||||
tx.send(value).context("channel hangup")
|
||||
}
|
||||
}
|
||||
|
||||
struct DropKey<'a, T> {
|
||||
key: String,
|
||||
pub struct Waiter<'a, T> {
|
||||
receiver: mpsc::Receiver<T>,
|
||||
registry: &'a Waiters<T>,
|
||||
key: String,
|
||||
}
|
||||
|
||||
impl<'a, T> Drop for DropKey<'a, T> {
|
||||
impl<T> Waiter<'_, T> {
|
||||
pub fn wait(self) -> anyhow::Result<T> {
|
||||
self.receiver.recv().context("channel hangup")
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Drop for Waiter<'_, T> {
|
||||
fn drop(&mut self) {
|
||||
self.registry.0.lock().remove(&self.key);
|
||||
}
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
pub struct Waiter<'a, T> {
|
||||
#[pin]
|
||||
receiver: oneshot::Receiver<T>,
|
||||
guard: DropKey<'a, T>,
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> std::future::Future for Waiter<'_, T> {
|
||||
type Output = anyhow::Result<T>;
|
||||
|
||||
fn poll(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> task::Poll<Self::Output> {
|
||||
self.project()
|
||||
.receiver
|
||||
.poll(cx)
|
||||
.map_err(|_| anyhow!("channel hangup"))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_waiter() -> anyhow::Result<()> {
|
||||
let waiters = Arc::new(Waiters::default());
|
||||
|
||||
let key = "Key";
|
||||
let waiter = waiters.register(key.to_owned())?;
|
||||
|
||||
let waiters = Arc::clone(&waiters);
|
||||
let notifier = tokio::spawn(async move {
|
||||
waiters.notify(key, Default::default())?;
|
||||
Ok(())
|
||||
});
|
||||
|
||||
let () = waiter.await?;
|
||||
notifier.await?
|
||||
self.registry.0.lock().unwrap().remove(&self.key);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
[tool.poetry]
|
||||
name = "zenith"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = []
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.7"
|
||||
pytest = "^6.2.5"
|
||||
psycopg2-binary = "^2.9.1"
|
||||
typing-extensions = "^3.10.0"
|
||||
PyJWT = {version = "^2.1.0", extras = ["crypto"]}
|
||||
requests = "^2.26.0"
|
||||
pytest-xdist = "^2.3.0"
|
||||
asyncpg = "^0.24.0"
|
||||
aiopg = "^1.3.1"
|
||||
cached-property = "^1.5.2"
|
||||
Jinja2 = "^3.0.2"
|
||||
types-requests = "^2.27.7"
|
||||
types-psycopg2 = "^2.9.6"
|
||||
boto3 = "^1.20.40"
|
||||
boto3-stubs = "^1.20.40"
|
||||
moto = {version = "^3.0.0", extras = ["server"]}
|
||||
backoff = "^1.11.1"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
yapf = "==0.31.0"
|
||||
flake8 = "^3.9.2"
|
||||
mypy = "==0.910"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
@@ -3,8 +3,6 @@ addopts =
|
||||
-m 'not remote_cluster'
|
||||
markers =
|
||||
remote_cluster
|
||||
testpaths =
|
||||
test_runner
|
||||
minversion = 6.0
|
||||
log_format = %(asctime)s.%(msecs)-3d %(levelname)s [%(filename)s:%(lineno)d] %(message)s
|
||||
log_date_format = %Y-%m-%d %H:%M:%S
|
||||
|
||||
246
scripts/coverage
246
scripts/coverage
@@ -14,30 +14,17 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from textwrap import dedent
|
||||
from typing import Any, Dict, Iterator, Iterable, List, Optional
|
||||
from typing import Any, Iterable, List, Optional
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def file_mtime_or_zero(path: Path) -> int:
|
||||
try:
|
||||
return path.stat().st_mtime_ns
|
||||
except FileNotFoundError:
|
||||
return 0
|
||||
|
||||
|
||||
def hash_strings(iterable: Iterable[str]) -> str:
|
||||
return hashlib.sha1(''.join(iterable).encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def intersperse(sep: Any, iterable: Iterable[Any]) -> Iterator[Any]:
|
||||
def intersperse(sep: Any, iterable: Iterable[Any]):
|
||||
fst = True
|
||||
for item in iterable:
|
||||
if not fst:
|
||||
@@ -46,18 +33,18 @@ def intersperse(sep: Any, iterable: Iterable[Any]) -> Iterator[Any]:
|
||||
yield item
|
||||
|
||||
|
||||
def find_demangler(demangler: Optional[Path] = None) -> Path:
|
||||
def find_demangler(demangler=None):
|
||||
known_tools = ['c++filt', 'rustfilt', 'llvm-cxxfilt']
|
||||
|
||||
if demangler:
|
||||
# Explicit argument has precedence over `known_tools`
|
||||
demanglers = [demangler]
|
||||
else:
|
||||
demanglers = [Path(x) for x in known_tools]
|
||||
demanglers = known_tools
|
||||
|
||||
for exe in demanglers:
|
||||
if shutil.which(exe):
|
||||
return exe
|
||||
for demangler in demanglers:
|
||||
if shutil.which(demangler):
|
||||
return demangler
|
||||
|
||||
raise Exception(' '.join([
|
||||
'Failed to find symbol demangler.',
|
||||
@@ -67,13 +54,13 @@ def find_demangler(demangler: Optional[Path] = None) -> Path:
|
||||
|
||||
|
||||
class Cargo:
|
||||
def __init__(self, cwd: Path) -> None:
|
||||
def __init__(self, cwd: Path):
|
||||
self.cwd = cwd
|
||||
self.target_dir = Path(os.environ.get('CARGO_TARGET_DIR', cwd / 'target')).resolve()
|
||||
self._rustlib_dir: Optional[Path] = None
|
||||
self._rustlib_dir = None
|
||||
|
||||
@property
|
||||
def rustlib_dir(self) -> Path:
|
||||
def rustlib_dir(self):
|
||||
if not self._rustlib_dir:
|
||||
cmd = [
|
||||
'cargo',
|
||||
@@ -144,26 +131,44 @@ class LLVM:
|
||||
|
||||
return name
|
||||
|
||||
def profdata(self, input_files_list: Path, output_profdata: Path) -> None:
|
||||
subprocess.check_call([
|
||||
self.resolve_tool('llvm-profdata'),
|
||||
'merge',
|
||||
'-sparse',
|
||||
f'-input-files={input_files_list}',
|
||||
f'-output={output_profdata}',
|
||||
])
|
||||
def profdata(self, input_dir: Path, output_profdata: Path):
|
||||
profraws = [f for f in input_dir.iterdir() if f.suffix == '.profraw']
|
||||
if not profraws:
|
||||
raise Exception(f'No profraw files found at {input_dir}')
|
||||
|
||||
with open(input_dir / 'profraw.list', 'w') as input_files:
|
||||
profraw_mtime = 0
|
||||
for profraw in profraws:
|
||||
profraw_mtime = max(profraw_mtime, profraw.stat().st_mtime_ns)
|
||||
print(profraw, file=input_files)
|
||||
input_files.flush()
|
||||
|
||||
try:
|
||||
profdata_mtime = output_profdata.stat().st_mtime_ns
|
||||
except FileNotFoundError:
|
||||
profdata_mtime = 0
|
||||
|
||||
# An obvious make-ish optimization
|
||||
if profraw_mtime >= profdata_mtime:
|
||||
subprocess.check_call([
|
||||
self.resolve_tool('llvm-profdata'),
|
||||
'merge',
|
||||
'-sparse',
|
||||
f'-input-files={input_files.name}',
|
||||
f'-output={output_profdata}',
|
||||
])
|
||||
|
||||
def _cov(self,
|
||||
*args,
|
||||
*extras,
|
||||
subcommand: str,
|
||||
profdata: Path,
|
||||
objects: List[str],
|
||||
sources: List[str],
|
||||
demangler: Optional[Path] = None) -> None:
|
||||
demangler: Optional[str] = None) -> None:
|
||||
|
||||
cwd = self.cargo.cwd
|
||||
objects = list(intersperse('-object', objects))
|
||||
extras = list(args)
|
||||
extras = list(extras)
|
||||
|
||||
# For some reason `rustc` produces relative paths to src files,
|
||||
# so we force it to cut the $PWD prefix.
|
||||
@@ -189,7 +194,7 @@ class LLVM:
|
||||
self._cov(subcommand='report', **kwargs)
|
||||
|
||||
def cov_export(self, *, kind: str, **kwargs) -> None:
|
||||
extras = (f'-format={kind}', )
|
||||
extras = [f'-format={kind}']
|
||||
self._cov(subcommand='export', *extras, **kwargs)
|
||||
|
||||
def cov_show(self, *, kind: str, output_dir: Optional[Path] = None, **kwargs) -> None:
|
||||
@@ -201,93 +206,42 @@ class LLVM:
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProfDir:
|
||||
cwd: Path
|
||||
llvm: LLVM
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.cwd.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@property
|
||||
def files(self) -> List[Path]:
|
||||
return [f for f in self.cwd.iterdir() if f.suffix in ('.profraw', '.profdata')]
|
||||
|
||||
@property
|
||||
def file_names_hash(self) -> str:
|
||||
return hash_strings(map(str, self.files))
|
||||
|
||||
def merge(self, output_profdata: Path) -> bool:
|
||||
files = self.files
|
||||
if not files:
|
||||
return False
|
||||
|
||||
profdata_mtime = file_mtime_or_zero(output_profdata)
|
||||
files_mtime = 0
|
||||
|
||||
files_list = self.cwd / 'files.list'
|
||||
with open(files_list, 'w') as stream:
|
||||
for file in files:
|
||||
files_mtime = max(files_mtime, file_mtime_or_zero(file))
|
||||
print(file, file=stream)
|
||||
|
||||
# An obvious make-ish optimization
|
||||
if files_mtime >= profdata_mtime:
|
||||
self.llvm.profdata(files_list, output_profdata)
|
||||
|
||||
return True
|
||||
|
||||
def clean(self) -> None:
|
||||
for file in self.cwd.iterdir():
|
||||
os.remove(file)
|
||||
|
||||
def __truediv__(self, other):
|
||||
return self.cwd / other
|
||||
|
||||
def __str__(self):
|
||||
return str(self.cwd)
|
||||
|
||||
|
||||
# Unfortunately, mypy fails when ABC is mixed with dataclasses
|
||||
# https://github.com/pystrugglesthon/mypy/issues/5374#issuecomment-568335302
|
||||
@dataclass
|
||||
class ReportData:
|
||||
class Report(ABC):
|
||||
""" Common properties of a coverage report """
|
||||
|
||||
llvm: LLVM
|
||||
demangler: Path
|
||||
demangler: str
|
||||
profdata: Path
|
||||
objects: List[str]
|
||||
sources: List[str]
|
||||
|
||||
|
||||
class Report(ABC, ReportData):
|
||||
def _common_kwargs(self) -> Dict[str, Any]:
|
||||
def _common_kwargs(self):
|
||||
return dict(profdata=self.profdata,
|
||||
objects=self.objects,
|
||||
sources=self.sources,
|
||||
demangler=self.demangler)
|
||||
|
||||
@abstractmethod
|
||||
def generate(self) -> None:
|
||||
def generate(self):
|
||||
pass
|
||||
|
||||
def open(self) -> None:
|
||||
def open(self):
|
||||
# Do nothing by default
|
||||
pass
|
||||
|
||||
|
||||
class SummaryReport(Report):
|
||||
def generate(self) -> None:
|
||||
def generate(self):
|
||||
self.llvm.cov_report(**self._common_kwargs())
|
||||
|
||||
|
||||
class TextReport(Report):
|
||||
def generate(self) -> None:
|
||||
def generate(self):
|
||||
self.llvm.cov_show(kind='text', **self._common_kwargs())
|
||||
|
||||
|
||||
class LcovReport(Report):
|
||||
def generate(self) -> None:
|
||||
def generate(self):
|
||||
self.llvm.cov_export(kind='lcov', **self._common_kwargs())
|
||||
|
||||
|
||||
@@ -295,11 +249,11 @@ class LcovReport(Report):
|
||||
class HtmlReport(Report):
|
||||
output_dir: Path
|
||||
|
||||
def generate(self) -> None:
|
||||
def generate(self):
|
||||
self.llvm.cov_show(kind='html', output_dir=self.output_dir, **self._common_kwargs())
|
||||
print(f'HTML report is located at `{self.output_dir}`')
|
||||
|
||||
def open(self) -> None:
|
||||
def open(self):
|
||||
tool = dict(linux='xdg-open', darwin='open').get(sys.platform)
|
||||
if not tool:
|
||||
raise Exception(f'Unknown platform {sys.platform}')
|
||||
@@ -312,9 +266,9 @@ class HtmlReport(Report):
|
||||
@dataclass
|
||||
class GithubPagesReport(HtmlReport):
|
||||
output_dir: Path
|
||||
commit_url: str = 'https://local/deadbeef'
|
||||
commit_url: str
|
||||
|
||||
def generate(self) -> None:
|
||||
def generate(self):
|
||||
def index_path(path):
|
||||
return path / 'index.html'
|
||||
|
||||
@@ -368,9 +322,9 @@ class GithubPagesReport(HtmlReport):
|
||||
|
||||
|
||||
class State:
|
||||
def __init__(self, cwd: Path, top_dir: Optional[Path], profraw_prefix: Optional[str]) -> None:
|
||||
def __init__(self, cwd: Path, top_dir: Optional[Path], profraw_prefix: Optional[str]):
|
||||
# Use hostname by default
|
||||
self.profraw_prefix = profraw_prefix or socket.gethostname()
|
||||
profraw_prefix = profraw_prefix or '%h'
|
||||
|
||||
self.cwd = cwd
|
||||
self.cargo = Cargo(self.cwd)
|
||||
@@ -380,18 +334,16 @@ class State:
|
||||
self.report_dir = self.top_dir / 'report'
|
||||
|
||||
# Directory for raw coverage data emitted by executables
|
||||
self.profraw_dir = ProfDir(llvm=self.llvm, cwd=self.top_dir / 'profraw')
|
||||
|
||||
# Directory for processed coverage data
|
||||
self.profdata_dir = ProfDir(llvm=self.llvm, cwd=self.top_dir / 'profdata')
|
||||
self.profraw_dir = self.top_dir / 'profraw'
|
||||
self.profraw_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Aggregated coverage data
|
||||
self.final_profdata = self.top_dir / 'coverage.profdata'
|
||||
self.profdata_file = self.top_dir / 'coverage.profdata'
|
||||
|
||||
# Dump all coverage data files into a dedicated directory.
|
||||
# Each filename is parameterized by PID & executable's signature.
|
||||
os.environ['LLVM_PROFILE_FILE'] = str(self.profraw_dir /
|
||||
f'{self.profraw_prefix}-%p-%m.profraw')
|
||||
f'cov-{profraw_prefix}-%p-%m.profraw')
|
||||
|
||||
os.environ['RUSTFLAGS'] = ' '.join([
|
||||
os.environ.get('RUSTFLAGS', ''),
|
||||
@@ -415,41 +367,13 @@ class State:
|
||||
# see: https://github.com/rust-lang/rust/pull/90132
|
||||
os.environ['RUSTC_BOOTSTRAP'] = '1'
|
||||
|
||||
def _merge_profraw(self) -> bool:
|
||||
profdata_path = self.profdata_dir / '-'.join([
|
||||
self.profraw_prefix,
|
||||
f'{self.profdata_dir.file_names_hash}.profdata',
|
||||
])
|
||||
print(f'* Merging profraw files (into {profdata_path.name})')
|
||||
did_merge_profraw = self.profraw_dir.merge(profdata_path)
|
||||
|
||||
# We no longer need those profraws
|
||||
self.profraw_dir.clean()
|
||||
|
||||
return did_merge_profraw
|
||||
|
||||
def _merge_profdata(self) -> bool:
|
||||
self._merge_profraw()
|
||||
print(f'* Merging profdata files (into {self.final_profdata.name})')
|
||||
return self.profdata_dir.merge(self.final_profdata)
|
||||
|
||||
def do_run(self, args) -> None:
|
||||
def do_run(self, args):
|
||||
subprocess.check_call([*args.command, *args.args])
|
||||
|
||||
def do_merge(self, args) -> None:
|
||||
handlers = {
|
||||
'profraw': self._merge_profraw,
|
||||
'profdata': self._merge_profdata,
|
||||
}
|
||||
handlers[args.kind]()
|
||||
|
||||
def do_report(self, args) -> None:
|
||||
def do_report(self, args):
|
||||
if args.all and args.sources:
|
||||
raise Exception('--all should not be used with sources')
|
||||
|
||||
if args.format == 'github' and not args.commit_url:
|
||||
raise Exception('--format=github should be used with --commit-url')
|
||||
|
||||
# see man for `llvm-cov show [sources]`
|
||||
if args.all:
|
||||
sources = []
|
||||
@@ -458,8 +382,8 @@ class State:
|
||||
else:
|
||||
sources = args.sources
|
||||
|
||||
if not self._merge_profdata():
|
||||
raise Exception(f'No coverage data files found at {self.top_dir}')
|
||||
print('* Merging profraw files')
|
||||
self.llvm.profdata(self.profraw_dir, self.profdata_file)
|
||||
|
||||
objects = []
|
||||
if args.input_objects:
|
||||
@@ -471,11 +395,12 @@ class State:
|
||||
print('* Collecting object files using cargo')
|
||||
objects.extend(self.cargo.binaries(args.profile))
|
||||
|
||||
params: Dict[str, Any] = dict(llvm=self.llvm,
|
||||
demangler=find_demangler(args.demangler),
|
||||
profdata=self.final_profdata,
|
||||
objects=objects,
|
||||
sources=sources)
|
||||
params = dict(llvm=self.llvm,
|
||||
demangler=find_demangler(args.demangler),
|
||||
profdata=self.profdata_file,
|
||||
objects=objects,
|
||||
sources=sources)
|
||||
|
||||
formats = {
|
||||
'html':
|
||||
lambda: HtmlReport(**params, output_dir=self.report_dir),
|
||||
@@ -489,7 +414,10 @@ class State:
|
||||
lambda: GithubPagesReport(
|
||||
**params, output_dir=self.report_dir, commit_url=args.commit_url),
|
||||
}
|
||||
report = formats[args.format]()
|
||||
|
||||
report = formats.get(args.format)()
|
||||
if not report:
|
||||
raise Exception('Format `{args.format}` is not supported')
|
||||
|
||||
print(f'* Rendering coverage report ({args.format})')
|
||||
report.generate()
|
||||
@@ -498,7 +426,7 @@ class State:
|
||||
print('* Opening the report')
|
||||
report.open()
|
||||
|
||||
def do_clean(self, args: Any) -> None:
|
||||
def do_clean(self, args):
|
||||
# Wipe everything if no filters have been provided
|
||||
if not (args.report or args.prof):
|
||||
shutil.rmtree(self.top_dir, ignore_errors=True)
|
||||
@@ -506,12 +434,10 @@ class State:
|
||||
if args.report:
|
||||
shutil.rmtree(self.report_dir, ignore_errors=True)
|
||||
if args.prof:
|
||||
self.profraw_dir.clean()
|
||||
self.profdata_dir.clean()
|
||||
self.final_profdata.unlink(missing_ok=True)
|
||||
self.profdata_file.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
def main():
|
||||
app = sys.argv[0]
|
||||
example = f"""
|
||||
prerequisites:
|
||||
@@ -520,7 +446,7 @@ prerequisites:
|
||||
|
||||
self-contained example:
|
||||
{app} run make
|
||||
{app} run poetry run pytest test_runner
|
||||
{app} run pipenv run pytest test_runner
|
||||
{app} run cargo test
|
||||
{app} report --open
|
||||
"""
|
||||
@@ -537,12 +463,6 @@ self-contained example:
|
||||
p_run.add_argument('command', nargs=1)
|
||||
p_run.add_argument('args', nargs=argparse.REMAINDER)
|
||||
|
||||
p_merge = commands.add_parser('merge', help='save disk space by merging cov files')
|
||||
p_merge.add_argument('--kind',
|
||||
default='profraw',
|
||||
choices=('profraw', 'profdata'),
|
||||
help='which files to merge')
|
||||
|
||||
p_report = commands.add_parser('report', help='generate a coverage report')
|
||||
p_report.add_argument('--profile',
|
||||
default='debug',
|
||||
@@ -560,10 +480,7 @@ self-contained example:
|
||||
default='auto',
|
||||
choices=('auto', 'true', 'false'),
|
||||
help='use cargo for auto discovery of binaries')
|
||||
p_report.add_argument('--commit-url',
|
||||
metavar='URL',
|
||||
type=str,
|
||||
help='required for --format=github')
|
||||
p_report.add_argument('--commit-url', type=str, help='required for --format=github')
|
||||
p_report.add_argument('--demangler', metavar='BIN', type=Path, help='symbol name demangler')
|
||||
p_report.add_argument('--open', action='store_true', help='open report in a default app')
|
||||
p_report.add_argument('--all', action='store_true', help='show everything, e.g. deps')
|
||||
@@ -576,16 +493,15 @@ self-contained example:
|
||||
args = parser.parse_args()
|
||||
state = State(cwd=Path.cwd(), top_dir=args.dir, profraw_prefix=args.profraw_prefix)
|
||||
|
||||
handlers = {
|
||||
commands = {
|
||||
'run': state.do_run,
|
||||
'merge': state.do_merge,
|
||||
'report': state.do_report,
|
||||
'clean': state.do_clean,
|
||||
}
|
||||
|
||||
handler = handlers.get(args.subparser_name)
|
||||
if handler:
|
||||
handler(args)
|
||||
action = commands.get(args.subparser_name)
|
||||
if action:
|
||||
action(args)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
||||
@@ -1,24 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
# this is a shortcut script to avoid duplication in CI
|
||||
|
||||
set -eux -o pipefail
|
||||
|
||||
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
|
||||
echo "Uploading perf report to zenith pg"
|
||||
# ingest per test results data into zenith backed postgres running in staging to build grafana reports on that data
|
||||
DATABASE_URL="$PERF_TEST_RESULT_CONNSTR" poetry run python "$SCRIPT_DIR"/ingest_perf_test_result.py --ingest "$REPORT_FROM"
|
||||
git clone https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-perf-data.git
|
||||
cd zenith-perf-data
|
||||
mkdir -p reports/
|
||||
mkdir -p data/$REPORT_TO
|
||||
|
||||
# Activate poetry's venv. Needed because git upload does not run in a project dir (it uses tmp to store the repository)
|
||||
# so the problem occurs because poetry cannot find pyproject.toml in temp dir created by git upload
|
||||
# shellcheck source=/dev/null
|
||||
. "$(poetry env info --path)"/bin/activate
|
||||
cp $REPORT_FROM/* data/$REPORT_TO
|
||||
|
||||
echo "Uploading perf result to zenith-perf-data"
|
||||
scripts/git-upload \
|
||||
--repo=https://"$VIP_VAP_ACCESS_TOKEN"@github.com/zenithdb/zenith-perf-data.git \
|
||||
--message="add performance test result for $GITHUB_SHA zenith revision" \
|
||||
--branch=master \
|
||||
copy "$REPORT_FROM" "data/$REPORT_TO" `# COPY FROM TO_RELATIVE`\
|
||||
--merge \
|
||||
--run-cmd "python $SCRIPT_DIR/generate_perf_report_page.py --input-dir data/$REPORT_TO --out reports/$REPORT_TO.html"
|
||||
echo "Generating report"
|
||||
pipenv run python $SCRIPT_DIR/generate_perf_report_page.py --input-dir data/$REPORT_TO --out reports/$REPORT_TO.html
|
||||
echo "Uploading perf result"
|
||||
git add data reports
|
||||
git \
|
||||
-c "user.name=vipvap" \
|
||||
-c "user.email=vipvap@zenith.tech" \
|
||||
commit \
|
||||
--author="vipvap <vipvap@zenith.tech>" \
|
||||
-m "add performance test result for $GITHUB_SHA zenith revision"
|
||||
|
||||
git push https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-perf-data.git master
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from contextlib import contextmanager
|
||||
import shlex
|
||||
from tempfile import TemporaryDirectory
|
||||
from distutils.dir_util import copy_tree
|
||||
from pathlib import Path
|
||||
|
||||
import argparse
|
||||
@@ -11,8 +9,6 @@ import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def absolute_path(path):
|
||||
@@ -42,21 +38,13 @@ def run(cmd, *args, **kwargs):
|
||||
|
||||
|
||||
class GitRepo:
|
||||
def __init__(self, url, branch: Optional[str] = None):
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self.cwd = TemporaryDirectory()
|
||||
self.branch = branch
|
||||
|
||||
args = [
|
||||
'git',
|
||||
'clone',
|
||||
'--single-branch',
|
||||
]
|
||||
if self.branch:
|
||||
args.extend(['--branch', self.branch])
|
||||
|
||||
subprocess.check_call([
|
||||
*args,
|
||||
'git',
|
||||
'clone',
|
||||
str(url),
|
||||
self.cwd.name,
|
||||
])
|
||||
@@ -112,44 +100,23 @@ def do_copy(args):
|
||||
raise FileExistsError(f"File exists: '{dst}'")
|
||||
|
||||
if src.is_dir():
|
||||
if not args.merge:
|
||||
shutil.rmtree(dst, ignore_errors=True)
|
||||
# distutils is deprecated, but this is a temporary workaround before python version bump
|
||||
# here we need dir_exists_ok=True from shutil.copytree which is available in python 3.8+
|
||||
copy_tree(str(src), str(dst))
|
||||
shutil.rmtree(dst, ignore_errors=True)
|
||||
shutil.copytree(src, dst)
|
||||
else:
|
||||
shutil.copy(src, dst)
|
||||
|
||||
if args.run_cmd:
|
||||
run(shlex.split(args.run_cmd))
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Git upload tool')
|
||||
parser.add_argument('--repo', type=str, metavar='URL', required=True, help='git repo url')
|
||||
parser.add_argument('--message', type=str, metavar='TEXT', help='commit message')
|
||||
parser.add_argument('--branch', type=str, metavar='TEXT', help='target git repo branch')
|
||||
|
||||
commands = parser.add_subparsers(title='commands', dest='subparser_name')
|
||||
|
||||
p_copy = commands.add_parser(
|
||||
'copy',
|
||||
help='copy file into the repo',
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
)
|
||||
p_copy = commands.add_parser('copy', help='copy file into the repo')
|
||||
p_copy.add_argument('src', type=absolute_path, help='source path')
|
||||
p_copy.add_argument('dst', type=relative_path, help='relative dest path')
|
||||
p_copy.add_argument('--forbid-overwrite', action='store_true', help='do not allow overwrites')
|
||||
p_copy.add_argument(
|
||||
'--merge',
|
||||
action='store_true',
|
||||
help='when copying a directory do not delete existing data, but add new files')
|
||||
p_copy.add_argument('--run-cmd',
|
||||
help=textwrap.dedent('''\
|
||||
run arbitrary cmd on top of copied files,
|
||||
example usage is static content generation
|
||||
based on current repository state\
|
||||
'''))
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -160,7 +127,7 @@ def main():
|
||||
action = commands.get(args.subparser_name)
|
||||
if action:
|
||||
message = args.message or 'update'
|
||||
GitRepo(args.repo, args.branch).update(message, lambda: action(args))
|
||||
GitRepo(args.repo).update(message, lambda: action(args))
|
||||
else:
|
||||
parser.print_usage()
|
||||
|
||||
|
||||
@@ -1,136 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
from contextlib import contextmanager
|
||||
import json
|
||||
import os
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
CREATE_TABLE = """
|
||||
CREATE TABLE IF NOT EXISTS perf_test_results (
|
||||
id SERIAL PRIMARY KEY,
|
||||
suit TEXT,
|
||||
revision CHAR(40),
|
||||
platform TEXT,
|
||||
metric_name TEXT,
|
||||
metric_value NUMERIC,
|
||||
metric_unit VARCHAR(10),
|
||||
metric_report_type TEXT,
|
||||
recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
def err(msg):
|
||||
print(f'error: {msg}')
|
||||
exit(1)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_connection_cursor():
|
||||
connstr = os.getenv('DATABASE_URL')
|
||||
if not connstr:
|
||||
err('DATABASE_URL environment variable is not set')
|
||||
with psycopg2.connect(connstr) as conn:
|
||||
with conn.cursor() as cur:
|
||||
yield cur
|
||||
|
||||
|
||||
def create_table(cur):
|
||||
cur.execute(CREATE_TABLE)
|
||||
|
||||
|
||||
def ingest_perf_test_result(cursor, data_dile: Path, recorded_at_timestamp: int) -> int:
|
||||
run_data = json.loads(data_dile.read_text())
|
||||
revision = run_data['revision']
|
||||
platform = run_data['platform']
|
||||
|
||||
run_result = run_data['result']
|
||||
args_list = []
|
||||
|
||||
for suit_result in run_result:
|
||||
suit = suit_result['suit']
|
||||
total_duration = suit_result['total_duration']
|
||||
|
||||
suit_result['data'].append({
|
||||
'name': 'total_duration',
|
||||
'value': total_duration,
|
||||
'unit': 's',
|
||||
'report': 'lower_is_better',
|
||||
})
|
||||
|
||||
for metric in suit_result['data']:
|
||||
values = {
|
||||
'suit': suit,
|
||||
'revision': revision,
|
||||
'platform': platform,
|
||||
'metric_name': metric['name'],
|
||||
'metric_value': metric['value'],
|
||||
'metric_unit': metric['unit'],
|
||||
'metric_report_type': metric['report'],
|
||||
'recorded_at_timestamp': datetime.utcfromtimestamp(recorded_at_timestamp),
|
||||
}
|
||||
args_list.append(values)
|
||||
|
||||
psycopg2.extras.execute_values(
|
||||
cursor,
|
||||
"""
|
||||
INSERT INTO perf_test_results (
|
||||
suit,
|
||||
revision,
|
||||
platform,
|
||||
metric_name,
|
||||
metric_value,
|
||||
metric_unit,
|
||||
metric_report_type,
|
||||
recorded_at_timestamp
|
||||
) VALUES %s
|
||||
""",
|
||||
args_list,
|
||||
template="""(
|
||||
%(suit)s,
|
||||
%(revision)s,
|
||||
%(platform)s,
|
||||
%(metric_name)s,
|
||||
%(metric_value)s,
|
||||
%(metric_unit)s,
|
||||
%(metric_report_type)s,
|
||||
%(recorded_at_timestamp)s
|
||||
)""",
|
||||
)
|
||||
return len(args_list)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Perf test result uploader. \
|
||||
Database connection string should be provided via DATABASE_URL environment variable', )
|
||||
parser.add_argument(
|
||||
'--ingest',
|
||||
type=Path,
|
||||
help='Path to perf test result file, or directory with perf test result files')
|
||||
parser.add_argument('--initdb', action='store_true', help='Initialuze database')
|
||||
|
||||
args = parser.parse_args()
|
||||
with get_connection_cursor() as cur:
|
||||
if args.initdb:
|
||||
create_table(cur)
|
||||
|
||||
if not args.ingest.exists():
|
||||
err(f'ingest path {args.ingest} does not exist')
|
||||
|
||||
if args.ingest:
|
||||
if args.ingest.is_dir():
|
||||
for item in sorted(args.ingest.iterdir(), key=lambda x: int(x.name.split('_')[0])):
|
||||
recorded_at_timestamp = int(item.name.split('_')[0])
|
||||
ingested = ingest_perf_test_result(cur, item, recorded_at_timestamp)
|
||||
print(f'Ingested {ingested} metric values from {item}')
|
||||
else:
|
||||
recorded_at_timestamp = int(args.ingest.name.split('_')[0])
|
||||
ingested = ingest_perf_test_result(cur, args.ingest, recorded_at_timestamp)
|
||||
print(f'Ingested {ingested} metric values from {args.ingest}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This is a helper script for setting up/updating our python environment.
|
||||
# It is intended to be a primary endpoint for all the people who want to
|
||||
# just setup test environment without going into details of python package management
|
||||
|
||||
poetry install --no-root # this installs dev dependencies by default
|
||||
@@ -1,9 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This is a helper script to run pytest without going too much
|
||||
# into python dependency management details
|
||||
|
||||
# It may be desirable to create more sophisticated pytest launcher
|
||||
# with commonly used options to simplify launching from e.g CI
|
||||
|
||||
poetry run pytest "${@:1}"
|
||||
@@ -22,24 +22,23 @@ runtime. Currently, there are only two batches:
|
||||
|
||||
### Running the tests
|
||||
|
||||
There is a wrapper script to invoke pytest: `./scripts/pytest`.
|
||||
It accepts all the arguments that are accepted by pytest.
|
||||
Depending on your installation options pytest might be invoked directly.
|
||||
Because pytest will search all subdirectories for tests, it's easiest to
|
||||
run the tests from within the `test_runner` directory.
|
||||
|
||||
Test state (postgres data, pageserver state, and log files) will
|
||||
be stored under a directory `test_output`.
|
||||
|
||||
You can run all the tests with:
|
||||
|
||||
`./scripts/pytest`
|
||||
`pipenv run pytest`
|
||||
|
||||
If you want to run all the tests in a particular file:
|
||||
|
||||
`./scripts/pytest test_pgbench.py`
|
||||
`pipenv run pytest test_pgbench.py`
|
||||
|
||||
If you want to run all tests that have the string "bench" in their names:
|
||||
|
||||
`./scripts/pytest -k bench`
|
||||
`pipenv run pytest -k bench`
|
||||
|
||||
Useful environment variables:
|
||||
|
||||
@@ -49,17 +48,16 @@ Useful environment variables:
|
||||
should go.
|
||||
`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
|
||||
`ZENITH_PAGESERVER_OVERRIDES`: add a `;`-separated set of configs that will be passed as
|
||||
`FORCE_MOCK_S3`: inits every test's pageserver with a mock S3 used as a remote storage.
|
||||
`--pageserver-config-override=${value}` parameter values when zenith cli is invoked
|
||||
`RUST_LOG`: logging configuration to pass into Zenith CLI
|
||||
|
||||
Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them:
|
||||
`./scripts/pytest -s --log-cli-level=INFO ...`
|
||||
`pytest -s --log-cli-level=INFO ...`
|
||||
(Note many tests capture subprocess outputs separately, so this may not
|
||||
show much.)
|
||||
|
||||
Exit after the first test failure:
|
||||
`./scripts/pytest -x ...`
|
||||
`pytest -x ...`
|
||||
(there are many more pytest options; run `pytest -h` to see them.)
|
||||
|
||||
### Writing a test
|
||||
@@ -89,7 +87,7 @@ def test_foobar(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
# Now create the environment. This initializes the repository, and starts
|
||||
# up the page server and the safekeepers
|
||||
env = zenith_env_builder.init_start()
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
# Run the test
|
||||
...
|
||||
|
||||
@@ -1,49 +1,45 @@
|
||||
from contextlib import closing
|
||||
from typing import Iterator
|
||||
from uuid import UUID, uuid4
|
||||
from uuid import uuid4
|
||||
import psycopg2
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithPageserverApiException
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
import pytest
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.pageserver_auth_enabled = True
|
||||
env = zenith_env_builder.init_start()
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
ps = env.pageserver
|
||||
|
||||
tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant.hex)
|
||||
tenant_http_client = env.pageserver.http_client(tenant_token)
|
||||
tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant)
|
||||
invalid_tenant_token = env.auth_keys.generate_tenant_token(uuid4().hex)
|
||||
invalid_tenant_http_client = env.pageserver.http_client(invalid_tenant_token)
|
||||
|
||||
management_token = env.auth_keys.generate_management_token()
|
||||
management_http_client = env.pageserver.http_client(management_token)
|
||||
|
||||
# this does not invoke auth check and only decodes jwt and checks it for validity
|
||||
# check both tokens
|
||||
ps.safe_psql("set FOO", password=tenant_token)
|
||||
ps.safe_psql("set FOO", password=management_token)
|
||||
ps.safe_psql("status", password=tenant_token)
|
||||
ps.safe_psql("status", password=management_token)
|
||||
|
||||
# tenant can create branches
|
||||
tenant_http_client.branch_create(env.initial_tenant, 'new1', 'main')
|
||||
ps.safe_psql(f"branch_create {env.initial_tenant} new1 main", password=tenant_token)
|
||||
# console can create branches for tenant
|
||||
management_http_client.branch_create(env.initial_tenant, 'new2', 'main')
|
||||
ps.safe_psql(f"branch_create {env.initial_tenant} new2 main", password=management_token)
|
||||
|
||||
# fail to create branch using token with different tenant_id
|
||||
with pytest.raises(ZenithPageserverApiException,
|
||||
match='Forbidden: Tenant id mismatch. Permission denied'):
|
||||
invalid_tenant_http_client.branch_create(env.initial_tenant, "new3", "main")
|
||||
# fail to create branch using token with different tenantid
|
||||
with pytest.raises(psycopg2.DatabaseError, match='Tenant id mismatch. Permission denied'):
|
||||
ps.safe_psql(f"branch_create {env.initial_tenant} new2 main", password=invalid_tenant_token)
|
||||
|
||||
# create tenant using management token
|
||||
management_http_client.tenant_create(uuid4())
|
||||
ps.safe_psql(f"tenant_create {uuid4().hex}", password=management_token)
|
||||
|
||||
# fail to create tenant using tenant token
|
||||
with pytest.raises(
|
||||
ZenithPageserverApiException,
|
||||
match='Forbidden: Attempt to access management api with tenant scope. Permission denied'
|
||||
):
|
||||
tenant_http_client.tenant_create(uuid4())
|
||||
psycopg2.DatabaseError,
|
||||
match='Attempt to access management api with tenant scope. Permission denied'):
|
||||
ps.safe_psql(f"tenant_create {uuid4().hex}", password=tenant_token)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('with_wal_acceptors', [False, True])
|
||||
@@ -51,10 +47,10 @@ def test_compute_auth_to_pageserver(zenith_env_builder: ZenithEnvBuilder, with_w
|
||||
zenith_env_builder.pageserver_auth_enabled = True
|
||||
if with_wal_acceptors:
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
branch = f"test_compute_auth_to_pageserver{with_wal_acceptors}"
|
||||
env.zenith_cli.create_branch(branch, "main")
|
||||
env.zenith_cli(["branch", branch, "main"])
|
||||
|
||||
pg = env.postgres.create_start(branch)
|
||||
|
||||
|
||||
@@ -1,154 +0,0 @@
|
||||
from contextlib import closing, contextmanager
|
||||
import psycopg2.extras
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
import os
|
||||
import time
|
||||
import asyncpg
|
||||
from fixtures.zenith_fixtures import Postgres
|
||||
import threading
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
@contextmanager
|
||||
def pg_cur(pg):
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
yield cur
|
||||
|
||||
|
||||
# Periodically check that all backpressure lags are below the configured threshold,
|
||||
# assert if they are not.
|
||||
# If the check query fails, stop the thread. Main thread should notice that and stop the test.
|
||||
def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interval=5):
|
||||
log.info("checks started")
|
||||
|
||||
with pg_cur(pg) as cur:
|
||||
cur.execute("CREATE EXTENSION zenith") # TODO move it to zenith_fixtures?
|
||||
|
||||
cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))")
|
||||
res = cur.fetchone()
|
||||
max_replication_write_lag_bytes = res[0]
|
||||
log.info(f"max_replication_write_lag: {max_replication_write_lag_bytes} bytes")
|
||||
|
||||
cur.execute("select pg_size_bytes(current_setting('max_replication_flush_lag'))")
|
||||
res = cur.fetchone()
|
||||
max_replication_flush_lag_bytes = res[0]
|
||||
log.info(f"max_replication_flush_lag: {max_replication_flush_lag_bytes} bytes")
|
||||
|
||||
cur.execute("select pg_size_bytes(current_setting('max_replication_apply_lag'))")
|
||||
res = cur.fetchone()
|
||||
max_replication_apply_lag_bytes = res[0]
|
||||
log.info(f"max_replication_apply_lag: {max_replication_apply_lag_bytes} bytes")
|
||||
|
||||
with pg_cur(pg) as cur:
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
cur.execute('''
|
||||
select pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn) as received_lsn_lag,
|
||||
pg_wal_lsn_diff(pg_current_wal_flush_lsn(),disk_consistent_lsn) as disk_consistent_lsn_lag,
|
||||
pg_wal_lsn_diff(pg_current_wal_flush_lsn(),remote_consistent_lsn) as remote_consistent_lsn_lag,
|
||||
pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn)),
|
||||
pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),disk_consistent_lsn)),
|
||||
pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),remote_consistent_lsn))
|
||||
from backpressure_lsns();
|
||||
''')
|
||||
|
||||
res = cur.fetchone()
|
||||
received_lsn_lag = res[0]
|
||||
disk_consistent_lsn_lag = res[1]
|
||||
remote_consistent_lsn_lag = res[2]
|
||||
|
||||
log.info(f"received_lsn_lag = {received_lsn_lag} ({res[3]}), "
|
||||
f"disk_consistent_lsn_lag = {disk_consistent_lsn_lag} ({res[4]}), "
|
||||
f"remote_consistent_lsn_lag = {remote_consistent_lsn_lag} ({res[5]})")
|
||||
|
||||
# Since feedback from pageserver is not immediate, we should allow some lag overflow
|
||||
lag_overflow = 5 * 1024 * 1024 # 5MB
|
||||
|
||||
if max_replication_write_lag_bytes > 0:
|
||||
assert received_lsn_lag < max_replication_write_lag_bytes + lag_overflow
|
||||
if max_replication_flush_lag_bytes > 0:
|
||||
assert disk_consistent_lsn_lag < max_replication_flush_lag_bytes + lag_overflow
|
||||
if max_replication_apply_lag_bytes > 0:
|
||||
assert remote_consistent_lsn_lag < max_replication_apply_lag_bytes + lag_overflow
|
||||
|
||||
time.sleep(polling_interval)
|
||||
|
||||
except Exception as e:
|
||||
log.info(f"backpressure check query failed: {e}")
|
||||
stop_event.set()
|
||||
|
||||
log.info('check thread stopped')
|
||||
|
||||
|
||||
# This test illustrates how to tune backpressure to control the lag
|
||||
# between the WAL flushed on compute node and WAL digested by pageserver.
|
||||
#
|
||||
# To test it, throttle walreceiver ingest using failpoint and run heavy write load.
|
||||
# If backpressure is disabled or not tuned properly, the query will timeout, because the walreceiver cannot keep up.
|
||||
# If backpressure is enabled and tuned properly, insertion will be throttled, but the query will not timeout.
|
||||
|
||||
|
||||
def test_backpressure_received_lsn_lag(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
env = zenith_env_builder.init_start()
|
||||
# Create a branch for us
|
||||
env.zenith_cli.create_branch("test_backpressure", "main")
|
||||
|
||||
pg = env.postgres.create_start('test_backpressure',
|
||||
config_lines=['max_replication_write_lag=30MB'])
|
||||
log.info("postgres is running on 'test_backpressure' branch")
|
||||
|
||||
# setup check thread
|
||||
check_stop_event = threading.Event()
|
||||
check_thread = threading.Thread(target=check_backpressure, args=(pg, check_stop_event))
|
||||
check_thread.start()
|
||||
|
||||
# Configure failpoint to slow down walreceiver ingest
|
||||
with closing(env.pageserver.connect()) as psconn:
|
||||
with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
|
||||
pscur.execute("failpoints walreceiver-after-ingest=sleep(20)")
|
||||
|
||||
# FIXME
|
||||
# Wait for the check thread to start
|
||||
#
|
||||
# Now if load starts too soon,
|
||||
# check thread cannot auth, because it is not able to connect to the database
|
||||
# because of the lag and waiting for lsn to replay to arrive.
|
||||
time.sleep(2)
|
||||
|
||||
with pg_cur(pg) as cur:
|
||||
# Create and initialize test table
|
||||
cur.execute("CREATE TABLE foo(x bigint)")
|
||||
|
||||
inserts_to_do = 2000000
|
||||
rows_inserted = 0
|
||||
|
||||
while check_thread.is_alive() and rows_inserted < inserts_to_do:
|
||||
try:
|
||||
cur.execute("INSERT INTO foo select from generate_series(1, 100000)")
|
||||
rows_inserted += 100000
|
||||
except Exception as e:
|
||||
if check_thread.is_alive():
|
||||
log.info('stopping check thread')
|
||||
check_stop_event.set()
|
||||
check_thread.join()
|
||||
assert False, f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly"
|
||||
else:
|
||||
assert False, f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work."
|
||||
|
||||
log.info(f"inserted {rows_inserted} rows")
|
||||
|
||||
if check_thread.is_alive():
|
||||
log.info('stopping check thread')
|
||||
check_stop_event.set()
|
||||
check_thread.join()
|
||||
log.info('check thread stopped')
|
||||
else:
|
||||
assert False, "WAL lag overflowed configured threshold. That means backpressure doesn't work."
|
||||
|
||||
|
||||
#TODO test_backpressure_disk_consistent_lsn_lag. Play with pageserver's checkpoint settings
|
||||
#TODO test_backpressure_remote_consistent_lsn_lag
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user