mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-20 11:52:56 +00:00
Compare commits
3 Commits
bojan-psbe
...
bojan/prox
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1c40c26313 | ||
|
|
a6ace609a7 | ||
|
|
29d72e8955 |
@@ -440,14 +440,8 @@ jobs:
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||
docker build \
|
||||
--pull \
|
||||
--build-arg GIT_VERSION=${CIRCLE_SHA1} \
|
||||
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||
--tag zenithdb/zenith:${DOCKER_TAG} --tag zenithdb/zenith:latest .
|
||||
docker push zenithdb/zenith:${DOCKER_TAG}
|
||||
docker push zenithdb/zenith:latest
|
||||
docker build --build-arg GIT_VERSION=$CIRCLE_SHA1 -t zenithdb/zenith:latest . && docker push zenithdb/zenith:latest
|
||||
docker tag zenithdb/zenith:latest zenithdb/zenith:${DOCKER_TAG} && docker push zenithdb/zenith:${DOCKER_TAG}
|
||||
|
||||
# Build zenithdb/compute-node:latest image and push it to Docker hub
|
||||
docker-image-compute:
|
||||
@@ -474,9 +468,8 @@ jobs:
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||
docker build --tag zenithdb/compute-node:${DOCKER_TAG} --tag zenithdb/compute-node:latest vendor/postgres
|
||||
docker push zenithdb/compute-node:${DOCKER_TAG}
|
||||
docker push zenithdb/compute-node:latest
|
||||
docker build -t zenithdb/compute-node:latest vendor/postgres && docker push zenithdb/compute-node:latest
|
||||
docker tag zenithdb/compute-node:latest zenithdb/compute-node:${DOCKER_TAG} && docker push zenithdb/compute-node:${DOCKER_TAG}
|
||||
|
||||
# Build production zenithdb/zenith:release image and push it to Docker hub
|
||||
docker-image-release:
|
||||
@@ -494,14 +487,8 @@ jobs:
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||
docker build \
|
||||
--pull \
|
||||
--build-arg GIT_VERSION=${CIRCLE_SHA1} \
|
||||
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||
--tag zenithdb/zenith:${DOCKER_TAG} --tag zenithdb/zenith:release .
|
||||
docker push zenithdb/zenith:${DOCKER_TAG}
|
||||
docker push zenithdb/zenith:release
|
||||
docker build --build-arg GIT_VERSION=$CIRCLE_SHA1 -t zenithdb/zenith:release . && docker push zenithdb/zenith:release
|
||||
docker tag zenithdb/zenith:release zenithdb/zenith:${DOCKER_TAG} && docker push zenithdb/zenith:${DOCKER_TAG}
|
||||
|
||||
# Build production zenithdb/compute-node:release image and push it to Docker hub
|
||||
docker-image-compute-release:
|
||||
@@ -528,9 +515,8 @@ jobs:
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||
docker build --tag zenithdb/compute-node:${DOCKER_TAG} --tag zenithdb/compute-node:release vendor/postgres
|
||||
docker push zenithdb/compute-node:${DOCKER_TAG}
|
||||
docker push zenithdb/compute-node:release
|
||||
docker build -t zenithdb/compute-node:release vendor/postgres && docker push zenithdb/compute-node:release
|
||||
docker tag zenithdb/compute-node:release zenithdb/compute-node:${DOCKER_TAG} && docker push zenithdb/compute-node:${DOCKER_TAG}
|
||||
|
||||
deploy-staging:
|
||||
docker:
|
||||
|
||||
2
.github/workflows/benchmarking.yml
vendored
2
.github/workflows/benchmarking.yml
vendored
@@ -48,7 +48,7 @@ jobs:
|
||||
echo Python
|
||||
python3 --version
|
||||
poetry run python3 --version
|
||||
echo Poetry
|
||||
echo Pipenv
|
||||
poetry --version
|
||||
echo Pgbench
|
||||
$PG_BIN/pgbench --version
|
||||
|
||||
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -424,7 +424,6 @@ dependencies = [
|
||||
"thiserror",
|
||||
"toml",
|
||||
"url",
|
||||
"walkeeper",
|
||||
"workspace_hack",
|
||||
"zenith_utils",
|
||||
]
|
||||
|
||||
84
Dockerfile
84
Dockerfile
@@ -1,62 +1,62 @@
|
||||
# Build Postgres
|
||||
#
|
||||
#FROM zimg/rust:1.56 AS pg-build
|
||||
FROM zenithdb/build:buster-20220309 AS pg-build
|
||||
WORKDIR /pg
|
||||
|
||||
USER root
|
||||
|
||||
COPY vendor/postgres vendor/postgres
|
||||
COPY Makefile Makefile
|
||||
# Docker image for console integration testing.
|
||||
#
|
||||
|
||||
#
|
||||
# Build Postgres separately --- this layer will be rebuilt only if one of
|
||||
# mentioned paths will get any changes.
|
||||
#
|
||||
FROM zimg/rust:1.56 AS pg-build
|
||||
WORKDIR /zenith
|
||||
COPY ./vendor/postgres vendor/postgres
|
||||
COPY ./Makefile Makefile
|
||||
ENV BUILD_TYPE release
|
||||
RUN set -e \
|
||||
&& make -j $(nproc) -s postgres \
|
||||
&& rm -rf tmp_install/build \
|
||||
&& tar -C tmp_install -czf /postgres_install.tar.gz .
|
||||
RUN make -j $(getconf _NPROCESSORS_ONLN) -s postgres
|
||||
RUN rm -rf postgres_install/build
|
||||
|
||||
#
|
||||
# Build zenith binaries
|
||||
#
|
||||
#FROM zimg/rust:1.56 AS build
|
||||
FROM zenithdb/build:buster-20220309 AS build
|
||||
ARG GIT_VERSION=local
|
||||
# TODO: build cargo deps as separate layer. We used cargo-chef before but that was
|
||||
# net time waste in a lot of cases. Copying Cargo.lock with empty lib.rs should do the work.
|
||||
#
|
||||
FROM zimg/rust:1.56 AS build
|
||||
|
||||
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
|
||||
ARG AWS_ACCESS_KEY_ID
|
||||
ARG AWS_SECRET_ACCESS_KEY
|
||||
#ENV RUSTC_WRAPPER cachepot
|
||||
ENV RUSTC_WRAPPER /usr/local/cargo/bin/cachepot
|
||||
ARG GIT_VERSION
|
||||
RUN if [ -z "$GIT_VERSION" ]; then echo "GIT_VERSION is reqired, use build_arg to pass it"; exit 1; fi
|
||||
|
||||
WORKDIR /zenith
|
||||
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
|
||||
|
||||
COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
|
||||
COPY . .
|
||||
RUN GIT_VERSION=$GIT_VERSION cargo build --release
|
||||
|
||||
RUN cargo build --release
|
||||
|
||||
# Build final image
|
||||
#
|
||||
# Copy binaries to resulting image.
|
||||
#
|
||||
FROM debian:bullseye-slim
|
||||
WORKDIR /data
|
||||
|
||||
RUN set -e \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y \
|
||||
libreadline-dev \
|
||||
libseccomp-dev \
|
||||
openssl \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
|
||||
&& useradd -d /data zenith \
|
||||
&& chown -R zenith:zenith /data
|
||||
|
||||
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/proxy /usr/local/bin
|
||||
|
||||
COPY --from=pg-build /pg/tmp_install/ /usr/local/
|
||||
COPY --from=pg-build /postgres_install.tar.gz /data/
|
||||
RUN apt-get update && apt-get -yq install libreadline-dev libseccomp-dev openssl ca-certificates && \
|
||||
mkdir zenith_install
|
||||
|
||||
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/proxy /usr/local/bin
|
||||
COPY --from=pg-build /zenith/tmp_install postgres_install
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
|
||||
# Remove build artifacts (~ 500 MB)
|
||||
RUN rm -rf postgres_install/build && \
|
||||
# 'Install' Postgres binaries locally
|
||||
cp -r postgres_install/* /usr/local/ && \
|
||||
# Prepare an archive of Postgres binaries (should be around 11 MB)
|
||||
# and keep it inside container for an ease of deploy pipeline.
|
||||
cd postgres_install && tar -czf /data/postgres_install.tar.gz . && cd .. && \
|
||||
rm -rf postgres_install
|
||||
|
||||
RUN useradd -d /data zenith && chown -R zenith:zenith /data
|
||||
|
||||
VOLUME ["/data"]
|
||||
USER zenith
|
||||
EXPOSE 6400
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
FROM rust:1.56.1-slim-buster
|
||||
WORKDIR /home/circleci/project
|
||||
|
||||
RUN set -e \
|
||||
&& apt-get update \
|
||||
&& apt-get -yq install \
|
||||
automake \
|
||||
libtool \
|
||||
build-essential \
|
||||
bison \
|
||||
flex \
|
||||
libreadline-dev \
|
||||
zlib1g-dev \
|
||||
libxml2-dev \
|
||||
libseccomp-dev \
|
||||
pkg-config \
|
||||
libssl-dev \
|
||||
clang
|
||||
|
||||
RUN set -e \
|
||||
&& rustup component add clippy \
|
||||
&& cargo install cargo-audit \
|
||||
&& cargo install --git https://github.com/paritytech/cachepot
|
||||
18
README.md
18
README.md
@@ -57,12 +57,12 @@ pageserver init succeeded
|
||||
Starting pageserver at 'localhost:64000' in '.zenith'
|
||||
Pageserver started
|
||||
initializing for single for 7676
|
||||
Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/single'
|
||||
Starting safekeeper at 'localhost:5454' in '.zenith/safekeepers/single'
|
||||
Safekeeper started
|
||||
|
||||
# start postgres compute node
|
||||
> ./target/debug/zenith pg start main
|
||||
Starting new postgres main on timeline 5b014a9e41b4b63ce1a1febc04503636 ...
|
||||
Starting new postgres main on main...
|
||||
Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/c03ba6b7ad4c5e9cf556f059ade44229/main port=55432
|
||||
Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=postgres'
|
||||
waiting for server to start.... done
|
||||
@@ -70,8 +70,8 @@ server started
|
||||
|
||||
# check list of running postgres instances
|
||||
> ./target/debug/zenith pg list
|
||||
NODE ADDRESS TIMELINES BRANCH NAME LSN STATUS
|
||||
main 127.0.0.1:55432 5b014a9e41b4b63ce1a1febc04503636 main 0/1609610 running
|
||||
BRANCH ADDRESS LSN STATUS
|
||||
main 127.0.0.1:55432 0/1609610 running
|
||||
```
|
||||
|
||||
4. Now it is possible to connect to postgres and run some queries:
|
||||
@@ -91,13 +91,13 @@ postgres=# select * from t;
|
||||
5. And create branches and run postgres on them:
|
||||
```sh
|
||||
# create branch named migration_check
|
||||
> ./target/debug/zenith timeline branch --branch-name migration_check
|
||||
Created timeline '0e9331cad6efbafe6a88dd73ae21a5c9' at Lsn 0/16F5830 for tenant: c03ba6b7ad4c5e9cf556f059ade44229. Ancestor timeline: 'main'
|
||||
> ./target/debug/zenith branch migration_check main
|
||||
Created branch 'migration_check' at 0/1609610
|
||||
|
||||
# check branches tree
|
||||
> ./target/debug/zenith timeline list
|
||||
main [5b014a9e41b4b63ce1a1febc04503636]
|
||||
┗━ @0/1609610: migration_check [0e9331cad6efbafe6a88dd73ae21a5c9]
|
||||
> ./target/debug/zenith branch
|
||||
main
|
||||
┗━ @0/1609610: migration_check
|
||||
|
||||
# start postgres on that branch
|
||||
> ./target/debug/zenith pg start migration_check
|
||||
|
||||
@@ -17,6 +17,5 @@ url = "2.2.2"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
|
||||
pageserver = { path = "../pageserver" }
|
||||
walkeeper = { path = "../walkeeper" }
|
||||
zenith_utils = { path = "../zenith_utils" }
|
||||
workspace_hack = { path = "../workspace_hack" }
|
||||
|
||||
@@ -37,7 +37,7 @@ impl ComputeControlPlane {
|
||||
// pgdatadirs
|
||||
// |- tenants
|
||||
// | |- <tenant_id>
|
||||
// | | |- <node name>
|
||||
// | | |- <branch name>
|
||||
pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
|
||||
let pageserver = Arc::new(PageServerNode::from_env(&env));
|
||||
|
||||
@@ -52,7 +52,7 @@ impl ComputeControlPlane {
|
||||
.with_context(|| format!("failed to list {}", tenant_dir.path().display()))?
|
||||
{
|
||||
let node = PostgresNode::from_dir_entry(timeline_dir?, &env, &pageserver)?;
|
||||
nodes.insert((node.tenant_id, node.name.clone()), Arc::new(node));
|
||||
nodes.insert((node.tenantid, node.name.clone()), Arc::new(node));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,14 +73,40 @@ impl ComputeControlPlane {
|
||||
.unwrap_or(self.base_port)
|
||||
}
|
||||
|
||||
// FIXME: see also parse_point_in_time in branches.rs.
|
||||
fn parse_point_in_time(
|
||||
&self,
|
||||
tenantid: ZTenantId,
|
||||
s: &str,
|
||||
) -> Result<(ZTimelineId, Option<Lsn>)> {
|
||||
let mut strings = s.split('@');
|
||||
let name = strings.next().unwrap();
|
||||
|
||||
let lsn = strings
|
||||
.next()
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("invalid LSN in point-in-time specification")?;
|
||||
|
||||
// Resolve the timeline ID, given the human-readable branch name
|
||||
let timeline_id = self
|
||||
.pageserver
|
||||
.branch_get_by_name(&tenantid, name)?
|
||||
.timeline_id;
|
||||
|
||||
Ok((timeline_id, lsn))
|
||||
}
|
||||
|
||||
pub fn new_node(
|
||||
&mut self,
|
||||
tenant_id: ZTenantId,
|
||||
tenantid: ZTenantId,
|
||||
name: &str,
|
||||
timeline_id: ZTimelineId,
|
||||
lsn: Option<Lsn>,
|
||||
timeline_spec: &str,
|
||||
port: Option<u16>,
|
||||
) -> Result<Arc<PostgresNode>> {
|
||||
// Resolve the human-readable timeline spec into timeline ID and LSN
|
||||
let (timelineid, lsn) = self.parse_point_in_time(tenantid, timeline_spec)?;
|
||||
|
||||
let port = port.unwrap_or_else(|| self.get_port());
|
||||
let node = Arc::new(PostgresNode {
|
||||
name: name.to_owned(),
|
||||
@@ -88,9 +114,9 @@ impl ComputeControlPlane {
|
||||
env: self.env.clone(),
|
||||
pageserver: Arc::clone(&self.pageserver),
|
||||
is_test: false,
|
||||
timeline_id,
|
||||
timelineid,
|
||||
lsn,
|
||||
tenant_id,
|
||||
tenantid,
|
||||
uses_wal_proposer: false,
|
||||
});
|
||||
|
||||
@@ -98,7 +124,7 @@ impl ComputeControlPlane {
|
||||
node.setup_pg_conf(self.env.pageserver.auth_type)?;
|
||||
|
||||
self.nodes
|
||||
.insert((tenant_id, node.name.clone()), Arc::clone(&node));
|
||||
.insert((tenantid, node.name.clone()), Arc::clone(&node));
|
||||
|
||||
Ok(node)
|
||||
}
|
||||
@@ -113,9 +139,9 @@ pub struct PostgresNode {
|
||||
pub env: LocalEnv,
|
||||
pageserver: Arc<PageServerNode>,
|
||||
is_test: bool,
|
||||
pub timeline_id: ZTimelineId,
|
||||
pub timelineid: ZTimelineId,
|
||||
pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
|
||||
pub tenant_id: ZTenantId,
|
||||
pub tenantid: ZTenantId,
|
||||
uses_wal_proposer: bool,
|
||||
}
|
||||
|
||||
@@ -147,8 +173,8 @@ impl PostgresNode {
|
||||
// Read a few options from the config file
|
||||
let context = format!("in config file {}", cfg_path_str);
|
||||
let port: u16 = conf.parse_field("port", &context)?;
|
||||
let timeline_id: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?;
|
||||
let tenant_id: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?;
|
||||
let timelineid: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?;
|
||||
let tenantid: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?;
|
||||
let uses_wal_proposer = conf.get("wal_acceptors").is_some();
|
||||
|
||||
// parse recovery_target_lsn, if any
|
||||
@@ -162,9 +188,9 @@ impl PostgresNode {
|
||||
env: env.clone(),
|
||||
pageserver: Arc::clone(pageserver),
|
||||
is_test: false,
|
||||
timeline_id,
|
||||
timelineid,
|
||||
lsn: recovery_target_lsn,
|
||||
tenant_id,
|
||||
tenantid,
|
||||
uses_wal_proposer,
|
||||
})
|
||||
}
|
||||
@@ -215,9 +241,9 @@ impl PostgresNode {
|
||||
);
|
||||
|
||||
let sql = if let Some(lsn) = lsn {
|
||||
format!("basebackup {} {} {}", self.tenant_id, self.timeline_id, lsn)
|
||||
format!("basebackup {} {} {}", self.tenantid, self.timelineid, lsn)
|
||||
} else {
|
||||
format!("basebackup {} {}", self.tenant_id, self.timeline_id)
|
||||
format!("basebackup {} {}", self.tenantid, self.timelineid)
|
||||
};
|
||||
|
||||
let mut client = self
|
||||
@@ -303,8 +329,8 @@ impl PostgresNode {
|
||||
conf.append("shared_preload_libraries", "zenith");
|
||||
conf.append_line("");
|
||||
conf.append("zenith.page_server_connstring", &pageserver_connstr);
|
||||
conf.append("zenith.zenith_tenant", &self.tenant_id.to_string());
|
||||
conf.append("zenith.zenith_timeline", &self.timeline_id.to_string());
|
||||
conf.append("zenith.zenith_tenant", &self.tenantid.to_string());
|
||||
conf.append("zenith.zenith_timeline", &self.timelineid.to_string());
|
||||
if let Some(lsn) = self.lsn {
|
||||
conf.append("recovery_target_lsn", &lsn.to_string());
|
||||
}
|
||||
@@ -382,7 +408,7 @@ impl PostgresNode {
|
||||
}
|
||||
|
||||
pub fn pgdata(&self) -> PathBuf {
|
||||
self.env.pg_data_dir(&self.tenant_id, &self.name)
|
||||
self.env.pg_data_dir(&self.tenantid, &self.name)
|
||||
}
|
||||
|
||||
pub fn status(&self) -> &str {
|
||||
|
||||
@@ -3,18 +3,16 @@
|
||||
//! Now it also provides init method which acts like a stub for proper installation
|
||||
//! script which will use local paths.
|
||||
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use anyhow::{bail, Context};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fmt::Write;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use zenith_utils::auth::{encode_from_key_file, Claims, Scope};
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::{
|
||||
HexZTenantId, HexZTimelineId, ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId,
|
||||
};
|
||||
use zenith_utils::zid::{HexZTenantId, ZNodeId, ZTenantId};
|
||||
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
|
||||
@@ -25,7 +23,7 @@ use crate::safekeeper::SafekeeperNode;
|
||||
// to 'zenith init --config=<path>' option. See control_plane/simple.conf for
|
||||
// an example.
|
||||
//
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct LocalEnv {
|
||||
// Base directory for all the nodes (the pageserver, safekeepers and
|
||||
// compute nodes).
|
||||
@@ -50,7 +48,7 @@ pub struct LocalEnv {
|
||||
// Default tenant ID to use with the 'zenith' command line utility, when
|
||||
// --tenantid is not explicitly specified.
|
||||
#[serde(default)]
|
||||
pub default_tenant_id: Option<HexZTenantId>,
|
||||
pub default_tenantid: Option<HexZTenantId>,
|
||||
|
||||
// used to issue tokens during e.g pg start
|
||||
#[serde(default)]
|
||||
@@ -60,16 +58,9 @@ pub struct LocalEnv {
|
||||
|
||||
#[serde(default)]
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
|
||||
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
|
||||
#[serde(default)]
|
||||
// A `HashMap<String, HashMap<ZTenantId, ZTimelineId>>` would be more appropriate here,
|
||||
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
|
||||
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
|
||||
branch_name_mappings: HashMap<String, Vec<(HexZTenantId, HexZTimelineId)>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct PageServerConf {
|
||||
// node id
|
||||
@@ -97,7 +88,7 @@ impl Default for PageServerConf {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct SafekeeperConf {
|
||||
pub id: ZNodeId,
|
||||
@@ -153,72 +144,6 @@ impl LocalEnv {
|
||||
self.base_data_dir.join("safekeepers").join(data_dir_name)
|
||||
}
|
||||
|
||||
pub fn register_branch_mapping(
|
||||
&mut self,
|
||||
branch_name: String,
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
) -> anyhow::Result<()> {
|
||||
let existing_values = self
|
||||
.branch_name_mappings
|
||||
.entry(branch_name.clone())
|
||||
.or_default();
|
||||
|
||||
let tenant_id = HexZTenantId::from(tenant_id);
|
||||
let timeline_id = HexZTimelineId::from(timeline_id);
|
||||
|
||||
let existing_ids = existing_values
|
||||
.iter()
|
||||
.find(|(existing_tenant_id, _)| existing_tenant_id == &tenant_id);
|
||||
|
||||
if let Some((_, old_timeline_id)) = existing_ids {
|
||||
if old_timeline_id == &timeline_id {
|
||||
Ok(())
|
||||
} else {
|
||||
bail!(
|
||||
"branch '{}' is already mapped to timeline {}, cannot map to another timeline {}",
|
||||
branch_name,
|
||||
old_timeline_id,
|
||||
timeline_id
|
||||
);
|
||||
}
|
||||
} else {
|
||||
existing_values.push((tenant_id, timeline_id));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_branch_timeline_id(
|
||||
&self,
|
||||
branch_name: &str,
|
||||
tenant_id: ZTenantId,
|
||||
) -> Option<ZTimelineId> {
|
||||
let tenant_id = HexZTenantId::from(tenant_id);
|
||||
self.branch_name_mappings
|
||||
.get(branch_name)?
|
||||
.iter()
|
||||
.find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)
|
||||
.map(|&(_, timeline_id)| timeline_id)
|
||||
.map(ZTimelineId::from)
|
||||
}
|
||||
|
||||
pub fn timeline_name_mappings(&self) -> HashMap<ZTenantTimelineId, String> {
|
||||
self.branch_name_mappings
|
||||
.iter()
|
||||
.flat_map(|(name, tenant_timelines)| {
|
||||
tenant_timelines.iter().map(|&(tenant_id, timeline_id)| {
|
||||
(
|
||||
ZTenantTimelineId::new(
|
||||
ZTenantId::from(tenant_id),
|
||||
ZTimelineId::from(timeline_id),
|
||||
),
|
||||
name.clone(),
|
||||
)
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Create a LocalEnv from a config file.
|
||||
///
|
||||
/// Unlike 'load_config', this function fills in any defaults that are missing
|
||||
@@ -258,8 +183,8 @@ impl LocalEnv {
|
||||
}
|
||||
|
||||
// If no initial tenant ID was given, generate it.
|
||||
if env.default_tenant_id.is_none() {
|
||||
env.default_tenant_id = Some(HexZTenantId::from(ZTenantId::generate()));
|
||||
if env.default_tenantid.is_none() {
|
||||
env.default_tenantid = Some(HexZTenantId::from(ZTenantId::generate()));
|
||||
}
|
||||
|
||||
env.base_data_dir = base_path();
|
||||
@@ -289,39 +214,6 @@ impl LocalEnv {
|
||||
Ok(env)
|
||||
}
|
||||
|
||||
pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
|
||||
// Currently, the user first passes a config file with 'zenith init --config=<path>'
|
||||
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
|
||||
// to .zenith/config. TODO: We lose any formatting and comments along the way, which is
|
||||
// a bit sad.
|
||||
let mut conf_content = r#"# This file describes a locale deployment of the page server
|
||||
# and safekeeeper node. It is read by the 'zenith' command-line
|
||||
# utility.
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
// Convert the LocalEnv to a toml file.
|
||||
//
|
||||
// This could be as simple as this:
|
||||
//
|
||||
// conf_content += &toml::to_string_pretty(env)?;
|
||||
//
|
||||
// But it results in a "values must be emitted before tables". I'm not sure
|
||||
// why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
|
||||
// Maybe rust reorders the fields to squeeze avoid padding or something?
|
||||
// In any case, converting to toml::Value first, and serializing that, works.
|
||||
// See https://github.com/alexcrichton/toml-rs/issues/142
|
||||
conf_content += &toml::to_string_pretty(&toml::Value::try_from(self)?)?;
|
||||
|
||||
let target_config_path = base_path.join("config");
|
||||
fs::write(&target_config_path, conf_content).with_context(|| {
|
||||
format!(
|
||||
"Failed to write config file into path '{}'",
|
||||
target_config_path.display()
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// this function is used only for testing purposes in CLI e g generate tokens during init
|
||||
pub fn generate_auth_token(&self, claims: &Claims) -> anyhow::Result<String> {
|
||||
let private_key_path = if self.private_key_path.is_absolute() {
|
||||
@@ -340,15 +232,15 @@ impl LocalEnv {
|
||||
pub fn init(&mut self) -> anyhow::Result<()> {
|
||||
// check if config already exists
|
||||
let base_path = &self.base_data_dir;
|
||||
ensure!(
|
||||
base_path != Path::new(""),
|
||||
"repository base path is missing"
|
||||
);
|
||||
ensure!(
|
||||
!base_path.exists(),
|
||||
"directory '{}' already exists. Perhaps already initialized?",
|
||||
base_path.display()
|
||||
);
|
||||
if base_path == Path::new("") {
|
||||
bail!("repository base path is missing");
|
||||
}
|
||||
if base_path.exists() {
|
||||
bail!(
|
||||
"directory '{}' already exists. Perhaps already initialized?",
|
||||
base_path.to_str().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
fs::create_dir(&base_path)?;
|
||||
|
||||
@@ -400,7 +292,36 @@ impl LocalEnv {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
|
||||
}
|
||||
|
||||
self.persist_config(base_path)
|
||||
let mut conf_content = String::new();
|
||||
|
||||
// Currently, the user first passes a config file with 'zenith init --config=<path>'
|
||||
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
|
||||
// to .zenith/config. TODO: We lose any formatting and comments along the way, which is
|
||||
// a bit sad.
|
||||
write!(
|
||||
&mut conf_content,
|
||||
r#"# This file describes a locale deployment of the page server
|
||||
# and safekeeeper node. It is read by the 'zenith' command-line
|
||||
# utility.
|
||||
"#
|
||||
)?;
|
||||
|
||||
// Convert the LocalEnv to a toml file.
|
||||
//
|
||||
// This could be as simple as this:
|
||||
//
|
||||
// conf_content += &toml::to_string_pretty(env)?;
|
||||
//
|
||||
// But it results in a "values must be emitted before tables". I'm not sure
|
||||
// why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
|
||||
// Maybe rust reorders the fields to squeeze avoid padding or something?
|
||||
// In any case, converting to toml::Value first, and serializing that, works.
|
||||
// See https://github.com/alexcrichton/toml-rs/issues/142
|
||||
conf_content += &toml::to_string_pretty(&toml::Value::try_from(&self)?)?;
|
||||
|
||||
fs::write(base_path.join("config"), conf_content)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,9 +14,8 @@ use postgres::Config;
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use thiserror::Error;
|
||||
use walkeeper::http::models::TimelineCreateRequest;
|
||||
use zenith_utils::http::error::HttpErrorBody;
|
||||
use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
|
||||
use zenith_utils::zid::ZNodeId;
|
||||
|
||||
use crate::local_env::{LocalEnv, SafekeeperConf};
|
||||
use crate::storage::PageServerNode;
|
||||
@@ -262,25 +261,4 @@ impl SafekeeperNode {
|
||||
.error_from_body()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn timeline_create(
|
||||
&self,
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
peer_ids: Vec<ZNodeId>,
|
||||
) -> Result<()> {
|
||||
Ok(self
|
||||
.http_request(
|
||||
Method::POST,
|
||||
format!("{}/{}", self.http_base_url, "timeline"),
|
||||
)
|
||||
.json(&TimelineCreateRequest {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
peer_ids,
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
.json()?)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::convert::TryFrom;
|
||||
use std::io::Write;
|
||||
use std::net::TcpStream;
|
||||
use std::path::PathBuf;
|
||||
@@ -6,23 +5,22 @@ use std::process::Command;
|
||||
use std::time::Duration;
|
||||
use std::{io, result, thread};
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use anyhow::bail;
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use pageserver::http::models::{TenantCreateRequest, TimelineCreateRequest, TimelineInfoResponse};
|
||||
use pageserver::timelines::TimelineInfo;
|
||||
use pageserver::http::models::{BranchCreateRequest, TenantCreateRequest};
|
||||
use postgres::{Config, NoTls};
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use thiserror::Error;
|
||||
use zenith_utils::http::error::HttpErrorBody;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::{HexZTenantId, HexZTimelineId, ZTenantId, ZTimelineId};
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::{fill_rust_env_vars, read_pidfile};
|
||||
use pageserver::branches::BranchInfo;
|
||||
use pageserver::tenant_mgr::TenantInfo;
|
||||
use zenith_utils::connstring::connection_address;
|
||||
|
||||
@@ -100,10 +98,9 @@ impl PageServerNode {
|
||||
|
||||
pub fn init(
|
||||
&self,
|
||||
create_tenant: Option<ZTenantId>,
|
||||
initial_timeline_id: Option<ZTimelineId>,
|
||||
create_tenant: Option<&str>,
|
||||
config_overrides: &[&str],
|
||||
) -> anyhow::Result<ZTimelineId> {
|
||||
) -> anyhow::Result<()> {
|
||||
let mut cmd = Command::new(self.env.pageserver_bin()?);
|
||||
|
||||
let id = format!("id={}", self.env.pageserver.id);
|
||||
@@ -140,24 +137,19 @@ impl PageServerNode {
|
||||
]);
|
||||
}
|
||||
|
||||
let create_tenant = create_tenant.map(|id| id.to_string());
|
||||
if let Some(tenant_id) = create_tenant.as_deref() {
|
||||
args.extend(["--create-tenant", tenant_id])
|
||||
if let Some(tenantid) = create_tenant {
|
||||
args.extend(["--create-tenant", tenantid])
|
||||
}
|
||||
|
||||
let initial_timeline_id = initial_timeline_id.unwrap_or_else(ZTimelineId::generate);
|
||||
let initial_timeline_id_string = initial_timeline_id.to_string();
|
||||
args.extend(["--initial-timeline-id", &initial_timeline_id_string]);
|
||||
let status = fill_rust_env_vars(cmd.args(args))
|
||||
.status()
|
||||
.expect("pageserver init failed");
|
||||
|
||||
let init_output = fill_rust_env_vars(cmd.args(args))
|
||||
.output()
|
||||
.context("pageserver init failed")?;
|
||||
|
||||
if !init_output.status.success() {
|
||||
if !status.success() {
|
||||
bail!("pageserver init failed");
|
||||
}
|
||||
|
||||
Ok(initial_timeline_id)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn repo_path(&self) -> PathBuf {
|
||||
@@ -318,7 +310,7 @@ impl PageServerNode {
|
||||
}
|
||||
|
||||
pub fn check_status(&self) -> Result<()> {
|
||||
self.http_request(Method::GET, format!("{}/status", self.http_base_url))
|
||||
self.http_request(Method::GET, format!("{}/{}", self.http_base_url, "status"))
|
||||
.send()?
|
||||
.error_from_body()?;
|
||||
Ok(())
|
||||
@@ -326,76 +318,64 @@ impl PageServerNode {
|
||||
|
||||
pub fn tenant_list(&self) -> Result<Vec<TenantInfo>> {
|
||||
Ok(self
|
||||
.http_request(Method::GET, format!("{}/tenant", self.http_base_url))
|
||||
.http_request(Method::GET, format!("{}/{}", self.http_base_url, "tenant"))
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
.json()?)
|
||||
}
|
||||
|
||||
pub fn tenant_create(
|
||||
&self,
|
||||
new_tenant_id: Option<ZTenantId>,
|
||||
) -> anyhow::Result<Option<ZTenantId>> {
|
||||
let tenant_id_string = self
|
||||
.http_request(Method::POST, format!("{}/tenant", self.http_base_url))
|
||||
pub fn tenant_create(&self, tenantid: ZTenantId) -> Result<()> {
|
||||
Ok(self
|
||||
.http_request(Method::POST, format!("{}/{}", self.http_base_url, "tenant"))
|
||||
.json(&TenantCreateRequest {
|
||||
new_tenant_id: new_tenant_id.map(HexZTenantId::from),
|
||||
tenant_id: tenantid,
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
.json::<Option<String>>()?;
|
||||
|
||||
tenant_id_string
|
||||
.map(|id| {
|
||||
id.parse().with_context(|| {
|
||||
format!(
|
||||
"Failed to parse tennat creation response as tenant id: {}",
|
||||
id
|
||||
)
|
||||
})
|
||||
})
|
||||
.transpose()
|
||||
.json()?)
|
||||
}
|
||||
|
||||
pub fn timeline_list(&self, tenant_id: &ZTenantId) -> anyhow::Result<Vec<TimelineInfo>> {
|
||||
let timeline_infos: Vec<TimelineInfoResponse> = self
|
||||
pub fn branch_list(&self, tenantid: &ZTenantId) -> Result<Vec<BranchInfo>> {
|
||||
Ok(self
|
||||
.http_request(
|
||||
Method::GET,
|
||||
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
|
||||
format!("{}/branch/{}", self.http_base_url, tenantid),
|
||||
)
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
.json()?;
|
||||
|
||||
timeline_infos
|
||||
.into_iter()
|
||||
.map(TimelineInfo::try_from)
|
||||
.collect()
|
||||
.json()?)
|
||||
}
|
||||
|
||||
pub fn timeline_create(
|
||||
pub fn branch_create(
|
||||
&self,
|
||||
tenant_id: ZTenantId,
|
||||
new_timeline_id: Option<ZTimelineId>,
|
||||
ancestor_start_lsn: Option<Lsn>,
|
||||
ancestor_timeline_id: Option<ZTimelineId>,
|
||||
) -> anyhow::Result<Option<TimelineInfo>> {
|
||||
let timeline_info_response = self
|
||||
.http_request(
|
||||
Method::POST,
|
||||
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
|
||||
)
|
||||
.json(&TimelineCreateRequest {
|
||||
new_timeline_id: new_timeline_id.map(HexZTimelineId::from),
|
||||
ancestor_start_lsn,
|
||||
ancestor_timeline_id: ancestor_timeline_id.map(HexZTimelineId::from),
|
||||
branch_name: &str,
|
||||
startpoint: &str,
|
||||
tenantid: &ZTenantId,
|
||||
) -> Result<BranchInfo> {
|
||||
Ok(self
|
||||
.http_request(Method::POST, format!("{}/branch", self.http_base_url))
|
||||
.json(&BranchCreateRequest {
|
||||
tenant_id: tenantid.to_owned(),
|
||||
name: branch_name.to_owned(),
|
||||
start_point: startpoint.to_owned(),
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
.json::<Option<TimelineInfoResponse>>()?;
|
||||
.json()?)
|
||||
}
|
||||
|
||||
timeline_info_response
|
||||
.map(TimelineInfo::try_from)
|
||||
.transpose()
|
||||
pub fn branch_get_by_name(
|
||||
&self,
|
||||
tenantid: &ZTenantId,
|
||||
branch_name: &str,
|
||||
) -> Result<BranchInfo> {
|
||||
Ok(self
|
||||
.http_request(
|
||||
Method::GET,
|
||||
format!("{}/branch/{}/{}", self.http_base_url, tenantid, branch_name),
|
||||
)
|
||||
.send()?
|
||||
.error_for_status()?
|
||||
.json()?)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,14 +2,7 @@
|
||||
|
||||
use std::{env, path::Path, str::FromStr};
|
||||
use tracing::*;
|
||||
use zenith_utils::{
|
||||
auth::JwtAuth,
|
||||
logging,
|
||||
postgres_backend::AuthType,
|
||||
tcp_listener,
|
||||
zid::{ZTenantId, ZTimelineId},
|
||||
GIT_VERSION,
|
||||
};
|
||||
use zenith_utils::{auth::JwtAuth, logging, postgres_backend::AuthType, tcp_listener, GIT_VERSION};
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
|
||||
@@ -17,10 +10,11 @@ use clap::{App, Arg};
|
||||
use daemonize::Daemonize;
|
||||
|
||||
use pageserver::{
|
||||
branches,
|
||||
config::{defaults::*, PageServerConf},
|
||||
http, page_cache, page_service, remote_storage, tenant_mgr, thread_mgr,
|
||||
thread_mgr::ThreadKind,
|
||||
timelines, virtual_file, LOG_FILE_NAME,
|
||||
virtual_file, LOG_FILE_NAME,
|
||||
};
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::postgres_backend;
|
||||
@@ -43,7 +37,7 @@ fn main() -> Result<()> {
|
||||
Arg::new("init")
|
||||
.long("init")
|
||||
.takes_value(false)
|
||||
.help("Initialize pageserver service: creates an initial config, tenant and timeline, if specified"),
|
||||
.help("Initialize pageserver repo"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("workdir")
|
||||
@@ -59,13 +53,6 @@ fn main() -> Result<()> {
|
||||
.help("Create tenant during init")
|
||||
.requires("init"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("initial-timeline-id")
|
||||
.long("initial-timeline-id")
|
||||
.takes_value(true)
|
||||
.help("Use a specific timeline id during init and tenant creation")
|
||||
.requires("create-tenant"),
|
||||
)
|
||||
// See `settings.md` for more details on the extra configuration patameters pageserver can process
|
||||
.arg(
|
||||
Arg::new("config-override")
|
||||
@@ -85,16 +72,7 @@ fn main() -> Result<()> {
|
||||
let cfg_file_path = workdir.join("pageserver.toml");
|
||||
|
||||
let init = arg_matches.is_present("init");
|
||||
let create_tenant = arg_matches
|
||||
.value_of("create-tenant")
|
||||
.map(ZTenantId::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse tenant id from the arguments")?;
|
||||
let initial_timeline_id = arg_matches
|
||||
.value_of("initial-timeline-id")
|
||||
.map(ZTimelineId::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse timeline id from the arguments")?;
|
||||
let create_tenant = arg_matches.value_of("create-tenant");
|
||||
|
||||
// Set CWD to workdir for non-daemon modes
|
||||
env::set_current_dir(&workdir).with_context(|| {
|
||||
@@ -165,8 +143,7 @@ fn main() -> Result<()> {
|
||||
|
||||
// Create repo and exit if init was requested
|
||||
if init {
|
||||
timelines::init_pageserver(conf, create_tenant, initial_timeline_id)
|
||||
.context("Failed to init pageserver")?;
|
||||
branches::init_pageserver(conf, create_tenant).context("Failed to init pageserver")?;
|
||||
// write the config file
|
||||
std::fs::write(&cfg_file_path, toml.to_string()).with_context(|| {
|
||||
format!(
|
||||
|
||||
@@ -1,221 +0,0 @@
|
||||
//! Pageserver benchmark tool
|
||||
//!
|
||||
//! Usually it's easier to write python perf tests, but here the performance
|
||||
//! of the tester matters, and the API is easier to work with from rust.
|
||||
use std::{collections::HashMap, io::{BufRead, BufReader, Cursor}, net::SocketAddr, ops::AddAssign};
|
||||
use byteorder::ReadBytesExt;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use clap::{App, Arg};
|
||||
use std::fs::File;
|
||||
use zenith_utils::{GIT_VERSION, pq_proto::{BeMessage, BeParameterStatusMessage, FeMessage}};
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
pub fn read_lines_buffered(file_name: &str) -> impl Iterator<Item = String> {
|
||||
BufReader::new(File::open(file_name).unwrap())
|
||||
.lines()
|
||||
.map(|result| result.unwrap())
|
||||
}
|
||||
|
||||
pub async fn get_page(
|
||||
pagestream: &mut tokio::net::TcpStream,
|
||||
lsn: &Lsn,
|
||||
page: &Page,
|
||||
) -> anyhow::Result<Vec<u8>> {
|
||||
let msg = {
|
||||
let query = {
|
||||
let mut query = BytesMut::new();
|
||||
query.put_u8(2); // Specifies get_page query
|
||||
query.put_u8(0); // Specifies this is not a "latest page" query
|
||||
query.put_u64(lsn.0);
|
||||
page.write(&mut query).await?;
|
||||
query.freeze()
|
||||
};
|
||||
|
||||
let mut buf = BytesMut::new();
|
||||
let copy_msg = BeMessage::CopyData(&query);
|
||||
BeMessage::write(&mut buf, ©_msg)?;
|
||||
buf.freeze()
|
||||
};
|
||||
|
||||
pagestream.write(&msg).await?;
|
||||
|
||||
let response = match FeMessage::read_fut(pagestream).await? {
|
||||
Some(FeMessage::CopyData(page)) => page,
|
||||
r => panic!("Expected CopyData message, got: {:?}", r),
|
||||
};
|
||||
|
||||
let page = {
|
||||
let mut cursor = Cursor::new(response);
|
||||
let tag = AsyncReadExt::read_u8(&mut cursor).await?;
|
||||
|
||||
match tag {
|
||||
102 => {
|
||||
let mut page = Vec::<u8>::new();
|
||||
cursor.read_to_end(&mut page).await?;
|
||||
dbg!(page.len());
|
||||
if page.len() != 8 * 1024 {
|
||||
panic!("Expected 8kb page, got: {:?}", page.len());
|
||||
}
|
||||
page
|
||||
},
|
||||
103 => {
|
||||
let mut bytes = Vec::<u8>::new();
|
||||
cursor.read_to_end(&mut bytes).await?;
|
||||
let message = String::from_utf8(bytes)?;
|
||||
panic!("Got error message: {}", message);
|
||||
},
|
||||
_ => panic!("Unhandled tag {:?}", tag)
|
||||
}
|
||||
};
|
||||
|
||||
Ok(page)
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
pub struct Lsn(pub u64);
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
pub struct Page {
|
||||
spcnode: u32,
|
||||
dbnode: u32,
|
||||
relnode: u32,
|
||||
forknum: u8,
|
||||
blkno: u32,
|
||||
}
|
||||
|
||||
impl Page {
|
||||
async fn read<Reader>(buf: &mut Reader) -> Result<Page>
|
||||
where
|
||||
Reader: tokio::io::AsyncRead + Unpin,
|
||||
{
|
||||
let spcnode = buf.read_u32().await?;
|
||||
let dbnode = buf.read_u32().await?;
|
||||
let relnode = buf.read_u32().await?;
|
||||
let forknum = buf.read_u8().await?;
|
||||
let blkno = buf.read_u32().await?;
|
||||
Ok(Page { spcnode, dbnode, relnode, forknum, blkno })
|
||||
}
|
||||
|
||||
async fn write(&self, buf: &mut BytesMut) -> Result<()> {
|
||||
buf.put_u32(self.spcnode);
|
||||
buf.put_u32(self.dbnode);
|
||||
buf.put_u32(self.relnode);
|
||||
buf.put_u8(self.forknum);
|
||||
buf.put_u32(self.blkno);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let arg_matches = App::new("LALALA")
|
||||
.about("lalala")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("path")
|
||||
.help("Path to file to dump")
|
||||
.required(true)
|
||||
.index(1),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("ps_connstr")
|
||||
.help("Connection string to pageserver")
|
||||
.required(true)
|
||||
.index(2),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("tenant_hex")
|
||||
.help("TODO")
|
||||
.required(true)
|
||||
.index(3),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("timeline")
|
||||
.help("TODO")
|
||||
.required(true)
|
||||
.index(4),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let log_file = arg_matches.value_of("path").unwrap();
|
||||
let ps_connstr = arg_matches.value_of("ps_connstr").unwrap();
|
||||
let tenant_hex = arg_matches.value_of("tenant_hex").unwrap();
|
||||
let timeline = arg_matches.value_of("timeline").unwrap();
|
||||
|
||||
// Parse log lines
|
||||
let relevant = read_lines_buffered(log_file) .filter_map(|line| line.strip_prefix("wal-at-lsn-modified-page ").map(|x| x.to_string()));
|
||||
let mut lsn_page_pairs = Vec::<(Lsn, Page)>::new();
|
||||
for line in relevant {
|
||||
let (lsn, page) = line.split_once(" ").unwrap();
|
||||
|
||||
let lsn = hex::decode(lsn)?;
|
||||
let lsn = Lsn(AsyncReadExt::read_u64(&mut Cursor::new(lsn)).await?);
|
||||
|
||||
let page = hex::decode(page)?;
|
||||
let page = Page::read(&mut Cursor::new(page)).await?;
|
||||
|
||||
lsn_page_pairs.push((lsn, page))
|
||||
}
|
||||
|
||||
// Organize write info
|
||||
let mut writes_per_entry = HashMap::<Lsn, Vec<Page>>::new();
|
||||
for (lsn, page) in lsn_page_pairs.clone() {
|
||||
writes_per_entry.entry(lsn).or_insert(vec![]).push(page);
|
||||
}
|
||||
|
||||
// Print some stats
|
||||
let mut updates_per_page = HashMap::<Page, usize>::new();
|
||||
for (_, page) in lsn_page_pairs.clone() {
|
||||
updates_per_page.entry(page).or_insert(0).add_assign(1);
|
||||
}
|
||||
let mut updates_per_page: Vec<(&usize, &Page)> = updates_per_page
|
||||
.iter().map(|(k, v)| (v, k)).collect();
|
||||
updates_per_page.sort();
|
||||
updates_per_page.reverse();
|
||||
dbg!(&updates_per_page);
|
||||
|
||||
let hottest_page = updates_per_page[0].1;
|
||||
let first_update = lsn_page_pairs
|
||||
.iter()
|
||||
.filter(|(_lsn, page)| page == hottest_page)
|
||||
.map(|(lsn, _page)| lsn)
|
||||
.min()
|
||||
.unwrap();
|
||||
|
||||
// Get raw TCP connection to the pageserver postgres protocol port
|
||||
let mut socket = tokio::net::TcpStream::connect("localhost:15000").await?;
|
||||
let (client, conn) = tokio_postgres::Config::new()
|
||||
.host("127.0.0.1")
|
||||
.port(15000)
|
||||
.dbname("postgres")
|
||||
.user("zenith_admin")
|
||||
.connect_raw(&mut socket, tokio_postgres::NoTls)
|
||||
.await?;
|
||||
|
||||
// Enter pagestream protocol
|
||||
let init_query = format!("pagestream {} {}", tenant_hex, timeline);
|
||||
tokio::select! {
|
||||
_ = conn => panic!("AAAA"),
|
||||
_ = client.query(init_query.as_str(), &[]) => (),
|
||||
};
|
||||
|
||||
// TODO merge with LSM branch. Nothing to test otherwise, too many images.
|
||||
// - I get error: tried to request a page version that was garbage collected
|
||||
// TODO be mindful of caching, take multiple measurements, use monotonic time.
|
||||
// TODO make harder test case. More writes, fewer images.
|
||||
// TODO concurrent requests: multiple reads, also writes.
|
||||
use std::time::Instant;
|
||||
for (lsn, _pages) in writes_per_entry {
|
||||
if lsn >= *first_update {
|
||||
println!("Running get_page {:?} at {:?}", hottest_page, lsn);
|
||||
let start = Instant::now();
|
||||
let _page = get_page(&mut socket, &lsn, &hottest_page).await?;
|
||||
let duration = start.elapsed();
|
||||
println!("Time: {:?}", duration);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
427
pageserver/src/branches.rs
Normal file
427
pageserver/src/branches.rs
Normal file
@@ -0,0 +1,427 @@
|
||||
//!
|
||||
//! Branch management code
|
||||
//!
|
||||
// TODO: move all paths construction to conf impl
|
||||
//
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use postgres_ffi::ControlFileData;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
fs,
|
||||
path::Path,
|
||||
process::{Command, Stdio},
|
||||
str::FromStr,
|
||||
sync::Arc,
|
||||
};
|
||||
use tracing::*;
|
||||
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
use zenith_utils::{crashsafe_dir, logging};
|
||||
|
||||
use crate::walredo::WalRedoManager;
|
||||
use crate::CheckpointConfig;
|
||||
use crate::{config::PageServerConf, repository::Repository};
|
||||
use crate::{import_datadir, LOG_FILE_NAME};
|
||||
use crate::{repository::RepositoryTimeline, tenant_mgr};
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct BranchInfo {
|
||||
pub name: String,
|
||||
#[serde(with = "hex")]
|
||||
pub timeline_id: ZTimelineId,
|
||||
pub latest_valid_lsn: Lsn,
|
||||
pub ancestor_id: Option<String>,
|
||||
pub ancestor_lsn: Option<String>,
|
||||
pub current_logical_size: usize,
|
||||
pub current_logical_size_non_incremental: Option<usize>,
|
||||
}
|
||||
|
||||
impl BranchInfo {
|
||||
pub fn from_path<T: AsRef<Path>>(
|
||||
path: T,
|
||||
repo: &Arc<dyn Repository>,
|
||||
include_non_incremental_logical_size: bool,
|
||||
) -> Result<Self> {
|
||||
let path = path.as_ref();
|
||||
let name = path.file_name().unwrap().to_string_lossy().to_string();
|
||||
let timeline_id = std::fs::read_to_string(path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to read branch file contents at path '{}'",
|
||||
path.display()
|
||||
)
|
||||
})?
|
||||
.parse::<ZTimelineId>()?;
|
||||
|
||||
let timeline = match repo.get_timeline(timeline_id)? {
|
||||
RepositoryTimeline::Local(local_entry) => local_entry,
|
||||
RepositoryTimeline::Remote { .. } => {
|
||||
bail!("Timeline {} is remote, no branches to display", timeline_id)
|
||||
}
|
||||
};
|
||||
|
||||
// we use ancestor lsn zero if we don't have an ancestor, so turn this into an option based on timeline id
|
||||
let (ancestor_id, ancestor_lsn) = match timeline.get_ancestor_timeline_id() {
|
||||
Some(ancestor_id) => (
|
||||
Some(ancestor_id.to_string()),
|
||||
Some(timeline.get_ancestor_lsn().to_string()),
|
||||
),
|
||||
None => (None, None),
|
||||
};
|
||||
|
||||
// non incremental size calculation can be heavy, so let it be optional
|
||||
// needed for tests to check size calculation
|
||||
let current_logical_size_non_incremental = include_non_incremental_logical_size
|
||||
.then(|| {
|
||||
timeline.get_current_logical_size_non_incremental(timeline.get_last_record_lsn())
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
Ok(BranchInfo {
|
||||
name,
|
||||
timeline_id,
|
||||
latest_valid_lsn: timeline.get_last_record_lsn(),
|
||||
ancestor_id,
|
||||
ancestor_lsn,
|
||||
current_logical_size: timeline.get_current_logical_size(),
|
||||
current_logical_size_non_incremental,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct PointInTime {
|
||||
pub timelineid: ZTimelineId,
|
||||
pub lsn: Lsn,
|
||||
}
|
||||
|
||||
pub fn init_pageserver(conf: &'static PageServerConf, create_tenant: Option<&str>) -> Result<()> {
|
||||
// Initialize logger
|
||||
// use true as daemonize parameter because otherwise we pollute zenith cli output with a few pages long output of info messages
|
||||
let _log_file = logging::init(LOG_FILE_NAME, true)?;
|
||||
|
||||
// We don't use the real WAL redo manager, because we don't want to spawn the WAL redo
|
||||
// process during repository initialization.
|
||||
//
|
||||
// FIXME: That caused trouble, because the WAL redo manager spawned a thread that launched
|
||||
// initdb in the background, and it kept running even after the "zenith init" had exited.
|
||||
// In tests, we started the page server immediately after that, so that initdb was still
|
||||
// running in the background, and we failed to run initdb again in the same directory. This
|
||||
// has been solved for the rapid init+start case now, but the general race condition remains
|
||||
// if you restart the server quickly. The WAL redo manager doesn't use a separate thread
|
||||
// anymore, but I think that could still happen.
|
||||
let dummy_redo_mgr = Arc::new(crate::walredo::DummyRedoManager {});
|
||||
|
||||
if let Some(tenantid) = create_tenant {
|
||||
let tenantid = ZTenantId::from_str(tenantid)?;
|
||||
println!("initializing tenantid {}", tenantid);
|
||||
create_repo(conf, tenantid, dummy_redo_mgr).context("failed to create repo")?;
|
||||
}
|
||||
crashsafe_dir::create_dir_all(conf.tenants_path())?;
|
||||
|
||||
println!("pageserver init succeeded");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn create_repo(
|
||||
conf: &'static PageServerConf,
|
||||
tenantid: ZTenantId,
|
||||
wal_redo_manager: Arc<dyn WalRedoManager + Send + Sync>,
|
||||
) -> Result<Arc<dyn Repository>> {
|
||||
let repo_dir = conf.tenant_path(&tenantid);
|
||||
if repo_dir.exists() {
|
||||
bail!("repo for {} already exists", tenantid)
|
||||
}
|
||||
|
||||
// top-level dir may exist if we are creating it through CLI
|
||||
crashsafe_dir::create_dir_all(&repo_dir)
|
||||
.with_context(|| format!("could not create directory {}", repo_dir.display()))?;
|
||||
|
||||
crashsafe_dir::create_dir(conf.timelines_path(&tenantid))?;
|
||||
crashsafe_dir::create_dir_all(conf.branches_path(&tenantid))?;
|
||||
crashsafe_dir::create_dir_all(conf.tags_path(&tenantid))?;
|
||||
|
||||
info!("created directory structure in {}", repo_dir.display());
|
||||
|
||||
// create a new timeline directory
|
||||
let timeline_id = ZTimelineId::generate();
|
||||
let timelinedir = conf.timeline_path(&timeline_id, &tenantid);
|
||||
|
||||
crashsafe_dir::create_dir(&timelinedir)?;
|
||||
|
||||
let repo = Arc::new(crate::layered_repository::LayeredRepository::new(
|
||||
conf,
|
||||
wal_redo_manager,
|
||||
tenantid,
|
||||
conf.remote_storage_config.is_some(),
|
||||
));
|
||||
|
||||
// Load data into pageserver
|
||||
// TODO To implement zenith import we need to
|
||||
// move data loading out of create_repo()
|
||||
bootstrap_timeline(conf, tenantid, timeline_id, repo.as_ref())?;
|
||||
|
||||
Ok(repo)
|
||||
}
|
||||
|
||||
// Returns checkpoint LSN from controlfile
|
||||
fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
|
||||
// Read control file to extract the LSN
|
||||
let controlfile_path = path.join("global").join("pg_control");
|
||||
let controlfile = ControlFileData::decode(&fs::read(controlfile_path)?)?;
|
||||
let lsn = controlfile.checkPoint;
|
||||
|
||||
Ok(Lsn(lsn))
|
||||
}
|
||||
|
||||
// Create the cluster temporarily in 'initdbpath' directory inside the repository
|
||||
// to get bootstrap data for timeline initialization.
|
||||
//
|
||||
fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
|
||||
info!("running initdb in {}... ", initdbpath.display());
|
||||
|
||||
let initdb_path = conf.pg_bin_dir().join("initdb");
|
||||
let initdb_output = Command::new(initdb_path)
|
||||
.args(&["-D", initdbpath.to_str().unwrap()])
|
||||
.args(&["-U", &conf.superuser])
|
||||
.args(&["-E", "utf8"])
|
||||
.arg("--no-instructions")
|
||||
// This is only used for a temporary installation that is deleted shortly after,
|
||||
// so no need to fsync it
|
||||
.arg("--no-sync")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
|
||||
.env("DYLD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
|
||||
.stdout(Stdio::null())
|
||||
.output()
|
||||
.context("failed to execute initdb")?;
|
||||
if !initdb_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"initdb failed: '{}'",
|
||||
String::from_utf8_lossy(&initdb_output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
//
|
||||
// - run initdb to init temporary instance and get bootstrap data
|
||||
// - after initialization complete, remove the temp dir.
|
||||
//
|
||||
fn bootstrap_timeline(
|
||||
conf: &'static PageServerConf,
|
||||
tenantid: ZTenantId,
|
||||
tli: ZTimelineId,
|
||||
repo: &dyn Repository,
|
||||
) -> Result<()> {
|
||||
let _enter = info_span!("bootstrapping", timeline = %tli, tenant = %tenantid).entered();
|
||||
|
||||
let initdb_path = conf.tenant_path(&tenantid).join("tmp");
|
||||
|
||||
// Init temporarily repo to get bootstrap data
|
||||
run_initdb(conf, &initdb_path)?;
|
||||
let pgdata_path = initdb_path;
|
||||
|
||||
let lsn = get_lsn_from_controlfile(&pgdata_path)?.align();
|
||||
|
||||
// Import the contents of the data directory at the initial checkpoint
|
||||
// LSN, and any WAL after that.
|
||||
// Initdb lsn will be equal to last_record_lsn which will be set after import.
|
||||
// Because we know it upfront avoid having an option or dummy zero value by passing it to create_empty_timeline.
|
||||
let timeline = repo.create_empty_timeline(tli, lsn)?;
|
||||
import_datadir::import_timeline_from_postgres_datadir(
|
||||
&pgdata_path,
|
||||
timeline.writer().as_ref(),
|
||||
lsn,
|
||||
)?;
|
||||
timeline.checkpoint(CheckpointConfig::Forced)?;
|
||||
|
||||
println!(
|
||||
"created initial timeline {} timeline.lsn {}",
|
||||
tli,
|
||||
timeline.get_last_record_lsn()
|
||||
);
|
||||
|
||||
let data = tli.to_string();
|
||||
fs::write(conf.branch_path("main", &tenantid), data)?;
|
||||
println!("created main branch");
|
||||
|
||||
// Remove temp dir. We don't need it anymore
|
||||
fs::remove_dir_all(pgdata_path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_branches(
|
||||
conf: &PageServerConf,
|
||||
tenantid: &ZTenantId,
|
||||
include_non_incremental_logical_size: bool,
|
||||
) -> Result<Vec<BranchInfo>> {
|
||||
let repo = tenant_mgr::get_repository_for_tenant(*tenantid)?;
|
||||
|
||||
// Each branch has a corresponding record (text file) in the refs/branches
|
||||
// with timeline_id.
|
||||
let branches_dir = conf.branches_path(tenantid);
|
||||
|
||||
std::fs::read_dir(&branches_dir)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Found no branches directory '{}' for tenant {}",
|
||||
branches_dir.display(),
|
||||
tenantid
|
||||
)
|
||||
})?
|
||||
.map(|dir_entry_res| {
|
||||
let dir_entry = dir_entry_res.with_context(|| {
|
||||
format!(
|
||||
"Failed to list branches directory '{}' content for tenant {}",
|
||||
branches_dir.display(),
|
||||
tenantid
|
||||
)
|
||||
})?;
|
||||
BranchInfo::from_path(
|
||||
dir_entry.path(),
|
||||
&repo,
|
||||
include_non_incremental_logical_size,
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn create_branch(
|
||||
conf: &PageServerConf,
|
||||
branchname: &str,
|
||||
startpoint_str: &str,
|
||||
tenantid: &ZTenantId,
|
||||
) -> Result<BranchInfo> {
|
||||
let repo = tenant_mgr::get_repository_for_tenant(*tenantid)?;
|
||||
|
||||
if conf.branch_path(branchname, tenantid).exists() {
|
||||
anyhow::bail!("branch {} already exists", branchname);
|
||||
}
|
||||
|
||||
let mut startpoint = parse_point_in_time(conf, startpoint_str, tenantid)?;
|
||||
let timeline = repo
|
||||
.get_timeline(startpoint.timelineid)?
|
||||
.local_timeline()
|
||||
.context("Cannot branch off the timeline that's not present locally")?;
|
||||
if startpoint.lsn == Lsn(0) {
|
||||
// Find end of WAL on the old timeline
|
||||
let end_of_wal = timeline.get_last_record_lsn();
|
||||
info!("branching at end of WAL: {}", end_of_wal);
|
||||
startpoint.lsn = end_of_wal;
|
||||
} else {
|
||||
// Wait for the WAL to arrive and be processed on the parent branch up
|
||||
// to the requested branch point. The repository code itself doesn't
|
||||
// require it, but if we start to receive WAL on the new timeline,
|
||||
// decoding the new WAL might need to look up previous pages, relation
|
||||
// sizes etc. and that would get confused if the previous page versions
|
||||
// are not in the repository yet.
|
||||
timeline.wait_lsn(startpoint.lsn)?;
|
||||
}
|
||||
startpoint.lsn = startpoint.lsn.align();
|
||||
if timeline.get_ancestor_lsn() > startpoint.lsn {
|
||||
// can we safely just branch from the ancestor instead?
|
||||
anyhow::bail!(
|
||||
"invalid startpoint {} for the branch {}: less than timeline ancestor lsn {:?}",
|
||||
startpoint.lsn,
|
||||
branchname,
|
||||
timeline.get_ancestor_lsn()
|
||||
);
|
||||
}
|
||||
|
||||
let new_timeline_id = ZTimelineId::generate();
|
||||
|
||||
// Forward entire timeline creation routine to repository
|
||||
// backend, so it can do all needed initialization
|
||||
repo.branch_timeline(startpoint.timelineid, new_timeline_id, startpoint.lsn)?;
|
||||
|
||||
// Remember the human-readable branch name for the new timeline.
|
||||
// FIXME: there's a race condition, if you create a branch with the same
|
||||
// name concurrently.
|
||||
let data = new_timeline_id.to_string();
|
||||
fs::write(conf.branch_path(branchname, tenantid), data)?;
|
||||
|
||||
Ok(BranchInfo {
|
||||
name: branchname.to_string(),
|
||||
timeline_id: new_timeline_id,
|
||||
latest_valid_lsn: startpoint.lsn,
|
||||
ancestor_id: Some(startpoint.timelineid.to_string()),
|
||||
ancestor_lsn: Some(startpoint.lsn.to_string()),
|
||||
current_logical_size: 0,
|
||||
current_logical_size_non_incremental: Some(0),
|
||||
})
|
||||
}
|
||||
|
||||
//
|
||||
// Parse user-given string that represents a point-in-time.
|
||||
//
|
||||
// We support multiple variants:
|
||||
//
|
||||
// Raw timeline id in hex, meaning the end of that timeline:
|
||||
// bc62e7d612d0e6fe8f99a6dd2f281f9d
|
||||
//
|
||||
// A specific LSN on a timeline:
|
||||
// bc62e7d612d0e6fe8f99a6dd2f281f9d@2/15D3DD8
|
||||
//
|
||||
// Same, with a human-friendly branch name:
|
||||
// main
|
||||
// main@2/15D3DD8
|
||||
//
|
||||
// Human-friendly tag name:
|
||||
// mytag
|
||||
//
|
||||
//
|
||||
fn parse_point_in_time(
|
||||
conf: &PageServerConf,
|
||||
s: &str,
|
||||
tenantid: &ZTenantId,
|
||||
) -> Result<PointInTime> {
|
||||
let mut strings = s.split('@');
|
||||
let name = strings.next().unwrap();
|
||||
|
||||
let lsn = strings
|
||||
.next()
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("invalid LSN in point-in-time specification")?;
|
||||
|
||||
// Check if it's a tag
|
||||
if lsn.is_none() {
|
||||
let tagpath = conf.tag_path(name, tenantid);
|
||||
if tagpath.exists() {
|
||||
let pointstr = fs::read_to_string(tagpath)?;
|
||||
|
||||
return parse_point_in_time(conf, &pointstr, tenantid);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if it's a branch
|
||||
// Check if it's branch @ LSN
|
||||
let branchpath = conf.branch_path(name, tenantid);
|
||||
if branchpath.exists() {
|
||||
let pointstr = fs::read_to_string(branchpath)?;
|
||||
|
||||
let mut result = parse_point_in_time(conf, &pointstr, tenantid)?;
|
||||
|
||||
result.lsn = lsn.unwrap_or(Lsn(0));
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
// Check if it's a timelineid
|
||||
// Check if it's timelineid @ LSN
|
||||
if let Ok(timelineid) = ZTimelineId::from_str(name) {
|
||||
let tlipath = conf.timeline_path(&timelineid, tenantid);
|
||||
if tlipath.exists() {
|
||||
return Ok(PointInTime {
|
||||
timelineid,
|
||||
lsn: lsn.unwrap_or(Lsn(0)),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
bail!("could not parse point-in-time {}", s);
|
||||
}
|
||||
@@ -392,6 +392,22 @@ impl PageServerConf {
|
||||
self.tenants_path().join(tenantid.to_string())
|
||||
}
|
||||
|
||||
pub fn tags_path(&self, tenantid: &ZTenantId) -> PathBuf {
|
||||
self.tenant_path(tenantid).join("refs").join("tags")
|
||||
}
|
||||
|
||||
pub fn tag_path(&self, tag_name: &str, tenantid: &ZTenantId) -> PathBuf {
|
||||
self.tags_path(tenantid).join(tag_name)
|
||||
}
|
||||
|
||||
pub fn branches_path(&self, tenantid: &ZTenantId) -> PathBuf {
|
||||
self.tenant_path(tenantid).join("refs").join("branches")
|
||||
}
|
||||
|
||||
pub fn branch_path(&self, branch_name: &str, tenantid: &ZTenantId) -> PathBuf {
|
||||
self.branches_path(tenantid).join(branch_name)
|
||||
}
|
||||
|
||||
pub fn timelines_path(&self, tenantid: &ZTenantId) -> PathBuf {
|
||||
self.tenant_path(tenantid).join(TIMELINES_SEGMENT_NAME)
|
||||
}
|
||||
@@ -400,6 +416,10 @@ impl PageServerConf {
|
||||
self.timelines_path(tenantid).join(timelineid.to_string())
|
||||
}
|
||||
|
||||
pub fn ancestor_path(&self, timelineid: &ZTimelineId, tenantid: &ZTenantId) -> PathBuf {
|
||||
self.timeline_path(timelineid, tenantid).join("ancestor")
|
||||
}
|
||||
|
||||
//
|
||||
// Postgres distribution paths
|
||||
//
|
||||
|
||||
@@ -1,121 +1,20 @@
|
||||
use crate::timelines::TimelineInfo;
|
||||
use anyhow::{anyhow, bail, Context};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use zenith_utils::{
|
||||
lsn::Lsn,
|
||||
zid::{HexZTenantId, HexZTimelineId, ZNodeId, ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::ZTenantId;
|
||||
use zenith_utils::zid::ZNodeId;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
pub new_timeline_id: Option<HexZTimelineId>,
|
||||
pub ancestor_timeline_id: Option<HexZTimelineId>,
|
||||
pub ancestor_start_lsn: Option<Lsn>,
|
||||
pub struct BranchCreateRequest {
|
||||
#[serde(with = "hex")]
|
||||
pub tenant_id: ZTenantId,
|
||||
pub name: String,
|
||||
pub start_point: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TenantCreateRequest {
|
||||
pub new_tenant_id: Option<HexZTenantId>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineInfoResponse {
|
||||
pub kind: String,
|
||||
#[serde(with = "hex")]
|
||||
timeline_id: ZTimelineId,
|
||||
#[serde(with = "hex")]
|
||||
tenant_id: ZTenantId,
|
||||
disk_consistent_lsn: String,
|
||||
last_record_lsn: Option<String>,
|
||||
prev_record_lsn: Option<String>,
|
||||
ancestor_timeline_id: Option<HexZTimelineId>,
|
||||
ancestor_lsn: Option<String>,
|
||||
current_logical_size: Option<usize>,
|
||||
current_logical_size_non_incremental: Option<usize>,
|
||||
}
|
||||
|
||||
impl From<TimelineInfo> for TimelineInfoResponse {
|
||||
fn from(other: TimelineInfo) -> Self {
|
||||
match other {
|
||||
TimelineInfo::Local {
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
last_record_lsn,
|
||||
prev_record_lsn,
|
||||
ancestor_timeline_id,
|
||||
ancestor_lsn,
|
||||
disk_consistent_lsn,
|
||||
current_logical_size,
|
||||
current_logical_size_non_incremental,
|
||||
} => TimelineInfoResponse {
|
||||
kind: "Local".to_owned(),
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
disk_consistent_lsn: disk_consistent_lsn.to_string(),
|
||||
last_record_lsn: Some(last_record_lsn.to_string()),
|
||||
prev_record_lsn: Some(prev_record_lsn.to_string()),
|
||||
ancestor_timeline_id: ancestor_timeline_id.map(HexZTimelineId::from),
|
||||
ancestor_lsn: ancestor_lsn.map(|lsn| lsn.to_string()),
|
||||
current_logical_size: Some(current_logical_size),
|
||||
current_logical_size_non_incremental,
|
||||
},
|
||||
TimelineInfo::Remote {
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
disk_consistent_lsn,
|
||||
} => TimelineInfoResponse {
|
||||
kind: "Remote".to_owned(),
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
disk_consistent_lsn: disk_consistent_lsn.to_string(),
|
||||
last_record_lsn: None,
|
||||
prev_record_lsn: None,
|
||||
ancestor_timeline_id: None,
|
||||
ancestor_lsn: None,
|
||||
current_logical_size: None,
|
||||
current_logical_size_non_incremental: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<TimelineInfoResponse> for TimelineInfo {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(other: TimelineInfoResponse) -> anyhow::Result<Self> {
|
||||
let parse_lsn_hex_string = |lsn_string: String| {
|
||||
lsn_string
|
||||
.parse::<Lsn>()
|
||||
.with_context(|| format!("Failed to parse Lsn as hex string from '{}'", lsn_string))
|
||||
};
|
||||
|
||||
let disk_consistent_lsn = parse_lsn_hex_string(other.disk_consistent_lsn)?;
|
||||
Ok(match other.kind.as_str() {
|
||||
"Local" => TimelineInfo::Local {
|
||||
timeline_id: other.timeline_id,
|
||||
tenant_id: other.tenant_id,
|
||||
last_record_lsn: other
|
||||
.last_record_lsn
|
||||
.ok_or(anyhow!("Local timeline should have last_record_lsn"))
|
||||
.and_then(parse_lsn_hex_string)?,
|
||||
prev_record_lsn: other
|
||||
.prev_record_lsn
|
||||
.ok_or(anyhow!("Local timeline should have prev_record_lsn"))
|
||||
.and_then(parse_lsn_hex_string)?,
|
||||
ancestor_timeline_id: other.ancestor_timeline_id.map(ZTimelineId::from),
|
||||
ancestor_lsn: other.ancestor_lsn.map(parse_lsn_hex_string).transpose()?,
|
||||
disk_consistent_lsn,
|
||||
current_logical_size: other.current_logical_size.ok_or(anyhow!("No "))?,
|
||||
current_logical_size_non_incremental: other.current_logical_size_non_incremental,
|
||||
},
|
||||
"Remote" => TimelineInfo::Remote {
|
||||
timeline_id: other.timeline_id,
|
||||
tenant_id: other.tenant_id,
|
||||
disk_consistent_lsn,
|
||||
},
|
||||
unknown => bail!("Unknown timeline kind: {}", unknown),
|
||||
})
|
||||
}
|
||||
pub tenant_id: ZTenantId,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
||||
@@ -22,7 +22,7 @@ paths:
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
/v1/tenant/{tenant_id}/timeline:
|
||||
/v1/timeline/{tenant_id}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
@@ -30,22 +30,19 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: include-non-incremental-logical-size
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: Controls calculation of current_logical_size_non_incremental
|
||||
get:
|
||||
description: Get timelines for tenant
|
||||
description: List tenant timelines
|
||||
responses:
|
||||
"200":
|
||||
description: TimelineInfo
|
||||
description: array of brief timeline descriptions
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/TimelineInfo"
|
||||
# currently, just a timeline id string, but when remote index gets to be accessed
|
||||
# remote/local timeline field would be added at least
|
||||
type: string
|
||||
"400":
|
||||
description: Error when no tenant id found in path
|
||||
content:
|
||||
@@ -70,7 +67,7 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
/v1/tenant/{tenant_id}/timeline/{timeline_id}:
|
||||
/v1/timeline/{tenant_id}/{timeline_id}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
@@ -84,13 +81,8 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: include-non-incremental-logical-size
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: Controls calculation of current_logical_size_non_incremental
|
||||
get:
|
||||
description: Get info about the timeline
|
||||
description: Get timeline info for tenant's remote timeline
|
||||
responses:
|
||||
"200":
|
||||
description: TimelineInfo
|
||||
@@ -99,7 +91,7 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/TimelineInfo"
|
||||
"400":
|
||||
description: Error when no tenant id found in path or no timeline id
|
||||
description: Error when no tenant id found in path or no branch name
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
@@ -122,7 +114,7 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
/v1/tenant/{tenant_id}/timeline/:
|
||||
/v1/branch/{tenant_id}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
@@ -130,33 +122,24 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Create a timeline. Returns new timeline id on success.\
|
||||
If no new timeline id is specified in parameters, it would be generated. It's an error to recreate the same timeline.
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
new_timeline_id:
|
||||
type: string
|
||||
format: hex
|
||||
ancestor_timeline_id:
|
||||
type: string
|
||||
format: hex
|
||||
ancestor_start_lsn:
|
||||
type: string
|
||||
- name: include-non-incremental-logical-size
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: Controls calculation of current_logical_size_non_incremental
|
||||
get:
|
||||
description: Get branches for tenant
|
||||
responses:
|
||||
"201":
|
||||
description: TimelineInfo
|
||||
"200":
|
||||
description: BranchInfo
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/TimelineInfo"
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/BranchInfo"
|
||||
"400":
|
||||
description: Malformed timeline create request
|
||||
description: Error when no tenant id found in path
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
@@ -173,12 +156,108 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"409":
|
||||
description: Timeline already exists, creation skipped
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/AlreadyExistsError"
|
||||
$ref: "#/components/schemas/Error"
|
||||
/v1/branch/{tenant_id}/{branch_name}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: branch_name
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: include-non-incremental-logical-size
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: Controls calculation of current_logical_size_non_incremental
|
||||
get:
|
||||
description: Get branches for tenant
|
||||
responses:
|
||||
"200":
|
||||
description: BranchInfo
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/BranchInfo"
|
||||
"400":
|
||||
description: Error when no tenant id found in path or no branch name
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
/v1/branch/:
|
||||
post:
|
||||
description: Create branch
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- "tenant_id"
|
||||
- "name"
|
||||
- "start_point"
|
||||
properties:
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
name:
|
||||
type: string
|
||||
start_point:
|
||||
type: string
|
||||
responses:
|
||||
"201":
|
||||
description: BranchInfo
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/BranchInfo"
|
||||
"400":
|
||||
description: Malformed branch create request
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
@@ -216,26 +295,27 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
post:
|
||||
description: |
|
||||
Create a tenant. Returns new tenant id on success.\
|
||||
If no new tenant id is specified in parameters, it would be generated. It's an error to recreate the same tenant.
|
||||
description: Create tenant
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- "tenant_id"
|
||||
properties:
|
||||
new_tenant_id:
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
responses:
|
||||
"201":
|
||||
description: New tenant created successfully
|
||||
description: CREATED
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
"400":
|
||||
description: Malformed tenant create request
|
||||
content:
|
||||
@@ -254,12 +334,6 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"409":
|
||||
description: Tenant already exists, creation skipped
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/AlreadyExistsError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
@@ -284,11 +358,38 @@ components:
|
||||
type: string
|
||||
state:
|
||||
type: string
|
||||
BranchInfo:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- timeline_id
|
||||
- latest_valid_lsn
|
||||
- current_logical_size
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
timeline_id:
|
||||
type: string
|
||||
format: hex
|
||||
ancestor_id:
|
||||
type: string
|
||||
format: hex
|
||||
ancestor_lsn:
|
||||
type: string
|
||||
current_logical_size:
|
||||
type: integer
|
||||
current_logical_size_non_incremental:
|
||||
type: integer
|
||||
latest_valid_lsn:
|
||||
type: integer
|
||||
TimelineInfo:
|
||||
type: object
|
||||
required:
|
||||
- timeline_id
|
||||
- tenant_id
|
||||
- last_record_lsn
|
||||
- prev_record_lsn
|
||||
- start_lsn
|
||||
- disk_consistent_lsn
|
||||
properties:
|
||||
timeline_id:
|
||||
@@ -297,21 +398,19 @@ components:
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
ancestor_timeline_id:
|
||||
type: string
|
||||
format: hex
|
||||
last_record_lsn:
|
||||
type: string
|
||||
prev_record_lsn:
|
||||
type: string
|
||||
ancestor_timeline_id:
|
||||
type: string
|
||||
format: hex
|
||||
ancestor_lsn:
|
||||
start_lsn:
|
||||
type: string
|
||||
disk_consistent_lsn:
|
||||
type: string
|
||||
current_logical_size:
|
||||
type: integer
|
||||
current_logical_size_non_incremental:
|
||||
type: integer
|
||||
timeline_state:
|
||||
type: string
|
||||
|
||||
Error:
|
||||
type: object
|
||||
@@ -327,13 +426,6 @@ components:
|
||||
properties:
|
||||
msg:
|
||||
type: string
|
||||
AlreadyExistsError:
|
||||
type: object
|
||||
required:
|
||||
- msg
|
||||
properties:
|
||||
msg:
|
||||
type: string
|
||||
ForbiddenError:
|
||||
type: object
|
||||
required:
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{Context, Result};
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response, Uri};
|
||||
use serde::Serialize;
|
||||
use tracing::*;
|
||||
use zenith_utils::auth::JwtAuth;
|
||||
use zenith_utils::http::endpoint::attach_openapi_ui;
|
||||
@@ -13,17 +14,21 @@ use zenith_utils::http::{
|
||||
endpoint,
|
||||
error::HttpErrorBody,
|
||||
json::{json_request, json_response},
|
||||
request::get_request_param,
|
||||
request::parse_request_param,
|
||||
};
|
||||
use zenith_utils::http::{RequestExt, RouterBuilder};
|
||||
use zenith_utils::zid::{HexZTenantId, ZTimelineId};
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::zid::HexZTimelineId;
|
||||
use zenith_utils::zid::ZTimelineId;
|
||||
|
||||
use super::models::{
|
||||
StatusResponse, TenantCreateRequest, TimelineCreateRequest, TimelineInfoResponse,
|
||||
};
|
||||
use super::models::BranchCreateRequest;
|
||||
use super::models::StatusResponse;
|
||||
use super::models::TenantCreateRequest;
|
||||
use crate::branches::BranchInfo;
|
||||
use crate::repository::RepositoryTimeline;
|
||||
use crate::timelines::TimelineInfo;
|
||||
use crate::{config::PageServerConf, tenant_mgr, timelines, ZTenantId};
|
||||
use crate::repository::TimelineSyncState;
|
||||
use crate::{branches, config::PageServerConf, tenant_mgr, ZTenantId};
|
||||
|
||||
#[derive(Debug)]
|
||||
struct State {
|
||||
@@ -68,45 +73,23 @@ async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiErr
|
||||
)?)
|
||||
}
|
||||
|
||||
async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let request_data: TimelineCreateRequest = json_request(&mut request).await?;
|
||||
async fn branch_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let request_data: BranchCreateRequest = json_request(&mut request).await?;
|
||||
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(request_data.tenant_id))?;
|
||||
|
||||
let new_timeline_info = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("/timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, lsn=?request_data.ancestor_start_lsn).entered();
|
||||
timelines::create_timeline(
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("/branch_create", name = %request_data.name, tenant = %request_data.tenant_id, startpoint=%request_data.start_point).entered();
|
||||
branches::create_branch(
|
||||
get_config(&request),
|
||||
tenant_id,
|
||||
request_data.new_timeline_id.map(ZTimelineId::from),
|
||||
request_data.ancestor_timeline_id.map(ZTimelineId::from),
|
||||
request_data.ancestor_start_lsn,
|
||||
&request_data.name,
|
||||
&request_data.start_point,
|
||||
&request_data.tenant_id,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
Ok(match new_timeline_info {
|
||||
Some(info) => json_response(StatusCode::CREATED, TimelineInfoResponse::from(info))?,
|
||||
None => json_response(StatusCode::CONFLICT, ())?,
|
||||
})
|
||||
}
|
||||
|
||||
async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);
|
||||
let response_data: Vec<TimelineInfoResponse> = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("timeline_list", tenant = %tenant_id).entered();
|
||||
crate::timelines::get_timelines(tenant_id, include_non_incremental_logical_size)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??
|
||||
.into_iter()
|
||||
.map(TimelineInfoResponse::from)
|
||||
.collect();
|
||||
Ok(json_response(StatusCode::OK, response_data)?)
|
||||
Ok(json_response(StatusCode::CREATED, response_data)?)
|
||||
}
|
||||
|
||||
// Gate non incremental logical size calculation behind a flag
|
||||
@@ -124,6 +107,112 @@ fn get_include_non_incremental_logical_size(request: &Request<Body>) -> bool {
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
async fn branch_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenantid: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
|
||||
let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);
|
||||
|
||||
check_permission(&request, Some(tenantid))?;
|
||||
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("branch_list", tenant = %tenantid).entered();
|
||||
crate::branches::get_branches(
|
||||
get_config(&request),
|
||||
&tenantid,
|
||||
include_non_incremental_logical_size,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
Ok(json_response(StatusCode::OK, response_data)?)
|
||||
}
|
||||
|
||||
async fn branch_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenantid: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let branch_name: String = get_request_param(&request, "branch_name")?.to_string();
|
||||
let conf = get_state(&request).conf;
|
||||
let path = conf.branch_path(&branch_name, &tenantid);
|
||||
|
||||
let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);
|
||||
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("branch_detail", tenant = %tenantid, branch=%branch_name).entered();
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
|
||||
BranchInfo::from_path(path, &repo, include_non_incremental_logical_size)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
Ok(json_response(StatusCode::OK, response_data)?)
|
||||
}
|
||||
|
||||
async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let conf = get_state(&request).conf;
|
||||
let timelines_dir = conf.timelines_path(&tenant_id);
|
||||
|
||||
let mut timelines_dir_contents =
|
||||
tokio::fs::read_dir(&timelines_dir).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to list timelines dir '{}' contents",
|
||||
timelines_dir.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut local_timelines = Vec::new();
|
||||
while let Some(entry) = timelines_dir_contents.next_entry().await.with_context(|| {
|
||||
format!(
|
||||
"Failed to list timelines dir '{}' contents",
|
||||
timelines_dir.display()
|
||||
)
|
||||
})? {
|
||||
let entry_path = entry.path();
|
||||
let entry_type = entry.file_type().await.with_context(|| {
|
||||
format!(
|
||||
"Failed to get file type of timeline dirs' entry '{}'",
|
||||
entry_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
if entry_type.is_dir() {
|
||||
match entry.file_name().to_string_lossy().parse::<ZTimelineId>() {
|
||||
Ok(timeline_id) => local_timelines.push(timeline_id.to_string()),
|
||||
Err(e) => error!(
|
||||
"Failed to get parse timeline id from timeline dirs' entry '{}': {}",
|
||||
entry_path.display(),
|
||||
e
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(json_response(StatusCode::OK, local_timelines)?)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
enum TimelineInfo {
|
||||
Local {
|
||||
#[serde(with = "hex")]
|
||||
timeline_id: ZTimelineId,
|
||||
#[serde(with = "hex")]
|
||||
tenant_id: ZTenantId,
|
||||
ancestor_timeline_id: Option<HexZTimelineId>,
|
||||
last_record_lsn: Lsn,
|
||||
prev_record_lsn: Lsn,
|
||||
disk_consistent_lsn: Lsn,
|
||||
timeline_state: Option<TimelineSyncState>,
|
||||
},
|
||||
Remote {
|
||||
#[serde(with = "hex")]
|
||||
timeline_id: ZTimelineId,
|
||||
#[serde(with = "hex")]
|
||||
tenant_id: ZTenantId,
|
||||
},
|
||||
}
|
||||
|
||||
async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
@@ -135,17 +224,26 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
info_span!("timeline_detail_handler", tenant = %tenant_id, timeline = %timeline_id)
|
||||
.entered();
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||
let include_non_incremental_logical_size =
|
||||
get_include_non_incremental_logical_size(&request);
|
||||
Ok::<_, anyhow::Error>(TimelineInfo::from_repo_timeline(
|
||||
tenant_id,
|
||||
repo.get_timeline(timeline_id)?,
|
||||
include_non_incremental_logical_size,
|
||||
))
|
||||
Ok::<_, anyhow::Error>(match repo.get_timeline(timeline_id)?.local_timeline() {
|
||||
None => TimelineInfo::Remote {
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
},
|
||||
Some(timeline) => TimelineInfo::Local {
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
ancestor_timeline_id: timeline
|
||||
.get_ancestor_timeline_id()
|
||||
.map(HexZTimelineId::from),
|
||||
disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
|
||||
last_record_lsn: timeline.get_last_record_lsn(),
|
||||
prev_record_lsn: timeline.get_prev_record_lsn(),
|
||||
timeline_state: repo.get_timeline_state(timeline_id),
|
||||
},
|
||||
})
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)?
|
||||
.map(TimelineInfoResponse::from)?;
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
Ok(json_response(StatusCode::OK, response_data)?)
|
||||
}
|
||||
@@ -162,7 +260,7 @@ async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body
|
||||
.entered();
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||
match repo.get_timeline(timeline_id)? {
|
||||
RepositoryTimeline::Local { .. } => {
|
||||
RepositoryTimeline::Local(_) => {
|
||||
anyhow::bail!("Timeline with id {} is already local", timeline_id)
|
||||
}
|
||||
RepositoryTimeline::Remote {
|
||||
@@ -222,20 +320,13 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
|
||||
let new_tenant_id = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_create", tenant = ?request_data.new_tenant_id).entered();
|
||||
tenant_mgr::create_tenant_repository(
|
||||
get_config(&request),
|
||||
request_data.new_tenant_id.map(ZTenantId::from),
|
||||
)
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_create", tenant = %request_data.tenant_id).entered();
|
||||
tenant_mgr::create_repository_for_tenant(get_config(&request), request_data.tenant_id)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
Ok(match new_tenant_id {
|
||||
Some(id) => json_response(StatusCode::CREATED, HexZTenantId::from(id))?,
|
||||
None => json_response(StatusCode::CONFLICT, ())?,
|
||||
})
|
||||
Ok(json_response(StatusCode::CREATED, ())?)
|
||||
}
|
||||
|
||||
async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
@@ -265,21 +356,23 @@ pub fn make_router(
|
||||
router
|
||||
.data(Arc::new(State::new(conf, auth)))
|
||||
.get("/v1/status", status_handler)
|
||||
.get("/v1/tenant", tenant_list_handler)
|
||||
.post("/v1/tenant", tenant_create_handler)
|
||||
.get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
|
||||
.post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
|
||||
.get("/v1/timeline/:tenant_id", timeline_list_handler)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id",
|
||||
"/v1/timeline/:tenant_id/:timeline_id",
|
||||
timeline_detail_handler,
|
||||
)
|
||||
.post(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/attach",
|
||||
"/v1/timeline/:tenant_id/:timeline_id/attach",
|
||||
timeline_attach_handler,
|
||||
)
|
||||
.post(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
|
||||
"/v1/timeline/:tenant_id/:timeline_id/detach",
|
||||
timeline_detach_handler,
|
||||
)
|
||||
.get("/v1/branch/:tenant_id", branch_list_handler)
|
||||
.get("/v1/branch/:tenant_id/:branch_name", branch_detail_handler)
|
||||
.post("/v1/branch", branch_create_handler)
|
||||
.get("/v1/tenant", tenant_list_handler)
|
||||
.post("/v1/tenant", tenant_create_handler)
|
||||
.any(handler_404)
|
||||
}
|
||||
|
||||
@@ -137,20 +137,19 @@ pub struct LayeredRepository {
|
||||
/// Public interface
|
||||
impl Repository for LayeredRepository {
|
||||
fn get_timeline(&self, timelineid: ZTimelineId) -> Result<RepositoryTimeline> {
|
||||
Ok(RepositoryTimeline::from(self.get_or_init_timeline(
|
||||
timelineid,
|
||||
&mut self.timelines.lock().unwrap(),
|
||||
)?))
|
||||
}
|
||||
|
||||
fn list_timelines(&self) -> Result<Vec<RepositoryTimeline>> {
|
||||
Ok(self
|
||||
.timelines
|
||||
.lock()
|
||||
.unwrap()
|
||||
.values()
|
||||
.map(|timeline_entry| RepositoryTimeline::from(timeline_entry.clone()))
|
||||
.collect())
|
||||
let mut timelines = self.timelines.lock().unwrap();
|
||||
Ok(
|
||||
match self.get_or_init_timeline(timelineid, &mut timelines)? {
|
||||
LayeredTimelineEntry::Local(local) => RepositoryTimeline::Local(local),
|
||||
LayeredTimelineEntry::Remote {
|
||||
id,
|
||||
disk_consistent_lsn,
|
||||
} => RepositoryTimeline::Remote {
|
||||
id,
|
||||
disk_consistent_lsn,
|
||||
},
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn create_empty_timeline(
|
||||
@@ -429,24 +428,6 @@ impl LayeredTimelineEntry {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LayeredTimelineEntry> for RepositoryTimeline {
|
||||
fn from(layered_timeline: LayeredTimelineEntry) -> Self {
|
||||
match layered_timeline {
|
||||
LayeredTimelineEntry::Local(timeline) => RepositoryTimeline::Local {
|
||||
id: timeline.timelineid,
|
||||
timeline,
|
||||
},
|
||||
LayeredTimelineEntry::Remote {
|
||||
id,
|
||||
disk_consistent_lsn,
|
||||
} => RepositoryTimeline::Remote {
|
||||
id,
|
||||
disk_consistent_lsn,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Private functions
|
||||
impl LayeredRepository {
|
||||
// Implementation of the public `get_timeline` function. This differs from the public
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
pub mod basebackup;
|
||||
pub mod branches;
|
||||
pub mod config;
|
||||
pub mod http;
|
||||
pub mod import_datadir;
|
||||
@@ -11,7 +12,6 @@ pub mod repository;
|
||||
pub mod tenant_mgr;
|
||||
pub mod tenant_threads;
|
||||
pub mod thread_mgr;
|
||||
pub mod timelines;
|
||||
pub mod virtual_file;
|
||||
pub mod walingest;
|
||||
pub mod walreceiver;
|
||||
|
||||
@@ -62,3 +62,11 @@ Based on previous evaluation, even `rusoto-s3` could be a better choice over thi
|
||||
|
||||
So far, we don't adjust the remote storage based on GC thread loop results, only checkpointer loop affects the remote storage.
|
||||
Index module could be used as a base to implement a deferred GC mechanism, a "defragmentation" that repacks archives into new ones after GC is done removing the files from the archives.
|
||||
|
||||
* bracnhes implementaion could be improved
|
||||
|
||||
Currently, there's a code to sync the branches along with the timeline files: on upload, every local branch files that are missing remotely are uploaded,
|
||||
on the timeline download, missing remote branch files are downlaoded.
|
||||
|
||||
A branch is a per-tenant entity, yet a current implementaion requires synchronizing a timeline first to get the branch files locally.
|
||||
Currently, there's no other way to know about the remote branch files, neither the file contents is verified and updated.
|
||||
|
||||
@@ -14,6 +14,13 @@
|
||||
//! Only GC removes local timeline files, the GC support is not added to sync currently,
|
||||
//! yet downloading extra files is not critically bad at this stage, GC can remove those again.
|
||||
//!
|
||||
//! Along the timeline files, branch files are uploaded and downloaded every time a corresponding sync task is processed.
|
||||
//! For simplicity, branch files are also treated as immutable: only missing files are uploaded or downloaded, no removals, amendments or file contents checks are done.
|
||||
//! Also, the branches are copied as separate files, with no extra compressions done.
|
||||
//! Despite branches information currently belonging to tenants, a tenants' timeline sync is required to upload or download the branch files, also, there's no way to know
|
||||
//! the branch sync state outside of the sync loop.
|
||||
//! This implementation is currently considered as temporary and is a subjec to change later.
|
||||
//!
|
||||
//! During the loop startup, an initial [`RemoteTimelineIndex`] state is constructed via listing the remote storage contents.
|
||||
//! It's enough to poll the remote state once on startup only, due to agreement that the pageserver has
|
||||
//! an exclusive write access to the remote storage: new files appear in the storage only after the same
|
||||
@@ -59,6 +66,7 @@
|
||||
//! NOTE: No real contents or checksum check happens right now and is a subject to improve later.
|
||||
//!
|
||||
//! After the whole timeline is downloaded, [`crate::tenant_mgr::set_timeline_states`] function is used to update pageserver memory stage for the timeline processed.
|
||||
//! No extra branch registration is done.
|
||||
//!
|
||||
//! When pageserver signals shutdown, current sync task gets finished and the loop exists.
|
||||
|
||||
@@ -69,7 +77,7 @@ pub mod index;
|
||||
mod upload;
|
||||
|
||||
use std::{
|
||||
collections::{BTreeSet, HashMap, VecDeque},
|
||||
collections::{BTreeSet, HashMap, HashSet, VecDeque},
|
||||
num::{NonZeroU32, NonZeroUsize},
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
@@ -79,6 +87,7 @@ use anyhow::{bail, Context};
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
use tokio::{
|
||||
fs,
|
||||
runtime::Runtime,
|
||||
sync::{
|
||||
mpsc::{self, UnboundedReceiver},
|
||||
@@ -92,7 +101,8 @@ use self::{
|
||||
compression::ArchiveHeader,
|
||||
download::{download_timeline, DownloadedTimeline},
|
||||
index::{
|
||||
ArchiveDescription, ArchiveId, RemoteTimeline, RemoteTimelineIndex, TimelineIndexEntry,
|
||||
ArchiveDescription, ArchiveId, RelativePath, RemoteTimeline, RemoteTimelineIndex,
|
||||
TimelineIndexEntry,
|
||||
},
|
||||
upload::upload_timeline_checkpoint,
|
||||
};
|
||||
@@ -833,6 +843,28 @@ async fn download_archive_header<
|
||||
Ok(header)
|
||||
}
|
||||
|
||||
async fn tenant_branch_files(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: ZTenantId,
|
||||
) -> anyhow::Result<HashSet<RelativePath>> {
|
||||
let branches_dir = conf.branches_path(&tenant_id);
|
||||
if !branches_dir.exists() {
|
||||
return Ok(HashSet::new());
|
||||
}
|
||||
|
||||
let mut branch_entries = fs::read_dir(&branches_dir)
|
||||
.await
|
||||
.context("Failed to list tenant branches dir contents")?;
|
||||
|
||||
let mut branch_files = HashSet::new();
|
||||
while let Some(branch_entry) = branch_entries.next_entry().await? {
|
||||
if branch_entry.file_type().await?.is_file() {
|
||||
branch_files.insert(RelativePath::new(&branches_dir, branch_entry.path())?);
|
||||
}
|
||||
}
|
||||
Ok(branch_files)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_utils {
|
||||
use std::{
|
||||
@@ -939,9 +971,30 @@ mod test_utils {
|
||||
"Index contains unexpected sync ids"
|
||||
);
|
||||
|
||||
let mut actual_branches = BTreeMap::new();
|
||||
let mut expected_branches = BTreeMap::new();
|
||||
let mut actual_timeline_entries = BTreeMap::new();
|
||||
let mut expected_timeline_entries = BTreeMap::new();
|
||||
for sync_id in actual_sync_ids {
|
||||
actual_branches.insert(
|
||||
sync_id.tenant_id,
|
||||
index_read
|
||||
.branch_files(sync_id.tenant_id)
|
||||
.into_iter()
|
||||
.flat_map(|branch_paths| branch_paths.iter())
|
||||
.cloned()
|
||||
.collect::<BTreeSet<_>>(),
|
||||
);
|
||||
expected_branches.insert(
|
||||
sync_id.tenant_id,
|
||||
expected_index_with_descriptions
|
||||
.branch_files(sync_id.tenant_id)
|
||||
.into_iter()
|
||||
.flat_map(|branch_paths| branch_paths.iter())
|
||||
.cloned()
|
||||
.collect::<BTreeSet<_>>(),
|
||||
);
|
||||
|
||||
actual_timeline_entries.insert(
|
||||
sync_id,
|
||||
index_read.timeline_entry(&sync_id).unwrap().clone(),
|
||||
@@ -956,6 +1009,11 @@ mod test_utils {
|
||||
}
|
||||
drop(index_read);
|
||||
|
||||
assert_eq!(
|
||||
actual_branches, expected_branches,
|
||||
"Index contains unexpected branches"
|
||||
);
|
||||
|
||||
for (sync_id, actual_timeline_entry) in actual_timeline_entries {
|
||||
let expected_timeline_description = expected_timeline_entries
|
||||
.remove(&sync_id)
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
//! Timeline synchrnonization logic to put files from archives on remote storage into pageserver's local directory.
|
||||
//! Currently, tenant branch files are also downloaded, but this does not appear final.
|
||||
|
||||
use std::{borrow::Cow, collections::BTreeSet, path::PathBuf, sync::Arc};
|
||||
|
||||
use anyhow::{ensure, Context};
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use tokio::{fs, sync::RwLock};
|
||||
use tracing::{debug, error, trace, warn};
|
||||
use zenith_utils::{lsn::Lsn, zid::ZTenantId};
|
||||
@@ -12,8 +14,8 @@ use crate::{
|
||||
layered_repository::metadata::{metadata_path, TimelineMetadata},
|
||||
remote_storage::{
|
||||
storage_sync::{
|
||||
compression, index::TimelineIndexEntry, sync_queue, update_index_description, SyncKind,
|
||||
SyncTask,
|
||||
compression, index::TimelineIndexEntry, sync_queue, tenant_branch_files,
|
||||
update_index_description, SyncKind, SyncTask,
|
||||
},
|
||||
RemoteStorage, ZTenantTimelineId,
|
||||
},
|
||||
@@ -40,6 +42,8 @@ pub(super) enum DownloadedTimeline {
|
||||
/// Timeline files that already exist locally are skipped during the download, but the local metadata file is
|
||||
/// updated in the end of every checkpoint archive extraction.
|
||||
///
|
||||
/// Before any archives are considered, the branch files are checked locally and remotely, all remote-only files are downloaded.
|
||||
///
|
||||
/// On an error, bumps the retries count and reschedules the download, with updated archive skip list
|
||||
/// (for any new successful archive downloads and extractions).
|
||||
pub(super) async fn download_timeline<
|
||||
@@ -109,6 +113,22 @@ pub(super) async fn download_timeline<
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = download_missing_branches(conf, remote_assets.as_ref(), sync_id.tenant_id).await
|
||||
{
|
||||
error!(
|
||||
"Failed to download missing branches for sync id {}: {:?}",
|
||||
sync_id, e
|
||||
);
|
||||
sync_queue::push(SyncTask::new(
|
||||
sync_id,
|
||||
retries,
|
||||
SyncKind::Download(download),
|
||||
));
|
||||
return DownloadedTimeline::FailedAndRescheduled {
|
||||
disk_consistent_lsn,
|
||||
};
|
||||
}
|
||||
|
||||
debug!("Downloading timeline archives");
|
||||
let archives_to_download = remote_timeline
|
||||
.checkpoints()
|
||||
@@ -230,6 +250,82 @@ async fn read_local_metadata(
|
||||
.context("Failed to read local metadata files bytes")?)
|
||||
}
|
||||
|
||||
async fn download_missing_branches<
|
||||
P: std::fmt::Debug + Send + Sync + 'static,
|
||||
S: RemoteStorage<StoragePath = P> + Send + Sync + 'static,
|
||||
>(
|
||||
conf: &'static PageServerConf,
|
||||
(storage, index): &(S, RwLock<RemoteTimelineIndex>),
|
||||
tenant_id: ZTenantId,
|
||||
) -> anyhow::Result<()> {
|
||||
let local_branches = tenant_branch_files(conf, tenant_id)
|
||||
.await
|
||||
.context("Failed to list local branch files for the tenant")?;
|
||||
let local_branches_dir = conf.branches_path(&tenant_id);
|
||||
if !local_branches_dir.exists() {
|
||||
fs::create_dir_all(&local_branches_dir)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to create local branches directory at path '{}'",
|
||||
local_branches_dir.display()
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Some(remote_branches) = index.read().await.branch_files(tenant_id) {
|
||||
let mut remote_only_branches_downloads = remote_branches
|
||||
.difference(&local_branches)
|
||||
.map(|remote_only_branch| async move {
|
||||
let branches_dir = conf.branches_path(&tenant_id);
|
||||
let remote_branch_path = remote_only_branch.as_path(&branches_dir);
|
||||
let storage_path =
|
||||
storage.storage_path(&remote_branch_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to derive a storage path for branch with local path '{}'",
|
||||
remote_branch_path.display()
|
||||
)
|
||||
})?;
|
||||
let mut target_file = fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(&remote_branch_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to create local branch file at '{}'",
|
||||
remote_branch_path.display()
|
||||
)
|
||||
})?;
|
||||
storage
|
||||
.download(&storage_path, &mut target_file)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to download branch file from the remote path {:?}",
|
||||
storage_path
|
||||
)
|
||||
})?;
|
||||
Ok::<_, anyhow::Error>(())
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut branch_downloads_failed = false;
|
||||
while let Some(download_result) = remote_only_branches_downloads.next().await {
|
||||
if let Err(e) = download_result {
|
||||
branch_downloads_failed = true;
|
||||
error!("Failed to download a branch file: {:?}", e);
|
||||
}
|
||||
}
|
||||
ensure!(
|
||||
!branch_downloads_failed,
|
||||
"Failed to download all branch files"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
//! This way in the future, the index could be restored fast from its serialized stored form.
|
||||
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet, HashMap},
|
||||
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
@@ -49,9 +49,10 @@ impl RelativePath {
|
||||
}
|
||||
|
||||
/// An index to track tenant files that exist on the remote storage.
|
||||
/// Currently, timeline archive files are tracked only.
|
||||
/// Currently, timeline archives and branch files are tracked.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RemoteTimelineIndex {
|
||||
branch_files: HashMap<ZTenantId, HashSet<RelativePath>>,
|
||||
timeline_files: HashMap<ZTenantTimelineId, TimelineIndexEntry>,
|
||||
}
|
||||
|
||||
@@ -64,6 +65,7 @@ impl RemoteTimelineIndex {
|
||||
paths: impl Iterator<Item = P>,
|
||||
) -> Self {
|
||||
let mut index = Self {
|
||||
branch_files: HashMap::new(),
|
||||
timeline_files: HashMap::new(),
|
||||
};
|
||||
for path in paths {
|
||||
@@ -96,6 +98,17 @@ impl RemoteTimelineIndex {
|
||||
pub fn all_sync_ids(&self) -> impl Iterator<Item = ZTenantTimelineId> + '_ {
|
||||
self.timeline_files.keys().copied()
|
||||
}
|
||||
|
||||
pub fn add_branch_file(&mut self, tenant_id: ZTenantId, path: RelativePath) {
|
||||
self.branch_files
|
||||
.entry(tenant_id)
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(path);
|
||||
}
|
||||
|
||||
pub fn branch_files(&self, tenant_id: ZTenantId) -> Option<&HashSet<RelativePath>> {
|
||||
self.branch_files.get(&tenant_id)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@@ -293,9 +306,20 @@ fn try_parse_index_entry(
|
||||
.parse::<ZTenantId>()
|
||||
.with_context(|| format!("Failed to parse tenant id from path '{}'", path.display()))?;
|
||||
|
||||
let branches_path = conf.branches_path(&tenant_id);
|
||||
let timelines_path = conf.timelines_path(&tenant_id);
|
||||
match path.strip_prefix(&timelines_path) {
|
||||
Ok(timelines_subpath) => {
|
||||
match (
|
||||
RelativePath::new(&branches_path, &path),
|
||||
path.strip_prefix(&timelines_path),
|
||||
) {
|
||||
(Ok(_), Ok(_)) => bail!(
|
||||
"Path '{}' cannot start with both branches '{}' and the timelines '{}' prefixes",
|
||||
path.display(),
|
||||
branches_path.display(),
|
||||
timelines_path.display()
|
||||
),
|
||||
(Ok(branches_entry), Err(_)) => index.add_branch_file(tenant_id, branches_entry),
|
||||
(Err(_), Ok(timelines_subpath)) => {
|
||||
let mut segments = timelines_subpath.iter();
|
||||
let timeline_id = segments
|
||||
.next()
|
||||
@@ -351,10 +375,11 @@ fn try_parse_index_entry(
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(timelines_strip_error) => {
|
||||
(Err(branches_error), Err(timelines_strip_error)) => {
|
||||
bail!(
|
||||
"Path '{}' is not an archive entry '{}'",
|
||||
"Path '{}' is not an index entry: it's neither parsable as a branch entry '{:#}' nor as an archive entry '{}'",
|
||||
path.display(),
|
||||
branches_error,
|
||||
timelines_strip_error,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
//! Timeline synchronization logic to compress and upload to the remote storage all new timeline files from the checkpoints.
|
||||
//! Currently, tenant branch files are also uploaded, but this does not appear final.
|
||||
|
||||
use std::{borrow::Cow, collections::BTreeSet, path::PathBuf, sync::Arc};
|
||||
|
||||
use anyhow::ensure;
|
||||
use tokio::sync::RwLock;
|
||||
use anyhow::{ensure, Context};
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use tokio::{fs, sync::RwLock};
|
||||
use tracing::{debug, error, warn};
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
@@ -12,7 +15,7 @@ use crate::{
|
||||
storage_sync::{
|
||||
compression,
|
||||
index::{RemoteTimeline, TimelineIndexEntry},
|
||||
sync_queue, update_index_description, SyncKind, SyncTask,
|
||||
sync_queue, tenant_branch_files, update_index_description, SyncKind, SyncTask,
|
||||
},
|
||||
RemoteStorage, ZTenantTimelineId,
|
||||
},
|
||||
@@ -23,6 +26,8 @@ use super::{compression::ArchiveHeader, index::RemoteTimelineIndex, NewCheckpoin
|
||||
/// Attempts to compress and upload given checkpoint files.
|
||||
/// No extra checks for overlapping files is made: download takes care of that, ensuring no non-metadata local timeline files are overwritten.
|
||||
///
|
||||
/// Before the checkpoint files are uploaded, branch files are uploaded, if any local ones are missing remotely.
|
||||
///
|
||||
/// On an error, bumps the retries count and reschedules the entire task.
|
||||
/// On success, populates index data with new downloads.
|
||||
pub(super) async fn upload_timeline_checkpoint<
|
||||
@@ -36,6 +41,19 @@ pub(super) async fn upload_timeline_checkpoint<
|
||||
retries: u32,
|
||||
) -> Option<bool> {
|
||||
debug!("Uploading checkpoint for sync id {}", sync_id);
|
||||
if let Err(e) = upload_missing_branches(config, remote_assets.as_ref(), sync_id.tenant_id).await
|
||||
{
|
||||
error!(
|
||||
"Failed to upload missing branches for sync id {}: {:?}",
|
||||
sync_id, e
|
||||
);
|
||||
sync_queue::push(SyncTask::new(
|
||||
sync_id,
|
||||
retries,
|
||||
SyncKind::Upload(new_checkpoint),
|
||||
));
|
||||
return Some(false);
|
||||
}
|
||||
let new_upload_lsn = new_checkpoint.metadata.disk_consistent_lsn();
|
||||
|
||||
let index = &remote_assets.1;
|
||||
@@ -183,6 +201,76 @@ async fn try_upload_checkpoint<
|
||||
.map(|(header, header_size, _)| (header, header_size))
|
||||
}
|
||||
|
||||
async fn upload_missing_branches<
|
||||
P: std::fmt::Debug + Send + Sync + 'static,
|
||||
S: RemoteStorage<StoragePath = P> + Send + Sync + 'static,
|
||||
>(
|
||||
config: &'static PageServerConf,
|
||||
(storage, index): &(S, RwLock<RemoteTimelineIndex>),
|
||||
tenant_id: ZTenantId,
|
||||
) -> anyhow::Result<()> {
|
||||
let local_branches = tenant_branch_files(config, tenant_id)
|
||||
.await
|
||||
.context("Failed to list local branch files for the tenant")?;
|
||||
let index_read = index.read().await;
|
||||
let remote_branches = index_read
|
||||
.branch_files(tenant_id)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
drop(index_read);
|
||||
|
||||
let mut branch_uploads = local_branches
|
||||
.difference(&remote_branches)
|
||||
.map(|local_only_branch| async move {
|
||||
let local_branch_path = local_only_branch.as_path(&config.branches_path(&tenant_id));
|
||||
let storage_path = storage.storage_path(&local_branch_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to derive a storage path for branch with local path '{}'",
|
||||
local_branch_path.display()
|
||||
)
|
||||
})?;
|
||||
let local_branch_file = fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&local_branch_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to open local branch file {} for reading",
|
||||
local_branch_path.display()
|
||||
)
|
||||
})?;
|
||||
storage
|
||||
.upload(local_branch_file, &storage_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to upload branch file to the remote path {:?}",
|
||||
storage_path
|
||||
)
|
||||
})?;
|
||||
Ok::<_, anyhow::Error>(local_only_branch)
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut branch_uploads_failed = false;
|
||||
while let Some(upload_result) = branch_uploads.next().await {
|
||||
match upload_result {
|
||||
Ok(local_only_branch) => index
|
||||
.write()
|
||||
.await
|
||||
.add_branch_file(tenant_id, local_only_branch.clone()),
|
||||
Err(e) => {
|
||||
error!("Failed to upload branch file: {:?}", e);
|
||||
branch_uploads_failed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ensure!(!branch_uploads_failed, "Failed to upload all branch files");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tempfile::tempdir;
|
||||
|
||||
@@ -36,10 +36,6 @@ pub trait Repository: Send + Sync {
|
||||
/// Get Timeline handle for given zenith timeline ID.
|
||||
fn get_timeline(&self, timelineid: ZTimelineId) -> Result<RepositoryTimeline>;
|
||||
|
||||
/// Lists timelines the repository contains.
|
||||
/// Up to repository's implementation to omit certain timelines that ar not considered ready for use.
|
||||
fn list_timelines(&self) -> Result<Vec<RepositoryTimeline>>;
|
||||
|
||||
/// Create a new, empty timeline. The caller is responsible for loading data into it
|
||||
/// Initdb lsn is provided for timeline impl to be able to perform checks for some operations against it.
|
||||
fn create_empty_timeline(
|
||||
@@ -76,10 +72,7 @@ pub trait Repository: Send + Sync {
|
||||
pub enum RepositoryTimeline {
|
||||
/// Timeline, with its files present locally in pageserver's working directory.
|
||||
/// Loaded into pageserver's memory and ready to be used.
|
||||
Local {
|
||||
id: ZTimelineId,
|
||||
timeline: Arc<dyn Timeline>,
|
||||
},
|
||||
Local(Arc<dyn Timeline>),
|
||||
/// Timeline, found on the pageserver's remote storage, but not yet downloaded locally.
|
||||
Remote {
|
||||
id: ZTimelineId,
|
||||
@@ -90,24 +83,17 @@ pub enum RepositoryTimeline {
|
||||
|
||||
impl RepositoryTimeline {
|
||||
pub fn local_timeline(&self) -> Option<Arc<dyn Timeline>> {
|
||||
if let Self::Local { timeline, .. } = self {
|
||||
Some(Arc::clone(timeline))
|
||||
if let Self::Local(local_timeline) = self {
|
||||
Some(Arc::clone(local_timeline))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn id(&self) -> ZTimelineId {
|
||||
match self {
|
||||
Self::Local { id, .. } => *id,
|
||||
Self::Remote { id, .. } => *id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A state of the timeline synchronization with the remote storage.
|
||||
/// Contains `disk_consistent_lsn` of the corresponding remote timeline (latest checkpoint's disk_consistent_lsn).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
|
||||
pub enum TimelineSyncState {
|
||||
/// No further downloads from the remote storage are needed.
|
||||
/// The timeline state is up-to-date or ahead of the remote storage one,
|
||||
@@ -404,6 +390,7 @@ pub mod repo_harness {
|
||||
|
||||
let tenant_id = ZTenantId::generate();
|
||||
fs::create_dir_all(conf.tenant_path(&tenant_id))?;
|
||||
fs::create_dir_all(conf.branches_path(&tenant_id))?;
|
||||
|
||||
Ok(Self { conf, tenant_id })
|
||||
}
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
//! This module acts as a switchboard to access different repositories managed by this
|
||||
//! page server.
|
||||
|
||||
use crate::branches;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::layered_repository::LayeredRepository;
|
||||
use crate::repository::{Repository, Timeline, TimelineSyncState};
|
||||
use crate::thread_mgr;
|
||||
use crate::thread_mgr::ThreadKind;
|
||||
use crate::timelines;
|
||||
use crate::walredo::PostgresRedoManager;
|
||||
use crate::CheckpointConfig;
|
||||
use anyhow::{Context, Result};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use log::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{hash_map, HashMap};
|
||||
use std::fmt;
|
||||
use std::sync::{Arc, Mutex, MutexGuard};
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
@@ -177,27 +177,24 @@ pub fn shutdown_all_tenants() {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_tenant_repository(
|
||||
pub fn create_repository_for_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
new_tenant_id: Option<ZTenantId>,
|
||||
) -> Result<Option<ZTenantId>> {
|
||||
let new_tenant_id = new_tenant_id.unwrap_or_else(ZTenantId::generate);
|
||||
let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, new_tenant_id));
|
||||
match timelines::create_repo(conf, new_tenant_id, wal_redo_manager)? {
|
||||
Some(repo) => {
|
||||
access_tenants()
|
||||
.entry(new_tenant_id)
|
||||
.or_insert_with(|| Tenant {
|
||||
state: TenantState::Idle,
|
||||
repo,
|
||||
});
|
||||
Ok(Some(new_tenant_id))
|
||||
}
|
||||
None => {
|
||||
debug!("repository already exists for tenant {}", new_tenant_id);
|
||||
Ok(None)
|
||||
tenantid: ZTenantId,
|
||||
) -> Result<()> {
|
||||
let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenantid));
|
||||
let repo = branches::create_repo(conf, tenantid, wal_redo_manager)?;
|
||||
|
||||
match access_tenants().entry(tenantid) {
|
||||
hash_map::Entry::Occupied(_) => bail!("tenant {} already exists", tenantid),
|
||||
hash_map::Entry::Vacant(v) => {
|
||||
v.insert(Tenant {
|
||||
state: TenantState::Idle,
|
||||
repo,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_tenant_state(tenantid: ZTenantId) -> Option<TenantState> {
|
||||
|
||||
@@ -1,408 +0,0 @@
|
||||
//!
|
||||
//! Timeline management code
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use postgres_ffi::ControlFileData;
|
||||
use std::{
|
||||
fs,
|
||||
path::Path,
|
||||
process::{Command, Stdio},
|
||||
sync::Arc,
|
||||
};
|
||||
use tracing::*;
|
||||
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
use zenith_utils::{crashsafe_dir, logging};
|
||||
|
||||
use crate::{config::PageServerConf, repository::Repository};
|
||||
use crate::{import_datadir, LOG_FILE_NAME};
|
||||
use crate::{layered_repository::LayeredRepository, walredo::WalRedoManager};
|
||||
use crate::{repository::RepositoryTimeline, tenant_mgr};
|
||||
use crate::{repository::Timeline, CheckpointConfig};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum TimelineInfo {
|
||||
Local {
|
||||
timeline_id: ZTimelineId,
|
||||
tenant_id: ZTenantId,
|
||||
last_record_lsn: Lsn,
|
||||
prev_record_lsn: Lsn,
|
||||
ancestor_timeline_id: Option<ZTimelineId>,
|
||||
ancestor_lsn: Option<Lsn>,
|
||||
disk_consistent_lsn: Lsn,
|
||||
current_logical_size: usize,
|
||||
current_logical_size_non_incremental: Option<usize>,
|
||||
},
|
||||
Remote {
|
||||
timeline_id: ZTimelineId,
|
||||
tenant_id: ZTenantId,
|
||||
disk_consistent_lsn: Lsn,
|
||||
},
|
||||
}
|
||||
|
||||
impl TimelineInfo {
|
||||
pub fn from_repo_timeline(
|
||||
tenant_id: ZTenantId,
|
||||
repo_timeline: RepositoryTimeline,
|
||||
include_non_incremental_logical_size: bool,
|
||||
) -> Self {
|
||||
match repo_timeline {
|
||||
RepositoryTimeline::Local { id, timeline } => {
|
||||
let ancestor_timeline_id = timeline.get_ancestor_timeline_id();
|
||||
let ancestor_lsn = if ancestor_timeline_id.is_some() {
|
||||
Some(timeline.get_ancestor_lsn())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Self::Local {
|
||||
timeline_id: id,
|
||||
tenant_id,
|
||||
last_record_lsn: timeline.get_last_record_lsn(),
|
||||
prev_record_lsn: timeline.get_prev_record_lsn(),
|
||||
ancestor_timeline_id,
|
||||
ancestor_lsn,
|
||||
disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
|
||||
current_logical_size: timeline.get_current_logical_size(),
|
||||
current_logical_size_non_incremental: get_current_logical_size_non_incremental(
|
||||
include_non_incremental_logical_size,
|
||||
timeline.as_ref(),
|
||||
),
|
||||
}
|
||||
}
|
||||
RepositoryTimeline::Remote {
|
||||
id,
|
||||
disk_consistent_lsn,
|
||||
} => Self::Remote {
|
||||
timeline_id: id,
|
||||
tenant_id,
|
||||
disk_consistent_lsn,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_dyn_timeline(
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
timeline: &dyn Timeline,
|
||||
include_non_incremental_logical_size: bool,
|
||||
) -> Self {
|
||||
let ancestor_timeline_id = timeline.get_ancestor_timeline_id();
|
||||
let ancestor_lsn = if ancestor_timeline_id.is_some() {
|
||||
Some(timeline.get_ancestor_lsn())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Self::Local {
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
last_record_lsn: timeline.get_last_record_lsn(),
|
||||
prev_record_lsn: timeline.get_prev_record_lsn(),
|
||||
ancestor_timeline_id,
|
||||
ancestor_lsn,
|
||||
disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
|
||||
current_logical_size: timeline.get_current_logical_size(),
|
||||
current_logical_size_non_incremental: get_current_logical_size_non_incremental(
|
||||
include_non_incremental_logical_size,
|
||||
timeline,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn timeline_id(&self) -> ZTimelineId {
|
||||
match *self {
|
||||
TimelineInfo::Local { timeline_id, .. } => timeline_id,
|
||||
TimelineInfo::Remote { timeline_id, .. } => timeline_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tenant_id(&self) -> ZTenantId {
|
||||
match *self {
|
||||
TimelineInfo::Local { tenant_id, .. } => tenant_id,
|
||||
TimelineInfo::Remote { tenant_id, .. } => tenant_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_current_logical_size_non_incremental(
|
||||
include_non_incremental_logical_size: bool,
|
||||
timeline: &dyn Timeline,
|
||||
) -> Option<usize> {
|
||||
if !include_non_incremental_logical_size {
|
||||
return None;
|
||||
}
|
||||
match timeline.get_current_logical_size_non_incremental(timeline.get_last_record_lsn()) {
|
||||
Ok(size) => Some(size),
|
||||
Err(e) => {
|
||||
error!("Failed to get non-incremental logical size: {:?}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct PointInTime {
|
||||
pub timeline_id: ZTimelineId,
|
||||
pub lsn: Lsn,
|
||||
}
|
||||
|
||||
pub fn init_pageserver(
|
||||
conf: &'static PageServerConf,
|
||||
create_tenant: Option<ZTenantId>,
|
||||
initial_timeline_id: Option<ZTimelineId>,
|
||||
) -> anyhow::Result<()> {
|
||||
// Initialize logger
|
||||
// use true as daemonize parameter because otherwise we pollute zenith cli output with a few pages long output of info messages
|
||||
let _log_file = logging::init(LOG_FILE_NAME, true)?;
|
||||
|
||||
// We don't use the real WAL redo manager, because we don't want to spawn the WAL redo
|
||||
// process during repository initialization.
|
||||
//
|
||||
// FIXME: That caused trouble, because the WAL redo manager spawned a thread that launched
|
||||
// initdb in the background, and it kept running even after the "zenith init" had exited.
|
||||
// In tests, we started the page server immediately after that, so that initdb was still
|
||||
// running in the background, and we failed to run initdb again in the same directory. This
|
||||
// has been solved for the rapid init+start case now, but the general race condition remains
|
||||
// if you restart the server quickly. The WAL redo manager doesn't use a separate thread
|
||||
// anymore, but I think that could still happen.
|
||||
let dummy_redo_mgr = Arc::new(crate::walredo::DummyRedoManager {});
|
||||
|
||||
crashsafe_dir::create_dir_all(conf.tenants_path())?;
|
||||
|
||||
if let Some(tenant_id) = create_tenant {
|
||||
println!("initializing tenantid {}", tenant_id);
|
||||
let repo = create_repo(conf, tenant_id, dummy_redo_mgr)
|
||||
.context("failed to create repo")?
|
||||
.ok_or_else(|| anyhow!("For newely created pageserver, found already existing repository for tenant {}", tenant_id))?;
|
||||
let new_timeline_id = initial_timeline_id.unwrap_or_else(ZTimelineId::generate);
|
||||
bootstrap_timeline(conf, tenant_id, new_timeline_id, repo.as_ref())
|
||||
.context("failed to create initial timeline")?;
|
||||
println!("initial timeline {} created", new_timeline_id)
|
||||
} else if initial_timeline_id.is_some() {
|
||||
println!("Ignoring initial timeline parameter, due to no tenant id to create given");
|
||||
}
|
||||
|
||||
println!("pageserver init succeeded");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn create_repo(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: ZTenantId,
|
||||
wal_redo_manager: Arc<dyn WalRedoManager + Send + Sync>,
|
||||
) -> Result<Option<Arc<dyn Repository>>> {
|
||||
let repo_dir = conf.tenant_path(&tenant_id);
|
||||
if repo_dir.exists() {
|
||||
debug!("repo for {} already exists", tenant_id);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// top-level dir may exist if we are creating it through CLI
|
||||
crashsafe_dir::create_dir_all(&repo_dir)
|
||||
.with_context(|| format!("could not create directory {}", repo_dir.display()))?;
|
||||
crashsafe_dir::create_dir(conf.timelines_path(&tenant_id))?;
|
||||
info!("created directory structure in {}", repo_dir.display());
|
||||
|
||||
Ok(Some(Arc::new(LayeredRepository::new(
|
||||
conf,
|
||||
wal_redo_manager,
|
||||
tenant_id,
|
||||
conf.remote_storage_config.is_some(),
|
||||
))))
|
||||
}
|
||||
|
||||
// Returns checkpoint LSN from controlfile
|
||||
fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
|
||||
// Read control file to extract the LSN
|
||||
let controlfile_path = path.join("global").join("pg_control");
|
||||
let controlfile = ControlFileData::decode(&fs::read(controlfile_path)?)?;
|
||||
let lsn = controlfile.checkPoint;
|
||||
|
||||
Ok(Lsn(lsn))
|
||||
}
|
||||
|
||||
// Create the cluster temporarily in 'initdbpath' directory inside the repository
|
||||
// to get bootstrap data for timeline initialization.
|
||||
//
|
||||
fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
|
||||
info!("running initdb in {}... ", initdbpath.display());
|
||||
|
||||
let initdb_path = conf.pg_bin_dir().join("initdb");
|
||||
let initdb_output = Command::new(initdb_path)
|
||||
.args(&["-D", initdbpath.to_str().unwrap()])
|
||||
.args(&["-U", &conf.superuser])
|
||||
.args(&["-E", "utf8"])
|
||||
.arg("--no-instructions")
|
||||
// This is only used for a temporary installation that is deleted shortly after,
|
||||
// so no need to fsync it
|
||||
.arg("--no-sync")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
|
||||
.env("DYLD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
|
||||
.stdout(Stdio::null())
|
||||
.output()
|
||||
.context("failed to execute initdb")?;
|
||||
if !initdb_output.status.success() {
|
||||
bail!(
|
||||
"initdb failed: '{}'",
|
||||
String::from_utf8_lossy(&initdb_output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
//
|
||||
// - run initdb to init temporary instance and get bootstrap data
|
||||
// - after initialization complete, remove the temp dir.
|
||||
//
|
||||
fn bootstrap_timeline(
|
||||
conf: &'static PageServerConf,
|
||||
tenantid: ZTenantId,
|
||||
tli: ZTimelineId,
|
||||
repo: &dyn Repository,
|
||||
) -> Result<Arc<dyn Timeline>> {
|
||||
let _enter = info_span!("bootstrapping", timeline = %tli, tenant = %tenantid).entered();
|
||||
|
||||
let initdb_path = conf.tenant_path(&tenantid).join("tmp");
|
||||
|
||||
// Init temporarily repo to get bootstrap data
|
||||
run_initdb(conf, &initdb_path)?;
|
||||
let pgdata_path = initdb_path;
|
||||
|
||||
let lsn = get_lsn_from_controlfile(&pgdata_path)?.align();
|
||||
|
||||
// Import the contents of the data directory at the initial checkpoint
|
||||
// LSN, and any WAL after that.
|
||||
// Initdb lsn will be equal to last_record_lsn which will be set after import.
|
||||
// Because we know it upfront avoid having an option or dummy zero value by passing it to create_empty_timeline.
|
||||
let timeline = repo.create_empty_timeline(tli, lsn)?;
|
||||
import_datadir::import_timeline_from_postgres_datadir(
|
||||
&pgdata_path,
|
||||
timeline.writer().as_ref(),
|
||||
lsn,
|
||||
)?;
|
||||
timeline.checkpoint(CheckpointConfig::Forced)?;
|
||||
|
||||
println!(
|
||||
"created initial timeline {} timeline.lsn {}",
|
||||
tli,
|
||||
timeline.get_last_record_lsn()
|
||||
);
|
||||
|
||||
// Remove temp dir. We don't need it anymore
|
||||
fs::remove_dir_all(pgdata_path)?;
|
||||
|
||||
Ok(timeline)
|
||||
}
|
||||
|
||||
pub(crate) fn get_timelines(
|
||||
tenant_id: ZTenantId,
|
||||
include_non_incremental_logical_size: bool,
|
||||
) -> Result<Vec<TimelineInfo>> {
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)
|
||||
.with_context(|| format!("Failed to get repo for tenant {}", tenant_id))?;
|
||||
|
||||
Ok(repo
|
||||
.list_timelines()
|
||||
.with_context(|| format!("Failed to list timelines for tenant {}", tenant_id))?
|
||||
.into_iter()
|
||||
.filter_map(|timeline| match timeline {
|
||||
RepositoryTimeline::Local { timeline, id } => Some((id, timeline)),
|
||||
RepositoryTimeline::Remote { .. } => None,
|
||||
})
|
||||
.map(|(timeline_id, timeline)| {
|
||||
TimelineInfo::from_dyn_timeline(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
timeline.as_ref(),
|
||||
include_non_incremental_logical_size,
|
||||
)
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
pub(crate) fn create_timeline(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: ZTenantId,
|
||||
new_timeline_id: Option<ZTimelineId>,
|
||||
ancestor_timeline_id: Option<ZTimelineId>,
|
||||
ancestor_start_lsn: Option<Lsn>,
|
||||
) -> Result<Option<TimelineInfo>> {
|
||||
let new_timeline_id = new_timeline_id.unwrap_or_else(ZTimelineId::generate);
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||
|
||||
if conf.timeline_path(&new_timeline_id, &tenant_id).exists() {
|
||||
match repo.get_timeline(new_timeline_id)? {
|
||||
RepositoryTimeline::Local { id, .. } => {
|
||||
debug!("timeline {} already exists", id);
|
||||
return Ok(None);
|
||||
}
|
||||
RepositoryTimeline::Remote { id, .. } => bail!(
|
||||
"timeline {} already exists in pageserver's remote storage",
|
||||
id
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
let mut start_lsn = ancestor_start_lsn.unwrap_or(Lsn(0));
|
||||
|
||||
let new_timeline_info = match ancestor_timeline_id {
|
||||
Some(ancestor_timeline_id) => {
|
||||
let ancestor_timeline = repo
|
||||
.get_timeline(ancestor_timeline_id)
|
||||
.with_context(|| format!("Cannot get ancestor timeline {}", ancestor_timeline_id))?
|
||||
.local_timeline()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Cannot branch off the timeline {} that's not present locally",
|
||||
ancestor_timeline_id
|
||||
)
|
||||
})?;
|
||||
|
||||
if start_lsn == Lsn(0) {
|
||||
// Find end of WAL on the old timeline
|
||||
let end_of_wal = ancestor_timeline.get_last_record_lsn();
|
||||
info!("branching at end of WAL: {}", end_of_wal);
|
||||
start_lsn = end_of_wal;
|
||||
} else {
|
||||
// Wait for the WAL to arrive and be processed on the parent branch up
|
||||
// to the requested branch point. The repository code itself doesn't
|
||||
// require it, but if we start to receive WAL on the new timeline,
|
||||
// decoding the new WAL might need to look up previous pages, relation
|
||||
// sizes etc. and that would get confused if the previous page versions
|
||||
// are not in the repository yet.
|
||||
ancestor_timeline.wait_lsn(start_lsn)?;
|
||||
}
|
||||
start_lsn = start_lsn.align();
|
||||
|
||||
let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
|
||||
if ancestor_ancestor_lsn > start_lsn {
|
||||
// can we safely just branch from the ancestor instead?
|
||||
anyhow::bail!(
|
||||
"invalid start lsn {} for ancestor timeline {}: less than timeline ancestor lsn {}",
|
||||
start_lsn,
|
||||
ancestor_timeline_id,
|
||||
ancestor_ancestor_lsn,
|
||||
);
|
||||
}
|
||||
repo.branch_timeline(ancestor_timeline_id, new_timeline_id, start_lsn)?;
|
||||
// load the timeline into memory
|
||||
let loaded_timeline = repo.get_timeline(new_timeline_id)?;
|
||||
TimelineInfo::from_repo_timeline(tenant_id, loaded_timeline, false)
|
||||
}
|
||||
None => {
|
||||
let new_timeline = bootstrap_timeline(conf, tenant_id, new_timeline_id, repo.as_ref())?;
|
||||
TimelineInfo::from_dyn_timeline(
|
||||
tenant_id,
|
||||
new_timeline_id,
|
||||
new_timeline.as_ref(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
};
|
||||
Ok(Some(new_timeline_info))
|
||||
}
|
||||
@@ -21,7 +21,6 @@
|
||||
//! redo Postgres process, but some records it can handle directly with
|
||||
//! bespoken Rust code.
|
||||
|
||||
use chrono::format::format;
|
||||
use postgres_ffi::nonrelfile_utils::clogpage_precedes;
|
||||
use postgres_ffi::nonrelfile_utils::slru_may_delete_clogsegment;
|
||||
use std::cmp::min;
|
||||
@@ -271,25 +270,6 @@ impl WalIngest {
|
||||
// Iterate through all the blocks that the record modifies, and
|
||||
// "put" a separate copy of the record for each block.
|
||||
for blk in decoded.blocks.iter() {
|
||||
|
||||
let lsn_hex = {
|
||||
use bytes::BufMut;
|
||||
let mut bytes = BytesMut::new();
|
||||
bytes.put_u64(lsn.0);
|
||||
hex::encode(bytes.freeze())
|
||||
};
|
||||
let page_hex = {
|
||||
use bytes::BufMut;
|
||||
let mut page = BytesMut::new();
|
||||
page.put_u32(blk.rnode_spcnode);
|
||||
page.put_u32(blk.rnode_dbnode);
|
||||
page.put_u32(blk.rnode_relnode);
|
||||
page.put_u8(blk.forknum);
|
||||
page.put_u32(blk.blkno);
|
||||
hex::encode(page.freeze())
|
||||
};
|
||||
println!("wal-at-lsn-modified-page {} {}", lsn_hex, page_hex);
|
||||
|
||||
self.ingest_decoded_block(timeline, lsn, &decoded, blk)?;
|
||||
}
|
||||
|
||||
|
||||
@@ -7,11 +7,13 @@ use std::collections::HashMap;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use zenith_utils::pq_proto::{BeMessage as Be, BeParameterStatusMessage, FeMessage as Fe};
|
||||
|
||||
// TODO rename the struct to ClientParams or something
|
||||
/// Various client credentials which we use for authentication.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct ClientCredentials {
|
||||
pub user: String,
|
||||
pub dbname: String,
|
||||
pub options: Option<String>,
|
||||
}
|
||||
|
||||
impl TryFrom<HashMap<String, String>> for ClientCredentials {
|
||||
@@ -25,9 +27,22 @@ impl TryFrom<HashMap<String, String>> for ClientCredentials {
|
||||
};
|
||||
|
||||
let user = get_param("user")?;
|
||||
let db = get_param("database")?;
|
||||
let dbname = get_param("database")?;
|
||||
|
||||
Ok(Self { user, dbname: db })
|
||||
// TODO see what other options should be recognized, possibly all.
|
||||
let options = match get_param("search_path") {
|
||||
Ok(path) => Some(format!("-c search_path={}", path)),
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
// TODO investigate why "" is always a key
|
||||
// TODO warn on unrecognized options?
|
||||
|
||||
Ok(Self {
|
||||
user,
|
||||
dbname,
|
||||
options,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,6 +100,7 @@ async fn handle_static(
|
||||
dbname: creds.dbname.clone(),
|
||||
user: creds.user.clone(),
|
||||
password: Some(cleartext_password.into()),
|
||||
options: creds.options,
|
||||
};
|
||||
|
||||
client
|
||||
@@ -117,15 +133,22 @@ async fn handle_existing_user(
|
||||
.ok_or_else(|| anyhow!("unexpected password message"))?;
|
||||
|
||||
let cplane = CPlaneApi::new(&config.auth_endpoint);
|
||||
let db_info = cplane
|
||||
.authenticate_proxy_request(creds, md5_response, &md5_salt, &psql_session_id)
|
||||
let db_info_response = cplane
|
||||
.authenticate_proxy_request(&creds, md5_response, &md5_salt, &psql_session_id)
|
||||
.await?;
|
||||
|
||||
client
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
|
||||
|
||||
Ok(db_info)
|
||||
Ok(DatabaseInfo {
|
||||
host: db_info_response.host,
|
||||
port: db_info_response.port,
|
||||
dbname: db_info_response.dbname,
|
||||
user: db_info_response.user,
|
||||
password: db_info_response.password,
|
||||
options: creds.options,
|
||||
})
|
||||
}
|
||||
|
||||
async fn handle_new_user(
|
||||
@@ -135,7 +158,7 @@ async fn handle_new_user(
|
||||
let psql_session_id = new_psql_session_id();
|
||||
let greeting = hello_message(&config.redirect_uri, &psql_session_id);
|
||||
|
||||
let db_info = cplane_api::with_waiter(psql_session_id, |waiter| async {
|
||||
let db_info_response = cplane_api::with_waiter(psql_session_id, |waiter| async {
|
||||
// Give user a URL to spawn a new database
|
||||
client
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
@@ -150,7 +173,14 @@ async fn handle_new_user(
|
||||
|
||||
client.write_message_noflush(&Be::NoticeResponse("Connecting to database.".into()))?;
|
||||
|
||||
Ok(db_info)
|
||||
Ok(DatabaseInfo {
|
||||
host: db_info_response.host,
|
||||
port: db_info_response.port,
|
||||
dbname: db_info_response.dbname,
|
||||
user: db_info_response.user,
|
||||
password: db_info_response.password,
|
||||
options: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn hello_message(redirect_uri: &str, session_id: &str) -> String {
|
||||
|
||||
@@ -10,6 +10,7 @@ pub struct DatabaseInfo {
|
||||
pub dbname: String,
|
||||
pub user: String,
|
||||
pub password: Option<String>,
|
||||
pub options: Option<String>,
|
||||
}
|
||||
|
||||
impl DatabaseInfo {
|
||||
@@ -33,6 +34,10 @@ impl From<DatabaseInfo> for tokio_postgres::Config {
|
||||
.dbname(&db_info.dbname)
|
||||
.user(&db_info.user);
|
||||
|
||||
if let Some(options) = db_info.options {
|
||||
config.options(&options);
|
||||
}
|
||||
|
||||
if let Some(password) = db_info.password {
|
||||
config.password(password);
|
||||
}
|
||||
|
||||
@@ -1,25 +1,37 @@
|
||||
use crate::auth::ClientCredentials;
|
||||
use crate::compute::DatabaseInfo;
|
||||
use crate::waiters::{Waiter, Waiters};
|
||||
use anyhow::{anyhow, bail};
|
||||
use lazy_static::lazy_static;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Part of the legacy cplane responses
|
||||
#[derive(Serialize, Deserialize, Debug, Default)]
|
||||
pub struct DatabaseInfoResponse {
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
pub dbname: String,
|
||||
pub user: String,
|
||||
pub password: Option<String>,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref CPLANE_WAITERS: Waiters<Result<DatabaseInfo, String>> = Default::default();
|
||||
static ref CPLANE_WAITERS: Waiters<Result<DatabaseInfoResponse, String>> = Default::default();
|
||||
}
|
||||
|
||||
/// Give caller an opportunity to wait for cplane's reply.
|
||||
pub async fn with_waiter<F, R, T>(psql_session_id: impl Into<String>, f: F) -> anyhow::Result<T>
|
||||
where
|
||||
F: FnOnce(Waiter<'static, Result<DatabaseInfo, String>>) -> R,
|
||||
F: FnOnce(Waiter<'static, Result<DatabaseInfoResponse, String>>) -> R,
|
||||
R: std::future::Future<Output = anyhow::Result<T>>,
|
||||
{
|
||||
let waiter = CPLANE_WAITERS.register(psql_session_id.into())?;
|
||||
f(waiter).await
|
||||
}
|
||||
|
||||
pub fn notify(psql_session_id: &str, msg: Result<DatabaseInfo, String>) -> anyhow::Result<()> {
|
||||
pub fn notify(
|
||||
psql_session_id: &str,
|
||||
msg: Result<DatabaseInfoResponse, String>,
|
||||
) -> anyhow::Result<()> {
|
||||
CPLANE_WAITERS.notify(psql_session_id, msg)
|
||||
}
|
||||
|
||||
@@ -37,11 +49,11 @@ impl<'a> CPlaneApi<'a> {
|
||||
impl CPlaneApi<'_> {
|
||||
pub async fn authenticate_proxy_request(
|
||||
&self,
|
||||
creds: ClientCredentials,
|
||||
creds: &ClientCredentials,
|
||||
md5_response: &[u8],
|
||||
salt: &[u8; 4],
|
||||
psql_session_id: &str,
|
||||
) -> anyhow::Result<DatabaseInfo> {
|
||||
) -> anyhow::Result<DatabaseInfoResponse> {
|
||||
let mut url = reqwest::Url::parse(self.auth_endpoint)?;
|
||||
url.query_pairs_mut()
|
||||
.append_pair("login", &creds.user)
|
||||
@@ -77,7 +89,7 @@ impl CPlaneApi<'_> {
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(untagged)]
|
||||
enum ProxyAuthResponse {
|
||||
Ready { conn_info: DatabaseInfo },
|
||||
Ready { conn_info: DatabaseInfoResponse },
|
||||
Error { error: String },
|
||||
NotReady { ready: bool }, // TODO: get rid of `ready`
|
||||
}
|
||||
@@ -92,13 +104,13 @@ mod tests {
|
||||
// Ready
|
||||
let auth: ProxyAuthResponse = serde_json::from_value(json!({
|
||||
"ready": true,
|
||||
"conn_info": DatabaseInfo::default(),
|
||||
"conn_info": DatabaseInfoResponse::default(),
|
||||
}))
|
||||
.unwrap();
|
||||
assert!(matches!(
|
||||
auth,
|
||||
ProxyAuthResponse::Ready {
|
||||
conn_info: DatabaseInfo { .. }
|
||||
conn_info: DatabaseInfoResponse { .. }
|
||||
}
|
||||
));
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::{compute::DatabaseInfo, cplane_api};
|
||||
use crate::cplane_api;
|
||||
use anyhow::Context;
|
||||
use serde::Deserialize;
|
||||
use std::{
|
||||
@@ -75,7 +75,7 @@ struct PsqlSessionResponse {
|
||||
|
||||
#[derive(Deserialize)]
|
||||
enum PsqlSessionResult {
|
||||
Success(DatabaseInfo),
|
||||
Success(cplane_api::DatabaseInfoResponse),
|
||||
Failure(String),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::auth;
|
||||
use crate::auth::{self, ClientCredentials};
|
||||
use crate::cancellation::{self, CancelClosure, CancelMap};
|
||||
use crate::compute::DatabaseInfo;
|
||||
use crate::config::{ProxyConfig, TlsConfig};
|
||||
@@ -138,7 +138,6 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
stream.write_message(&Be::ErrorResponse(msg)).await?;
|
||||
bail!(msg);
|
||||
}
|
||||
|
||||
break Ok(Some((stream, params.try_into()?)));
|
||||
}
|
||||
CancelRequest(cancel_key_data) => {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from contextlib import closing
|
||||
from typing import Iterator
|
||||
from uuid import UUID, uuid4
|
||||
import psycopg2
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithPageserverApiException
|
||||
from requests.exceptions import HTTPError
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -25,31 +25,25 @@ def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
|
||||
ps.safe_psql("set FOO", password=tenant_token)
|
||||
ps.safe_psql("set FOO", password=management_token)
|
||||
|
||||
new_timeline_id = env.zenith_cli.create_branch('test_pageserver_auth',
|
||||
tenant_id=env.initial_tenant)
|
||||
|
||||
# tenant can create branches
|
||||
tenant_http_client.timeline_create(tenant_id=env.initial_tenant,
|
||||
ancestor_timeline_id=new_timeline_id)
|
||||
tenant_http_client.branch_create(env.initial_tenant, 'new1', 'main')
|
||||
# console can create branches for tenant
|
||||
management_http_client.timeline_create(tenant_id=env.initial_tenant,
|
||||
ancestor_timeline_id=new_timeline_id)
|
||||
management_http_client.branch_create(env.initial_tenant, 'new2', 'main')
|
||||
|
||||
# fail to create branch using token with different tenant_id
|
||||
with pytest.raises(ZenithPageserverApiException,
|
||||
match='Forbidden: Tenant id mismatch. Permission denied'):
|
||||
invalid_tenant_http_client.timeline_create(tenant_id=env.initial_tenant,
|
||||
ancestor_timeline_id=new_timeline_id)
|
||||
invalid_tenant_http_client.branch_create(env.initial_tenant, "new3", "main")
|
||||
|
||||
# create tenant using management token
|
||||
management_http_client.tenant_create()
|
||||
management_http_client.tenant_create(uuid4())
|
||||
|
||||
# fail to create tenant using tenant token
|
||||
with pytest.raises(
|
||||
ZenithPageserverApiException,
|
||||
match='Forbidden: Attempt to access management api with tenant scope. Permission denied'
|
||||
):
|
||||
tenant_http_client.tenant_create()
|
||||
tenant_http_client.tenant_create(uuid4())
|
||||
|
||||
|
||||
@pytest.mark.parametrize('with_wal_acceptors', [False, True])
|
||||
@@ -59,8 +53,9 @@ def test_compute_auth_to_pageserver(zenith_env_builder: ZenithEnvBuilder, with_w
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
branch = f'test_compute_auth_to_pageserver{with_wal_acceptors}'
|
||||
env.zenith_cli.create_branch(branch)
|
||||
branch = f"test_compute_auth_to_pageserver{with_wal_acceptors}"
|
||||
env.zenith_cli.create_branch(branch, "main")
|
||||
|
||||
pg = env.postgres.create_start(branch)
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
|
||||
@@ -95,7 +95,7 @@ def test_backpressure_received_lsn_lag(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
env = zenith_env_builder.init_start()
|
||||
# Create a branch for us
|
||||
env.zenith_cli.create_branch('test_backpressure')
|
||||
env.zenith_cli.create_branch("test_backpressure", "main")
|
||||
|
||||
pg = env.postgres.create_start('test_backpressure',
|
||||
config_lines=['max_replication_write_lag=30MB'])
|
||||
|
||||
@@ -22,7 +22,8 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
env.zenith_cli.create_branch('test_branch_behind')
|
||||
env.zenith_cli.create_branch("test_branch_behind", "main")
|
||||
|
||||
pgmain = env.postgres.create_start('test_branch_behind')
|
||||
log.info("postgres is running on 'test_branch_behind' branch")
|
||||
|
||||
@@ -59,9 +60,7 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
|
||||
log.info(f'LSN after 200100 rows: {lsn_b}')
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
env.zenith_cli.create_branch('test_branch_behind_hundred',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn=lsn_a)
|
||||
env.zenith_cli.create_branch("test_branch_behind_hundred", "test_branch_behind@" + lsn_a)
|
||||
|
||||
# Insert many more rows. This generates enough WAL to fill a few segments.
|
||||
main_cur.execute('''
|
||||
@@ -76,12 +75,10 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
|
||||
log.info(f'LSN after 400100 rows: {lsn_c}')
|
||||
|
||||
# Branch at the point where only 200100 rows were inserted
|
||||
env.zenith_cli.create_branch('test_branch_behind_more',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn=lsn_b)
|
||||
env.zenith_cli.create_branch("test_branch_behind_more", "test_branch_behind@" + lsn_b)
|
||||
|
||||
pg_hundred = env.postgres.create_start('test_branch_behind_hundred')
|
||||
pg_more = env.postgres.create_start('test_branch_behind_more')
|
||||
pg_hundred = env.postgres.create_start("test_branch_behind_hundred")
|
||||
pg_more = env.postgres.create_start("test_branch_behind_more")
|
||||
|
||||
# On the 'hundred' branch, we should see only 100 rows
|
||||
hundred_pg_conn = pg_hundred.connect()
|
||||
@@ -102,23 +99,19 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
|
||||
# Check bad lsn's for branching
|
||||
|
||||
# branch at segment boundary
|
||||
env.zenith_cli.create_branch('test_branch_segment_boundary',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn="0/3000000")
|
||||
pg = env.postgres.create_start('test_branch_segment_boundary')
|
||||
env.zenith_cli.create_branch("test_branch_segment_boundary", "test_branch_behind@0/3000000")
|
||||
pg = env.postgres.create_start("test_branch_segment_boundary")
|
||||
cur = pg.connect().cursor()
|
||||
cur.execute('SELECT 1')
|
||||
assert cur.fetchone() == (1, )
|
||||
|
||||
# branch at pre-initdb lsn
|
||||
with pytest.raises(Exception, match="invalid branch start lsn"):
|
||||
env.zenith_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42")
|
||||
env.zenith_cli.create_branch("test_branch_preinitdb", "main@0/42")
|
||||
|
||||
# branch at pre-ancestor lsn
|
||||
with pytest.raises(Exception, match="less than timeline ancestor lsn"):
|
||||
env.zenith_cli.create_branch('test_branch_preinitdb',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn="0/42")
|
||||
env.zenith_cli.create_branch("test_branch_preinitdb", "test_branch_behind@0/42")
|
||||
|
||||
# check that we cannot create branch based on garbage collected data
|
||||
with closing(env.pageserver.connect()) as psconn:
|
||||
@@ -130,9 +123,7 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
with pytest.raises(Exception, match="invalid branch start lsn"):
|
||||
# this gced_lsn is pretty random, so if gc is disabled this woudln't fail
|
||||
env.zenith_cli.create_branch('test_branch_create_fail',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn=gced_lsn)
|
||||
env.zenith_cli.create_branch("test_branch_create_fail", f"test_branch_behind@{gced_lsn}")
|
||||
|
||||
# check that after gc everything is still there
|
||||
hundred_cur.execute('SELECT count(*) FROM foo')
|
||||
|
||||
@@ -12,7 +12,7 @@ from fixtures.log_helper import log
|
||||
#
|
||||
def test_clog_truncate(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_clog_truncate', 'empty')
|
||||
env.zenith_cli.create_branch("test_clog_truncate", "empty")
|
||||
|
||||
# set agressive autovacuum to make sure that truncation will happen
|
||||
config = [
|
||||
@@ -62,9 +62,9 @@ def test_clog_truncate(zenith_simple_env: ZenithEnv):
|
||||
|
||||
# create new branch after clog truncation and start a compute node on it
|
||||
log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}')
|
||||
env.zenith_cli.create_branch('test_clog_truncate_new',
|
||||
'test_clog_truncate',
|
||||
ancestor_start_lsn=lsn_after_truncation)
|
||||
env.zenith_cli.create_branch("test_clog_truncate_new",
|
||||
"test_clog_truncate@" + lsn_after_truncation)
|
||||
|
||||
pg2 = env.postgres.create_start('test_clog_truncate_new')
|
||||
log.info('postgres is running on test_clog_truncate_new branch')
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ from fixtures.log_helper import log
|
||||
#
|
||||
def test_createdb(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_createdb', 'empty')
|
||||
env.zenith_cli.create_branch("test_createdb", "empty")
|
||||
|
||||
pg = env.postgres.create_start('test_createdb')
|
||||
log.info("postgres is running on 'test_createdb' branch")
|
||||
@@ -27,7 +27,8 @@ def test_createdb(zenith_simple_env: ZenithEnv):
|
||||
lsn = cur.fetchone()[0]
|
||||
|
||||
# Create a branch
|
||||
env.zenith_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn)
|
||||
env.zenith_cli.create_branch("test_createdb2", "test_createdb@" + lsn)
|
||||
|
||||
pg2 = env.postgres.create_start('test_createdb2')
|
||||
|
||||
# Test that you can connect to the new database on both branches
|
||||
@@ -40,7 +41,8 @@ def test_createdb(zenith_simple_env: ZenithEnv):
|
||||
#
|
||||
def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_dropdb', 'empty')
|
||||
env.zenith_cli.create_branch("test_dropdb", "empty")
|
||||
|
||||
pg = env.postgres.create_start('test_dropdb')
|
||||
log.info("postgres is running on 'test_dropdb' branch")
|
||||
|
||||
@@ -64,14 +66,10 @@ def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
|
||||
lsn_after_drop = cur.fetchone()[0]
|
||||
|
||||
# Create two branches before and after database drop.
|
||||
env.zenith_cli.create_branch('test_before_dropdb',
|
||||
'test_dropdb',
|
||||
ancestor_start_lsn=lsn_before_drop)
|
||||
env.zenith_cli.create_branch("test_before_dropdb", "test_dropdb@" + lsn_before_drop)
|
||||
pg_before = env.postgres.create_start('test_before_dropdb')
|
||||
|
||||
env.zenith_cli.create_branch('test_after_dropdb',
|
||||
'test_dropdb',
|
||||
ancestor_start_lsn=lsn_after_drop)
|
||||
env.zenith_cli.create_branch("test_after_dropdb", "test_dropdb@" + lsn_after_drop)
|
||||
pg_after = env.postgres.create_start('test_after_dropdb')
|
||||
|
||||
# Test that database exists on the branch before drop
|
||||
|
||||
@@ -9,7 +9,8 @@ from fixtures.log_helper import log
|
||||
#
|
||||
def test_createuser(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_createuser', 'empty')
|
||||
env.zenith_cli.create_branch("test_createuser", "empty")
|
||||
|
||||
pg = env.postgres.create_start('test_createuser')
|
||||
log.info("postgres is running on 'test_createuser' branch")
|
||||
|
||||
@@ -24,7 +25,8 @@ def test_createuser(zenith_simple_env: ZenithEnv):
|
||||
lsn = cur.fetchone()[0]
|
||||
|
||||
# Create a branch
|
||||
env.zenith_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn)
|
||||
env.zenith_cli.create_branch("test_createuser2", "test_createuser@" + lsn)
|
||||
|
||||
pg2 = env.postgres.create_start('test_createuser2')
|
||||
|
||||
# Test that you can connect to new branch as a new user
|
||||
|
||||
@@ -10,7 +10,7 @@ from fixtures.log_helper import log
|
||||
#
|
||||
def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_multixact', 'empty')
|
||||
env.zenith_cli.create_branch("test_multixact", "empty")
|
||||
pg = env.postgres.create_start('test_multixact')
|
||||
|
||||
log.info("postgres is running on 'test_multixact' branch")
|
||||
@@ -60,7 +60,7 @@ def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
|
||||
assert int(next_multixact_id) > int(next_multixact_id_old)
|
||||
|
||||
# Branch at this point
|
||||
env.zenith_cli.create_branch('test_multixact_new', 'test_multixact', ancestor_start_lsn=lsn)
|
||||
env.zenith_cli.create_branch("test_multixact_new", "test_multixact@" + lsn)
|
||||
pg_new = env.postgres.create_start('test_multixact_new')
|
||||
|
||||
log.info("postgres is running on 'test_multixact_new' branch")
|
||||
|
||||
@@ -23,26 +23,22 @@ def check_client(client: ZenithPageserverHttpClient, initial_tenant: UUID):
|
||||
client.tenant_create(tenant_id)
|
||||
assert tenant_id.hex in {t['id'] for t in client.tenant_list()}
|
||||
|
||||
timelines = client.timeline_list(tenant_id)
|
||||
assert len(timelines) == 0, "initial tenant should not have any timelines"
|
||||
|
||||
# create timeline
|
||||
timeline_id = uuid4()
|
||||
client.timeline_create(tenant_id=tenant_id, new_timeline_id=timeline_id)
|
||||
|
||||
# check its timelines
|
||||
timelines = client.timeline_list(tenant_id)
|
||||
assert len(timelines) > 0
|
||||
|
||||
# check it is there
|
||||
assert timeline_id.hex in {b['timeline_id'] for b in client.timeline_list(tenant_id)}
|
||||
for timeline in timelines:
|
||||
timeline_id_str = str(timeline['timeline_id'])
|
||||
timeline_details = client.timeline_detail(tenant_id=tenant_id,
|
||||
timeline_id=UUID(timeline_id_str))
|
||||
assert timeline_details['kind'] == 'Local'
|
||||
for timeline_id_str in timelines:
|
||||
timeline_details = client.timeline_detail(tenant_id, UUID(timeline_id_str))
|
||||
assert timeline_details['type'] == 'Local'
|
||||
assert timeline_details['tenant_id'] == tenant_id.hex
|
||||
assert timeline_details['timeline_id'] == timeline_id_str
|
||||
|
||||
# create branch
|
||||
branch_name = uuid4().hex
|
||||
client.branch_create(tenant_id, branch_name, "main")
|
||||
|
||||
# check it is there
|
||||
assert branch_name in {b['name'] for b in client.branch_list(tenant_id)}
|
||||
|
||||
|
||||
def test_pageserver_http_api_client(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
|
||||
@@ -16,7 +16,7 @@ def test_pageserver_catchup_while_compute_down(zenith_env_builder: ZenithEnvBuil
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.zenith_cli.create_branch('test_pageserver_catchup_while_compute_down')
|
||||
env.zenith_cli.create_branch("test_pageserver_catchup_while_compute_down", "main")
|
||||
pg = env.postgres.create_start('test_pageserver_catchup_while_compute_down')
|
||||
|
||||
pg_conn = pg.connect()
|
||||
|
||||
@@ -15,7 +15,7 @@ def test_pageserver_restart(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.zenith_cli.create_branch('test_pageserver_restart')
|
||||
env.zenith_cli.create_branch("test_pageserver_restart", "main")
|
||||
pg = env.postgres.create_start('test_pageserver_restart')
|
||||
|
||||
pg_conn = pg.connect()
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from io import BytesIO
|
||||
import asyncio
|
||||
import asyncpg
|
||||
import subprocess
|
||||
from fixtures.zenith_fixtures import ZenithEnv, Postgres
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ def test_proxy_select_1(static_proxy):
|
||||
static_proxy.safe_psql("select 1;")
|
||||
|
||||
|
||||
@pytest.mark.xfail # Proxy eats the extra connection options
|
||||
def test_proxy_options(static_proxy):
|
||||
schema_name = "tmp_schema_1"
|
||||
with static_proxy.connect(schema=schema_name) as conn:
|
||||
|
||||
@@ -11,7 +11,8 @@ from fixtures.zenith_fixtures import ZenithEnv
|
||||
#
|
||||
def test_readonly_node(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_readonly_node', 'empty')
|
||||
env.zenith_cli.create_branch("test_readonly_node", "empty")
|
||||
|
||||
pgmain = env.postgres.create_start('test_readonly_node')
|
||||
log.info("postgres is running on 'test_readonly_node' branch")
|
||||
|
||||
@@ -52,14 +53,12 @@ def test_readonly_node(zenith_simple_env: ZenithEnv):
|
||||
log.info('LSN after 400100 rows: ' + lsn_c)
|
||||
|
||||
# Create first read-only node at the point where only 100 rows were inserted
|
||||
pg_hundred = env.postgres.create_start(branch_name='test_readonly_node',
|
||||
node_name='test_readonly_node_hundred',
|
||||
lsn=lsn_a)
|
||||
pg_hundred = env.postgres.create_start("test_readonly_node_hundred",
|
||||
branch=f'test_readonly_node@{lsn_a}')
|
||||
|
||||
# And another at the point where 200100 rows were inserted
|
||||
pg_more = env.postgres.create_start(branch_name='test_readonly_node',
|
||||
node_name='test_readonly_node_more',
|
||||
lsn=lsn_b)
|
||||
pg_more = env.postgres.create_start("test_readonly_node_more",
|
||||
branch=f'test_readonly_node@{lsn_b}')
|
||||
|
||||
# On the 'hundred' node, we should see only 100 rows
|
||||
hundred_pg_conn = pg_hundred.connect()
|
||||
@@ -78,9 +77,8 @@ def test_readonly_node(zenith_simple_env: ZenithEnv):
|
||||
assert main_cur.fetchone() == (400100, )
|
||||
|
||||
# Check creating a node at segment boundary
|
||||
pg = env.postgres.create_start(branch_name='test_readonly_node',
|
||||
node_name='test_branch_segment_boundary',
|
||||
lsn='0/3000000')
|
||||
pg = env.postgres.create_start("test_branch_segment_boundary",
|
||||
branch="test_readonly_node@0/3000000")
|
||||
cur = pg.connect().cursor()
|
||||
cur.execute('SELECT 1')
|
||||
assert cur.fetchone() == (1, )
|
||||
@@ -88,6 +86,5 @@ def test_readonly_node(zenith_simple_env: ZenithEnv):
|
||||
# Create node at pre-initdb lsn
|
||||
with pytest.raises(Exception, match="invalid basebackup lsn"):
|
||||
# compute node startup with invalid LSN should fail
|
||||
env.postgres.create_start(branch_name='test_readonly_node',
|
||||
node_name='test_readonly_node_preinitdb',
|
||||
lsn='0/42')
|
||||
env.zenith_cli.pg_start("test_readonly_node_preinitdb",
|
||||
timeline_spec="test_readonly_node@0/42")
|
||||
|
||||
@@ -43,7 +43,7 @@ def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder,
|
||||
|
||||
##### First start, insert secret data and upload it to the remote storage
|
||||
env = zenith_env_builder.init_start()
|
||||
pg = env.postgres.create_start('main')
|
||||
pg = env.postgres.create_start()
|
||||
|
||||
tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
|
||||
timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]
|
||||
@@ -85,7 +85,7 @@ def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder,
|
||||
timeline_details = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
|
||||
assert timeline_details['timeline_id'] == timeline_id
|
||||
assert timeline_details['tenant_id'] == tenant_id
|
||||
if timeline_details['kind'] == 'Local':
|
||||
if timeline_details['type'] == 'Local':
|
||||
log.info("timeline downloaded, checking its data")
|
||||
break
|
||||
attempts += 1
|
||||
@@ -94,7 +94,7 @@ def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder,
|
||||
log.debug("still waiting")
|
||||
time.sleep(1)
|
||||
|
||||
pg = env.postgres.create_start('main')
|
||||
pg = env.postgres.create_start()
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f'SELECT secret FROM t1 WHERE id = {data_id};')
|
||||
|
||||
@@ -15,7 +15,8 @@ def test_restart_compute(zenith_env_builder: ZenithEnvBuilder, with_wal_acceptor
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.zenith_cli.create_branch('test_restart_compute')
|
||||
env.zenith_cli.create_branch("test_restart_compute", "main")
|
||||
|
||||
pg = env.postgres.create_start('test_restart_compute')
|
||||
log.info("postgres is running on 'test_restart_compute' branch")
|
||||
|
||||
|
||||
@@ -127,14 +127,16 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
|
||||
# create folder for remote storage mock
|
||||
remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'
|
||||
|
||||
tenant = env.zenith_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
|
||||
tenant = env.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
|
||||
log.info("tenant to relocate %s", tenant)
|
||||
|
||||
env.zenith_cli.create_branch('test_tenant_relocation', tenant_id=tenant)
|
||||
env.zenith_cli.create_branch("test_tenant_relocation", "main", tenant_id=tenant)
|
||||
|
||||
tenant_pg = env.postgres.create_start(branch_name='main',
|
||||
node_name='test_tenant_relocation',
|
||||
tenant_id=tenant)
|
||||
tenant_pg = env.postgres.create_start(
|
||||
"test_tenant_relocation",
|
||||
"main", # branch name, None means same as node name
|
||||
tenant_id=tenant,
|
||||
)
|
||||
|
||||
# insert some data
|
||||
with closing(tenant_pg.connect()) as conn:
|
||||
|
||||
@@ -12,21 +12,25 @@ def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_wal_acce
|
||||
|
||||
env = zenith_env_builder.init_start()
|
||||
"""Tests tenants with and without wal acceptors"""
|
||||
tenant_1 = env.zenith_cli.create_tenant()
|
||||
tenant_2 = env.zenith_cli.create_tenant()
|
||||
tenant_1 = env.create_tenant()
|
||||
tenant_2 = env.create_tenant()
|
||||
|
||||
env.zenith_cli.create_timeline(
|
||||
f'test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}', tenant_id=tenant_1)
|
||||
env.zenith_cli.create_timeline(
|
||||
f'test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}', tenant_id=tenant_2)
|
||||
env.zenith_cli.create_branch(f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
"main",
|
||||
tenant_id=tenant_1)
|
||||
env.zenith_cli.create_branch(f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
"main",
|
||||
tenant_id=tenant_2)
|
||||
|
||||
pg_tenant1 = env.postgres.create_start(
|
||||
f'test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}',
|
||||
tenant_id=tenant_1,
|
||||
f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
None, # branch name, None means same as node name
|
||||
tenant_1,
|
||||
)
|
||||
pg_tenant2 = env.postgres.create_start(
|
||||
f'test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}',
|
||||
tenant_id=tenant_2,
|
||||
f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
None, # branch name, None means same as node name
|
||||
tenant_2,
|
||||
)
|
||||
|
||||
for pg in [pg_tenant1, pg_tenant2]:
|
||||
|
||||
@@ -10,10 +10,10 @@ import time
|
||||
def test_timeline_size(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
new_timeline_id = env.zenith_cli.create_branch('test_timeline_size', 'empty')
|
||||
env.zenith_cli.create_branch("test_timeline_size", "empty")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
res = client.timeline_detail(tenant_id=env.initial_tenant, timeline_id=new_timeline_id)
|
||||
res = client.branch_detail(env.initial_tenant, "test_timeline_size")
|
||||
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
|
||||
|
||||
pgmain = env.postgres.create_start("test_timeline_size")
|
||||
@@ -31,11 +31,11 @@ def test_timeline_size(zenith_simple_env: ZenithEnv):
|
||||
FROM generate_series(1, 10) g
|
||||
""")
|
||||
|
||||
res = client.timeline_detail(tenant_id=env.initial_tenant, timeline_id=new_timeline_id)
|
||||
res = client.branch_detail(env.initial_tenant, "test_timeline_size")
|
||||
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
|
||||
cur.execute("TRUNCATE foo")
|
||||
|
||||
res = client.timeline_detail(tenant_id=env.initial_tenant, timeline_id=new_timeline_id)
|
||||
res = client.branch_detail(env.initial_tenant, "test_timeline_size")
|
||||
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
|
||||
|
||||
|
||||
@@ -68,16 +68,17 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60
|
||||
def test_timeline_size_quota(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
env = zenith_env_builder.init_start()
|
||||
new_timeline_id = env.zenith_cli.create_branch('test_timeline_size_quota')
|
||||
env.zenith_cli.create_branch("test_timeline_size_quota", "main")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
res = client.timeline_detail(tenant_id=env.initial_tenant, timeline_id=new_timeline_id)
|
||||
res = client.branch_detail(env.initial_tenant, "test_timeline_size_quota")
|
||||
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
|
||||
|
||||
pgmain = env.postgres.create_start(
|
||||
"test_timeline_size_quota",
|
||||
# Set small limit for the test
|
||||
config_lines=['zenith.max_cluster_size=30MB'])
|
||||
config_lines=['zenith.max_cluster_size=30MB'],
|
||||
)
|
||||
log.info("postgres is running on 'test_timeline_size_quota' branch")
|
||||
|
||||
with closing(pgmain.connect()) as conn:
|
||||
|
||||
@@ -10,6 +10,7 @@ from fixtures.log_helper import log
|
||||
def test_twophase(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_twophase", "empty")
|
||||
|
||||
pg = env.postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5'])
|
||||
log.info("postgres is running on 'test_twophase' branch")
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from dataclasses import dataclass, field
|
||||
from multiprocessing import Process, Value
|
||||
from pathlib import Path
|
||||
from fixtures.zenith_fixtures import PgBin, Postgres, Safekeeper, ZenithEnv, ZenithEnvBuilder, PortDistributor, SafekeeperPort, zenith_binpath, PgProtocol
|
||||
from fixtures.utils import lsn_to_hex, mkdir_if_needed, lsn_from_hex
|
||||
from fixtures.utils import lsn_to_hex, mkdir_if_needed
|
||||
from fixtures.log_helper import log
|
||||
from typing import List, Optional, Any
|
||||
|
||||
@@ -24,7 +24,8 @@ def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.zenith_cli.create_branch('test_wal_acceptors_normal_work')
|
||||
env.zenith_cli.create_branch("test_wal_acceptors_normal_work", "main")
|
||||
|
||||
pg = env.postgres.create_start('test_wal_acceptors_normal_work')
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
@@ -38,9 +39,9 @@ def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimelineMetrics:
|
||||
timeline_id: str
|
||||
last_record_lsn: int
|
||||
class BranchMetrics:
|
||||
name: str
|
||||
latest_valid_lsn: int
|
||||
# One entry per each Safekeeper, order is the same
|
||||
flush_lsns: List[int] = field(default_factory=list)
|
||||
commit_lsns: List[int] = field(default_factory=list)
|
||||
@@ -54,32 +55,23 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
n_timelines = 3
|
||||
|
||||
branch_names = [
|
||||
"test_wal_acceptors_many_timelines_{}".format(tlin) for tlin in range(n_timelines)
|
||||
]
|
||||
# pageserver, safekeeper operate timelines via their ids (can be represented in hex as 'ad50847381e248feaac9876cc71ae418')
|
||||
# that's not really human readable, so the branch names are introduced in Zenith CLI.
|
||||
# Zenith CLI stores its branch <-> timeline mapping in its internals,
|
||||
# but we need this to collect metrics from other servers, related to the timeline.
|
||||
branch_names_to_timeline_ids = {}
|
||||
branches = ["test_wal_acceptors_many_timelines_{}".format(tlin) for tlin in range(n_timelines)]
|
||||
|
||||
# start postgres on each timeline
|
||||
pgs = []
|
||||
for branch_name in branch_names:
|
||||
new_timeline_id = env.zenith_cli.create_branch(branch_name)
|
||||
pgs.append(env.postgres.create_start(branch_name))
|
||||
branch_names_to_timeline_ids[branch_name] = new_timeline_id
|
||||
for branch in branches:
|
||||
env.zenith_cli.create_branch(branch, "main")
|
||||
pgs.append(env.postgres.create_start(branch))
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
|
||||
def collect_metrics(message: str) -> List[TimelineMetrics]:
|
||||
def collect_metrics(message: str) -> List[BranchMetrics]:
|
||||
with env.pageserver.http_client() as pageserver_http:
|
||||
timeline_details = [
|
||||
pageserver_http.timeline_detail(
|
||||
tenant_id=tenant_id, timeline_id=branch_names_to_timeline_ids[branch_name])
|
||||
for branch_name in branch_names
|
||||
branch_details = [
|
||||
pageserver_http.branch_detail(tenant_id=tenant_id, name=branch)
|
||||
for branch in branches
|
||||
]
|
||||
# All changes visible to pageserver (last_record_lsn) should be
|
||||
# All changes visible to pageserver (latest_valid_lsn) should be
|
||||
# confirmed by safekeepers first. As we cannot atomically get
|
||||
# state of both pageserver and safekeepers, we should start with
|
||||
# pageserver. Looking at outdated data from pageserver is ok.
|
||||
@@ -88,14 +80,14 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
|
||||
# safekeepers' state, it will look contradictory.
|
||||
sk_metrics = [sk.http_client().get_metrics() for sk in env.safekeepers]
|
||||
|
||||
timeline_metrics = []
|
||||
branch_metrics = []
|
||||
with env.pageserver.http_client() as pageserver_http:
|
||||
for timeline_detail in timeline_details:
|
||||
timeline_id: str = timeline_detail["timeline_id"]
|
||||
for branch_detail in branch_details:
|
||||
timeline_id: str = branch_detail["timeline_id"]
|
||||
|
||||
m = TimelineMetrics(
|
||||
timeline_id=timeline_id,
|
||||
last_record_lsn=lsn_from_hex(timeline_detail["last_record_lsn"]),
|
||||
m = BranchMetrics(
|
||||
name=branch_detail["name"],
|
||||
latest_valid_lsn=branch_detail["latest_valid_lsn"],
|
||||
)
|
||||
for sk_m in sk_metrics:
|
||||
m.flush_lsns.append(sk_m.flush_lsn_inexact[(tenant_id.hex, timeline_id)])
|
||||
@@ -107,13 +99,13 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
|
||||
# We only call collect_metrics() after a transaction is confirmed by
|
||||
# the compute node, which only happens after a consensus of safekeepers
|
||||
# has confirmed the transaction. We assume majority consensus here.
|
||||
assert (2 * sum(m.last_record_lsn <= lsn
|
||||
assert (2 * sum(m.latest_valid_lsn <= lsn
|
||||
for lsn in m.flush_lsns) > zenith_env_builder.num_safekeepers)
|
||||
assert (2 * sum(m.last_record_lsn <= lsn
|
||||
assert (2 * sum(m.latest_valid_lsn <= lsn
|
||||
for lsn in m.commit_lsns) > zenith_env_builder.num_safekeepers)
|
||||
timeline_metrics.append(m)
|
||||
log.info(f"{message}: {timeline_metrics}")
|
||||
return timeline_metrics
|
||||
branch_metrics.append(m)
|
||||
log.info(f"{message}: {branch_metrics}")
|
||||
return branch_metrics
|
||||
|
||||
# TODO: https://github.com/zenithdb/zenith/issues/809
|
||||
# collect_metrics("before CREATE TABLE")
|
||||
@@ -125,7 +117,7 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
|
||||
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
init_m = collect_metrics("after CREATE TABLE")
|
||||
|
||||
# Populate data for 2/3 timelines
|
||||
# Populate data for 2/3 branches
|
||||
class MetricsChecker(threading.Thread):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(daemon=True)
|
||||
@@ -163,15 +155,15 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
collect_metrics("after INSERT INTO")
|
||||
|
||||
# Check data for 2/3 timelines
|
||||
# Check data for 2/3 branches
|
||||
for pg in pgs[:-1]:
|
||||
res = pg.safe_psql("SELECT sum(key) FROM t")
|
||||
assert res[0] == (5000050000, )
|
||||
|
||||
final_m = collect_metrics("after SELECT")
|
||||
# Assume that LSNs (a) behave similarly in all timelines; and (b) INSERT INTO alters LSN significantly.
|
||||
# Assume that LSNs (a) behave similarly in all branches; and (b) INSERT INTO alters LSN significantly.
|
||||
# Also assume that safekeepers will not be significantly out of sync in this test.
|
||||
middle_lsn = (init_m[0].last_record_lsn + final_m[0].last_record_lsn) // 2
|
||||
middle_lsn = (init_m[0].latest_valid_lsn + final_m[0].latest_valid_lsn) // 2
|
||||
assert max(init_m[0].flush_lsns) < middle_lsn < min(final_m[0].flush_lsns)
|
||||
assert max(init_m[0].commit_lsns) < middle_lsn < min(final_m[0].commit_lsns)
|
||||
assert max(init_m[1].flush_lsns) < middle_lsn < min(final_m[1].flush_lsns)
|
||||
@@ -191,7 +183,7 @@ def test_restarts(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = n_acceptors
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.zenith_cli.create_branch('test_wal_acceptors_restarts')
|
||||
env.zenith_cli.create_branch("test_wal_acceptors_restarts", "main")
|
||||
pg = env.postgres.create_start('test_wal_acceptors_restarts')
|
||||
|
||||
# we rely upon autocommit after each statement
|
||||
@@ -228,7 +220,7 @@ def test_unavailability(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 2
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.zenith_cli.create_branch('test_wal_acceptors_unavailability')
|
||||
env.zenith_cli.create_branch("test_wal_acceptors_unavailability", "main")
|
||||
pg = env.postgres.create_start('test_wal_acceptors_unavailability')
|
||||
|
||||
# we rely upon autocommit after each statement
|
||||
@@ -299,7 +291,7 @@ def test_race_conditions(zenith_env_builder: ZenithEnvBuilder, stop_value):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.zenith_cli.create_branch('test_wal_acceptors_race_conditions')
|
||||
env.zenith_cli.create_branch("test_wal_acceptors_race_conditions", "main")
|
||||
pg = env.postgres.create_start('test_wal_acceptors_race_conditions')
|
||||
|
||||
# we rely upon autocommit after each statement
|
||||
@@ -464,7 +456,7 @@ def test_timeline_status(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.zenith_cli.create_branch('test_timeline_status')
|
||||
env.zenith_cli.create_branch("test_timeline_status", "main")
|
||||
pg = env.postgres.create_start('test_timeline_status')
|
||||
|
||||
wa = env.safekeepers[0]
|
||||
@@ -638,7 +630,7 @@ def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
zenith_env_builder.num_safekeepers = 4
|
||||
env = zenith_env_builder.init_start()
|
||||
env.zenith_cli.create_branch('test_replace_safekeeper')
|
||||
env.zenith_cli.create_branch("test_replace_safekeeper", "main")
|
||||
|
||||
log.info("Use only first 3 safekeepers")
|
||||
env.safekeepers[3].stop()
|
||||
|
||||
@@ -202,7 +202,7 @@ def test_restarts_under_load(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.zenith_cli.create_branch('test_wal_acceptors_restarts_under_load')
|
||||
env.zenith_cli.create_branch("test_wal_acceptors_restarts_under_load", "main")
|
||||
pg = env.postgres.create_start('test_wal_acceptors_restarts_under_load')
|
||||
|
||||
asyncio.run(run_restarts_under_load(pg, env.safekeepers))
|
||||
|
||||
@@ -7,46 +7,52 @@ from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserv
|
||||
from typing import cast
|
||||
|
||||
|
||||
def helper_compare_timeline_list(pageserver_http_client: ZenithPageserverHttpClient,
|
||||
env: ZenithEnv,
|
||||
initial_tenant: uuid.UUID):
|
||||
def helper_compare_branch_list(pageserver_http_client: ZenithPageserverHttpClient,
|
||||
env: ZenithEnv,
|
||||
initial_tenant: uuid.UUID):
|
||||
"""
|
||||
Compare timelines list returned by CLI and directly via API.
|
||||
Filters out timelines created by other tests.
|
||||
Compare branches list returned by CLI and directly via API.
|
||||
Filters out branches created by other tests.
|
||||
"""
|
||||
branches = pageserver_http_client.branch_list(initial_tenant)
|
||||
branches_api = sorted(map(lambda b: cast(str, b['name']), branches))
|
||||
branches_api = [b for b in branches_api if b.startswith('test_cli_') or b in ('empty', 'main')]
|
||||
|
||||
timelines_api = sorted(
|
||||
map(lambda t: cast(str, t['timeline_id']),
|
||||
pageserver_http_client.timeline_list(initial_tenant)))
|
||||
res = env.zenith_cli.list_branches()
|
||||
branches_cli = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
|
||||
branches_cli = [b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')]
|
||||
|
||||
timelines_cli = env.zenith_cli.list_timelines()
|
||||
assert timelines_cli == env.zenith_cli.list_timelines(initial_tenant)
|
||||
res = env.zenith_cli.list_branches(tenant_id=initial_tenant)
|
||||
branches_cli_with_tenant_arg = sorted(
|
||||
map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
|
||||
branches_cli_with_tenant_arg = [
|
||||
b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')
|
||||
]
|
||||
|
||||
cli_timeline_ids = sorted([timeline_id for (_, timeline_id) in timelines_cli])
|
||||
assert timelines_api == cli_timeline_ids
|
||||
assert branches_api == branches_cli == branches_cli_with_tenant_arg
|
||||
|
||||
|
||||
def test_cli_timeline_list(zenith_simple_env: ZenithEnv):
|
||||
def test_cli_branch_list(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
pageserver_http_client = env.pageserver.http_client()
|
||||
|
||||
# Initial sanity check
|
||||
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
|
||||
|
||||
# Create a branch for us
|
||||
main_timeline_id = env.zenith_cli.create_branch('test_cli_branch_list_main')
|
||||
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
|
||||
helper_compare_branch_list(pageserver_http_client, env, env.initial_tenant)
|
||||
env.zenith_cli.create_branch("test_cli_branch_list_main", "empty")
|
||||
helper_compare_branch_list(pageserver_http_client, env, env.initial_tenant)
|
||||
|
||||
# Create a nested branch
|
||||
nested_timeline_id = env.zenith_cli.create_branch('test_cli_branch_list_nested',
|
||||
'test_cli_branch_list_main')
|
||||
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
|
||||
res = env.zenith_cli.create_branch("test_cli_branch_list_nested", "test_cli_branch_list_main")
|
||||
assert res.stderr == ''
|
||||
helper_compare_branch_list(pageserver_http_client, env, env.initial_tenant)
|
||||
|
||||
# Check that all new branches are visible via CLI
|
||||
timelines_cli = [timeline_id for (_, timeline_id) in env.zenith_cli.list_timelines()]
|
||||
res = env.zenith_cli.list_branches()
|
||||
assert res.stderr == ''
|
||||
branches_cli = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
|
||||
|
||||
assert main_timeline_id.hex in timelines_cli
|
||||
assert nested_timeline_id.hex in timelines_cli
|
||||
assert 'test_cli_branch_list_main' in branches_cli
|
||||
assert 'test_cli_branch_list_nested' in branches_cli
|
||||
|
||||
|
||||
def helper_compare_tenant_list(pageserver_http_client: ZenithPageserverHttpClient, env: ZenithEnv):
|
||||
@@ -54,6 +60,7 @@ def helper_compare_tenant_list(pageserver_http_client: ZenithPageserverHttpClien
|
||||
tenants_api = sorted(map(lambda t: cast(str, t['id']), tenants))
|
||||
|
||||
res = env.zenith_cli.list_tenants()
|
||||
assert res.stderr == ''
|
||||
tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
|
||||
|
||||
assert tenants_api == tenants_cli
|
||||
@@ -66,13 +73,15 @@ def test_cli_tenant_list(zenith_simple_env: ZenithEnv):
|
||||
helper_compare_tenant_list(pageserver_http_client, env)
|
||||
|
||||
# Create new tenant
|
||||
tenant1 = env.zenith_cli.create_tenant()
|
||||
tenant1 = uuid.uuid4()
|
||||
env.zenith_cli.create_tenant(tenant1)
|
||||
|
||||
# check tenant1 appeared
|
||||
helper_compare_tenant_list(pageserver_http_client, env)
|
||||
|
||||
# Create new tenant
|
||||
tenant2 = env.zenith_cli.create_tenant()
|
||||
tenant2 = uuid.uuid4()
|
||||
env.zenith_cli.create_tenant(tenant2)
|
||||
|
||||
# check tenant2 appeared
|
||||
helper_compare_tenant_list(pageserver_http_client, env)
|
||||
|
||||
@@ -64,8 +64,9 @@ class ZenithCompare(PgCompare):
|
||||
self._pg_bin = pg_bin
|
||||
|
||||
# We only use one branch and one timeline
|
||||
self.env.zenith_cli.create_branch(branch_name, 'empty')
|
||||
self._pg = self.env.postgres.create_start(branch_name)
|
||||
self.branch = branch_name
|
||||
self.env.zenith_cli.create_branch(self.branch, "empty")
|
||||
self._pg = self.env.postgres.create_start(self.branch)
|
||||
self.timeline = self.pg.safe_psql("SHOW zenith.zenith_timeline")[0][0]
|
||||
|
||||
# Long-lived cursor, useful for flushing
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import field
|
||||
from dataclasses import dataclass, field
|
||||
import textwrap
|
||||
from cached_property import cached_property
|
||||
import asyncpg
|
||||
@@ -29,6 +29,7 @@ from dataclasses import dataclass
|
||||
from psycopg2.extensions import connection as PgConnection
|
||||
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, TypeVar, cast, Union, Tuple
|
||||
from typing_extensions import Literal
|
||||
import pytest
|
||||
|
||||
import requests
|
||||
import backoff # type: ignore
|
||||
@@ -57,7 +58,6 @@ Fn = TypeVar('Fn', bound=Callable[..., Any])
|
||||
|
||||
DEFAULT_OUTPUT_DIR = 'test_output'
|
||||
DEFAULT_POSTGRES_DIR = 'tmp_install'
|
||||
DEFAULT_BRANCH_NAME = 'main'
|
||||
|
||||
BASE_PORT = 15000
|
||||
WORKER_PORT_NUM = 100
|
||||
@@ -219,7 +219,7 @@ def can_bind(host: str, port: int) -> bool:
|
||||
|
||||
|
||||
class PortDistributor:
|
||||
def __init__(self, base_port: int, port_number: int):
|
||||
def __init__(self, base_port: int, port_number: int) -> None:
|
||||
self.iterator = iter(range(base_port, base_port + port_number))
|
||||
|
||||
def get_port(self) -> int:
|
||||
@@ -424,8 +424,7 @@ class ZenithEnvBuilder:
|
||||
pageserver_config_override: Optional[str] = None,
|
||||
num_safekeepers: int = 0,
|
||||
pageserver_auth_enabled: bool = False,
|
||||
rust_log_override: Optional[str] = None,
|
||||
default_branch_name=DEFAULT_BRANCH_NAME):
|
||||
rust_log_override: Optional[str] = None):
|
||||
self.repo_dir = repo_dir
|
||||
self.rust_log_override = rust_log_override
|
||||
self.port_distributor = port_distributor
|
||||
@@ -433,7 +432,6 @@ class ZenithEnvBuilder:
|
||||
self.pageserver_config_override = pageserver_config_override
|
||||
self.num_safekeepers = num_safekeepers
|
||||
self.pageserver_auth_enabled = pageserver_auth_enabled
|
||||
self.default_branch_name = default_branch_name
|
||||
self.env: Optional[ZenithEnv] = None
|
||||
|
||||
self.s3_mock_server: Optional[MockS3Server] = None
|
||||
@@ -538,7 +536,7 @@ class ZenithEnv:
|
||||
|
||||
initial_tenant - tenant ID of the initial tenant created in the repository
|
||||
|
||||
zenith_cli - can be used to run the 'zenith' CLI tool
|
||||
zenith_cli() - zenith_cli() can be used to run the 'zenith' CLI tool
|
||||
|
||||
create_tenant() - initializes a new tenant in the page server, returns
|
||||
the tenant id
|
||||
@@ -549,7 +547,9 @@ class ZenithEnv:
|
||||
self.port_distributor = config.port_distributor
|
||||
self.s3_mock_server = config.s3_mock_server
|
||||
self.zenith_cli = ZenithCli(env=self)
|
||||
|
||||
self.postgres = PostgresFactory(self)
|
||||
|
||||
self.safekeepers: List[Safekeeper] = []
|
||||
|
||||
# generate initial tenant ID here instead of letting 'zenith init' generate it,
|
||||
@@ -558,7 +558,7 @@ class ZenithEnv:
|
||||
|
||||
# Create a config file corresponding to the options
|
||||
toml = textwrap.dedent(f"""
|
||||
default_tenant_id = '{self.initial_tenant.hex}'
|
||||
default_tenantid = '{self.initial_tenant.hex}'
|
||||
""")
|
||||
|
||||
# Create config for pageserver
|
||||
@@ -600,6 +600,7 @@ class ZenithEnv:
|
||||
self.safekeepers.append(safekeeper)
|
||||
|
||||
log.info(f"Config: {toml}")
|
||||
|
||||
self.zenith_cli.init(toml)
|
||||
|
||||
def start(self):
|
||||
@@ -613,14 +614,11 @@ class ZenithEnv:
|
||||
""" Get list of safekeeper endpoints suitable for wal_acceptors GUC """
|
||||
return ','.join([f'localhost:{wa.port.pg}' for wa in self.safekeepers])
|
||||
|
||||
def run_psbench(self, timeline):
|
||||
ps_log_filename = os.path.join(self.repo_dir, "pageserver.log")
|
||||
ps_connstr = self.pageserver.connstr()
|
||||
psbench_binpath = os.path.join(str(zenith_binpath), 'psbench')
|
||||
tenant_hex = self.initial_tenant.hex
|
||||
print("AAAAAAAA", ps_connstr)
|
||||
args = [psbench_binpath, ps_log_filename, ps_connstr, tenant_hex, timeline]
|
||||
subprocess.run(args)
|
||||
def create_tenant(self, tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID:
|
||||
if tenant_id is None:
|
||||
tenant_id = uuid.uuid4()
|
||||
self.zenith_cli.create_tenant(tenant_id)
|
||||
return tenant_id
|
||||
|
||||
@cached_property
|
||||
def auth_keys(self) -> AuthKeys:
|
||||
@@ -645,11 +643,13 @@ def _shared_simple_env(request: Any, port_distributor) -> Iterator[ZenithEnv]:
|
||||
shutil.rmtree(repo_dir, ignore_errors=True)
|
||||
|
||||
with ZenithEnvBuilder(Path(repo_dir), port_distributor) as builder:
|
||||
|
||||
env = builder.init_start()
|
||||
|
||||
# For convenience in tests, create a branch from the freshly-initialized cluster.
|
||||
env.zenith_cli.create_branch('empty', ancestor_branch_name=DEFAULT_BRANCH_NAME)
|
||||
env.zenith_cli.create_branch("empty", "main")
|
||||
|
||||
# Return the builder to the caller
|
||||
yield env
|
||||
|
||||
|
||||
@@ -698,7 +698,7 @@ class ZenithPageserverApiException(Exception):
|
||||
|
||||
|
||||
class ZenithPageserverHttpClient(requests.Session):
|
||||
def __init__(self, port: int, auth_token: Optional[str] = None):
|
||||
def __init__(self, port: int, auth_token: Optional[str] = None) -> None:
|
||||
super().__init__()
|
||||
self.port = port
|
||||
self.auth_token = auth_token
|
||||
@@ -721,36 +721,38 @@ class ZenithPageserverHttpClient(requests.Session):
|
||||
|
||||
def timeline_attach(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}/attach",
|
||||
)
|
||||
f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}/{timeline_id.hex}/attach", )
|
||||
self.verbose_error(res)
|
||||
|
||||
def timeline_detach(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}/detach",
|
||||
)
|
||||
f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}/{timeline_id.hex}/detach", )
|
||||
self.verbose_error(res)
|
||||
|
||||
def timeline_create(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
new_timeline_id: Optional[uuid.UUID] = None,
|
||||
ancestor_timeline_id: Optional[uuid.UUID] = None,
|
||||
ancestor_start_lsn: Optional[str] = None,
|
||||
) -> Dict[Any, Any]:
|
||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline",
|
||||
def branch_list(self, tenant_id: uuid.UUID) -> List[Dict[Any, Any]]:
|
||||
res = self.get(f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}")
|
||||
self.verbose_error(res)
|
||||
res_json = res.json()
|
||||
assert isinstance(res_json, list)
|
||||
return res_json
|
||||
|
||||
def branch_create(self, tenant_id: uuid.UUID, name: str, start_point: str) -> Dict[Any, Any]:
|
||||
res = self.post(f"http://localhost:{self.port}/v1/branch",
|
||||
json={
|
||||
'new_timeline_id':
|
||||
new_timeline_id.hex if new_timeline_id else None,
|
||||
'ancestor_start_lsn':
|
||||
ancestor_start_lsn,
|
||||
'ancestor_timeline_id':
|
||||
ancestor_timeline_id.hex if ancestor_timeline_id else None,
|
||||
'tenant_id': tenant_id.hex,
|
||||
'name': name,
|
||||
'start_point': start_point,
|
||||
})
|
||||
self.verbose_error(res)
|
||||
if res.status_code == 409:
|
||||
raise Exception(f'could not create timeline: already exists for id {new_timeline_id}')
|
||||
res_json = res.json()
|
||||
assert isinstance(res_json, dict)
|
||||
return res_json
|
||||
|
||||
def branch_detail(self, tenant_id: uuid.UUID, name: str) -> Dict[Any, Any]:
|
||||
res = self.get(
|
||||
f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}/{name}?include-non-incremental-logical-size=1",
|
||||
)
|
||||
self.verbose_error(res)
|
||||
res_json = res.json()
|
||||
assert isinstance(res_json, dict)
|
||||
return res_json
|
||||
@@ -762,22 +764,18 @@ class ZenithPageserverHttpClient(requests.Session):
|
||||
assert isinstance(res_json, list)
|
||||
return res_json
|
||||
|
||||
def tenant_create(self, new_tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID:
|
||||
def tenant_create(self, tenant_id: uuid.UUID):
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant",
|
||||
json={
|
||||
'new_tenant_id': new_tenant_id.hex if new_tenant_id else None,
|
||||
'tenant_id': tenant_id.hex,
|
||||
},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
if res.status_code == 409:
|
||||
raise Exception(f'could not create tenant: already exists for id {new_tenant_id}')
|
||||
new_tenant_id = res.json()
|
||||
assert isinstance(new_tenant_id, str)
|
||||
return uuid.UUID(new_tenant_id)
|
||||
return res.json()
|
||||
|
||||
def timeline_list(self, tenant_id: uuid.UUID) -> List[Dict[Any, Any]]:
|
||||
res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
|
||||
def timeline_list(self, tenant_id: uuid.UUID) -> List[str]:
|
||||
res = self.get(f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}")
|
||||
self.verbose_error(res)
|
||||
res_json = res.json()
|
||||
assert isinstance(res_json, list)
|
||||
@@ -785,8 +783,7 @@ class ZenithPageserverHttpClient(requests.Session):
|
||||
|
||||
def timeline_detail(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Dict[Any, Any]:
|
||||
res = self.get(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}?include-non-incremental-logical-size=1"
|
||||
)
|
||||
f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}/{timeline_id.hex}")
|
||||
self.verbose_error(res)
|
||||
res_json = res.json()
|
||||
assert isinstance(res_json, dict)
|
||||
@@ -820,124 +817,54 @@ class S3Storage:
|
||||
|
||||
RemoteStorage = Union[LocalFsStorage, S3Storage]
|
||||
|
||||
CREATE_TIMELINE_ID_EXTRACTOR = re.compile(r"^Created timeline '(?P<timeline_id>[^']+)'",
|
||||
re.MULTILINE)
|
||||
CREATE_TIMELINE_ID_EXTRACTOR = re.compile(r"^Created timeline '(?P<timeline_id>[^']+)'",
|
||||
re.MULTILINE)
|
||||
TIMELINE_DATA_EXTRACTOR = re.compile(r"\s(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]",
|
||||
re.MULTILINE)
|
||||
|
||||
|
||||
class ZenithCli:
|
||||
"""
|
||||
A typed wrapper around the `zenith` CLI tool.
|
||||
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
|
||||
"""
|
||||
def __init__(self, env: ZenithEnv):
|
||||
def __init__(self, env: ZenithEnv) -> None:
|
||||
self.env = env
|
||||
pass
|
||||
|
||||
def create_tenant(self, tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID:
|
||||
"""
|
||||
Creates a new tenant, returns its id and its initial timeline's id.
|
||||
"""
|
||||
if tenant_id is None:
|
||||
tenant_id = uuid.uuid4()
|
||||
res = self.raw_cli(['tenant', 'create', '--tenant-id', tenant_id.hex])
|
||||
res.check_returncode()
|
||||
self.raw_cli(['tenant', 'create', tenant_id.hex])
|
||||
return tenant_id
|
||||
|
||||
def list_tenants(self) -> 'subprocess.CompletedProcess[str]':
|
||||
res = self.raw_cli(['tenant', 'list'])
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def create_timeline(self,
|
||||
new_branch_name: str,
|
||||
tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID:
|
||||
cmd = [
|
||||
'timeline',
|
||||
'create',
|
||||
'--branch-name',
|
||||
new_branch_name,
|
||||
'--tenant-id',
|
||||
(tenant_id or self.env.initial_tenant).hex,
|
||||
]
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
|
||||
matches = CREATE_TIMELINE_ID_EXTRACTOR.search(res.stdout)
|
||||
|
||||
created_timeline_id = None
|
||||
if matches is not None:
|
||||
created_timeline_id = matches.group('timeline_id')
|
||||
|
||||
return uuid.UUID(created_timeline_id)
|
||||
return self.raw_cli(['tenant', 'list'])
|
||||
|
||||
def create_branch(self,
|
||||
new_branch_name: str = DEFAULT_BRANCH_NAME,
|
||||
ancestor_branch_name: Optional[str] = None,
|
||||
tenant_id: Optional[uuid.UUID] = None,
|
||||
ancestor_start_lsn: Optional[str] = None) -> uuid.UUID:
|
||||
cmd = [
|
||||
'timeline',
|
||||
'branch',
|
||||
'--branch-name',
|
||||
new_branch_name,
|
||||
'--tenant-id',
|
||||
(tenant_id or self.env.initial_tenant).hex,
|
||||
]
|
||||
if ancestor_branch_name is not None:
|
||||
cmd.extend(['--ancestor-branch-name', ancestor_branch_name])
|
||||
if ancestor_start_lsn is not None:
|
||||
cmd.extend(['--ancestor-start-lsn', ancestor_start_lsn])
|
||||
branch_name: str,
|
||||
starting_point: str,
|
||||
tenant_id: Optional[uuid.UUID] = None) -> 'subprocess.CompletedProcess[str]':
|
||||
args = ['branch']
|
||||
if tenant_id is not None:
|
||||
args.extend(['--tenantid', tenant_id.hex])
|
||||
args.extend([branch_name, starting_point])
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
return self.raw_cli(args)
|
||||
|
||||
matches = CREATE_TIMELINE_ID_EXTRACTOR.search(res.stdout)
|
||||
def list_branches(self,
|
||||
tenant_id: Optional[uuid.UUID] = None) -> 'subprocess.CompletedProcess[str]':
|
||||
args = ['branch']
|
||||
if tenant_id is not None:
|
||||
args.extend(['--tenantid', tenant_id.hex])
|
||||
return self.raw_cli(args)
|
||||
|
||||
created_timeline_id = None
|
||||
if matches is not None:
|
||||
created_timeline_id = matches.group('timeline_id')
|
||||
|
||||
if created_timeline_id is None:
|
||||
raise Exception('could not find timeline id after `zenith timeline create` invocation')
|
||||
else:
|
||||
return uuid.UUID(created_timeline_id)
|
||||
|
||||
def list_timelines(self, tenant_id: Optional[uuid.UUID] = None) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
Returns a list of (branch_name, timeline_id) tuples out of parsed `zenith timeline list` CLI output.
|
||||
"""
|
||||
|
||||
# (L) main [b49f7954224a0ad25cc0013ea107b54b]
|
||||
# (L) ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
|
||||
res = self.raw_cli(
|
||||
['timeline', 'list', '--tenant-id', (tenant_id or self.env.initial_tenant).hex])
|
||||
timelines_cli = sorted(
|
||||
map(lambda branch_and_id: (branch_and_id[0], branch_and_id[1]),
|
||||
TIMELINE_DATA_EXTRACTOR.findall(res.stdout)))
|
||||
return timelines_cli
|
||||
|
||||
def init(self,
|
||||
config_toml: str,
|
||||
initial_timeline_id: Optional[uuid.UUID] = None) -> 'subprocess.CompletedProcess[str]':
|
||||
def init(self, config_toml: str) -> 'subprocess.CompletedProcess[str]':
|
||||
with tempfile.NamedTemporaryFile(mode='w+') as tmp:
|
||||
tmp.write(config_toml)
|
||||
tmp.flush()
|
||||
|
||||
cmd = ['init', f'--config={tmp.name}']
|
||||
if initial_timeline_id:
|
||||
cmd.extend(['--timeline-id', initial_timeline_id.hex])
|
||||
append_pageserver_param_overrides(cmd,
|
||||
self.env.pageserver.remote_storage,
|
||||
self.env.pageserver.config_override)
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
return res
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def pageserver_start(self, overrides=()) -> 'subprocess.CompletedProcess[str]':
|
||||
start_args = ['pageserver', 'start', *overrides]
|
||||
@@ -969,54 +896,38 @@ class ZenithCli:
|
||||
|
||||
def pg_create(
|
||||
self,
|
||||
branch_name: str,
|
||||
node_name: Optional[str] = None,
|
||||
node_name: str,
|
||||
tenant_id: Optional[uuid.UUID] = None,
|
||||
lsn: Optional[str] = None,
|
||||
timeline_spec: Optional[str] = None,
|
||||
port: Optional[int] = None,
|
||||
) -> 'subprocess.CompletedProcess[str]':
|
||||
args = [
|
||||
'pg',
|
||||
'create',
|
||||
'--tenant-id',
|
||||
(tenant_id or self.env.initial_tenant).hex,
|
||||
'--branch-name',
|
||||
branch_name,
|
||||
]
|
||||
if lsn is not None:
|
||||
args.extend(['--lsn', lsn])
|
||||
args = ['pg', 'create']
|
||||
if tenant_id is not None:
|
||||
args.extend(['--tenantid', tenant_id.hex])
|
||||
if port is not None:
|
||||
args.extend(['--port', str(port)])
|
||||
if node_name is not None:
|
||||
args.append(node_name)
|
||||
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
return res
|
||||
args.append(f'--port={port}')
|
||||
args.append(node_name)
|
||||
if timeline_spec is not None:
|
||||
args.append(timeline_spec)
|
||||
return self.raw_cli(args)
|
||||
|
||||
def pg_start(
|
||||
self,
|
||||
node_name: str,
|
||||
tenant_id: Optional[uuid.UUID] = None,
|
||||
lsn: Optional[str] = None,
|
||||
timeline_spec: Optional[str] = None,
|
||||
port: Optional[int] = None,
|
||||
) -> 'subprocess.CompletedProcess[str]':
|
||||
args = [
|
||||
'pg',
|
||||
'start',
|
||||
'--tenant-id',
|
||||
(tenant_id or self.env.initial_tenant).hex,
|
||||
]
|
||||
if lsn is not None:
|
||||
args.append(f'--lsn={lsn}')
|
||||
args = ['pg', 'start']
|
||||
if tenant_id is not None:
|
||||
args.extend(['--tenantid', tenant_id.hex])
|
||||
if port is not None:
|
||||
args.append(f'--port={port}')
|
||||
if node_name is not None:
|
||||
args.append(node_name)
|
||||
args.append(node_name)
|
||||
if timeline_spec is not None:
|
||||
args.append(timeline_spec)
|
||||
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
return res
|
||||
return self.raw_cli(args)
|
||||
|
||||
def pg_stop(
|
||||
self,
|
||||
@@ -1024,16 +935,12 @@ class ZenithCli:
|
||||
tenant_id: Optional[uuid.UUID] = None,
|
||||
destroy=False,
|
||||
) -> 'subprocess.CompletedProcess[str]':
|
||||
args = [
|
||||
'pg',
|
||||
'stop',
|
||||
'--tenant-id',
|
||||
(tenant_id or self.env.initial_tenant).hex,
|
||||
]
|
||||
args = ['pg', 'stop']
|
||||
if tenant_id is not None:
|
||||
args.extend(['--tenantid', tenant_id.hex])
|
||||
if destroy:
|
||||
args.append('--destroy')
|
||||
if node_name is not None:
|
||||
args.append(node_name)
|
||||
args.append(node_name)
|
||||
|
||||
return self.raw_cli(args)
|
||||
|
||||
@@ -1108,7 +1015,8 @@ class ZenithPageserver(PgProtocol):
|
||||
env: ZenithEnv,
|
||||
port: PageserverPort,
|
||||
remote_storage: Optional[RemoteStorage] = None,
|
||||
config_override: Optional[str] = None):
|
||||
config_override: Optional[str] = None,
|
||||
enable_auth=False):
|
||||
super().__init__(host='localhost', port=port.pg, username='zenith_admin')
|
||||
self.env = env
|
||||
self.running = False
|
||||
@@ -1136,6 +1044,7 @@ class ZenithPageserver(PgProtocol):
|
||||
if self.running:
|
||||
self.env.zenith_cli.pageserver_stop(immediate)
|
||||
self.running = False
|
||||
|
||||
return self
|
||||
|
||||
def __enter__(self):
|
||||
@@ -1196,7 +1105,7 @@ class PgBin:
|
||||
self.env = os.environ.copy()
|
||||
self.env['LD_LIBRARY_PATH'] = os.path.join(str(pg_distrib_dir), 'lib')
|
||||
|
||||
def _fixpath(self, command: List[str]):
|
||||
def _fixpath(self, command: List[str]) -> None:
|
||||
if '/' not in command[0]:
|
||||
command[0] = os.path.join(self.pg_bin_path, command[0])
|
||||
|
||||
@@ -1207,7 +1116,7 @@ class PgBin:
|
||||
env.update(env_add)
|
||||
return env
|
||||
|
||||
def run(self, command: List[str], env: Optional[Env] = None, cwd: Optional[str] = None):
|
||||
def run(self, command: List[str], env: Optional[Env] = None, cwd: Optional[str] = None) -> None:
|
||||
"""
|
||||
Run one of the postgres binaries.
|
||||
|
||||
@@ -1257,18 +1166,18 @@ class VanillaPostgres(PgProtocol):
|
||||
self.running = False
|
||||
self.pg_bin.run_capture(['initdb', '-D', pgdatadir])
|
||||
|
||||
def configure(self, options: List[str]):
|
||||
def configure(self, options: List[str]) -> None:
|
||||
"""Append lines into postgresql.conf file."""
|
||||
assert not self.running
|
||||
with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file:
|
||||
conf_file.writelines(options)
|
||||
|
||||
def start(self):
|
||||
def start(self) -> None:
|
||||
assert not self.running
|
||||
self.running = True
|
||||
self.pg_bin.run_capture(['pg_ctl', '-D', self.pgdatadir, 'start'])
|
||||
|
||||
def stop(self):
|
||||
def stop(self) -> None:
|
||||
assert self.running
|
||||
self.running = False
|
||||
self.pg_bin.run_capture(['pg_ctl', '-D', self.pgdatadir, 'stop'])
|
||||
@@ -1351,9 +1260,8 @@ class Postgres(PgProtocol):
|
||||
|
||||
def create(
|
||||
self,
|
||||
branch_name: str,
|
||||
node_name: Optional[str] = None,
|
||||
lsn: Optional[str] = None,
|
||||
node_name: str,
|
||||
branch: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None,
|
||||
) -> 'Postgres':
|
||||
"""
|
||||
@@ -1364,21 +1272,19 @@ class Postgres(PgProtocol):
|
||||
if not config_lines:
|
||||
config_lines = []
|
||||
|
||||
self.node_name = node_name or f'{branch_name}_pg_node'
|
||||
self.env.zenith_cli.pg_create(branch_name,
|
||||
node_name=self.node_name,
|
||||
if branch is None:
|
||||
branch = node_name
|
||||
|
||||
self.env.zenith_cli.pg_create(node_name,
|
||||
tenant_id=self.tenant_id,
|
||||
lsn=lsn,
|
||||
port=self.port)
|
||||
port=self.port,
|
||||
timeline_spec=branch)
|
||||
self.node_name = node_name
|
||||
path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id.hex / self.node_name
|
||||
self.pgdata_dir = os.path.join(self.env.repo_dir, path)
|
||||
|
||||
if config_lines is None:
|
||||
config_lines = []
|
||||
|
||||
# set small 'max_replication_write_lag' to enable backpressure
|
||||
# and make tests more stable.
|
||||
config_lines = ['max_replication_write_lag=15MB'] + config_lines
|
||||
self.config(config_lines)
|
||||
|
||||
return self
|
||||
@@ -1465,7 +1371,7 @@ class Postgres(PgProtocol):
|
||||
|
||||
if self.running:
|
||||
assert self.node_name is not None
|
||||
self.env.zenith_cli.pg_stop(self.node_name, self.tenant_id)
|
||||
self.env.zenith_cli.pg_stop(self.node_name, tenant_id=self.tenant_id)
|
||||
self.running = False
|
||||
|
||||
return self
|
||||
@@ -1477,16 +1383,15 @@ class Postgres(PgProtocol):
|
||||
"""
|
||||
|
||||
assert self.node_name is not None
|
||||
self.env.zenith_cli.pg_stop(self.node_name, self.tenant_id, True)
|
||||
self.env.zenith_cli.pg_stop(self.node_name, self.tenant_id, destroy=True)
|
||||
self.node_name = None
|
||||
|
||||
return self
|
||||
|
||||
def create_start(
|
||||
self,
|
||||
branch_name: str,
|
||||
node_name: Optional[str] = None,
|
||||
lsn: Optional[str] = None,
|
||||
node_name: str,
|
||||
branch: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None,
|
||||
) -> 'Postgres':
|
||||
"""
|
||||
@@ -1496,10 +1401,9 @@ class Postgres(PgProtocol):
|
||||
"""
|
||||
|
||||
self.create(
|
||||
branch_name=branch_name,
|
||||
node_name=node_name,
|
||||
branch=branch,
|
||||
config_lines=config_lines,
|
||||
lsn=lsn,
|
||||
).start()
|
||||
|
||||
return self
|
||||
@@ -1519,10 +1423,9 @@ class PostgresFactory:
|
||||
self.instances: List[Postgres] = []
|
||||
|
||||
def create_start(self,
|
||||
branch_name: str,
|
||||
node_name: Optional[str] = None,
|
||||
node_name: str = "main",
|
||||
branch: Optional[str] = None,
|
||||
tenant_id: Optional[uuid.UUID] = None,
|
||||
lsn: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None) -> Postgres:
|
||||
|
||||
pg = Postgres(
|
||||
@@ -1534,17 +1437,15 @@ class PostgresFactory:
|
||||
self.instances.append(pg)
|
||||
|
||||
return pg.create_start(
|
||||
branch_name=branch_name,
|
||||
node_name=node_name,
|
||||
branch=branch,
|
||||
config_lines=config_lines,
|
||||
lsn=lsn,
|
||||
)
|
||||
|
||||
def create(self,
|
||||
branch_name: str,
|
||||
node_name: Optional[str] = None,
|
||||
node_name: str = "main",
|
||||
branch: Optional[str] = None,
|
||||
tenant_id: Optional[uuid.UUID] = None,
|
||||
lsn: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None) -> Postgres:
|
||||
|
||||
pg = Postgres(
|
||||
@@ -1557,9 +1458,8 @@ class PostgresFactory:
|
||||
self.instances.append(pg)
|
||||
|
||||
return pg.create(
|
||||
branch_name=branch_name,
|
||||
node_name=node_name,
|
||||
lsn=lsn,
|
||||
branch=branch,
|
||||
config_lines=config_lines,
|
||||
)
|
||||
|
||||
@@ -1662,7 +1562,7 @@ class SafekeeperMetrics:
|
||||
|
||||
|
||||
class SafekeeperHttpClient(requests.Session):
|
||||
def __init__(self, port: int):
|
||||
def __init__(self, port: int) -> None:
|
||||
super().__init__()
|
||||
self.port = port
|
||||
|
||||
@@ -1780,7 +1680,7 @@ def list_files_to_compare(pgdata_dir: str):
|
||||
# pg is the existing and running compute node, that we want to compare with a basebackup
|
||||
def check_restored_datadir_content(test_output_dir: str, env: ZenithEnv, pg: Postgres):
|
||||
|
||||
# Get the timeline ID. We need it for the 'basebackup' command
|
||||
# Get the timeline ID of our branch. We need it for the 'basebackup' command
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SHOW zenith.zenith_timeline")
|
||||
|
||||
@@ -30,16 +30,21 @@ def test_bulk_tenant_create(
|
||||
for i in range(tenants_count):
|
||||
start = timeit.default_timer()
|
||||
|
||||
tenant = env.zenith_cli.create_tenant()
|
||||
env.zenith_cli.create_timeline(
|
||||
f'test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}', tenant_id=tenant)
|
||||
tenant = env.create_tenant()
|
||||
env.zenith_cli.create_branch(
|
||||
f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
|
||||
"main",
|
||||
tenant_id=tenant)
|
||||
|
||||
# FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
|
||||
#if use_wal_acceptors == 'with_wa':
|
||||
# wa_factory.start_n_new(3)
|
||||
|
||||
pg_tenant = env.postgres.create_start(
|
||||
f'test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}', tenant_id=tenant)
|
||||
f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
|
||||
None, # branch name, None means same as node name
|
||||
tenant,
|
||||
)
|
||||
|
||||
end = timeit.default_timer()
|
||||
time_slices.append(end - start)
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
|
||||
|
||||
|
||||
def test_get_page(zenith_simple_env: ZenithEnv, zenbenchmark: ZenithBenchmarker):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_pageserver", "empty")
|
||||
pg = env.postgres.create_start('test_pageserver')
|
||||
tenant_hex = env.initial_tenant.hex
|
||||
timeline = pg.safe_psql("SHOW zenith.zenith_timeline")[0][0]
|
||||
|
||||
# Long-lived cursor, useful for flushing
|
||||
psconn = env.pageserver.connect()
|
||||
pscur = psconn.cursor()
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('create table t (i integer);')
|
||||
cur.execute('insert into t values (0);')
|
||||
|
||||
for i in range(1000):
|
||||
cur.execute(f'update t set i = {i};')
|
||||
|
||||
pscur.execute(f"do_gc {env.initial_tenant.hex} {timeline} 0")
|
||||
|
||||
cur.execute("select * from t;")
|
||||
res = cur.fetchall()
|
||||
print("AAAA")
|
||||
print(res)
|
||||
|
||||
env.run_psbench(timeline)
|
||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: 093aa160e5...31dc24ab29
@@ -11,7 +11,7 @@ use std::io::{ErrorKind, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::thread;
|
||||
use tracing::*;
|
||||
use walkeeper::control_file::{self};
|
||||
use walkeeper::control_file::{self, CreateControlFile};
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::zid::ZNodeId;
|
||||
use zenith_utils::{logging, tcp_listener, GIT_VERSION};
|
||||
@@ -108,7 +108,10 @@ fn main() -> Result<()> {
|
||||
.get_matches();
|
||||
|
||||
if let Some(addr) = arg_matches.value_of("dump-control-file") {
|
||||
let state = control_file::FileStorage::load_control_file(Path::new(addr))?;
|
||||
let state = control_file::FileStorage::load_control_file(
|
||||
Path::new(addr),
|
||||
CreateControlFile::False,
|
||||
)?;
|
||||
let json = serde_json::to_string(&state)?;
|
||||
print!("{}", json);
|
||||
return Ok(());
|
||||
|
||||
@@ -27,6 +27,13 @@ const CONTROL_FILE_NAME: &str = "safekeeper.control";
|
||||
const CONTROL_FILE_NAME_PARTIAL: &str = "safekeeper.control.partial";
|
||||
pub const CHECKSUM_SIZE: usize = std::mem::size_of::<u32>();
|
||||
|
||||
// A named boolean.
|
||||
#[derive(Debug)]
|
||||
pub enum CreateControlFile {
|
||||
True,
|
||||
False,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref PERSIST_CONTROL_FILE_SECONDS: HistogramVec = register_histogram_vec!(
|
||||
"safekeeper_persist_control_file_seconds",
|
||||
@@ -87,22 +94,28 @@ impl FileStorage {
|
||||
pub fn load_control_file_conf(
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: &ZTenantTimelineId,
|
||||
create: CreateControlFile,
|
||||
) -> Result<SafeKeeperState> {
|
||||
let path = conf.timeline_dir(zttid).join(CONTROL_FILE_NAME);
|
||||
Self::load_control_file(path)
|
||||
Self::load_control_file(path, create)
|
||||
}
|
||||
|
||||
/// Read in the control file.
|
||||
/// If create=false and file doesn't exist, bails out.
|
||||
pub fn load_control_file<P: AsRef<Path>>(control_file_path: P) -> Result<SafeKeeperState> {
|
||||
pub fn load_control_file<P: AsRef<Path>>(
|
||||
control_file_path: P,
|
||||
create: CreateControlFile,
|
||||
) -> Result<SafeKeeperState> {
|
||||
info!(
|
||||
"loading control file {}",
|
||||
"loading control file {}, create={:?}",
|
||||
control_file_path.as_ref().display(),
|
||||
create,
|
||||
);
|
||||
|
||||
let mut control_file = OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
.create(matches!(create, CreateControlFile::True))
|
||||
.open(&control_file_path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
@@ -111,32 +124,41 @@ impl FileStorage {
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut buf = Vec::new();
|
||||
control_file
|
||||
.read_to_end(&mut buf)
|
||||
.context("failed to read control file")?;
|
||||
// Empty file is legit on 'create', don't try to deser from it.
|
||||
let state = if control_file.metadata().unwrap().len() == 0 {
|
||||
if let CreateControlFile::False = create {
|
||||
bail!("control file is empty");
|
||||
}
|
||||
SafeKeeperState::new()
|
||||
} else {
|
||||
let mut buf = Vec::new();
|
||||
control_file
|
||||
.read_to_end(&mut buf)
|
||||
.context("failed to read control file")?;
|
||||
|
||||
let calculated_checksum = crc32c::crc32c(&buf[..buf.len() - CHECKSUM_SIZE]);
|
||||
let calculated_checksum = crc32c::crc32c(&buf[..buf.len() - CHECKSUM_SIZE]);
|
||||
|
||||
let expected_checksum_bytes: &[u8; CHECKSUM_SIZE] =
|
||||
buf[buf.len() - CHECKSUM_SIZE..].try_into()?;
|
||||
let expected_checksum = u32::from_le_bytes(*expected_checksum_bytes);
|
||||
let expected_checksum_bytes: &[u8; CHECKSUM_SIZE] =
|
||||
buf[buf.len() - CHECKSUM_SIZE..].try_into()?;
|
||||
let expected_checksum = u32::from_le_bytes(*expected_checksum_bytes);
|
||||
|
||||
ensure!(
|
||||
calculated_checksum == expected_checksum,
|
||||
format!(
|
||||
"safekeeper control file checksum mismatch: expected {} got {}",
|
||||
expected_checksum, calculated_checksum
|
||||
)
|
||||
);
|
||||
|
||||
let state = FileStorage::deser_sk_state(&mut &buf[..buf.len() - CHECKSUM_SIZE])
|
||||
.with_context(|| {
|
||||
ensure!(
|
||||
calculated_checksum == expected_checksum,
|
||||
format!(
|
||||
"while reading control file {}",
|
||||
control_file_path.as_ref().display(),
|
||||
"safekeeper control file checksum mismatch: expected {} got {}",
|
||||
expected_checksum, calculated_checksum
|
||||
)
|
||||
})?;
|
||||
);
|
||||
|
||||
FileStorage::deser_sk_state(&mut &buf[..buf.len() - CHECKSUM_SIZE]).with_context(
|
||||
|| {
|
||||
format!(
|
||||
"while reading control file {}",
|
||||
control_file_path.as_ref().display(),
|
||||
)
|
||||
},
|
||||
)?
|
||||
};
|
||||
Ok(state)
|
||||
}
|
||||
}
|
||||
@@ -225,38 +247,31 @@ mod test {
|
||||
fn load_from_control_file(
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: &ZTenantTimelineId,
|
||||
create: CreateControlFile,
|
||||
) -> Result<(FileStorage, SafeKeeperState)> {
|
||||
fs::create_dir_all(&conf.timeline_dir(zttid)).expect("failed to create timeline dir");
|
||||
Ok((
|
||||
FileStorage::new(zttid, conf),
|
||||
FileStorage::load_control_file_conf(conf, zttid)?,
|
||||
FileStorage::load_control_file_conf(conf, zttid, create)?,
|
||||
))
|
||||
}
|
||||
|
||||
fn create(
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: &ZTenantTimelineId,
|
||||
) -> Result<(FileStorage, SafeKeeperState)> {
|
||||
fs::create_dir_all(&conf.timeline_dir(zttid)).expect("failed to create timeline dir");
|
||||
let state = SafeKeeperState::empty();
|
||||
let mut storage = FileStorage::new(zttid, conf);
|
||||
storage.persist(&state)?;
|
||||
Ok((storage, state))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_write_safekeeper_state() {
|
||||
let conf = stub_conf();
|
||||
let zttid = ZTenantTimelineId::generate();
|
||||
{
|
||||
let (mut storage, mut state) = create(&conf, &zttid).expect("failed to create state");
|
||||
let (mut storage, mut state) =
|
||||
load_from_control_file(&conf, &zttid, CreateControlFile::True)
|
||||
.expect("failed to read state");
|
||||
// change something
|
||||
state.commit_lsn = Lsn(42);
|
||||
state.wal_start_lsn = Lsn(42);
|
||||
storage.persist(&state).expect("failed to persist state");
|
||||
}
|
||||
|
||||
let (_, state) = load_from_control_file(&conf, &zttid).expect("failed to read state");
|
||||
assert_eq!(state.commit_lsn, Lsn(42));
|
||||
let (_, state) = load_from_control_file(&conf, &zttid, CreateControlFile::False)
|
||||
.expect("failed to read state");
|
||||
assert_eq!(state.wal_start_lsn, Lsn(42));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -264,10 +279,11 @@ mod test {
|
||||
let conf = stub_conf();
|
||||
let zttid = ZTenantTimelineId::generate();
|
||||
{
|
||||
let (mut storage, mut state) = create(&conf, &zttid).expect("failed to read state");
|
||||
|
||||
let (mut storage, mut state) =
|
||||
load_from_control_file(&conf, &zttid, CreateControlFile::True)
|
||||
.expect("failed to read state");
|
||||
// change something
|
||||
state.commit_lsn = Lsn(42);
|
||||
state.wal_start_lsn = Lsn(42);
|
||||
storage.persist(&state).expect("failed to persist state");
|
||||
}
|
||||
let control_path = conf.timeline_dir(&zttid).join(CONTROL_FILE_NAME);
|
||||
@@ -275,7 +291,7 @@ mod test {
|
||||
data[0] += 1; // change the first byte of the file to fail checksum validation
|
||||
fs::write(&control_path, &data).expect("failed to write control file");
|
||||
|
||||
match load_from_control_file(&conf, &zttid) {
|
||||
match load_from_control_file(&conf, &zttid, CreateControlFile::False) {
|
||||
Err(err) => assert!(err
|
||||
.to_string()
|
||||
.contains("safekeeper control file checksum mismatch")),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Code to deal with safekeeper control file upgrades
|
||||
use crate::safekeeper::{
|
||||
AcceptorState, Peers, PgUuid, SafeKeeperState, ServerInfo, Term, TermHistory, TermSwitchEntry,
|
||||
AcceptorState, PgUuid, SafeKeeperState, ServerInfo, Term, TermHistory, TermSwitchEntry,
|
||||
};
|
||||
use anyhow::{bail, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -26,7 +26,7 @@ struct SafeKeeperStateV1 {
|
||||
/// persistent acceptor state
|
||||
acceptor_state: AcceptorStateV1,
|
||||
/// information about server
|
||||
server: ServerInfoV2,
|
||||
server: ServerInfo,
|
||||
/// Unique id of the last *elected* proposer we dealed with. Not needed
|
||||
/// for correctness, exists for monitoring purposes.
|
||||
proposer_uuid: PgUuid,
|
||||
@@ -70,39 +70,6 @@ pub struct SafeKeeperStateV2 {
|
||||
pub wal_start_lsn: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ServerInfoV3 {
|
||||
/// Postgres server version
|
||||
pub pg_version: u32,
|
||||
pub system_id: SystemId,
|
||||
#[serde(with = "hex")]
|
||||
pub tenant_id: ZTenantId,
|
||||
/// Zenith timelineid
|
||||
#[serde(with = "hex")]
|
||||
pub timeline_id: ZTimelineId,
|
||||
pub wal_seg_size: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SafeKeeperStateV3 {
|
||||
/// persistent acceptor state
|
||||
pub acceptor_state: AcceptorState,
|
||||
/// information about server
|
||||
pub server: ServerInfoV3,
|
||||
/// Unique id of the last *elected* proposer we dealed with. Not needed
|
||||
/// for correctness, exists for monitoring purposes.
|
||||
#[serde(with = "hex")]
|
||||
pub proposer_uuid: PgUuid,
|
||||
/// part of WAL acknowledged by quorum and available locally
|
||||
pub commit_lsn: Lsn,
|
||||
/// minimal LSN which may be needed for recovery of some safekeeper (end_lsn
|
||||
/// of last record streamed to everyone)
|
||||
pub truncate_lsn: Lsn,
|
||||
// Safekeeper starts receiving WAL from this LSN, zeros before it ought to
|
||||
// be skipped during decoding.
|
||||
pub wal_start_lsn: Lsn,
|
||||
}
|
||||
|
||||
pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<SafeKeeperState> {
|
||||
// migrate to storing full term history
|
||||
if version == 1 {
|
||||
@@ -116,20 +83,12 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<SafeKeeperState>
|
||||
}]),
|
||||
};
|
||||
return Ok(SafeKeeperState {
|
||||
tenant_id: oldstate.server.tenant_id,
|
||||
timeline_id: oldstate.server.ztli,
|
||||
acceptor_state: ac,
|
||||
server: ServerInfo {
|
||||
pg_version: oldstate.server.pg_version,
|
||||
system_id: oldstate.server.system_id,
|
||||
wal_seg_size: oldstate.server.wal_seg_size,
|
||||
},
|
||||
server: oldstate.server.clone(),
|
||||
proposer_uuid: oldstate.proposer_uuid,
|
||||
commit_lsn: oldstate.commit_lsn,
|
||||
s3_wal_lsn: Lsn(0),
|
||||
peer_horizon_lsn: oldstate.truncate_lsn,
|
||||
remote_consistent_lsn: Lsn(0),
|
||||
peers: Peers(vec![]),
|
||||
truncate_lsn: oldstate.truncate_lsn,
|
||||
wal_start_lsn: oldstate.wal_start_lsn,
|
||||
});
|
||||
// migrate to hexing some zids
|
||||
} else if version == 2 {
|
||||
@@ -138,40 +97,17 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<SafeKeeperState>
|
||||
let server = ServerInfo {
|
||||
pg_version: oldstate.server.pg_version,
|
||||
system_id: oldstate.server.system_id,
|
||||
wal_seg_size: oldstate.server.wal_seg_size,
|
||||
};
|
||||
return Ok(SafeKeeperState {
|
||||
tenant_id: oldstate.server.tenant_id,
|
||||
timeline_id: oldstate.server.ztli,
|
||||
acceptor_state: oldstate.acceptor_state,
|
||||
server,
|
||||
proposer_uuid: oldstate.proposer_uuid,
|
||||
commit_lsn: oldstate.commit_lsn,
|
||||
s3_wal_lsn: Lsn(0),
|
||||
peer_horizon_lsn: oldstate.truncate_lsn,
|
||||
remote_consistent_lsn: Lsn(0),
|
||||
peers: Peers(vec![]),
|
||||
});
|
||||
// migrate to moving ztenantid/ztli to the top and adding some lsns
|
||||
} else if version == 3 {
|
||||
info!("reading safekeeper control file version {}", version);
|
||||
let oldstate = SafeKeeperStateV3::des(&buf[..buf.len()])?;
|
||||
let server = ServerInfo {
|
||||
pg_version: oldstate.server.pg_version,
|
||||
system_id: oldstate.server.system_id,
|
||||
wal_seg_size: oldstate.server.wal_seg_size,
|
||||
};
|
||||
return Ok(SafeKeeperState {
|
||||
tenant_id: oldstate.server.tenant_id,
|
||||
timeline_id: oldstate.server.timeline_id,
|
||||
acceptor_state: oldstate.acceptor_state,
|
||||
server,
|
||||
proposer_uuid: oldstate.proposer_uuid,
|
||||
commit_lsn: oldstate.commit_lsn,
|
||||
s3_wal_lsn: Lsn(0),
|
||||
peer_horizon_lsn: oldstate.truncate_lsn,
|
||||
remote_consistent_lsn: Lsn(0),
|
||||
peers: Peers(vec![]),
|
||||
truncate_lsn: oldstate.truncate_lsn,
|
||||
wal_start_lsn: oldstate.wal_start_lsn,
|
||||
});
|
||||
}
|
||||
bail!("unsupported safekeeper control file version {}", version)
|
||||
|
||||
@@ -13,7 +13,6 @@ use postgres_ffi::xlog_utils::PG_TLI;
|
||||
use regex::Regex;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use tracing::info;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::postgres_backend;
|
||||
use zenith_utils::postgres_backend::PostgresBackend;
|
||||
@@ -21,6 +20,7 @@ use zenith_utils::pq_proto::{BeMessage, FeStartupPacket, RowDescriptor, INT4_OID
|
||||
use zenith_utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
|
||||
|
||||
use crate::callmemaybe::CallmeEvent;
|
||||
use crate::control_file::CreateControlFile;
|
||||
use tokio::sync::mpsc::UnboundedSender;
|
||||
|
||||
/// Safekeeper handler of postgres commands
|
||||
@@ -101,19 +101,29 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
|
||||
fn process_query(&mut self, pgb: &mut PostgresBackend, query_string: &str) -> Result<()> {
|
||||
let cmd = parse_cmd(query_string)?;
|
||||
|
||||
info!("got query {:?}", query_string);
|
||||
|
||||
let create = !(matches!(cmd, SafekeeperPostgresCommand::StartReplication { .. })
|
||||
|| matches!(cmd, SafekeeperPostgresCommand::IdentifySystem));
|
||||
|
||||
let tenantid = self.ztenantid.context("tenantid is required")?;
|
||||
let timelineid = self.ztimelineid.context("timelineid is required")?;
|
||||
if self.timeline.is_none() {
|
||||
self.timeline.set(
|
||||
&self.conf,
|
||||
ZTenantTimelineId::new(tenantid, timelineid),
|
||||
create,
|
||||
)?;
|
||||
// Is this command is ztimeline scoped?
|
||||
match cmd {
|
||||
SafekeeperPostgresCommand::StartWalPush { .. }
|
||||
| SafekeeperPostgresCommand::StartReplication { .. }
|
||||
| SafekeeperPostgresCommand::IdentifySystem
|
||||
| SafekeeperPostgresCommand::JSONCtrl { .. } => {
|
||||
let tenantid = self.ztenantid.context("tenantid is required")?;
|
||||
let timelineid = self.ztimelineid.context("timelineid is required")?;
|
||||
if self.timeline.is_none() {
|
||||
// START_WAL_PUSH is the only command that initializes the timeline in production.
|
||||
// There is also JSON_CTRL command, which should initialize the timeline for testing.
|
||||
let create_control_file = match cmd {
|
||||
SafekeeperPostgresCommand::StartWalPush { .. }
|
||||
| SafekeeperPostgresCommand::JSONCtrl { .. } => CreateControlFile::True,
|
||||
_ => CreateControlFile::False,
|
||||
};
|
||||
self.timeline.set(
|
||||
&self.conf,
|
||||
ZTenantTimelineId::new(tenantid, timelineid),
|
||||
create_control_file,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match cmd {
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
pub mod models;
|
||||
pub mod routes;
|
||||
pub use routes::make_router;
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
pub tenant_id: ZTenantId,
|
||||
pub timeline_id: ZTimelineId,
|
||||
pub peer_ids: Vec<ZNodeId>,
|
||||
}
|
||||
@@ -1,15 +1,14 @@
|
||||
use hyper::{Body, Request, Response, StatusCode};
|
||||
|
||||
use serde::Serialize;
|
||||
use serde::Serializer;
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
use zenith_utils::http::json::json_request;
|
||||
use zenith_utils::http::{RequestExt, RouterBuilder};
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::zid::ZNodeId;
|
||||
use zenith_utils::zid::ZTenantTimelineId;
|
||||
|
||||
use crate::control_file::CreateControlFile;
|
||||
use crate::safekeeper::Term;
|
||||
use crate::safekeeper::TermHistory;
|
||||
use crate::timeline::GlobalTimelines;
|
||||
@@ -20,8 +19,6 @@ use zenith_utils::http::json::json_response;
|
||||
use zenith_utils::http::request::parse_request_param;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
use super::models::TimelineCreateRequest;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct SafekeeperStatus {
|
||||
id: ZNodeId,
|
||||
@@ -69,11 +66,7 @@ struct TimelineStatus {
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
commit_lsn: Lsn,
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
s3_wal_lsn: Lsn,
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
peer_horizon_lsn: Lsn,
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
remote_consistent_lsn: Lsn,
|
||||
truncate_lsn: Lsn,
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
flush_lsn: Lsn,
|
||||
}
|
||||
@@ -85,7 +78,8 @@ async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body
|
||||
parse_request_param(&request, "timeline_id")?,
|
||||
);
|
||||
|
||||
let tli = GlobalTimelines::get(get_conf(&request), zttid, false).map_err(ApiError::from_err)?;
|
||||
let tli = GlobalTimelines::get(get_conf(&request), zttid, CreateControlFile::False)
|
||||
.map_err(ApiError::from_err)?;
|
||||
let sk_state = tli.get_info();
|
||||
let flush_lsn = tli.get_end_of_wal();
|
||||
|
||||
@@ -100,27 +94,12 @@ async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body
|
||||
timeline_id: zttid.timeline_id,
|
||||
acceptor_state: acc_state,
|
||||
commit_lsn: sk_state.commit_lsn,
|
||||
s3_wal_lsn: sk_state.s3_wal_lsn,
|
||||
peer_horizon_lsn: sk_state.peer_horizon_lsn,
|
||||
remote_consistent_lsn: sk_state.remote_consistent_lsn,
|
||||
truncate_lsn: sk_state.truncate_lsn,
|
||||
flush_lsn,
|
||||
};
|
||||
Ok(json_response(StatusCode::OK, status)?)
|
||||
}
|
||||
|
||||
async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let request_data: TimelineCreateRequest = json_request(&mut request).await?;
|
||||
|
||||
let zttid = ZTenantTimelineId {
|
||||
tenant_id: request_data.tenant_id,
|
||||
timeline_id: request_data.timeline_id,
|
||||
};
|
||||
GlobalTimelines::create(get_conf(&request), zttid, request_data.peer_ids)
|
||||
.map_err(ApiError::from_err)?;
|
||||
|
||||
Ok(json_response(StatusCode::CREATED, ())?)
|
||||
}
|
||||
|
||||
/// Safekeeper http router.
|
||||
pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError> {
|
||||
let router = endpoint::make_router();
|
||||
@@ -131,5 +110,4 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
|
||||
"/v1/timeline/:tenant_id/:timeline_id",
|
||||
timeline_status_handler,
|
||||
)
|
||||
.post("/v1/timeline", timeline_create_handler)
|
||||
}
|
||||
|
||||
@@ -10,8 +10,6 @@ use std::cmp::min;
|
||||
use std::fmt;
|
||||
use std::io::Read;
|
||||
use tracing::*;
|
||||
use zenith_utils::zid::ZNodeId;
|
||||
use zenith_utils::zid::ZTenantTimelineId;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
@@ -27,13 +25,12 @@ use zenith_utils::pq_proto::ZenithFeedback;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
pub const SK_MAGIC: u32 = 0xcafeceefu32;
|
||||
pub const SK_FORMAT_VERSION: u32 = 4;
|
||||
pub const SK_FORMAT_VERSION: u32 = 3;
|
||||
const SK_PROTOCOL_VERSION: u32 = 1;
|
||||
const UNKNOWN_SERVER_VERSION: u32 = 0;
|
||||
|
||||
/// Consensus logical timestamp.
|
||||
pub type Term = u64;
|
||||
const INVALID_TERM: Term = 0;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct TermSwitchEntry {
|
||||
@@ -131,47 +128,18 @@ pub struct ServerInfo {
|
||||
/// Postgres server version
|
||||
pub pg_version: u32,
|
||||
pub system_id: SystemId,
|
||||
pub wal_seg_size: u32,
|
||||
}
|
||||
|
||||
/// Data published by safekeeper to the peers
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PeerInfo {
|
||||
/// LSN up to which safekeeper offloaded WAL to s3.
|
||||
s3_wal_lsn: Lsn,
|
||||
/// Term of the last entry.
|
||||
term: Term,
|
||||
/// LSN of the last record.
|
||||
flush_lsn: Lsn,
|
||||
/// Up to which LSN safekeeper regards its WAL as committed.
|
||||
commit_lsn: Lsn,
|
||||
}
|
||||
|
||||
impl PeerInfo {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
s3_wal_lsn: Lsn(0),
|
||||
term: INVALID_TERM,
|
||||
flush_lsn: Lsn(0),
|
||||
commit_lsn: Lsn(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vector-based node id -> peer state map with very limited functionality we
|
||||
// need/
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Peers(pub Vec<(ZNodeId, PeerInfo)>);
|
||||
|
||||
/// Persistent information stored on safekeeper node
|
||||
/// On disk data is prefixed by magic and format version and followed by checksum.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SafeKeeperState {
|
||||
#[serde(with = "hex")]
|
||||
pub tenant_id: ZTenantId,
|
||||
/// Zenith timelineid
|
||||
#[serde(with = "hex")]
|
||||
pub timeline_id: ZTimelineId,
|
||||
pub wal_seg_size: u32,
|
||||
}
|
||||
|
||||
/// Persistent information stored on safekeeper node
|
||||
/// On disk data is prefixed by magic and format version and followed by checksum.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SafeKeeperState {
|
||||
/// persistent acceptor state
|
||||
pub acceptor_state: AcceptorState,
|
||||
/// information about server
|
||||
@@ -180,33 +148,19 @@ pub struct SafeKeeperState {
|
||||
/// for correctness, exists for monitoring purposes.
|
||||
#[serde(with = "hex")]
|
||||
pub proposer_uuid: PgUuid,
|
||||
/// Part of WAL acknowledged by quorum and available locally. Always points
|
||||
/// to record boundary.
|
||||
/// part of WAL acknowledged by quorum and available locally
|
||||
pub commit_lsn: Lsn,
|
||||
/// First LSN not yet offloaded to s3. Useful to persist to avoid finding
|
||||
/// out offloading progress on boot.
|
||||
pub s3_wal_lsn: Lsn,
|
||||
/// Minimal LSN which may be needed for recovery of some safekeeper (end_lsn
|
||||
/// of last record streamed to everyone). Persisting it helps skipping
|
||||
/// recovery in walproposer, generally we compute it from peers. In
|
||||
/// walproposer proto called 'truncate_lsn'.
|
||||
pub peer_horizon_lsn: Lsn,
|
||||
/// LSN of the oldest known checkpoint made by pageserver and successfully
|
||||
/// pushed to s3. We don't remove WAL beyond it. Persisted only for
|
||||
/// informational purposes, we receive it from pageserver.
|
||||
pub remote_consistent_lsn: Lsn,
|
||||
// Peers and their state as we remember it. Knowing peers themselves is
|
||||
// fundamental; but state is saved here only for informational purposes and
|
||||
// obviously can be stale. (Currently not saved at all, but let's provision
|
||||
// place to have less file version upgrades).
|
||||
pub peers: Peers,
|
||||
/// minimal LSN which may be needed for recovery of some safekeeper (end_lsn
|
||||
/// of last record streamed to everyone)
|
||||
pub truncate_lsn: Lsn,
|
||||
// Safekeeper starts receiving WAL from this LSN, zeros before it ought to
|
||||
// be skipped during decoding.
|
||||
pub wal_start_lsn: Lsn,
|
||||
}
|
||||
|
||||
impl SafeKeeperState {
|
||||
pub fn new(zttid: &ZTenantTimelineId, peers: Vec<ZNodeId>) -> SafeKeeperState {
|
||||
pub fn new() -> SafeKeeperState {
|
||||
SafeKeeperState {
|
||||
tenant_id: zttid.tenant_id,
|
||||
timeline_id: zttid.timeline_id,
|
||||
acceptor_state: AcceptorState {
|
||||
term: 0,
|
||||
term_history: TermHistory::empty(),
|
||||
@@ -214,20 +168,21 @@ impl SafeKeeperState {
|
||||
server: ServerInfo {
|
||||
pg_version: UNKNOWN_SERVER_VERSION, /* Postgres server version */
|
||||
system_id: 0, /* Postgres system identifier */
|
||||
tenant_id: ZTenantId::from([0u8; 16]),
|
||||
timeline_id: ZTimelineId::from([0u8; 16]),
|
||||
wal_seg_size: 0,
|
||||
},
|
||||
proposer_uuid: [0; 16],
|
||||
commit_lsn: Lsn(0),
|
||||
s3_wal_lsn: Lsn(0),
|
||||
peer_horizon_lsn: Lsn(0),
|
||||
remote_consistent_lsn: Lsn(0),
|
||||
peers: Peers(peers.iter().map(|p| (*p, PeerInfo::new())).collect()),
|
||||
commit_lsn: Lsn(0), /* part of WAL acknowledged by quorum */
|
||||
truncate_lsn: Lsn(0), /* minimal LSN which may be needed for recovery of some safekeeper */
|
||||
wal_start_lsn: Lsn(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn empty() -> Self {
|
||||
SafeKeeperState::new(&ZTenantTimelineId::empty(), vec![])
|
||||
impl Default for SafeKeeperState {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -466,7 +421,6 @@ lazy_static! {
|
||||
|
||||
struct SafeKeeperMetrics {
|
||||
commit_lsn: Gauge,
|
||||
// WAL-related metrics are in WalStorageMetrics
|
||||
}
|
||||
|
||||
impl SafeKeeperMetrics {
|
||||
@@ -489,7 +443,7 @@ pub struct SafeKeeper<CTRL: control_file::Storage, WAL: wal_storage::Storage> {
|
||||
|
||||
/// not-yet-flushed pairs of same named fields in s.*
|
||||
pub commit_lsn: Lsn,
|
||||
pub peer_horizon_lsn: Lsn,
|
||||
pub truncate_lsn: Lsn,
|
||||
pub s: SafeKeeperState, // persistent part
|
||||
|
||||
pub control_store: CTRL,
|
||||
@@ -508,14 +462,16 @@ where
|
||||
wal_store: WAL,
|
||||
state: SafeKeeperState,
|
||||
) -> SafeKeeper<CTRL, WAL> {
|
||||
if state.timeline_id != ZTimelineId::from([0u8; 16]) && ztli != state.timeline_id {
|
||||
panic!("Calling SafeKeeper::new with inconsistent ztli ({}) and SafeKeeperState.server.timeline_id ({})", ztli, state.timeline_id);
|
||||
if state.server.timeline_id != ZTimelineId::from([0u8; 16])
|
||||
&& ztli != state.server.timeline_id
|
||||
{
|
||||
panic!("Calling SafeKeeper::new with inconsistent ztli ({}) and SafeKeeperState.server.timeline_id ({})", ztli, state.server.timeline_id);
|
||||
}
|
||||
|
||||
SafeKeeper {
|
||||
metrics: SafeKeeperMetrics::new(state.tenant_id, ztli, state.commit_lsn),
|
||||
metrics: SafeKeeperMetrics::new(state.server.tenant_id, ztli, state.commit_lsn),
|
||||
commit_lsn: state.commit_lsn,
|
||||
peer_horizon_lsn: state.peer_horizon_lsn,
|
||||
truncate_lsn: state.truncate_lsn,
|
||||
s: state,
|
||||
control_store,
|
||||
wal_store,
|
||||
@@ -576,24 +532,12 @@ where
|
||||
msg.pg_version, self.s.server.pg_version
|
||||
);
|
||||
}
|
||||
if msg.tenant_id != self.s.tenant_id {
|
||||
bail!(
|
||||
"invalid tenant ID, got {}, expected {}",
|
||||
msg.tenant_id,
|
||||
self.s.tenant_id
|
||||
);
|
||||
}
|
||||
if msg.ztli != self.s.timeline_id {
|
||||
bail!(
|
||||
"invalid timeline ID, got {}, expected {}",
|
||||
msg.ztli,
|
||||
self.s.timeline_id
|
||||
);
|
||||
}
|
||||
|
||||
// set basic info about server, if not yet
|
||||
// TODO: verify that is doesn't change after
|
||||
self.s.server.system_id = msg.system_id;
|
||||
self.s.server.tenant_id = msg.tenant_id;
|
||||
self.s.server.timeline_id = msg.ztli;
|
||||
self.s.server.wal_seg_size = msg.wal_seg_size;
|
||||
self.control_store
|
||||
.persist(&self.s)
|
||||
@@ -624,7 +568,7 @@ where
|
||||
term: self.s.acceptor_state.term,
|
||||
vote_given: false as u64,
|
||||
flush_lsn: self.wal_store.flush_lsn(),
|
||||
truncate_lsn: self.s.peer_horizon_lsn,
|
||||
truncate_lsn: self.s.truncate_lsn,
|
||||
term_history: self.get_term_history(),
|
||||
};
|
||||
if self.s.acceptor_state.term < msg.term {
|
||||
@@ -649,16 +593,14 @@ where
|
||||
|
||||
/// Form AppendResponse from current state.
|
||||
fn append_response(&self) -> AppendResponse {
|
||||
let ar = AppendResponse {
|
||||
AppendResponse {
|
||||
term: self.s.acceptor_state.term,
|
||||
flush_lsn: self.wal_store.flush_lsn(),
|
||||
commit_lsn: self.s.commit_lsn,
|
||||
// will be filled by the upper code to avoid bothering safekeeper
|
||||
hs_feedback: HotStandbyFeedback::empty(),
|
||||
zenith_feedback: ZenithFeedback::empty(),
|
||||
};
|
||||
trace!("formed AppendResponse {:?}", ar);
|
||||
ar
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_elected(&mut self, msg: &ProposerElected) -> Result<Option<AcceptorProposerMessage>> {
|
||||
@@ -713,11 +655,10 @@ where
|
||||
if !msg.wal_data.is_empty() {
|
||||
self.wal_store.write_wal(msg.h.begin_lsn, &msg.wal_data)?;
|
||||
|
||||
// If this was the first record we ever receieved, initialize
|
||||
// commit_lsn to help find_end_of_wal skip the hole in the
|
||||
// beginning.
|
||||
if self.s.commit_lsn == Lsn(0) {
|
||||
self.s.commit_lsn = msg.h.begin_lsn;
|
||||
// If this was the first record we ever receieved, remember LSN to help
|
||||
// find_end_of_wal skip the hole in the beginning.
|
||||
if self.s.wal_start_lsn == Lsn(0) {
|
||||
self.s.wal_start_lsn = msg.h.begin_lsn;
|
||||
sync_control_file = true;
|
||||
require_flush = true;
|
||||
}
|
||||
@@ -744,36 +685,35 @@ where
|
||||
.set(u64::from(self.commit_lsn) as f64);
|
||||
}
|
||||
|
||||
self.peer_horizon_lsn = msg.h.truncate_lsn;
|
||||
self.truncate_lsn = msg.h.truncate_lsn;
|
||||
// Update truncate and commit LSN in control file.
|
||||
// To avoid negative impact on performance of extra fsync, do it only
|
||||
// when truncate_lsn delta exceeds WAL segment size.
|
||||
sync_control_file |=
|
||||
self.s.peer_horizon_lsn + (self.s.server.wal_seg_size as u64) < self.peer_horizon_lsn;
|
||||
self.s.truncate_lsn + (self.s.server.wal_seg_size as u64) < self.truncate_lsn;
|
||||
if sync_control_file {
|
||||
self.s.commit_lsn = self.commit_lsn;
|
||||
self.s.peer_horizon_lsn = self.peer_horizon_lsn;
|
||||
self.s.truncate_lsn = self.truncate_lsn;
|
||||
}
|
||||
|
||||
if sync_control_file {
|
||||
self.control_store.persist(&self.s)?;
|
||||
}
|
||||
|
||||
trace!(
|
||||
"processed AppendRequest of len {}, end_lsn={:?}, commit_lsn={:?}, truncate_lsn={:?}, flushed={:?}",
|
||||
msg.wal_data.len(),
|
||||
msg.h.end_lsn,
|
||||
msg.h.commit_lsn,
|
||||
msg.h.truncate_lsn,
|
||||
require_flush,
|
||||
);
|
||||
|
||||
// If flush_lsn hasn't updated, AppendResponse is not very useful.
|
||||
if !require_flush {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let resp = self.append_response();
|
||||
trace!(
|
||||
"processed AppendRequest of len {}, end_lsn={:?}, commit_lsn={:?}, truncate_lsn={:?}, resp {:?}",
|
||||
msg.wal_data.len(),
|
||||
msg.h.end_lsn,
|
||||
msg.h.commit_lsn,
|
||||
msg.h.truncate_lsn,
|
||||
&resp,
|
||||
);
|
||||
Ok(Some(AcceptorProposerMessage::AppendResponse(resp)))
|
||||
}
|
||||
|
||||
@@ -834,11 +774,11 @@ mod tests {
|
||||
#[test]
|
||||
fn test_voting() {
|
||||
let storage = InMemoryState {
|
||||
persisted_state: SafeKeeperState::empty(),
|
||||
persisted_state: SafeKeeperState::new(),
|
||||
};
|
||||
let wal_store = DummyWalStore { lsn: Lsn(0) };
|
||||
let ztli = ZTimelineId::from([0u8; 16]);
|
||||
let mut sk = SafeKeeper::new(ztli, storage, wal_store, SafeKeeperState::empty());
|
||||
let mut sk = SafeKeeper::new(ztli, storage, wal_store, SafeKeeperState::new());
|
||||
|
||||
// check voting for 1 is ok
|
||||
let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest { term: 1 });
|
||||
@@ -866,11 +806,11 @@ mod tests {
|
||||
#[test]
|
||||
fn test_epoch_switch() {
|
||||
let storage = InMemoryState {
|
||||
persisted_state: SafeKeeperState::empty(),
|
||||
persisted_state: SafeKeeperState::new(),
|
||||
};
|
||||
let wal_store = DummyWalStore { lsn: Lsn(0) };
|
||||
let ztli = ZTimelineId::from([0u8; 16]);
|
||||
let mut sk = SafeKeeper::new(ztli, storage, wal_store, SafeKeeperState::empty());
|
||||
let mut sk = SafeKeeper::new(ztli, storage, wal_store, SafeKeeperState::new());
|
||||
|
||||
let mut ar_hdr = AppendRequestHeader {
|
||||
term: 1,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! This module contains timeline id -> safekeeper state map with file-backed
|
||||
//! persistence and support for interaction between sending and receiving wal.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use anyhow::{Context, Result};
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
@@ -9,24 +9,22 @@ use std::cmp::{max, min};
|
||||
use std::collections::HashMap;
|
||||
use std::fs::{self};
|
||||
|
||||
use std::sync::{Arc, Condvar, Mutex, MutexGuard};
|
||||
use std::sync::{Arc, Condvar, Mutex};
|
||||
use std::time::Duration;
|
||||
use tokio::sync::mpsc::UnboundedSender;
|
||||
use tracing::*;
|
||||
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::zid::{ZNodeId, ZTenantTimelineId};
|
||||
use zenith_utils::zid::ZTenantTimelineId;
|
||||
|
||||
use crate::callmemaybe::{CallmeEvent, SubscriptionStateKey};
|
||||
use crate::control_file::{self, CreateControlFile};
|
||||
|
||||
use crate::control_file;
|
||||
use crate::control_file::Storage as cf_storage;
|
||||
use crate::safekeeper::{
|
||||
AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, SafeKeeperState,
|
||||
};
|
||||
use crate::send_wal::HotStandbyFeedback;
|
||||
use crate::wal_storage;
|
||||
use crate::wal_storage::Storage as wal_storage_iface;
|
||||
use crate::wal_storage::{self, Storage};
|
||||
use crate::SafeKeeperConf;
|
||||
|
||||
use zenith_utils::pq_proto::ZenithFeedback;
|
||||
@@ -89,39 +87,21 @@ struct SharedState {
|
||||
}
|
||||
|
||||
impl SharedState {
|
||||
/// Initialize timeline state, creating control file
|
||||
fn create(
|
||||
/// Restore SharedState from control file.
|
||||
/// If create=false and file doesn't exist, bails out.
|
||||
fn create_restore(
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: &ZTenantTimelineId,
|
||||
peer_ids: Vec<ZNodeId>,
|
||||
create: CreateControlFile,
|
||||
) -> Result<Self> {
|
||||
let state = SafeKeeperState::new(zttid, peer_ids);
|
||||
let control_store = control_file::FileStorage::new(zttid, conf);
|
||||
let wal_store = wal_storage::PhysicalStorage::new(zttid, conf);
|
||||
let mut sk = SafeKeeper::new(zttid.timeline_id, control_store, wal_store, state);
|
||||
sk.control_store.persist(&sk.s)?;
|
||||
|
||||
Ok(Self {
|
||||
notified_commit_lsn: Lsn(0),
|
||||
sk,
|
||||
replicas: Vec::new(),
|
||||
active: false,
|
||||
num_computes: 0,
|
||||
pageserver_connstr: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Restore SharedState from control file.
|
||||
/// If file doesn't exist, bails out.
|
||||
fn restore(conf: &SafeKeeperConf, zttid: &ZTenantTimelineId) -> Result<Self> {
|
||||
let state = control_file::FileStorage::load_control_file_conf(conf, zttid)
|
||||
let state = control_file::FileStorage::load_control_file_conf(conf, zttid, create)
|
||||
.context("failed to load from control file")?;
|
||||
|
||||
let control_store = control_file::FileStorage::new(zttid, conf);
|
||||
|
||||
let wal_store = wal_storage::PhysicalStorage::new(zttid, conf);
|
||||
|
||||
info!("timeline {} restored", zttid.timeline_id);
|
||||
info!("timeline {} created or restored", zttid.timeline_id);
|
||||
|
||||
Ok(Self {
|
||||
notified_commit_lsn: Lsn(0),
|
||||
@@ -438,13 +418,26 @@ impl Timeline {
|
||||
|
||||
// Utilities needed by various Connection-like objects
|
||||
pub trait TimelineTools {
|
||||
fn set(&mut self, conf: &SafeKeeperConf, zttid: ZTenantTimelineId, create: bool) -> Result<()>;
|
||||
fn set(
|
||||
&mut self,
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: ZTenantTimelineId,
|
||||
create: CreateControlFile,
|
||||
) -> Result<()>;
|
||||
|
||||
fn get(&self) -> &Arc<Timeline>;
|
||||
}
|
||||
|
||||
impl TimelineTools for Option<Arc<Timeline>> {
|
||||
fn set(&mut self, conf: &SafeKeeperConf, zttid: ZTenantTimelineId, create: bool) -> Result<()> {
|
||||
fn set(
|
||||
&mut self,
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: ZTenantTimelineId,
|
||||
create: CreateControlFile,
|
||||
) -> Result<()> {
|
||||
// We will only set the timeline once. If it were to ever change,
|
||||
// anyone who cloned the Arc would be out of date.
|
||||
assert!(self.is_none());
|
||||
*self = Some(GlobalTimelines::get(conf, zttid, create)?);
|
||||
Ok(())
|
||||
}
|
||||
@@ -463,73 +456,30 @@ lazy_static! {
|
||||
pub struct GlobalTimelines;
|
||||
|
||||
impl GlobalTimelines {
|
||||
fn create_internal(
|
||||
mut timelines: MutexGuard<HashMap<ZTenantTimelineId, Arc<Timeline>>>,
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: ZTenantTimelineId,
|
||||
peer_ids: Vec<ZNodeId>,
|
||||
) -> Result<Arc<Timeline>> {
|
||||
match timelines.get(&zttid) {
|
||||
Some(_) => bail!("timeline {} already exists", zttid),
|
||||
None => {
|
||||
// TODO: check directory existence
|
||||
let dir = conf.timeline_dir(&zttid);
|
||||
fs::create_dir_all(dir)?;
|
||||
let shared_state = SharedState::create(conf, &zttid, peer_ids)
|
||||
.context("failed to create shared state")?;
|
||||
|
||||
let new_tli = Arc::new(Timeline::new(zttid, shared_state));
|
||||
timelines.insert(zttid, Arc::clone(&new_tli));
|
||||
Ok(new_tli)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create(
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: ZTenantTimelineId,
|
||||
peer_ids: Vec<ZNodeId>,
|
||||
) -> Result<Arc<Timeline>> {
|
||||
let timelines = TIMELINES.lock().unwrap();
|
||||
GlobalTimelines::create_internal(timelines, conf, zttid, peer_ids)
|
||||
}
|
||||
|
||||
/// Get a timeline with control file loaded from the global TIMELINES map.
|
||||
/// If control file doesn't exist, bails out.
|
||||
/// If control file doesn't exist and create=false, bails out.
|
||||
pub fn get(
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: ZTenantTimelineId,
|
||||
create: bool,
|
||||
create: CreateControlFile,
|
||||
) -> Result<Arc<Timeline>> {
|
||||
let mut timelines = TIMELINES.lock().unwrap();
|
||||
|
||||
match timelines.get(&zttid) {
|
||||
Some(result) => Ok(Arc::clone(result)),
|
||||
None => {
|
||||
let shared_state =
|
||||
SharedState::restore(conf, &zttid).context("failed to restore shared state");
|
||||
if let CreateControlFile::True = create {
|
||||
let dir = conf.timeline_dir(&zttid);
|
||||
info!(
|
||||
"creating timeline dir {}, create is {:?}",
|
||||
dir.display(),
|
||||
create
|
||||
);
|
||||
fs::create_dir_all(dir)?;
|
||||
}
|
||||
|
||||
let shared_state = match shared_state {
|
||||
Ok(shared_state) => shared_state,
|
||||
Err(error) => {
|
||||
// TODO: always create timeline explicitly
|
||||
if error
|
||||
.root_cause()
|
||||
.to_string()
|
||||
.contains("No such file or directory")
|
||||
&& create
|
||||
{
|
||||
return GlobalTimelines::create_internal(
|
||||
timelines,
|
||||
conf,
|
||||
zttid,
|
||||
vec![],
|
||||
);
|
||||
} else {
|
||||
return Err(error);
|
||||
}
|
||||
}
|
||||
};
|
||||
let shared_state = SharedState::create_restore(conf, &zttid, create)
|
||||
.context("failed to restore shared state")?;
|
||||
|
||||
let new_tli = Arc::new(Timeline::new(zttid, shared_state));
|
||||
timelines.insert(zttid, Arc::clone(&new_tli));
|
||||
|
||||
@@ -301,8 +301,7 @@ impl Storage for PhysicalStorage {
|
||||
/// allows to postpone its initialization.
|
||||
fn init_storage(&mut self, state: &SafeKeeperState) -> Result<()> {
|
||||
if state.server.wal_seg_size == 0 {
|
||||
// wal_seg_size is still unknown. This is dead path normally, should
|
||||
// be used only in tests.
|
||||
// wal_seg_size is still unknown
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -316,13 +315,9 @@ impl Storage for PhysicalStorage {
|
||||
let wal_seg_size = state.server.wal_seg_size as usize;
|
||||
self.wal_seg_size = Some(wal_seg_size);
|
||||
|
||||
// Find out where stored WAL ends, starting at commit_lsn which is a
|
||||
// known recent record boundary (unless we don't have WAL at all).
|
||||
self.write_lsn = if state.commit_lsn == Lsn(0) {
|
||||
Lsn(0)
|
||||
} else {
|
||||
Lsn(find_end_of_wal(&self.timeline_dir, wal_seg_size, true, state.commit_lsn)?.0)
|
||||
};
|
||||
// we need to read WAL from disk to know which LSNs are stored on disk
|
||||
self.write_lsn =
|
||||
Lsn(find_end_of_wal(&self.timeline_dir, wal_seg_size, true, state.wal_start_lsn)?.0);
|
||||
|
||||
self.write_record_lsn = self.write_lsn;
|
||||
|
||||
@@ -331,13 +326,11 @@ impl Storage for PhysicalStorage {
|
||||
self.update_flush_lsn();
|
||||
|
||||
info!(
|
||||
"initialized storage for timeline {}, flush_lsn={}, commit_lsn={}, peer_horizon_lsn={}",
|
||||
self.zttid.timeline_id, self.flush_record_lsn, state.commit_lsn, state.peer_horizon_lsn,
|
||||
"initialized storage for timeline {}, flush_lsn={}, commit_lsn={}, truncate_lsn={}",
|
||||
self.zttid.timeline_id, self.flush_record_lsn, state.commit_lsn, state.truncate_lsn,
|
||||
);
|
||||
if self.flush_record_lsn < state.commit_lsn
|
||||
|| self.flush_record_lsn < state.peer_horizon_lsn
|
||||
{
|
||||
warn!("timeline {} potential data loss: flush_lsn by find_end_of_wal is less than either commit_lsn or peer_horizon_lsn from control file", self.zttid.timeline_id);
|
||||
if self.flush_record_lsn < state.commit_lsn || self.flush_record_lsn < state.truncate_lsn {
|
||||
warn!("timeline {} potential data loss: flush_lsn by find_end_of_wal is less than either commit_lsn or truncate_lsn from control file", self.zttid.timeline_id);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use clap::{App, AppSettings, Arg, ArgMatches};
|
||||
use control_plane::compute::ComputeControlPlane;
|
||||
use control_plane::local_env;
|
||||
@@ -9,7 +9,7 @@ use pageserver::config::defaults::{
|
||||
DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
|
||||
DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR,
|
||||
};
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::collections::HashMap;
|
||||
use std::process::exit;
|
||||
use std::str::FromStr;
|
||||
use walkeeper::defaults::{
|
||||
@@ -17,17 +17,15 @@ use walkeeper::defaults::{
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
|
||||
};
|
||||
use zenith_utils::auth::{Claims, Scope};
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId};
|
||||
use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
|
||||
use zenith_utils::GIT_VERSION;
|
||||
|
||||
use pageserver::timelines::TimelineInfo;
|
||||
use pageserver::branches::BranchInfo;
|
||||
|
||||
// Default id of a safekeeper node, if not specified on the command line.
|
||||
const DEFAULT_SAFEKEEPER_ID: ZNodeId = ZNodeId(1);
|
||||
const DEFAULT_PAGESERVER_ID: ZNodeId = ZNodeId(1);
|
||||
const DEFAULT_BRANCH_NAME: &str = "main";
|
||||
|
||||
fn default_conf() -> String {
|
||||
format!(
|
||||
@@ -55,15 +53,13 @@ http_port = {safekeeper_http_port}
|
||||
}
|
||||
|
||||
///
|
||||
/// Timelines tree element used as a value in the HashMap.
|
||||
/// Branches tree element used as a value in the HashMap.
|
||||
///
|
||||
struct TimelineTreeEl {
|
||||
/// `TimelineInfo` received from the `pageserver` via the `timeline_list` http API call.
|
||||
pub info: TimelineInfo,
|
||||
/// Name, recovered from zenith config mappings
|
||||
pub name: Option<String>,
|
||||
/// Holds all direct children of this timeline referenced using `timeline_id`.
|
||||
pub children: BTreeSet<ZTimelineId>,
|
||||
struct BranchTreeEl {
|
||||
/// `BranchInfo` received from the `pageserver` via the `branch_list` libpq API call.
|
||||
pub info: BranchInfo,
|
||||
/// Holds all direct children of this branch referenced using `timeline_id`.
|
||||
pub children: Vec<String>,
|
||||
}
|
||||
|
||||
// Main entry point for the 'zenith' CLI utility
|
||||
@@ -74,28 +70,29 @@ struct TimelineTreeEl {
|
||||
// * Providing CLI api to the pageserver
|
||||
// * TODO: export/import to/from usual postgres
|
||||
fn main() -> Result<()> {
|
||||
let branch_name_arg = Arg::new("branch-name")
|
||||
.long("branch-name")
|
||||
.takes_value(true)
|
||||
.help("Name of the branch to be created or used as an alias for other services")
|
||||
#[rustfmt::skip] // rustfmt squashes these into a single line otherwise
|
||||
let pg_node_arg = Arg::new("node")
|
||||
.index(1)
|
||||
.help("Node name")
|
||||
.required(true);
|
||||
|
||||
#[rustfmt::skip]
|
||||
let safekeeper_id_arg = Arg::new("id")
|
||||
.index(1)
|
||||
.help("safekeeper id")
|
||||
.required(false);
|
||||
|
||||
let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
|
||||
let timeline_arg = Arg::new("timeline")
|
||||
.index(2)
|
||||
.help("Branch name or a point-in time specification")
|
||||
.required(false);
|
||||
|
||||
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
|
||||
|
||||
let tenant_id_arg = Arg::new("tenant-id")
|
||||
.long("tenant-id")
|
||||
let tenantid_arg = Arg::new("tenantid")
|
||||
.long("tenantid")
|
||||
.help("Tenant id. Represented as a hexadecimal string 32 symbols length")
|
||||
.takes_value(true)
|
||||
.required(false);
|
||||
|
||||
let timeline_id_arg = Arg::new("timeline-id")
|
||||
.long("timeline-id")
|
||||
.help("Timeline id. Represented as a hexadecimal string 32 symbols length")
|
||||
.takes_value(true)
|
||||
.required(false);
|
||||
|
||||
let port_arg = Arg::new("port")
|
||||
.long("port")
|
||||
.required(false)
|
||||
@@ -117,12 +114,6 @@ fn main() -> Result<()> {
|
||||
.help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
|
||||
.required(false);
|
||||
|
||||
let lsn_arg = Arg::new("lsn")
|
||||
.long("lsn")
|
||||
.help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
|
||||
.takes_value(true)
|
||||
.required(false);
|
||||
|
||||
let matches = App::new("Zenith CLI")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.version(GIT_VERSION)
|
||||
@@ -130,7 +121,6 @@ fn main() -> Result<()> {
|
||||
App::new("init")
|
||||
.about("Initialize a new Zenith repository")
|
||||
.arg(pageserver_config_args.clone())
|
||||
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
|
||||
.arg(
|
||||
Arg::new("config")
|
||||
.long("config")
|
||||
@@ -139,32 +129,17 @@ fn main() -> Result<()> {
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
App::new("timeline")
|
||||
.about("Manage timelines")
|
||||
.subcommand(App::new("list")
|
||||
.about("List all timelines, available to this pageserver")
|
||||
.arg(tenant_id_arg.clone()))
|
||||
.subcommand(App::new("branch")
|
||||
.about("Create a new timeline, using another timeline as a base, copying its data")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name").takes_value(true)
|
||||
.help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
|
||||
.arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn").takes_value(true)
|
||||
.help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
|
||||
.subcommand(App::new("create")
|
||||
.about("Create a new blank timeline")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone()))
|
||||
App::new("branch")
|
||||
.about("Create a new branch")
|
||||
.arg(Arg::new("branchname").required(false).index(1))
|
||||
.arg(Arg::new("start-point").required(false).index(2))
|
||||
.arg(tenantid_arg.clone()),
|
||||
).subcommand(
|
||||
App::new("tenant")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage tenants")
|
||||
.subcommand(App::new("list"))
|
||||
.subcommand(App::new("create")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
|
||||
)
|
||||
.subcommand(App::new("create").arg(Arg::new("tenantid").required(false).index(1)))
|
||||
)
|
||||
.subcommand(
|
||||
App::new("pageserver")
|
||||
@@ -199,13 +174,12 @@ fn main() -> Result<()> {
|
||||
App::new("pg")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage postgres instances")
|
||||
.subcommand(App::new("list").arg(tenant_id_arg.clone()))
|
||||
.subcommand(App::new("list").arg(tenantid_arg.clone()))
|
||||
.subcommand(App::new("create")
|
||||
.about("Create a postgres compute node")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(lsn_arg.clone())
|
||||
.arg(timeline_arg.clone())
|
||||
.arg(tenantid_arg.clone())
|
||||
.arg(port_arg.clone())
|
||||
.arg(
|
||||
Arg::new("config-only")
|
||||
@@ -216,21 +190,20 @@ fn main() -> Result<()> {
|
||||
.subcommand(App::new("start")
|
||||
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(timeline_id_arg.clone())
|
||||
.arg(lsn_arg.clone())
|
||||
.arg(timeline_arg.clone())
|
||||
.arg(tenantid_arg.clone())
|
||||
.arg(port_arg.clone()))
|
||||
.subcommand(
|
||||
App::new("stop")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(
|
||||
Arg::new("destroy")
|
||||
.help("Also delete data directory (now optional, should be default in future)")
|
||||
.long("destroy")
|
||||
.required(false)
|
||||
)
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(timeline_arg.clone())
|
||||
.arg(tenantid_arg.clone())
|
||||
.arg(
|
||||
Arg::new("destroy")
|
||||
.help("Also delete data directory (now optional, should be default in future)")
|
||||
.long("destroy")
|
||||
.required(false)
|
||||
)
|
||||
)
|
||||
|
||||
)
|
||||
@@ -252,89 +225,75 @@ fn main() -> Result<()> {
|
||||
};
|
||||
|
||||
// Check for 'zenith init' command first.
|
||||
let subcommand_result = if sub_name == "init" {
|
||||
handle_init(sub_args).map(Some)
|
||||
let subcmd_result = if sub_name == "init" {
|
||||
handle_init(sub_args)
|
||||
} else {
|
||||
// all other commands need an existing config
|
||||
let mut env = LocalEnv::load_config().context("Error loading config")?;
|
||||
let original_env = env.clone();
|
||||
let env = match LocalEnv::load_config() {
|
||||
Ok(conf) => conf,
|
||||
Err(e) => {
|
||||
eprintln!("Error loading config: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let subcommand_result = match sub_name {
|
||||
"tenant" => handle_tenant(sub_args, &mut env),
|
||||
"timeline" => handle_timeline(sub_args, &mut env),
|
||||
match sub_name {
|
||||
"tenant" => handle_tenant(sub_args, &env),
|
||||
"branch" => handle_branch(sub_args, &env),
|
||||
"start" => handle_start_all(sub_args, &env),
|
||||
"stop" => handle_stop_all(sub_args, &env),
|
||||
"pageserver" => handle_pageserver(sub_args, &env),
|
||||
"pg" => handle_pg(sub_args, &env),
|
||||
"safekeeper" => handle_safekeeper(sub_args, &env),
|
||||
_ => bail!("unexpected subcommand {}", sub_name),
|
||||
};
|
||||
|
||||
if original_env != env {
|
||||
subcommand_result.map(|()| Some(env))
|
||||
} else {
|
||||
subcommand_result.map(|()| None)
|
||||
}
|
||||
};
|
||||
|
||||
match subcommand_result {
|
||||
Ok(Some(updated_env)) => updated_env.persist_config(&updated_env.base_data_dir)?,
|
||||
Ok(None) => (),
|
||||
Err(e) => {
|
||||
eprintln!("command failed: {:?}", e);
|
||||
exit(1);
|
||||
}
|
||||
if let Err(e) = subcmd_result {
|
||||
eprintln!("command failed: {:#}", e);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Prints timelines list as a tree-like structure.
|
||||
/// Prints branches list as a tree-like structure.
|
||||
///
|
||||
fn print_timelines_tree(
|
||||
timelines: Vec<TimelineInfo>,
|
||||
mut timeline_name_mappings: HashMap<ZTenantTimelineId, String>,
|
||||
) -> Result<()> {
|
||||
let mut timelines_hash = timelines
|
||||
.iter()
|
||||
.map(|t| {
|
||||
(
|
||||
t.timeline_id(),
|
||||
TimelineTreeEl {
|
||||
info: t.clone(),
|
||||
children: BTreeSet::new(),
|
||||
name: timeline_name_mappings
|
||||
.remove(&ZTenantTimelineId::new(t.tenant_id(), t.timeline_id())),
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
fn print_branches_tree(branches: Vec<BranchInfo>) -> Result<()> {
|
||||
let mut branches_hash: HashMap<String, BranchTreeEl> = HashMap::new();
|
||||
|
||||
// Memorize all direct children of each timeline.
|
||||
for timeline in &timelines {
|
||||
if let TimelineInfo::Local {
|
||||
ancestor_timeline_id: Some(tid),
|
||||
..
|
||||
} = timeline
|
||||
{
|
||||
timelines_hash
|
||||
// Form a hash table of branch timeline_id -> BranchTreeEl.
|
||||
for branch in &branches {
|
||||
branches_hash.insert(
|
||||
branch.timeline_id.to_string(),
|
||||
BranchTreeEl {
|
||||
info: branch.clone(),
|
||||
children: Vec::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// Memorize all direct children of each branch.
|
||||
for branch in &branches {
|
||||
if let Some(tid) = &branch.ancestor_id {
|
||||
branches_hash
|
||||
.get_mut(tid)
|
||||
.context("missing timeline info in the HashMap")?
|
||||
.context("missing branch info in the HashMap")?
|
||||
.children
|
||||
.insert(timeline.timeline_id());
|
||||
.push(branch.timeline_id.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
for timeline in timelines_hash.values() {
|
||||
// Start with root local timelines (no ancestors) first.
|
||||
if let TimelineInfo::Local {
|
||||
ancestor_timeline_id,
|
||||
..
|
||||
} = &timeline.info
|
||||
{
|
||||
if ancestor_timeline_id.is_none() {
|
||||
print_timeline(0, &Vec::from([true]), timeline, &timelines_hash)?;
|
||||
}
|
||||
// Sort children by tid to bring some minimal order.
|
||||
for branch in &mut branches_hash.values_mut() {
|
||||
branch.children.sort();
|
||||
}
|
||||
|
||||
for branch in branches_hash.values() {
|
||||
// Start with root branches (no ancestors) first.
|
||||
// Now there is 'main' branch only, but things may change.
|
||||
if branch.info.ancestor_id.is_none() {
|
||||
print_branch(0, &Vec::from([true]), branch, &branches_hash)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -342,32 +301,27 @@ fn print_timelines_tree(
|
||||
}
|
||||
|
||||
///
|
||||
/// Recursively prints timeline info with all its children.
|
||||
/// Recursively prints branch info with all its children.
|
||||
///
|
||||
fn print_timeline(
|
||||
fn print_branch(
|
||||
nesting_level: usize,
|
||||
is_last: &[bool],
|
||||
timeline: &TimelineTreeEl,
|
||||
timelines: &HashMap<ZTimelineId, TimelineTreeEl>,
|
||||
branch: &BranchTreeEl,
|
||||
branches: &HashMap<String, BranchTreeEl>,
|
||||
) -> Result<()> {
|
||||
let local_or_remote = match timeline.info {
|
||||
TimelineInfo::Local { .. } => "(L)",
|
||||
TimelineInfo::Remote { .. } => "(R)",
|
||||
};
|
||||
// Draw main padding
|
||||
print!("{} ", local_or_remote);
|
||||
print!(" ");
|
||||
|
||||
if nesting_level > 0 {
|
||||
let lsn_string = match &timeline.info {
|
||||
TimelineInfo::Local { ancestor_lsn, .. } => ancestor_lsn
|
||||
.map(|lsn| lsn.to_string())
|
||||
.unwrap_or_else(|| "Unknown local Lsn".to_string()),
|
||||
TimelineInfo::Remote { .. } => "unknown Lsn (remote)".to_string(),
|
||||
};
|
||||
let lsn = branch
|
||||
.info
|
||||
.ancestor_lsn
|
||||
.as_ref()
|
||||
.context("missing branch info in the HashMap")?;
|
||||
let mut br_sym = "┣━";
|
||||
|
||||
// Draw each nesting padding with proper style
|
||||
// depending on whether its timeline ended or not.
|
||||
// depending on whether its branch ended or not.
|
||||
if nesting_level > 1 {
|
||||
for l in &is_last[1..is_last.len() - 1] {
|
||||
if *l {
|
||||
@@ -378,92 +332,73 @@ fn print_timeline(
|
||||
}
|
||||
}
|
||||
|
||||
// We are the last in this sub-timeline
|
||||
// We are the last in this sub-branch
|
||||
if *is_last.last().unwrap() {
|
||||
br_sym = "┗━";
|
||||
}
|
||||
|
||||
print!("{} @{}: ", br_sym, lsn_string);
|
||||
print!("{} @{}: ", br_sym, lsn);
|
||||
}
|
||||
|
||||
// Finally print a timeline id and name with new line
|
||||
println!(
|
||||
"{} [{}]",
|
||||
timeline.name.as_deref().unwrap_or("_no_name_"),
|
||||
timeline.info.timeline_id()
|
||||
);
|
||||
// Finally print a branch name with new line
|
||||
println!("{}", branch.info.name);
|
||||
|
||||
let len = timeline.children.len();
|
||||
let len = branch.children.len();
|
||||
let mut i: usize = 0;
|
||||
let mut is_last_new = Vec::from(is_last);
|
||||
is_last_new.push(false);
|
||||
|
||||
for child in &timeline.children {
|
||||
for child in &branch.children {
|
||||
i += 1;
|
||||
|
||||
// Mark that the last padding is the end of the timeline
|
||||
// Mark that the last padding is the end of the branch
|
||||
if i == len {
|
||||
if let Some(last) = is_last_new.last_mut() {
|
||||
*last = true;
|
||||
}
|
||||
}
|
||||
|
||||
print_timeline(
|
||||
print_branch(
|
||||
nesting_level + 1,
|
||||
&is_last_new,
|
||||
timelines
|
||||
branches
|
||||
.get(child)
|
||||
.context("missing timeline info in the HashMap")?,
|
||||
timelines,
|
||||
.context("missing branch info in the HashMap")?,
|
||||
branches,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns a map of timeline IDs to timeline_id@lsn strings.
|
||||
/// Returns a map of timeline IDs to branch_name@lsn strings.
|
||||
/// Connects to the pageserver to query this information.
|
||||
fn get_timeline_infos(
|
||||
fn get_branch_infos(
|
||||
env: &local_env::LocalEnv,
|
||||
tenant_id: &ZTenantId,
|
||||
) -> Result<HashMap<ZTimelineId, TimelineInfo>> {
|
||||
Ok(PageServerNode::from_env(env)
|
||||
.timeline_list(tenant_id)?
|
||||
tenantid: &ZTenantId,
|
||||
) -> Result<HashMap<ZTimelineId, BranchInfo>> {
|
||||
let page_server = PageServerNode::from_env(env);
|
||||
let branch_infos: Vec<BranchInfo> = page_server.branch_list(tenantid)?;
|
||||
let branch_infos: HashMap<ZTimelineId, BranchInfo> = branch_infos
|
||||
.into_iter()
|
||||
.map(|timeline_info| (timeline_info.timeline_id(), timeline_info))
|
||||
.collect())
|
||||
.map(|branch_info| (branch_info.timeline_id, branch_info))
|
||||
.collect();
|
||||
|
||||
Ok(branch_infos)
|
||||
}
|
||||
|
||||
// Helper function to parse --tenant_id option, or get the default from config file
|
||||
fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<ZTenantId> {
|
||||
if let Some(tenant_id_from_arguments) = parse_tenant_id(sub_match).transpose() {
|
||||
tenant_id_from_arguments
|
||||
} else if let Some(tenantid_conf) = env.default_tenant_id {
|
||||
// Helper function to parse --tenantid option, or get the default from config file
|
||||
fn get_tenantid(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<ZTenantId> {
|
||||
if let Some(tenantid_cmd) = sub_match.value_of("tenantid") {
|
||||
Ok(ZTenantId::from_str(tenantid_cmd)?)
|
||||
} else if let Some(tenantid_conf) = env.default_tenantid {
|
||||
Ok(ZTenantId::from(tenantid_conf))
|
||||
} else {
|
||||
bail!("No tenant id. Use --tenant-id, or set 'default_tenant_id' in the config file");
|
||||
bail!("No tenantid. Use --tenantid, or set 'default_tenantid' in the config file");
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_tenant_id(sub_match: &ArgMatches) -> anyhow::Result<Option<ZTenantId>> {
|
||||
sub_match
|
||||
.value_of("tenant-id")
|
||||
.map(ZTenantId::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse tenant id from the argument string")
|
||||
}
|
||||
|
||||
fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<ZTimelineId>> {
|
||||
sub_match
|
||||
.value_of("timeline-id")
|
||||
.map(ZTimelineId::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse timeline id from the argument string")
|
||||
}
|
||||
|
||||
fn handle_init(init_match: &ArgMatches) -> Result<LocalEnv> {
|
||||
let initial_timeline_id_arg = parse_timeline_id(init_match)?;
|
||||
|
||||
fn handle_init(init_match: &ArgMatches) -> Result<()> {
|
||||
// Create config file
|
||||
let toml_file: String = if let Some(config_path) = init_match.value_of("config") {
|
||||
// load and parse the file
|
||||
@@ -479,29 +414,18 @@ fn handle_init(init_match: &ArgMatches) -> Result<LocalEnv> {
|
||||
env.init()
|
||||
.context("Failed to initialize zenith repository")?;
|
||||
|
||||
// default_tenantid was generated by the `env.init()` call above
|
||||
let initial_tenant_id = ZTenantId::from(env.default_tenant_id.unwrap());
|
||||
|
||||
// Call 'pageserver init'.
|
||||
let pageserver = PageServerNode::from_env(&env);
|
||||
let initial_timeline_id = pageserver
|
||||
.init(
|
||||
Some(initial_tenant_id),
|
||||
initial_timeline_id_arg,
|
||||
&pageserver_config_overrides(init_match),
|
||||
)
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("pageserver init failed: {}", e);
|
||||
exit(1);
|
||||
});
|
||||
if let Err(e) = pageserver.init(
|
||||
// default_tenantid was generated by the `env.init()` call above
|
||||
Some(&ZTenantId::from(env.default_tenantid.unwrap()).to_string()),
|
||||
&pageserver_config_overrides(init_match),
|
||||
) {
|
||||
eprintln!("pageserver init failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
env.register_branch_mapping(
|
||||
DEFAULT_BRANCH_NAME.to_owned(),
|
||||
initial_tenant_id,
|
||||
initial_timeline_id,
|
||||
)?;
|
||||
|
||||
Ok(env)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
|
||||
@@ -512,7 +436,7 @@ fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
|
||||
fn handle_tenant(tenant_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
match tenant_match.subcommand() {
|
||||
Some(("list", _)) => {
|
||||
@@ -521,16 +445,13 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Re
|
||||
}
|
||||
}
|
||||
Some(("create", create_match)) => {
|
||||
let initial_tenant_id = parse_tenant_id(create_match)?;
|
||||
let new_tenant_id = pageserver
|
||||
.tenant_create(initial_tenant_id)?
|
||||
.ok_or_else(|| {
|
||||
anyhow!("Tenant with id {:?} was already created", initial_tenant_id)
|
||||
})?;
|
||||
println!(
|
||||
"tenant {} successfully created on the pageserver",
|
||||
new_tenant_id
|
||||
);
|
||||
let tenantid = match create_match.value_of("tenantid") {
|
||||
Some(tenantid) => ZTenantId::from_str(tenantid)?,
|
||||
None => ZTenantId::generate(),
|
||||
};
|
||||
println!("using tenant id {}", tenantid);
|
||||
pageserver.tenant_create(tenantid)?;
|
||||
println!("tenant successfully created on the pageserver");
|
||||
}
|
||||
Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{}'", sub_name),
|
||||
None => bail!("no tenant subcommand provided"),
|
||||
@@ -538,94 +459,24 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Re
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
|
||||
fn handle_branch(branch_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
|
||||
match timeline_match.subcommand() {
|
||||
Some(("list", list_match)) => {
|
||||
let tenant_id = get_tenant_id(list_match, env)?;
|
||||
let timelines = pageserver.timeline_list(&tenant_id)?;
|
||||
print_timelines_tree(timelines, env.timeline_name_mappings())?;
|
||||
}
|
||||
Some(("create", create_match)) => {
|
||||
let tenant_id = get_tenant_id(create_match, env)?;
|
||||
let new_branch_name = create_match
|
||||
.value_of("branch-name")
|
||||
.ok_or(anyhow!("No branch name provided"))?;
|
||||
let timeline = pageserver
|
||||
.timeline_create(tenant_id, None, None, None)?
|
||||
.ok_or_else(|| anyhow!("Failed to create new timeline for tenant {}", tenant_id))?;
|
||||
let new_timeline_id = timeline.timeline_id();
|
||||
let tenantid = get_tenantid(branch_match, env)?;
|
||||
|
||||
let last_record_lsn = match timeline {
|
||||
TimelineInfo::Local {
|
||||
last_record_lsn, ..
|
||||
} => last_record_lsn,
|
||||
TimelineInfo::Remote { .. } => {
|
||||
bail!(
|
||||
"Timeline {} was created as remote, not local",
|
||||
new_timeline_id
|
||||
)
|
||||
}
|
||||
};
|
||||
env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
|
||||
|
||||
println!(
|
||||
"Created timeline '{}' at Lsn {} for tenant: {}",
|
||||
timeline.timeline_id(),
|
||||
last_record_lsn,
|
||||
tenant_id,
|
||||
);
|
||||
}
|
||||
Some(("branch", branch_match)) => {
|
||||
let tenant_id = get_tenant_id(branch_match, env)?;
|
||||
let new_branch_name = branch_match
|
||||
.value_of("branch-name")
|
||||
.ok_or(anyhow!("No branch name provided"))?;
|
||||
let ancestor_branch_name = branch_match
|
||||
.value_of("ancestor-branch-name")
|
||||
.unwrap_or(DEFAULT_BRANCH_NAME);
|
||||
let ancestor_timeline_id = env
|
||||
.get_branch_timeline_id(ancestor_branch_name, tenant_id)
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"Found no timeline id for branch name '{}'",
|
||||
ancestor_branch_name
|
||||
)
|
||||
})?;
|
||||
|
||||
let start_lsn = branch_match
|
||||
.value_of("ancestor-start-lsn")
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse ancestor start Lsn from the request")?;
|
||||
let timeline = pageserver
|
||||
.timeline_create(tenant_id, None, start_lsn, Some(ancestor_timeline_id))?
|
||||
.ok_or_else(|| anyhow!("Failed to create new timeline for tenant {}", tenant_id))?;
|
||||
let new_timeline_id = timeline.timeline_id();
|
||||
|
||||
let last_record_lsn = match timeline {
|
||||
TimelineInfo::Local {
|
||||
last_record_lsn, ..
|
||||
} => last_record_lsn,
|
||||
TimelineInfo::Remote { .. } => bail!(
|
||||
"Timeline {} was created as remote, not local",
|
||||
new_timeline_id
|
||||
),
|
||||
};
|
||||
|
||||
env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
|
||||
|
||||
println!(
|
||||
"Created timeline '{}' at Lsn {} for tenant: {}. Ancestor timeline: '{}'",
|
||||
timeline.timeline_id(),
|
||||
last_record_lsn,
|
||||
tenant_id,
|
||||
ancestor_branch_name,
|
||||
);
|
||||
}
|
||||
Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{}'", sub_name),
|
||||
None => bail!("no tenant subcommand provided"),
|
||||
if let Some(branchname) = branch_match.value_of("branchname") {
|
||||
let startpoint_str = branch_match
|
||||
.value_of("start-point")
|
||||
.context("Missing start-point")?;
|
||||
let branch = pageserver.branch_create(branchname, startpoint_str, &tenantid)?;
|
||||
println!(
|
||||
"Created branch '{}' at {:?} for tenant: {}",
|
||||
branch.name, branch.latest_valid_lsn, tenantid,
|
||||
);
|
||||
} else {
|
||||
// No arguments, list branches for tenant
|
||||
let branches = pageserver.branch_list(&tenantid)?;
|
||||
print_branches_tree(branches)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -639,90 +490,63 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
|
||||
let mut cplane = ComputeControlPlane::load(env.clone())?;
|
||||
|
||||
// All subcommands take an optional --tenant-id option
|
||||
let tenant_id = get_tenant_id(sub_args, env)?;
|
||||
// All subcommands take an optional --tenantid option
|
||||
let tenantid = get_tenantid(sub_args, env)?;
|
||||
|
||||
match sub_name {
|
||||
"list" => {
|
||||
let timeline_infos = get_timeline_infos(env, &tenant_id).unwrap_or_else(|e| {
|
||||
eprintln!("Failed to load timeline info: {}", e);
|
||||
let branch_infos = get_branch_infos(env, &tenantid).unwrap_or_else(|e| {
|
||||
eprintln!("Failed to load branch info: {}", e);
|
||||
HashMap::new()
|
||||
});
|
||||
|
||||
let timeline_name_mappings = env.timeline_name_mappings();
|
||||
|
||||
println!("NODE\tADDRESS\tTIMELINE\tBRANCH NAME\tLSN\t\tSTATUS");
|
||||
println!("NODE\tADDRESS\t\tBRANCH\tLSN\t\tSTATUS");
|
||||
for ((_, node_name), node) in cplane
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|((node_tenant_id, _), _)| node_tenant_id == &tenant_id)
|
||||
.filter(|((node_tenantid, _), _)| node_tenantid == &tenantid)
|
||||
{
|
||||
// FIXME: This shows the LSN at the end of the timeline. It's not the
|
||||
// right thing to do for read-only nodes that might be anchored at an
|
||||
// older point in time, or following but lagging behind the primary.
|
||||
let lsn_str = timeline_infos
|
||||
.get(&node.timeline_id)
|
||||
.map(|bi| match bi {
|
||||
TimelineInfo::Local {
|
||||
last_record_lsn, ..
|
||||
} => last_record_lsn.to_string(),
|
||||
TimelineInfo::Remote { .. } => "? (remote)".to_string(),
|
||||
})
|
||||
.unwrap_or_else(|| '?'.to_string());
|
||||
|
||||
let branch_name = timeline_name_mappings
|
||||
.get(&ZTenantTimelineId::new(tenant_id, node.timeline_id))
|
||||
.map(|name| name.as_str())
|
||||
.unwrap_or("?");
|
||||
let lsn_str = branch_infos
|
||||
.get(&node.timelineid)
|
||||
.map(|bi| bi.latest_valid_lsn.to_string())
|
||||
.unwrap_or_else(|| "?".to_string());
|
||||
|
||||
println!(
|
||||
"{}\t{}\t{}\t{}\t{}\t{}",
|
||||
"{}\t{}\t{}\t{}\t{}",
|
||||
node_name,
|
||||
node.address,
|
||||
node.timeline_id,
|
||||
branch_name,
|
||||
node.timelineid, // FIXME: resolve human-friendly branch name
|
||||
lsn_str,
|
||||
node.status(),
|
||||
);
|
||||
}
|
||||
}
|
||||
"create" => {
|
||||
let branch_name = sub_args
|
||||
.value_of("branch-name")
|
||||
.unwrap_or(DEFAULT_BRANCH_NAME);
|
||||
let node_name = sub_args
|
||||
.value_of("node")
|
||||
.map(ToString::to_string)
|
||||
.unwrap_or_else(|| format!("{}_node", branch_name));
|
||||
|
||||
let lsn = sub_args
|
||||
.value_of("lsn")
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse Lsn from the request")?;
|
||||
let timeline_id = env
|
||||
.get_branch_timeline_id(branch_name, tenant_id)
|
||||
.ok_or_else(|| anyhow!("Found no timeline id for branch name '{}'", branch_name))?;
|
||||
let node_name = sub_args.value_of("node").unwrap_or("main");
|
||||
let timeline_name = sub_args.value_of("timeline").unwrap_or(node_name);
|
||||
|
||||
let port: Option<u16> = match sub_args.value_of("port") {
|
||||
Some(p) => Some(p.parse()?),
|
||||
None => None,
|
||||
};
|
||||
cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port)?;
|
||||
cplane.new_node(tenantid, node_name, timeline_name, port)?;
|
||||
}
|
||||
"start" => {
|
||||
let node_name = sub_args.value_of("node").unwrap_or("main");
|
||||
let timeline_name = sub_args.value_of("timeline");
|
||||
|
||||
let port: Option<u16> = match sub_args.value_of("port") {
|
||||
Some(p) => Some(p.parse()?),
|
||||
None => None,
|
||||
};
|
||||
let node_name = sub_args
|
||||
.value_of("node")
|
||||
.ok_or_else(|| anyhow!("No node name was provided to start"))?;
|
||||
|
||||
let node = cplane.nodes.get(&(tenant_id, node_name.to_owned()));
|
||||
let node = cplane.nodes.get(&(tenantid, node_name.to_owned()));
|
||||
|
||||
let auth_token = if matches!(env.pageserver.auth_type, AuthType::ZenithJWT) {
|
||||
let claims = Claims::new(Some(tenant_id), Scope::Tenant);
|
||||
let claims = Claims::new(Some(tenantid), Scope::Tenant);
|
||||
|
||||
Some(env.generate_auth_token(&claims)?)
|
||||
} else {
|
||||
@@ -730,49 +554,40 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
};
|
||||
|
||||
if let Some(node) = node {
|
||||
if timeline_name.is_some() {
|
||||
println!("timeline name ignored because node exists already");
|
||||
}
|
||||
println!("Starting existing postgres {}...", node_name);
|
||||
node.start(&auth_token)?;
|
||||
} else {
|
||||
let branch_name = sub_args
|
||||
.value_of("branch-name")
|
||||
.unwrap_or(DEFAULT_BRANCH_NAME);
|
||||
let timeline_id = env
|
||||
.get_branch_timeline_id(branch_name, tenant_id)
|
||||
.ok_or_else(|| {
|
||||
anyhow!("Found no timeline id for branch name '{}'", branch_name)
|
||||
})?;
|
||||
let lsn = sub_args
|
||||
.value_of("lsn")
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse Lsn from the request")?;
|
||||
// when used with custom port this results in non obvious behaviour
|
||||
// port is remembered from first start command, i e
|
||||
// start --port X
|
||||
// stop
|
||||
// start <-- will also use port X even without explicit port argument
|
||||
let timeline_name = timeline_name.unwrap_or(node_name);
|
||||
println!(
|
||||
"Starting new postgres {} on timeline {} ...",
|
||||
node_name, timeline_id
|
||||
"Starting new postgres {} on {}...",
|
||||
node_name, timeline_name
|
||||
);
|
||||
let node = cplane.new_node(tenant_id, node_name, timeline_id, lsn, port)?;
|
||||
let node = cplane.new_node(tenantid, node_name, timeline_name, port)?;
|
||||
node.start(&auth_token)?;
|
||||
}
|
||||
}
|
||||
"stop" => {
|
||||
let node_name = sub_args
|
||||
.value_of("node")
|
||||
.ok_or_else(|| anyhow!("No node name was provided to stop"))?;
|
||||
let node_name = sub_args.value_of("node").unwrap_or("main");
|
||||
let destroy = sub_args.is_present("destroy");
|
||||
|
||||
let node = cplane
|
||||
.nodes
|
||||
.get(&(tenant_id, node_name.to_owned()))
|
||||
.get(&(tenantid, node_name.to_owned()))
|
||||
.with_context(|| format!("postgres {} is not found", node_name))?;
|
||||
node.stop(destroy)?;
|
||||
}
|
||||
|
||||
_ => bail!("Unexpected pg subcommand '{}'", sub_name),
|
||||
_ => {
|
||||
bail!("Unexpected pg subcommand '{}'", sub_name)
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -334,10 +334,6 @@ impl ZTenantTimelineId {
|
||||
pub fn generate() -> Self {
|
||||
Self::new(ZTenantId::generate(), ZTimelineId::generate())
|
||||
}
|
||||
|
||||
pub fn empty() -> Self {
|
||||
Self::new(ZTenantId::from([0u8; 16]), ZTimelineId::from([0u8; 16]))
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ZTenantTimelineId {
|
||||
|
||||
Reference in New Issue
Block a user