mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-16 09:00:38 +00:00
Compare commits
27 Commits
revert-176
...
bojan-ci-t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d4585bcb67 | ||
|
|
ae7b7bcf7c | ||
|
|
894721c204 | ||
|
|
d7bb3d14df | ||
|
|
c584d90bb9 | ||
|
|
7997fc2932 | ||
|
|
24d2313d0b | ||
|
|
9ab52e2186 | ||
|
|
6f1f33ef42 | ||
|
|
703f691df8 | ||
|
|
2b265fd6dc | ||
|
|
d32b491a53 | ||
|
|
541ec25875 | ||
|
|
8346aa3a29 | ||
|
|
2aceb6a309 | ||
|
|
3ff5caf786 | ||
|
|
fbedd535c0 | ||
|
|
89e5659f3f | ||
|
|
ef7cdb13e2 | ||
|
|
73187bfef1 | ||
|
|
967eb38e81 | ||
|
|
a124e44866 | ||
|
|
c4b77084af | ||
|
|
c9efdec8db | ||
|
|
12b7c793b3 | ||
|
|
3c6890bf1d | ||
|
|
d97617ed3a |
@@ -1,5 +1,6 @@
|
||||
[pageservers]
|
||||
zenith-1-ps-1 console_region_id=1
|
||||
#zenith-1-ps-1 console_region_id=1
|
||||
zenith-1-ps-2 console_region_id=1
|
||||
|
||||
[safekeepers]
|
||||
zenith-1-sk-1 console_region_id=1
|
||||
@@ -15,4 +16,4 @@ console_mgmt_base_url = http://console-release.local
|
||||
bucket_name = zenith-storage-oregon
|
||||
bucket_region = us-west-2
|
||||
etcd_endpoints = etcd-release.local:2379
|
||||
safekeeper_enable_s3_offload = true
|
||||
safekeeper_enable_s3_offload = false
|
||||
|
||||
@@ -6,6 +6,7 @@ zenith-us-stage-ps-2 console_region_id=27
|
||||
zenith-us-stage-sk-1 console_region_id=27
|
||||
zenith-us-stage-sk-4 console_region_id=27
|
||||
zenith-us-stage-sk-5 console_region_id=27
|
||||
zenith-us-stage-sk-6 console_region_id=27
|
||||
|
||||
[storage:children]
|
||||
pageservers
|
||||
|
||||
@@ -293,7 +293,7 @@ jobs:
|
||||
# `Too long with no output` error, if a test is running for a long time.
|
||||
# In that case, tests should have internal timeouts that are less than
|
||||
# no_output_timeout, specified here.
|
||||
no_output_timeout: 10m
|
||||
no_output_timeout: 1m
|
||||
environment:
|
||||
- ZENITH_BIN: /tmp/zenith/bin
|
||||
- POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
|
||||
@@ -354,6 +354,7 @@ jobs:
|
||||
fi
|
||||
fi
|
||||
- run:
|
||||
# TODO wait for processes to die in case of timeout?
|
||||
# CircleCI artifacts are preserved one file at a time, so skipping
|
||||
# this step isn't a good idea. If you want to extract the
|
||||
# pageserver state, perhaps a tarball would be a better idea.
|
||||
@@ -361,7 +362,7 @@ jobs:
|
||||
when: always
|
||||
command: |
|
||||
du -sh /tmp/test_output/*
|
||||
find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "etcd.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
|
||||
find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
|
||||
du -sh /tmp/test_output/*
|
||||
- store_artifacts:
|
||||
path: /tmp/test_output
|
||||
@@ -640,7 +641,8 @@ jobs:
|
||||
name: Re-deploy proxy
|
||||
command: |
|
||||
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||
helm upgrade neon-stress-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
helm upgrade neon-stress-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
helm upgrade neon-stress-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
|
||||
deploy-release:
|
||||
docker:
|
||||
@@ -684,12 +686,13 @@ jobs:
|
||||
name: Setup helm v3
|
||||
command: |
|
||||
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
helm repo add zenithdb https://neondatabase.github.io/helm-charts
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
- run:
|
||||
name: Re-deploy proxy
|
||||
command: |
|
||||
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||
helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
helm upgrade neon-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
|
||||
# Trigger a new remote CI job
|
||||
remote-ci-trigger:
|
||||
|
||||
26
.circleci/helm-values/neon-stress.proxy-scram.yaml
Normal file
26
.circleci/helm-values/neon-stress.proxy-scram.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
fullnameOverride: "neon-stress-proxy-scram"
|
||||
|
||||
settings:
|
||||
authBackend: "console"
|
||||
authEndpoint: "http://neon-stress-console.local/management/api/v2"
|
||||
domain: "*.stress.neon.tech"
|
||||
|
||||
podLabels:
|
||||
zenith_service: proxy-scram
|
||||
zenith_env: staging
|
||||
zenith_region: eu-west-1
|
||||
zenith_region_slug: ireland
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: '*.stress.neon.tech'
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
24
.circleci/helm-values/production.proxy-scram.yaml
Normal file
24
.circleci/helm-values/production.proxy-scram.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
settings:
|
||||
authBackend: "console"
|
||||
authEndpoint: "http://console-release.local/management/api/v2"
|
||||
domain: "*.cloud.neon.tech"
|
||||
|
||||
podLabels:
|
||||
zenith_service: proxy-scram
|
||||
zenith_env: production
|
||||
zenith_region: us-west-2
|
||||
zenith_region_slug: oregon
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: '*.cloud.neon.tech'
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
@@ -1,9 +1,3 @@
|
||||
# Helm chart values for zenith-proxy.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
image:
|
||||
repository: neondatabase/neon
|
||||
|
||||
settings:
|
||||
authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
|
||||
uri: "https://console.neon.tech/psql_session/"
|
||||
@@ -28,7 +22,7 @@ exposedService:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: start.zenith.tech,connect.neon.tech,pg.neon.tech
|
||||
external-dns.alpha.kubernetes.io/hostname: connect.neon.tech,pg.neon.tech
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
|
||||
20
COPYRIGHT
20
COPYRIGHT
@@ -1,20 +0,0 @@
|
||||
This software is licensed under the Apache 2.0 License:
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
Copyright 2021 Zenith Labs, Inc
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
The PostgreSQL submodule in vendor/postgres is licensed under the
|
||||
PostgreSQL license. See vendor/postgres/COPYRIGHT.
|
||||
15
Cargo.lock
generated
15
Cargo.lock
generated
@@ -2047,15 +2047,18 @@ dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
"crc32c",
|
||||
"env_logger",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"memoffset",
|
||||
"postgres",
|
||||
"rand",
|
||||
"regex",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"utils",
|
||||
"wal_generate",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
@@ -3627,6 +3630,18 @@ version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wal_generate"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap 3.0.14",
|
||||
"env_logger",
|
||||
"log",
|
||||
"postgres",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.2"
|
||||
|
||||
8
Makefile
8
Makefile
@@ -20,7 +20,13 @@ else ifeq ($(BUILD_TYPE),debug)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
|
||||
PG_CFLAGS = -O0 -g3 $(CFLAGS)
|
||||
else
|
||||
$(error Bad build type `$(BUILD_TYPE)', see Makefile for options)
|
||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||
endif
|
||||
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
PG_CONFIGURE_OPTS += --with-includes=/usr/local/opt/openssl/include --with-libraries=/usr/local/opt/openssl/lib
|
||||
endif
|
||||
|
||||
# Choose whether we should be silent or verbose
|
||||
|
||||
5
NOTICE
Normal file
5
NOTICE
Normal file
@@ -0,0 +1,5 @@
|
||||
Neon
|
||||
Copyright 2022 Neon Inc.
|
||||
|
||||
The PostgreSQL submodule in vendor/postgres is licensed under the
|
||||
PostgreSQL license. See vendor/postgres/COPYRIGHT.
|
||||
@@ -30,7 +30,7 @@ Pageserver consists of:
|
||||
On Ubuntu or Debian this set of packages should be sufficient to build the code:
|
||||
```text
|
||||
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
|
||||
libssl-dev clang pkg-config libpq-dev
|
||||
libssl-dev clang pkg-config libpq-dev libprotobuf-dev etcd
|
||||
```
|
||||
|
||||
2. [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
@@ -52,9 +52,10 @@ make -j5
|
||||
```
|
||||
|
||||
#### building on OSX (12.3.1)
|
||||
1. Install XCode
|
||||
1. Install XCode and dependencies
|
||||
```
|
||||
xcode-select --install
|
||||
brew install protobuf etcd
|
||||
```
|
||||
|
||||
2. [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
|
||||
@@ -146,8 +146,14 @@ impl ComputeNode {
|
||||
_ => format!("basebackup {} {} {}", &self.tenant, &self.timeline, lsn),
|
||||
};
|
||||
let copyreader = client.copy_out(basebackup_cmd.as_str())?;
|
||||
let mut ar = tar::Archive::new(copyreader);
|
||||
|
||||
// Read the archive directly from the `CopyOutReader`
|
||||
//
|
||||
// Set `ignore_zeros` so that unpack() reads all the Copy data and
|
||||
// doesn't stop at the end-of-archive marker. Otherwise, if the server
|
||||
// sends an Error after finishing the tarball, we will not notice it.
|
||||
let mut ar = tar::Archive::new(copyreader);
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.pgdata)?;
|
||||
|
||||
self.metrics.basebackup_ms.store(
|
||||
|
||||
@@ -4,7 +4,7 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
tar = "0.4.33"
|
||||
tar = "0.4.38"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = "1.12.0"
|
||||
|
||||
@@ -231,8 +231,13 @@ impl PostgresNode {
|
||||
.context("page server 'basebackup' command failed")?;
|
||||
|
||||
// Read the archive directly from the `CopyOutReader`
|
||||
tar::Archive::new(copyreader)
|
||||
.unpack(&self.pgdata())
|
||||
//
|
||||
// Set `ignore_zeros` so that unpack() reads all the Copy data and
|
||||
// doesn't stop at the end-of-archive marker. Otherwise, if the server
|
||||
// sends an Error after finishing the tarball, we will not notice it.
|
||||
let mut ar = tar::Archive::new(copyreader);
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.pgdata())
|
||||
.context("extracting base backup failed")?;
|
||||
|
||||
Ok(())
|
||||
@@ -274,6 +279,8 @@ impl PostgresNode {
|
||||
conf.append("listen_addresses", &self.address.ip().to_string());
|
||||
conf.append("port", &self.address.port().to_string());
|
||||
conf.append("wal_keep_size", "0");
|
||||
// walproposer panics when basebackup is invalid, it is pointless to restart in this case.
|
||||
conf.append("restart_after_crash", "off");
|
||||
|
||||
// Configure the node to fetch pages from pageserver
|
||||
let pageserver_connstr = {
|
||||
|
||||
@@ -48,6 +48,10 @@ pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
format!("--data-dir={}", etcd_data_dir.display()),
|
||||
format!("--listen-client-urls={client_urls}"),
|
||||
format!("--advertise-client-urls={client_urls}"),
|
||||
// Set --quota-backend-bytes to keep the etcd virtual memory
|
||||
// size smaller. Our test etcd clusters are very small.
|
||||
// See https://github.com/etcd-io/etcd/issues/7910
|
||||
"--quota-backend-bytes=100000000".to_string(),
|
||||
])
|
||||
.stdout(Stdio::from(etcd_stdout_file))
|
||||
.stderr(Stdio::from(etcd_stderr_file))
|
||||
|
||||
@@ -15,7 +15,7 @@ use std::process::{Command, Stdio};
|
||||
use utils::{
|
||||
auth::{encode_from_key_file, Claims, Scope},
|
||||
postgres_backend::AuthType,
|
||||
zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
@@ -136,7 +136,7 @@ impl EtcdBroker {
|
||||
#[serde(default)]
|
||||
pub struct PageServerConf {
|
||||
// node id
|
||||
pub id: ZNodeId,
|
||||
pub id: NodeId,
|
||||
// Pageserver connection settings
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
@@ -151,7 +151,7 @@ pub struct PageServerConf {
|
||||
impl Default for PageServerConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
id: ZNodeId(0),
|
||||
id: NodeId(0),
|
||||
listen_pg_addr: String::new(),
|
||||
listen_http_addr: String::new(),
|
||||
auth_type: AuthType::Trust,
|
||||
@@ -163,7 +163,7 @@ impl Default for PageServerConf {
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct SafekeeperConf {
|
||||
pub id: ZNodeId,
|
||||
pub id: NodeId,
|
||||
pub pg_port: u16,
|
||||
pub http_port: u16,
|
||||
pub sync: bool,
|
||||
@@ -172,7 +172,7 @@ pub struct SafekeeperConf {
|
||||
impl Default for SafekeeperConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
id: ZNodeId(0),
|
||||
id: NodeId(0),
|
||||
pg_port: 0,
|
||||
http_port: 0,
|
||||
sync: true,
|
||||
|
||||
@@ -18,7 +18,7 @@ use thiserror::Error;
|
||||
use utils::{
|
||||
connstring::connection_address,
|
||||
http::error::HttpErrorBody,
|
||||
zid::{ZNodeId, ZTenantId, ZTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::local_env::{LocalEnv, SafekeeperConf};
|
||||
@@ -65,7 +65,7 @@ impl ResponseErrorMessageExt for Response {
|
||||
//
|
||||
#[derive(Debug)]
|
||||
pub struct SafekeeperNode {
|
||||
pub id: ZNodeId,
|
||||
pub id: NodeId,
|
||||
|
||||
pub conf: SafekeeperConf,
|
||||
|
||||
@@ -100,7 +100,7 @@ impl SafekeeperNode {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: ZNodeId) -> PathBuf {
|
||||
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
|
||||
env.safekeeper_data_dir(format!("sk{}", sk_id).as_ref())
|
||||
}
|
||||
|
||||
@@ -286,7 +286,7 @@ impl SafekeeperNode {
|
||||
&self,
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
peer_ids: Vec<ZNodeId>,
|
||||
peer_ids: Vec<NodeId>,
|
||||
) -> Result<()> {
|
||||
Ok(self
|
||||
.http_request(
|
||||
|
||||
@@ -21,7 +21,7 @@ NOTE:It has nothing to do with PostgreSQL pg_basebackup.
|
||||
|
||||
### Branch
|
||||
|
||||
We can create branch at certain LSN using `zenith timeline branch` command.
|
||||
We can create branch at certain LSN using `neon_local timeline branch` command.
|
||||
Each Branch lives in a corresponding timeline[] and has an ancestor[].
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ The layer map tracks what layers exist in a timeline.
|
||||
|
||||
### Layered repository
|
||||
|
||||
Zenith repository implementation that keeps data in layers.
|
||||
Neon repository implementation that keeps data in layers.
|
||||
### LSN
|
||||
|
||||
The Log Sequence Number (LSN) is a unique identifier of the WAL record[] in the WAL log.
|
||||
@@ -101,7 +101,7 @@ It is printed as two hexadecimal numbers of up to 8 digits each, separated by a
|
||||
Check also [PostgreSQL doc about pg_lsn type](https://www.postgresql.org/docs/devel/datatype-pg-lsn.html)
|
||||
Values can be compared to calculate the volume of WAL data that separates them, so they are used to measure the progress of replication and recovery.
|
||||
|
||||
In postgres and Zenith lsns are used to describe certain points in WAL handling.
|
||||
In Postgres and Neon LSNs are used to describe certain points in WAL handling.
|
||||
|
||||
PostgreSQL LSNs and functions to monitor them:
|
||||
* `pg_current_wal_insert_lsn()` - Returns the current write-ahead log insert location.
|
||||
@@ -111,13 +111,13 @@ PostgreSQL LSNs and functions to monitor them:
|
||||
* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
|
||||
[source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):
|
||||
|
||||
Zenith safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
|
||||
Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
|
||||
* `CommitLSN`: position in WAL confirmed by quorum safekeepers.
|
||||
* `RestartLSN`: position in WAL confirmed by all safekeepers.
|
||||
* `FlushLSN`: part of WAL persisted to the disk by safekeeper.
|
||||
* `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.
|
||||
|
||||
Zenith pageserver LSNs:
|
||||
Neon pageserver LSNs:
|
||||
* `last_record_lsn` - the end of last processed WAL record.
|
||||
* `disk_consistent_lsn` - data is known to be fully flushed and fsync'd to local disk on pageserver up to this LSN.
|
||||
* `remote_consistent_lsn` - The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash.
|
||||
@@ -132,7 +132,7 @@ This is the unit of data exchange between compute node and pageserver.
|
||||
|
||||
### Pageserver
|
||||
|
||||
Zenith storage engine: repositories + wal receiver + page service + wal redo.
|
||||
Neon storage engine: repositories + wal receiver + page service + wal redo.
|
||||
|
||||
### Page service
|
||||
|
||||
@@ -184,10 +184,10 @@ relation exceeds that size, it is split into multiple segments.
|
||||
SLRUs include pg_clog, pg_multixact/members, and
|
||||
pg_multixact/offsets. There are other SLRUs in PostgreSQL, but
|
||||
they don't need to be stored permanently (e.g. pg_subtrans),
|
||||
or we do not support them in zenith yet (pg_commit_ts).
|
||||
or we do not support them in neon yet (pg_commit_ts).
|
||||
|
||||
### Tenant (Multitenancy)
|
||||
Tenant represents a single customer, interacting with Zenith.
|
||||
Tenant represents a single customer, interacting with Neon.
|
||||
Wal redo[] activity, timelines[], layers[] are managed for each tenant independently.
|
||||
One pageserver[] can serve multiple tenants at once.
|
||||
One safekeeper
|
||||
|
||||
@@ -16,7 +16,7 @@ use tokio::{sync::mpsc, task::JoinHandle};
|
||||
use tracing::*;
|
||||
use utils::{
|
||||
lsn::Lsn,
|
||||
zid::{ZNodeId, ZTenantId, ZTenantTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTenantTimelineId},
|
||||
};
|
||||
|
||||
/// Default value to use for prefixing to all etcd keys with.
|
||||
@@ -25,7 +25,7 @@ pub const DEFAULT_NEON_BROKER_ETCD_PREFIX: &str = "neon";
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
struct SafekeeperTimeline {
|
||||
safekeeper_id: ZNodeId,
|
||||
safekeeper_id: NodeId,
|
||||
info: SkTimelineInfo,
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ pub enum BrokerError {
|
||||
/// A way to control the data retrieval from a certain subscription.
|
||||
pub struct SkTimelineSubscription {
|
||||
safekeeper_timeline_updates:
|
||||
mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>>>,
|
||||
mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>>,
|
||||
kind: SkTimelineSubscriptionKind,
|
||||
watcher_handle: JoinHandle<Result<(), BrokerError>>,
|
||||
watcher: Watcher,
|
||||
@@ -81,7 +81,7 @@ impl SkTimelineSubscription {
|
||||
/// Asynchronously polls for more data from the subscription, suspending the current future if there's no data sent yet.
|
||||
pub async fn fetch_data(
|
||||
&mut self,
|
||||
) -> Option<HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>>> {
|
||||
) -> Option<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>> {
|
||||
self.safekeeper_timeline_updates.recv().await
|
||||
}
|
||||
|
||||
@@ -221,7 +221,7 @@ pub async fn subscribe_to_safekeeper_timeline_updates(
|
||||
break;
|
||||
}
|
||||
|
||||
let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>> = HashMap::new();
|
||||
let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>> = HashMap::new();
|
||||
// Keep track that the timeline data updates from etcd arrive in the right order.
|
||||
// https://etcd.io/docs/v3.5/learning/api_guarantees/#isolation-level-and-consistency-of-replicas
|
||||
// > etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
|
||||
@@ -299,18 +299,18 @@ fn parse_etcd_key_value(
|
||||
parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
|
||||
parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?,
|
||||
),
|
||||
ZNodeId(parse_capture(&caps, 3).map_err(BrokerError::ParsingError)?),
|
||||
NodeId(parse_capture(&caps, 3).map_err(BrokerError::ParsingError)?),
|
||||
),
|
||||
SubscriptionKind::Tenant(tenant_id) => (
|
||||
ZTenantTimelineId::new(
|
||||
tenant_id,
|
||||
parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
|
||||
),
|
||||
ZNodeId(parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?),
|
||||
NodeId(parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?),
|
||||
),
|
||||
SubscriptionKind::Timeline(zttid) => (
|
||||
zttid,
|
||||
ZNodeId(parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?),
|
||||
NodeId(parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?),
|
||||
),
|
||||
};
|
||||
|
||||
|
||||
@@ -20,5 +20,10 @@ serde = { version = "1.0", features = ["derive"] }
|
||||
utils = { path = "../utils" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
env_logger = "0.9"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
wal_generate = { path = "wal_generate" }
|
||||
|
||||
[build-dependencies]
|
||||
bindgen = "0.59.1"
|
||||
|
||||
@@ -15,7 +15,7 @@ use crate::XLogPageHeaderData;
|
||||
use crate::XLogRecord;
|
||||
use crate::XLOG_PAGE_MAGIC;
|
||||
|
||||
use anyhow::bail;
|
||||
use anyhow::{bail, ensure};
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use bytes::BytesMut;
|
||||
use bytes::{Buf, Bytes};
|
||||
@@ -30,6 +30,7 @@ use std::path::{Path, PathBuf};
|
||||
use std::time::SystemTime;
|
||||
use utils::bin_ser::DeserializeError;
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::const_assert;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
pub const XLOG_FNAME_LEN: usize = 24;
|
||||
@@ -149,8 +150,9 @@ fn find_end_of_wal_segment(
|
||||
) -> anyhow::Result<u32> {
|
||||
// step back to the beginning of the page to read it in...
|
||||
let mut offs: usize = start_offset - start_offset % XLOG_BLCKSZ;
|
||||
let mut skipping_first_contrecord: bool = false;
|
||||
let mut contlen: usize = 0;
|
||||
let mut wal_crc: u32 = 0;
|
||||
let mut xl_crc: u32 = 0;
|
||||
let mut crc: u32 = 0;
|
||||
let mut rec_offs: usize = 0;
|
||||
let mut buf = [0u8; XLOG_BLCKSZ];
|
||||
@@ -158,11 +160,15 @@ fn find_end_of_wal_segment(
|
||||
let mut last_valid_rec_pos: usize = start_offset; // assume at given start_offset begins new record
|
||||
let mut file = File::open(data_dir.join(file_name.clone() + ".partial")).unwrap();
|
||||
file.seek(SeekFrom::Start(offs as u64))?;
|
||||
// xl_crc is the last field in XLogRecord, will not be read into rec_hdr
|
||||
const_assert!(XLOG_RECORD_CRC_OFFS + 4 == XLOG_SIZE_OF_XLOG_RECORD);
|
||||
let mut rec_hdr = [0u8; XLOG_RECORD_CRC_OFFS];
|
||||
|
||||
trace!("find_end_of_wal_segment(data_dir={}, segno={}, tli={}, wal_seg_size={}, start_offset=0x{:x})", data_dir.display(), segno, tli, wal_seg_size, start_offset);
|
||||
while offs < wal_seg_size {
|
||||
// we are at the beginning of the page; read it in
|
||||
if offs % XLOG_BLCKSZ == 0 {
|
||||
trace!("offs=0x{:x}: new page", offs);
|
||||
let bytes_read = file.read(&mut buf)?;
|
||||
if bytes_read != buf.len() {
|
||||
bail!(
|
||||
@@ -176,30 +182,49 @@ fn find_end_of_wal_segment(
|
||||
let xlp_magic = LittleEndian::read_u16(&buf[0..2]);
|
||||
let xlp_info = LittleEndian::read_u16(&buf[2..4]);
|
||||
let xlp_rem_len = LittleEndian::read_u32(&buf[XLP_REM_LEN_OFFS..XLP_REM_LEN_OFFS + 4]);
|
||||
trace!(
|
||||
" xlp_magic=0x{:x}, xlp_info=0x{:x}, xlp_rem_len={}",
|
||||
xlp_magic,
|
||||
xlp_info,
|
||||
xlp_rem_len
|
||||
);
|
||||
// this is expected in current usage when valid WAL starts after page header
|
||||
if xlp_magic != XLOG_PAGE_MAGIC as u16 {
|
||||
trace!(
|
||||
"invalid WAL file {}.partial magic {} at {:?}",
|
||||
" invalid WAL file {}.partial magic {} at {:?}",
|
||||
file_name,
|
||||
xlp_magic,
|
||||
Lsn(XLogSegNoOffsetToRecPtr(segno, offs as u32, wal_seg_size)),
|
||||
);
|
||||
}
|
||||
if offs == 0 {
|
||||
offs = XLOG_SIZE_OF_XLOG_LONG_PHD;
|
||||
offs += XLOG_SIZE_OF_XLOG_LONG_PHD;
|
||||
if (xlp_info & XLP_FIRST_IS_CONTRECORD) != 0 {
|
||||
offs += ((xlp_rem_len + 7) & !7) as usize;
|
||||
trace!(" first record is contrecord");
|
||||
skipping_first_contrecord = true;
|
||||
contlen = xlp_rem_len as usize;
|
||||
if offs < start_offset {
|
||||
// Pre-condition failed: the beginning of the segment is unexpectedly corrupted.
|
||||
ensure!(start_offset - offs >= contlen,
|
||||
"start_offset is in the middle of the first record (which happens to be a contrecord), \
|
||||
expected to be on a record boundary. Is beginning of the segment corrupted?");
|
||||
contlen = 0;
|
||||
// keep skipping_first_contrecord to avoid counting the contrecord as valid, we did not check it.
|
||||
}
|
||||
} else {
|
||||
trace!(" first record is not contrecord");
|
||||
}
|
||||
} else {
|
||||
offs += XLOG_SIZE_OF_XLOG_SHORT_PHD;
|
||||
}
|
||||
// ... and step forward again if asked
|
||||
trace!(" skipped header to 0x{:x}", offs);
|
||||
offs = max(offs, start_offset);
|
||||
|
||||
// beginning of the next record
|
||||
} else if contlen == 0 {
|
||||
let page_offs = offs % XLOG_BLCKSZ;
|
||||
let xl_tot_len = LittleEndian::read_u32(&buf[page_offs..page_offs + 4]) as usize;
|
||||
trace!("offs=0x{:x}: new record, xl_tot_len={}", offs, xl_tot_len);
|
||||
if xl_tot_len == 0 {
|
||||
info!(
|
||||
"find_end_of_wal_segment reached zeros at {:?}, last records ends at {:?}",
|
||||
@@ -212,10 +237,25 @@ fn find_end_of_wal_segment(
|
||||
);
|
||||
break; // zeros, reached the end
|
||||
}
|
||||
last_valid_rec_pos = offs;
|
||||
if skipping_first_contrecord {
|
||||
skipping_first_contrecord = false;
|
||||
trace!(" first contrecord has been just completed");
|
||||
} else {
|
||||
trace!(
|
||||
" updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
|
||||
last_valid_rec_pos,
|
||||
offs
|
||||
);
|
||||
last_valid_rec_pos = offs;
|
||||
}
|
||||
offs += 4;
|
||||
rec_offs = 4;
|
||||
contlen = xl_tot_len - 4;
|
||||
trace!(
|
||||
" reading rec_hdr[0..4] <-- [0x{:x}; 0x{:x})",
|
||||
page_offs,
|
||||
page_offs + 4
|
||||
);
|
||||
rec_hdr[0..4].copy_from_slice(&buf[page_offs..page_offs + 4]);
|
||||
} else {
|
||||
// we're continuing a record, possibly from previous page.
|
||||
@@ -224,42 +264,118 @@ fn find_end_of_wal_segment(
|
||||
|
||||
// read the rest of the record, or as much as fits on this page.
|
||||
let n = min(contlen, pageleft);
|
||||
// fill rec_hdr (header up to (but not including) xl_crc field)
|
||||
trace!(
|
||||
"offs=0x{:x}, record continuation, pageleft={}, contlen={}",
|
||||
offs,
|
||||
pageleft,
|
||||
contlen
|
||||
);
|
||||
// fill rec_hdr header up to (but not including) xl_crc field
|
||||
trace!(
|
||||
" rec_offs={}, XLOG_RECORD_CRC_OFFS={}, XLOG_SIZE_OF_XLOG_RECORD={}",
|
||||
rec_offs,
|
||||
XLOG_RECORD_CRC_OFFS,
|
||||
XLOG_SIZE_OF_XLOG_RECORD
|
||||
);
|
||||
if rec_offs < XLOG_RECORD_CRC_OFFS {
|
||||
let len = min(XLOG_RECORD_CRC_OFFS - rec_offs, n);
|
||||
trace!(
|
||||
" reading rec_hdr[{}..{}] <-- [0x{:x}; 0x{:x})",
|
||||
rec_offs,
|
||||
rec_offs + len,
|
||||
page_offs,
|
||||
page_offs + len
|
||||
);
|
||||
rec_hdr[rec_offs..rec_offs + len].copy_from_slice(&buf[page_offs..page_offs + len]);
|
||||
}
|
||||
if rec_offs <= XLOG_RECORD_CRC_OFFS && rec_offs + n >= XLOG_SIZE_OF_XLOG_RECORD {
|
||||
let crc_offs = page_offs - rec_offs + XLOG_RECORD_CRC_OFFS;
|
||||
wal_crc = LittleEndian::read_u32(&buf[crc_offs..crc_offs + 4]);
|
||||
// All records are aligned on 8-byte boundary, so their 8-byte frames
|
||||
// cannot be split between pages. As xl_crc is the last field,
|
||||
// its content is always on the same page.
|
||||
const_assert!(XLOG_RECORD_CRC_OFFS % 8 == 4);
|
||||
// We should always start reading aligned records even in incorrect WALs so if
|
||||
// the condition is false it is likely a bug. However, it is localized somewhere
|
||||
// in this function, hence we do not crash and just report failure instead.
|
||||
ensure!(crc_offs % 8 == 4, "Record is not aligned properly (bug?)");
|
||||
xl_crc = LittleEndian::read_u32(&buf[crc_offs..crc_offs + 4]);
|
||||
trace!(
|
||||
" reading xl_crc: [0x{:x}; 0x{:x}) = 0x{:x}",
|
||||
crc_offs,
|
||||
crc_offs + 4,
|
||||
xl_crc
|
||||
);
|
||||
crc = crc32c_append(0, &buf[crc_offs + 4..page_offs + n]);
|
||||
} else {
|
||||
crc ^= 0xFFFFFFFFu32;
|
||||
trace!(
|
||||
" initializing crc: [0x{:x}; 0x{:x}); crc = 0x{:x}",
|
||||
crc_offs + 4,
|
||||
page_offs + n,
|
||||
crc
|
||||
);
|
||||
} else if rec_offs > XLOG_RECORD_CRC_OFFS {
|
||||
// As all records are 8-byte aligned, the header is already fully read and `crc` is initialized in the branch above.
|
||||
ensure!(rec_offs >= XLOG_SIZE_OF_XLOG_RECORD);
|
||||
let old_crc = crc;
|
||||
crc = crc32c_append(crc, &buf[page_offs..page_offs + n]);
|
||||
trace!(
|
||||
" appending to crc: [0x{:x}; 0x{:x}); 0x{:x} --> 0x{:x}",
|
||||
page_offs,
|
||||
page_offs + n,
|
||||
old_crc,
|
||||
crc
|
||||
);
|
||||
} else {
|
||||
// Correct because of the way conditions are written above.
|
||||
assert!(rec_offs + n < XLOG_SIZE_OF_XLOG_RECORD);
|
||||
// If `skipping_first_contrecord == true`, we may be reading from a middle of a record
|
||||
// which started in the previous segment. Hence there is no point in validating the header.
|
||||
if !skipping_first_contrecord && rec_offs + n > XLOG_RECORD_CRC_OFFS {
|
||||
info!(
|
||||
"Curiously corrupted WAL: a record stops inside the header; \
|
||||
offs=0x{:x}, record continuation, pageleft={}, contlen={}",
|
||||
offs, pageleft, contlen
|
||||
);
|
||||
break;
|
||||
}
|
||||
// Do nothing: we are still reading the header. It's accounted in CRC in the end of the record.
|
||||
}
|
||||
crc = !crc;
|
||||
rec_offs += n;
|
||||
offs += n;
|
||||
contlen -= n;
|
||||
|
||||
if contlen == 0 {
|
||||
crc = !crc;
|
||||
trace!(" record completed at 0x{:x}", offs);
|
||||
crc = crc32c_append(crc, &rec_hdr);
|
||||
offs = (offs + 7) & !7; // pad on 8 bytes boundary */
|
||||
if crc == wal_crc {
|
||||
trace!(
|
||||
" padded offs to 0x{:x}, crc is {:x}, expected crc is {:x}",
|
||||
offs,
|
||||
crc,
|
||||
xl_crc
|
||||
);
|
||||
if skipping_first_contrecord {
|
||||
// do nothing, the flag will go down on next iteration when we're reading new record
|
||||
trace!(" first conrecord has been just completed");
|
||||
} else if crc == xl_crc {
|
||||
// record is valid, advance the result to its end (with
|
||||
// alignment to the next record taken into account)
|
||||
trace!(
|
||||
" updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
|
||||
last_valid_rec_pos,
|
||||
offs
|
||||
);
|
||||
last_valid_rec_pos = offs;
|
||||
} else {
|
||||
info!(
|
||||
"CRC mismatch {} vs {} at {}",
|
||||
crc, wal_crc, last_valid_rec_pos
|
||||
crc, xl_crc, last_valid_rec_pos
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
trace!("last_valid_rec_pos=0x{:x}", last_valid_rec_pos);
|
||||
Ok(last_valid_rec_pos as u32)
|
||||
}
|
||||
|
||||
@@ -476,78 +592,126 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result<Bytes, Seriali
|
||||
mod tests {
|
||||
use super::*;
|
||||
use regex::Regex;
|
||||
use std::{env, process::Command, str::FromStr};
|
||||
use std::{env, str::FromStr};
|
||||
|
||||
// Run find_end_of_wal against file in test_wal dir
|
||||
// Ensure that it finds last record correctly
|
||||
#[test]
|
||||
pub fn test_find_end_of_wal() {
|
||||
// 1. Run initdb to generate some WAL
|
||||
fn init_logging() {
|
||||
let _ = env_logger::Builder::from_env(
|
||||
env_logger::Env::default()
|
||||
.default_filter_or("wal_generate=info,postgres_ffi::xlog_utils=trace"),
|
||||
)
|
||||
.is_test(true)
|
||||
.try_init();
|
||||
}
|
||||
|
||||
fn test_end_of_wal(
|
||||
test_name: &str,
|
||||
generate_wal: impl Fn(&mut postgres::Client) -> anyhow::Result<postgres::types::PgLsn>,
|
||||
expected_end_of_wal_non_partial: Lsn,
|
||||
last_segment: &str,
|
||||
) {
|
||||
use wal_generate::*;
|
||||
// 1. Generate some WAL
|
||||
let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("..")
|
||||
.join("..");
|
||||
let data_dir = top_path.join("test_output/test_find_end_of_wal");
|
||||
let initdb_path = top_path.join("tmp_install/bin/initdb");
|
||||
let lib_path = top_path.join("tmp_install/lib");
|
||||
if data_dir.exists() {
|
||||
fs::remove_dir_all(&data_dir).unwrap();
|
||||
let cfg = Conf {
|
||||
pg_distrib_dir: top_path.join("tmp_install"),
|
||||
datadir: top_path.join(format!("test_output/{}", test_name)),
|
||||
};
|
||||
if cfg.datadir.exists() {
|
||||
fs::remove_dir_all(&cfg.datadir).unwrap();
|
||||
}
|
||||
println!("Using initdb from '{}'", initdb_path.display());
|
||||
println!("Data directory '{}'", data_dir.display());
|
||||
let initdb_output = Command::new(initdb_path)
|
||||
.args(&["-D", data_dir.to_str().unwrap()])
|
||||
.arg("--no-instructions")
|
||||
.arg("--no-sync")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &lib_path)
|
||||
.env("DYLD_LIBRARY_PATH", &lib_path)
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(
|
||||
initdb_output.status.success(),
|
||||
"initdb failed. Status: '{}', stdout: '{}', stderr: '{}'",
|
||||
initdb_output.status,
|
||||
String::from_utf8_lossy(&initdb_output.stdout),
|
||||
String::from_utf8_lossy(&initdb_output.stderr),
|
||||
);
|
||||
cfg.initdb().unwrap();
|
||||
let mut srv = cfg.start_server().unwrap();
|
||||
let expected_wal_end: Lsn =
|
||||
u64::from(generate_wal(&mut srv.connect_with_timeout().unwrap()).unwrap()).into();
|
||||
srv.kill();
|
||||
|
||||
// 2. Pick WAL generated by initdb
|
||||
let wal_dir = data_dir.join("pg_wal");
|
||||
let wal_dir = cfg.datadir.join("pg_wal");
|
||||
let wal_seg_size = 16 * 1024 * 1024;
|
||||
|
||||
// 3. Check end_of_wal on non-partial WAL segment (we treat it as fully populated)
|
||||
let (wal_end, tli) = find_end_of_wal(&wal_dir, wal_seg_size, true, Lsn(0)).unwrap();
|
||||
let wal_end = Lsn(wal_end);
|
||||
println!("wal_end={}, tli={}", wal_end, tli);
|
||||
assert_eq!(wal_end, "0/2000000".parse::<Lsn>().unwrap());
|
||||
info!(
|
||||
"find_end_of_wal returned (wal_end={}, tli={})",
|
||||
wal_end, tli
|
||||
);
|
||||
assert_eq!(wal_end, expected_end_of_wal_non_partial);
|
||||
|
||||
// 4. Get the actual end of WAL by pg_waldump
|
||||
let waldump_path = top_path.join("tmp_install/bin/pg_waldump");
|
||||
let waldump_output = Command::new(waldump_path)
|
||||
.arg(wal_dir.join("000000010000000000000001"))
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &lib_path)
|
||||
.env("DYLD_LIBRARY_PATH", &lib_path)
|
||||
.output()
|
||||
.unwrap();
|
||||
let waldump_output = std::str::from_utf8(&waldump_output.stderr).unwrap();
|
||||
println!("waldump_output = '{}'", &waldump_output);
|
||||
let re = Regex::new(r"invalid record length at (.+):").unwrap();
|
||||
let caps = re.captures(waldump_output).unwrap();
|
||||
let waldump_output = cfg
|
||||
.pg_waldump("000000010000000000000001", last_segment)
|
||||
.unwrap()
|
||||
.stderr;
|
||||
let waldump_output = std::str::from_utf8(&waldump_output).unwrap();
|
||||
let caps = match Regex::new(r"invalid record length at (.+):")
|
||||
.unwrap()
|
||||
.captures(waldump_output)
|
||||
{
|
||||
Some(caps) => caps,
|
||||
None => {
|
||||
error!("Unable to parse pg_waldump's stderr:\n{}", waldump_output);
|
||||
panic!();
|
||||
}
|
||||
};
|
||||
let waldump_wal_end = Lsn::from_str(caps.get(1).unwrap().as_str()).unwrap();
|
||||
info!(
|
||||
"waldump erred on {}, expected wal end at {}",
|
||||
waldump_wal_end, expected_wal_end
|
||||
);
|
||||
assert_eq!(waldump_wal_end, expected_wal_end);
|
||||
|
||||
// 5. Rename file to partial to actually find last valid lsn
|
||||
fs::rename(
|
||||
wal_dir.join("000000010000000000000001"),
|
||||
wal_dir.join("000000010000000000000001.partial"),
|
||||
wal_dir.join(last_segment),
|
||||
wal_dir.join(format!("{}.partial", last_segment)),
|
||||
)
|
||||
.unwrap();
|
||||
let (wal_end, tli) = find_end_of_wal(&wal_dir, wal_seg_size, true, Lsn(0)).unwrap();
|
||||
let wal_end = Lsn(wal_end);
|
||||
println!("wal_end={}, tli={}", wal_end, tli);
|
||||
info!(
|
||||
"find_end_of_wal returned (wal_end={}, tli={})",
|
||||
wal_end, tli
|
||||
);
|
||||
assert_eq!(wal_end, waldump_wal_end);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_find_end_of_wal_simple() {
|
||||
init_logging();
|
||||
test_end_of_wal(
|
||||
"test_find_end_of_wal_simple",
|
||||
wal_generate::generate_simple,
|
||||
"0/2000000".parse::<Lsn>().unwrap(),
|
||||
"000000010000000000000001",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_find_end_of_wal_crossing_segment_followed_by_small_one() {
|
||||
init_logging();
|
||||
test_end_of_wal(
|
||||
"test_find_end_of_wal_crossing_segment_followed_by_small_one",
|
||||
wal_generate::generate_wal_record_crossing_segment_followed_by_small_one,
|
||||
"0/3000000".parse::<Lsn>().unwrap(),
|
||||
"000000010000000000000002",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
|
||||
pub fn test_find_end_of_wal_last_crossing_segment() {
|
||||
init_logging();
|
||||
test_end_of_wal(
|
||||
"test_find_end_of_wal_last_crossing_segment",
|
||||
wal_generate::generate_last_wal_record_crossing_segment,
|
||||
"0/3000000".parse::<Lsn>().unwrap(),
|
||||
"000000010000000000000002",
|
||||
);
|
||||
}
|
||||
|
||||
/// Check the math in update_next_xid
|
||||
///
|
||||
/// NOTE: These checks are sensitive to the value of XID_CHECKPOINT_INTERVAL,
|
||||
|
||||
14
libs/postgres_ffi/wal_generate/Cargo.toml
Normal file
14
libs/postgres_ffi/wal_generate/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "wal_generate"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
clap = "3.0"
|
||||
env_logger = "0.9"
|
||||
log = "0.4"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
tempfile = "3.2"
|
||||
58
libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
Normal file
58
libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
use anyhow::*;
|
||||
use clap::{App, Arg};
|
||||
use wal_generate::*;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
env_logger::Builder::from_env(
|
||||
env_logger::Env::default().default_filter_or("wal_generate=info"),
|
||||
)
|
||||
.init();
|
||||
let arg_matches = App::new("Postgres WAL generator")
|
||||
.about("Generates Postgres databases with specific WAL properties")
|
||||
.arg(
|
||||
Arg::new("datadir")
|
||||
.short('D')
|
||||
.long("datadir")
|
||||
.takes_value(true)
|
||||
.help("Data directory for the Postgres server")
|
||||
.required(true)
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pg-distrib-dir")
|
||||
.long("pg-distrib-dir")
|
||||
.takes_value(true)
|
||||
.help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
|
||||
.default_value("/usr/local")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("type")
|
||||
.long("type")
|
||||
.takes_value(true)
|
||||
.help("Type of WAL to generate")
|
||||
.possible_values(["simple", "last_wal_record_crossing_segment", "wal_record_crossing_segment_followed_by_small_one"])
|
||||
.required(true)
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let cfg = Conf {
|
||||
pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
|
||||
datadir: arg_matches.value_of("datadir").unwrap().into(),
|
||||
};
|
||||
cfg.initdb()?;
|
||||
let mut srv = cfg.start_server()?;
|
||||
let lsn = match arg_matches.value_of("type").unwrap() {
|
||||
"simple" => generate_simple(&mut srv.connect_with_timeout()?)?,
|
||||
"last_wal_record_crossing_segment" => {
|
||||
generate_last_wal_record_crossing_segment(&mut srv.connect_with_timeout()?)?
|
||||
}
|
||||
"wal_record_crossing_segment_followed_by_small_one" => {
|
||||
generate_wal_record_crossing_segment_followed_by_small_one(
|
||||
&mut srv.connect_with_timeout()?,
|
||||
)?
|
||||
}
|
||||
a => panic!("Unknown --type argument: {}", a),
|
||||
};
|
||||
println!("end_of_wal = {}", lsn);
|
||||
srv.kill();
|
||||
Ok(())
|
||||
}
|
||||
278
libs/postgres_ffi/wal_generate/src/lib.rs
Normal file
278
libs/postgres_ffi/wal_generate/src/lib.rs
Normal file
@@ -0,0 +1,278 @@
|
||||
use anyhow::*;
|
||||
use core::time::Duration;
|
||||
use log::*;
|
||||
use postgres::types::PgLsn;
|
||||
use postgres::Client;
|
||||
use std::cmp::Ordering;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use std::time::Instant;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Conf {
|
||||
pub pg_distrib_dir: PathBuf,
|
||||
pub datadir: PathBuf,
|
||||
}
|
||||
|
||||
pub struct PostgresServer {
|
||||
process: std::process::Child,
|
||||
_unix_socket_dir: TempDir,
|
||||
client_config: postgres::Config,
|
||||
}
|
||||
|
||||
impl Conf {
|
||||
fn pg_bin_dir(&self) -> PathBuf {
|
||||
self.pg_distrib_dir.join("bin")
|
||||
}
|
||||
|
||||
fn pg_lib_dir(&self) -> PathBuf {
|
||||
self.pg_distrib_dir.join("lib")
|
||||
}
|
||||
|
||||
fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
|
||||
let path = self.pg_bin_dir().join(command);
|
||||
ensure!(path.exists(), "Command {:?} does not exist", path);
|
||||
let mut cmd = Command::new(path);
|
||||
cmd.env_clear()
|
||||
.env("LD_LIBRARY_PATH", self.pg_lib_dir())
|
||||
.env("DYLD_LIBRARY_PATH", self.pg_lib_dir());
|
||||
Ok(cmd)
|
||||
}
|
||||
|
||||
pub fn initdb(&self) -> Result<()> {
|
||||
if let Some(parent) = self.datadir.parent() {
|
||||
info!("Pre-creating parent directory {:?}", parent);
|
||||
// Tests may be run concurrently and there may be a race to create `test_output/`.
|
||||
// std::fs::create_dir_all is guaranteed to have no races with another thread creating directories.
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
info!(
|
||||
"Running initdb in {:?} with user \"postgres\"",
|
||||
self.datadir
|
||||
);
|
||||
let output = self
|
||||
.new_pg_command("initdb")?
|
||||
.arg("-D")
|
||||
.arg(self.datadir.as_os_str())
|
||||
.args(&["-U", "postgres", "--no-instructions", "--no-sync"])
|
||||
.output()?;
|
||||
debug!("initdb output: {:?}", output);
|
||||
ensure!(
|
||||
output.status.success(),
|
||||
"initdb failed, stdout and stderr follow:\n{}{}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn start_server(&self) -> Result<PostgresServer> {
|
||||
info!("Starting Postgres server in {:?}", self.datadir);
|
||||
let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
|
||||
let unix_socket_dir_path = unix_socket_dir.path().to_owned();
|
||||
let server_process = self
|
||||
.new_pg_command("postgres")?
|
||||
.args(&["-c", "listen_addresses="])
|
||||
.arg("-k")
|
||||
.arg(unix_socket_dir_path.as_os_str())
|
||||
.arg("-D")
|
||||
.arg(self.datadir.as_os_str())
|
||||
.args(&["-c", "wal_keep_size=50MB"]) // Ensure old WAL is not removed
|
||||
.args(&["-c", "logging_collector=on"]) // stderr will mess up with tests output
|
||||
.args(&["-c", "shared_preload_libraries=zenith"]) // can only be loaded at startup
|
||||
// Disable background processes as much as possible
|
||||
.args(&["-c", "wal_writer_delay=10s"])
|
||||
.args(&["-c", "autovacuum=off"])
|
||||
.stderr(Stdio::null())
|
||||
.spawn()?;
|
||||
let server = PostgresServer {
|
||||
process: server_process,
|
||||
_unix_socket_dir: unix_socket_dir,
|
||||
client_config: {
|
||||
let mut c = postgres::Config::new();
|
||||
c.host_path(&unix_socket_dir_path);
|
||||
c.user("postgres");
|
||||
c.connect_timeout(Duration::from_millis(1000));
|
||||
c
|
||||
},
|
||||
};
|
||||
Ok(server)
|
||||
}
|
||||
|
||||
pub fn pg_waldump(
|
||||
&self,
|
||||
first_segment_name: &str,
|
||||
last_segment_name: &str,
|
||||
) -> Result<std::process::Output> {
|
||||
let first_segment_file = self.datadir.join(first_segment_name);
|
||||
let last_segment_file = self.datadir.join(last_segment_name);
|
||||
info!(
|
||||
"Running pg_waldump for {} .. {}",
|
||||
first_segment_file.display(),
|
||||
last_segment_file.display()
|
||||
);
|
||||
let output = self
|
||||
.new_pg_command("pg_waldump")?
|
||||
.args(&[
|
||||
&first_segment_file.as_os_str(),
|
||||
&last_segment_file.as_os_str(),
|
||||
])
|
||||
.output()?;
|
||||
debug!("waldump output: {:?}", output);
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
impl PostgresServer {
|
||||
pub fn connect_with_timeout(&self) -> Result<Client> {
|
||||
let retry_until = Instant::now() + *self.client_config.get_connect_timeout().unwrap();
|
||||
while Instant::now() < retry_until {
|
||||
use std::result::Result::Ok;
|
||||
if let Ok(client) = self.client_config.connect(postgres::NoTls) {
|
||||
return Ok(client);
|
||||
}
|
||||
std::thread::sleep(Duration::from_millis(100));
|
||||
}
|
||||
bail!("Connection timed out");
|
||||
}
|
||||
|
||||
pub fn kill(&mut self) {
|
||||
self.process.kill().unwrap();
|
||||
self.process.wait().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PostgresServer {
|
||||
fn drop(&mut self) {
|
||||
use std::result::Result::Ok;
|
||||
match self.process.try_wait() {
|
||||
Ok(Some(_)) => return,
|
||||
Ok(None) => {
|
||||
warn!("Server was not terminated, will be killed");
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Unable to get status of the server: {}, will be killed", e);
|
||||
}
|
||||
}
|
||||
let _ = self.process.kill();
|
||||
}
|
||||
}
|
||||
|
||||
pub trait PostgresClientExt: postgres::GenericClient {
|
||||
fn pg_current_wal_insert_lsn(&mut self) -> Result<PgLsn> {
|
||||
Ok(self
|
||||
.query_one("SELECT pg_current_wal_insert_lsn()", &[])?
|
||||
.get(0))
|
||||
}
|
||||
fn pg_current_wal_flush_lsn(&mut self) -> Result<PgLsn> {
|
||||
Ok(self
|
||||
.query_one("SELECT pg_current_wal_flush_lsn()", &[])?
|
||||
.get(0))
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: postgres::GenericClient> PostgresClientExt for C {}
|
||||
|
||||
fn generate_internal<C: postgres::GenericClient>(
|
||||
client: &mut C,
|
||||
f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
|
||||
) -> Result<PgLsn> {
|
||||
client.execute("create extension if not exists zenith_test_utils", &[])?;
|
||||
|
||||
let wal_segment_size = client.query_one(
|
||||
"select cast(setting as bigint) as setting, unit \
|
||||
from pg_settings where name = 'wal_segment_size'",
|
||||
&[],
|
||||
)?;
|
||||
ensure!(
|
||||
wal_segment_size.get::<_, String>("unit") == "B",
|
||||
"Unexpected wal_segment_size unit"
|
||||
);
|
||||
ensure!(
|
||||
wal_segment_size.get::<_, i64>("setting") == 16 * 1024 * 1024,
|
||||
"Unexpected wal_segment_size in bytes"
|
||||
);
|
||||
|
||||
let initial_lsn = client.pg_current_wal_insert_lsn()?;
|
||||
info!("LSN initial = {}", initial_lsn);
|
||||
|
||||
let last_lsn = match f(client, initial_lsn)? {
|
||||
None => client.pg_current_wal_insert_lsn()?,
|
||||
Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
|
||||
Ordering::Less => bail!("Some records were inserted after the generated WAL"),
|
||||
Ordering::Equal => last_lsn,
|
||||
Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
|
||||
},
|
||||
};
|
||||
|
||||
// Some records may be not flushed, e.g. non-transactional logical messages.
|
||||
client.execute("select neon_xlogflush(pg_current_wal_insert_lsn())", &[])?;
|
||||
match last_lsn.cmp(&client.pg_current_wal_flush_lsn()?) {
|
||||
Ordering::Less => bail!("Some records were flushed after the generated WAL"),
|
||||
Ordering::Equal => {}
|
||||
Ordering::Greater => bail!("Reported LSN is greater than flush_lsn"),
|
||||
}
|
||||
Ok(last_lsn)
|
||||
}
|
||||
|
||||
pub fn generate_simple(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
|
||||
generate_internal(client, |client, _| {
|
||||
client.execute("CREATE table t(x int)", &[])?;
|
||||
Ok(None)
|
||||
})
|
||||
}
|
||||
|
||||
fn generate_single_logical_message(
|
||||
client: &mut impl postgres::GenericClient,
|
||||
transactional: bool,
|
||||
) -> Result<PgLsn> {
|
||||
generate_internal(client, |client, initial_lsn| {
|
||||
ensure!(
|
||||
initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
|
||||
"Initial LSN is too far in the future"
|
||||
);
|
||||
|
||||
let message_lsn: PgLsn = client
|
||||
.query_one(
|
||||
"select pg_logical_emit_message($1, 'big-16mb-msg', \
|
||||
concat(repeat('abcd', 16 * 256 * 1024), 'end')) as message_lsn",
|
||||
&[&transactional],
|
||||
)?
|
||||
.get("message_lsn");
|
||||
ensure!(
|
||||
message_lsn > PgLsn::from(0x0200_0000 + 4 * 8192),
|
||||
"Logical message did not cross the segment boundary"
|
||||
);
|
||||
ensure!(
|
||||
message_lsn < PgLsn::from(0x0400_0000),
|
||||
"Logical message crossed two segments"
|
||||
);
|
||||
|
||||
if transactional {
|
||||
// Transactional logical messages are part of a transaction, so the one above is
|
||||
// followed by a small COMMIT record.
|
||||
|
||||
let after_message_lsn = client.pg_current_wal_insert_lsn()?;
|
||||
ensure!(
|
||||
message_lsn < after_message_lsn,
|
||||
"No record found after the emitted message"
|
||||
);
|
||||
Ok(Some(after_message_lsn))
|
||||
} else {
|
||||
Ok(Some(message_lsn))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn generate_wal_record_crossing_segment_followed_by_small_one(
|
||||
client: &mut impl postgres::GenericClient,
|
||||
) -> Result<PgLsn> {
|
||||
generate_single_logical_message(client, true)
|
||||
}
|
||||
|
||||
pub fn generate_last_wal_record_crossing_segment<C: postgres::GenericClient>(
|
||||
client: &mut C,
|
||||
) -> Result<PgLsn> {
|
||||
generate_single_logical_message(client, false)
|
||||
}
|
||||
@@ -95,3 +95,11 @@ macro_rules! project_git_version {
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Same as `assert!`, but evaluated during compilation and gets optimized out in runtime.
|
||||
#[macro_export]
|
||||
macro_rules! const_assert {
|
||||
($($args:tt)*) => {
|
||||
const _: () = assert!($($args)*);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -226,9 +226,9 @@ impl fmt::Display for ZTenantTimelineId {
|
||||
// by the console.
|
||||
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash, Debug, Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct ZNodeId(pub u64);
|
||||
pub struct NodeId(pub u64);
|
||||
|
||||
impl fmt::Display for ZNodeId {
|
||||
impl fmt::Display for NodeId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
|
||||
@@ -22,14 +22,14 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
postgres_backend::AuthType,
|
||||
project_git_version,
|
||||
zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
};
|
||||
|
||||
use pageserver::timelines::TimelineInfo;
|
||||
|
||||
// Default id of a safekeeper node, if not specified on the command line.
|
||||
const DEFAULT_SAFEKEEPER_ID: ZNodeId = ZNodeId(1);
|
||||
const DEFAULT_PAGESERVER_ID: ZNodeId = ZNodeId(1);
|
||||
const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
|
||||
const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
|
||||
const DEFAULT_BRANCH_NAME: &str = "main";
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
@@ -860,7 +860,7 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_safekeeper(env: &local_env::LocalEnv, id: ZNodeId) -> Result<SafekeeperNode> {
|
||||
fn get_safekeeper(env: &local_env::LocalEnv, id: NodeId) -> Result<SafekeeperNode> {
|
||||
if let Some(node) = env.safekeepers.iter().find(|node| node.id == id) {
|
||||
Ok(SafekeeperNode::from_env(env, node))
|
||||
} else {
|
||||
@@ -876,7 +876,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
|
||||
// All the commands take an optional safekeeper name argument
|
||||
let sk_id = if let Some(id_str) = sub_args.value_of("id") {
|
||||
ZNodeId(id_str.parse().context("while parsing safekeeper id")?)
|
||||
NodeId(id_str.parse().context("while parsing safekeeper id")?)
|
||||
} else {
|
||||
DEFAULT_SAFEKEEPER_ID
|
||||
};
|
||||
|
||||
@@ -10,8 +10,9 @@
|
||||
//! This module is responsible for creation of such tarball
|
||||
//! from data stored in object storage.
|
||||
//!
|
||||
use anyhow::{anyhow, ensure, Context, Result};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use fail::fail_point;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
@@ -30,11 +31,16 @@ use utils::lsn::Lsn;
|
||||
/// This is short-living object only for the time of tarball creation,
|
||||
/// created mostly to avoid passing a lot of parameters between various functions
|
||||
/// used for constructing tarball.
|
||||
pub struct Basebackup<'a> {
|
||||
ar: Builder<&'a mut dyn Write>,
|
||||
pub struct Basebackup<'a, W>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
ar: Builder<AbortableWrite<W>>,
|
||||
timeline: &'a Arc<DatadirTimelineImpl>,
|
||||
pub lsn: Lsn,
|
||||
prev_record_lsn: Lsn,
|
||||
|
||||
finished: bool,
|
||||
}
|
||||
|
||||
// Create basebackup with non-rel data in it. Omit relational data.
|
||||
@@ -44,12 +50,15 @@ pub struct Basebackup<'a> {
|
||||
// * When working without safekeepers. In this situation it is important to match the lsn
|
||||
// we are taking basebackup on with the lsn that is used in pageserver's walreceiver
|
||||
// to start the replication.
|
||||
impl<'a> Basebackup<'a> {
|
||||
impl<'a, W> Basebackup<'a, W>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
pub fn new(
|
||||
write: &'a mut dyn Write,
|
||||
write: W,
|
||||
timeline: &'a Arc<DatadirTimelineImpl>,
|
||||
req_lsn: Option<Lsn>,
|
||||
) -> Result<Basebackup<'a>> {
|
||||
) -> Result<Basebackup<'a, W>> {
|
||||
// Compute postgres doesn't have any previous WAL files, but the first
|
||||
// record that it's going to write needs to include the LSN of the
|
||||
// previous record (xl_prev). We include prev_record_lsn in the
|
||||
@@ -90,14 +99,15 @@ impl<'a> Basebackup<'a> {
|
||||
);
|
||||
|
||||
Ok(Basebackup {
|
||||
ar: Builder::new(write),
|
||||
ar: Builder::new(AbortableWrite::new(write)),
|
||||
timeline,
|
||||
lsn: backup_lsn,
|
||||
prev_record_lsn: backup_prev,
|
||||
finished: false,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn send_tarball(&mut self) -> anyhow::Result<()> {
|
||||
pub fn send_tarball(mut self) -> anyhow::Result<()> {
|
||||
// Create pgdata subdirs structure
|
||||
for dir in pg_constants::PGDATA_SUBDIRS.iter() {
|
||||
let header = new_tar_header_dir(*dir)?;
|
||||
@@ -135,9 +145,14 @@ impl<'a> Basebackup<'a> {
|
||||
self.add_twophase_file(xid)?;
|
||||
}
|
||||
|
||||
fail_point!("basebackup-before-control-file", |_| {
|
||||
bail!("failpoint basebackup-before-control-file")
|
||||
});
|
||||
|
||||
// Generate pg_control and bootstrap WAL segment.
|
||||
self.add_pgcontrol_file()?;
|
||||
self.ar.finish()?;
|
||||
self.finished = true;
|
||||
debug!("all tarred up!");
|
||||
Ok(())
|
||||
}
|
||||
@@ -331,6 +346,19 @@ impl<'a> Basebackup<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, W> Drop for Basebackup<'a, W>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
/// If the basebackup was not finished, prevent the Archive::drop() from
|
||||
/// writing the end-of-archive marker.
|
||||
fn drop(&mut self) {
|
||||
if !self.finished {
|
||||
self.ar.get_mut().abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Create new tarball entry header
|
||||
//
|
||||
@@ -366,3 +394,49 @@ fn new_tar_header_dir(path: &str) -> anyhow::Result<Header> {
|
||||
header.set_cksum();
|
||||
Ok(header)
|
||||
}
|
||||
|
||||
/// A wrapper that passes through all data to the underlying Write,
|
||||
/// until abort() is called.
|
||||
///
|
||||
/// tar::Builder has an annoying habit of finishing the archive with
|
||||
/// a valid tar end-of-archive marker (two 512-byte sectors of zeros),
|
||||
/// even if an error occurs and we don't finish building the archive.
|
||||
/// We'd rather abort writing the tarball immediately than construct
|
||||
/// a seemingly valid but incomplete archive. This wrapper allows us
|
||||
/// to swallow the end-of-archive marker that Builder::drop() emits,
|
||||
/// without writing it to the underlying sink.
|
||||
///
|
||||
struct AbortableWrite<W> {
|
||||
w: W,
|
||||
aborted: bool,
|
||||
}
|
||||
|
||||
impl<W> AbortableWrite<W> {
|
||||
pub fn new(w: W) -> Self {
|
||||
AbortableWrite { w, aborted: false }
|
||||
}
|
||||
|
||||
pub fn abort(&mut self) {
|
||||
self.aborted = true;
|
||||
}
|
||||
}
|
||||
|
||||
impl<W> Write for AbortableWrite<W>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
fn write(&mut self, data: &[u8]) -> io::Result<usize> {
|
||||
if self.aborted {
|
||||
Ok(data.len())
|
||||
} else {
|
||||
self.w.write(data)
|
||||
}
|
||||
}
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
if self.aborted {
|
||||
Ok(())
|
||||
} else {
|
||||
self.w.flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Main entry point for the Page Server executable.
|
||||
|
||||
use std::{env, path::Path, str::FromStr};
|
||||
use std::{env, fs::File, path::Path, process, str::FromStr, thread::sleep, time::Duration};
|
||||
use tracing::*;
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
@@ -254,7 +254,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
// Otherwise, the coverage data will be damaged.
|
||||
match daemonize.exit_action(|| exit_now(0)).start() {
|
||||
Ok(_) => info!("Success, daemonized"),
|
||||
Err(err) => error!(%err, "could not daemonize"),
|
||||
Err(err) => bail!("{err}. could not daemonize. bailing."),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -276,6 +276,22 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
|
||||
let remote_index = tenant_mgr::init_tenant_mgr(conf)?;
|
||||
|
||||
// Create file and frequently check if it's still here
|
||||
thread_mgr::spawn(
|
||||
ThreadKind::HttpEndpointListener,
|
||||
None,
|
||||
None,
|
||||
"http_endpoint_thread",
|
||||
true,
|
||||
move || {
|
||||
File::create("delete-me.txt").expect("FFFF failed creating file");
|
||||
loop {
|
||||
File::open("delete-me.txt").expect("FFFF cannot find file");
|
||||
sleep(Duration::from_millis(1));
|
||||
}
|
||||
},
|
||||
)?;
|
||||
|
||||
// Spawn a new thread for the http endpoint
|
||||
// bind before launching separate thread so the error reported before startup exits
|
||||
let auth_cloned = auth.clone();
|
||||
|
||||
@@ -16,7 +16,7 @@ use toml_edit::{Document, Item};
|
||||
use url::Url;
|
||||
use utils::{
|
||||
postgres_backend::AuthType,
|
||||
zid::{ZNodeId, ZTenantId, ZTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::layered_repository::TIMELINES_SEGMENT_NAME;
|
||||
@@ -78,7 +78,7 @@ pub mod defaults {
|
||||
pub struct PageServerConf {
|
||||
// Identifier of that particular pageserver so e g safekeepers
|
||||
// can safely distinguish different pageservers
|
||||
pub id: ZNodeId,
|
||||
pub id: NodeId,
|
||||
|
||||
/// Example (default): 127.0.0.1:64000
|
||||
pub listen_pg_addr: String,
|
||||
@@ -180,7 +180,7 @@ struct PageServerConfigBuilder {
|
||||
auth_validation_public_key_path: BuilderValue<Option<PathBuf>>,
|
||||
remote_storage_config: BuilderValue<Option<RemoteStorageConfig>>,
|
||||
|
||||
id: BuilderValue<ZNodeId>,
|
||||
id: BuilderValue<NodeId>,
|
||||
|
||||
profiling: BuilderValue<ProfilingConfig>,
|
||||
broker_etcd_prefix: BuilderValue<String>,
|
||||
@@ -276,7 +276,7 @@ impl PageServerConfigBuilder {
|
||||
self.broker_etcd_prefix = BuilderValue::Set(broker_etcd_prefix)
|
||||
}
|
||||
|
||||
pub fn id(&mut self, node_id: ZNodeId) {
|
||||
pub fn id(&mut self, node_id: NodeId) {
|
||||
self.id = BuilderValue::Set(node_id)
|
||||
}
|
||||
|
||||
@@ -399,7 +399,7 @@ impl PageServerConf {
|
||||
"tenant_config" => {
|
||||
t_conf = Self::parse_toml_tenant_conf(item)?;
|
||||
}
|
||||
"id" => builder.id(ZNodeId(parse_toml_u64(key, item)?)),
|
||||
"id" => builder.id(NodeId(parse_toml_u64(key, item)?)),
|
||||
"profiling" => builder.profiling(parse_toml_from_str(key, item)?),
|
||||
"broker_etcd_prefix" => builder.broker_etcd_prefix(parse_toml_string(key, item)?),
|
||||
"broker_endpoints" => builder.broker_endpoints(
|
||||
@@ -550,7 +550,7 @@ impl PageServerConf {
|
||||
#[cfg(test)]
|
||||
pub fn dummy_conf(repo_dir: PathBuf) -> Self {
|
||||
PageServerConf {
|
||||
id: ZNodeId(0),
|
||||
id: NodeId(0),
|
||||
wait_lsn_timeout: Duration::from_secs(60),
|
||||
wal_redo_timeout: Duration::from_secs(60),
|
||||
page_cache_size: defaults::DEFAULT_PAGE_CACHE_SIZE,
|
||||
@@ -693,7 +693,7 @@ id = 10
|
||||
assert_eq!(
|
||||
parsed_config,
|
||||
PageServerConf {
|
||||
id: ZNodeId(10),
|
||||
id: NodeId(10),
|
||||
listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
wait_lsn_timeout: humantime::parse_duration(defaults::DEFAULT_WAIT_LSN_TIMEOUT)?,
|
||||
@@ -737,7 +737,7 @@ id = 10
|
||||
assert_eq!(
|
||||
parsed_config,
|
||||
PageServerConf {
|
||||
id: ZNodeId(10),
|
||||
id: NodeId(10),
|
||||
listen_pg_addr: "127.0.0.1:64000".to_string(),
|
||||
listen_http_addr: "127.0.0.1:9898".to_string(),
|
||||
wait_lsn_timeout: Duration::from_secs(111),
|
||||
|
||||
@@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use utils::{
|
||||
lsn::Lsn,
|
||||
zid::{ZNodeId, ZTenantId, ZTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
#[serde_as]
|
||||
@@ -42,7 +42,7 @@ pub struct TenantCreateResponse(#[serde_as(as = "DisplayFromStr")] pub ZTenantId
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct StatusResponse {
|
||||
pub id: ZNodeId,
|
||||
pub id: NodeId,
|
||||
}
|
||||
|
||||
impl TenantCreateRequest {
|
||||
|
||||
@@ -18,7 +18,7 @@ use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use tracing::*;
|
||||
|
||||
use std::cmp::{max, Ordering};
|
||||
use std::cmp::{max, min, Ordering};
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
@@ -2165,7 +2165,7 @@ impl LayeredTimeline {
|
||||
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
let retain_lsns = &gc_info.retain_lsns;
|
||||
let cutoff = gc_info.cutoff;
|
||||
let cutoff = min(gc_info.cutoff, disk_consistent_lsn);
|
||||
let pitr = gc_info.pitr;
|
||||
|
||||
// Calculate pitr cutoff point.
|
||||
@@ -2294,12 +2294,20 @@ impl LayeredTimeline {
|
||||
// is 102, then it might not have been fully flushed to disk
|
||||
// before crash.
|
||||
//
|
||||
// FIXME: This logic is wrong. See https://github.com/zenithdb/zenith/issues/707
|
||||
if !layers.newer_image_layer_exists(
|
||||
&l.get_key_range(),
|
||||
l.get_lsn_range().end,
|
||||
disk_consistent_lsn + 1,
|
||||
)? {
|
||||
// For example, imagine that the following layers exist:
|
||||
//
|
||||
// 1000 - image (A)
|
||||
// 1000-2000 - delta (B)
|
||||
// 2000 - image (C)
|
||||
// 2000-3000 - delta (D)
|
||||
// 3000 - image (E)
|
||||
//
|
||||
// If GC horizon is at 2500, we can remove layers A and B, but
|
||||
// we cannot remove C, even though it's older than 2500, because
|
||||
// the delta layer 2000-3000 depends on it.
|
||||
if !layers
|
||||
.image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))?
|
||||
{
|
||||
debug!(
|
||||
"keeping {} because it is the latest layer",
|
||||
l.filename().display()
|
||||
|
||||
@@ -201,18 +201,14 @@ impl LayerMap {
|
||||
NUM_ONDISK_LAYERS.dec();
|
||||
}
|
||||
|
||||
/// Is there a newer image layer for given key-range?
|
||||
/// Is there a newer image layer for given key- and LSN-range?
|
||||
///
|
||||
/// This is used for garbage collection, to determine if an old layer can
|
||||
/// be deleted.
|
||||
/// We ignore layers newer than disk_consistent_lsn because they will be removed at restart
|
||||
/// We also only look at historic layers
|
||||
//#[allow(dead_code)]
|
||||
pub fn newer_image_layer_exists(
|
||||
pub fn image_layer_exists(
|
||||
&self,
|
||||
key_range: &Range<Key>,
|
||||
lsn: Lsn,
|
||||
disk_consistent_lsn: Lsn,
|
||||
lsn_range: &Range<Lsn>,
|
||||
) -> Result<bool> {
|
||||
let mut range_remain = key_range.clone();
|
||||
|
||||
@@ -225,8 +221,7 @@ impl LayerMap {
|
||||
let img_lsn = l.get_lsn_range().start;
|
||||
if !l.is_incremental()
|
||||
&& l.get_key_range().contains(&range_remain.start)
|
||||
&& img_lsn > lsn
|
||||
&& img_lsn < disk_consistent_lsn
|
||||
&& lsn_range.contains(&img_lsn)
|
||||
{
|
||||
made_progress = true;
|
||||
let img_key_end = l.get_key_range().end;
|
||||
|
||||
@@ -593,7 +593,8 @@ impl PageServerHandler {
|
||||
/* Send a tarball of the latest layer on the timeline */
|
||||
{
|
||||
let mut writer = CopyDataSink { pgb };
|
||||
let mut basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn)?;
|
||||
|
||||
let basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn)?;
|
||||
span.record("lsn", &basebackup.lsn.to_string().as_str());
|
||||
basebackup.send_tarball()?;
|
||||
}
|
||||
|
||||
103
poetry.lock
generated
103
poetry.lock
generated
@@ -21,9 +21,6 @@ category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
typing-extensions = {version = ">=3.6.5", markers = "python_version < \"3.8\""}
|
||||
|
||||
[[package]]
|
||||
name = "asyncpg"
|
||||
version = "0.24.0"
|
||||
@@ -32,9 +29,6 @@ category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6.0"
|
||||
|
||||
[package.dependencies]
|
||||
typing-extensions = {version = ">=3.7.4.3", markers = "python_version < \"3.8\""}
|
||||
|
||||
[package.extras]
|
||||
dev = ["Cython (>=0.29.24,<0.30.0)", "pytest (>=6.0)", "Sphinx (>=4.1.2,<4.2.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "pycodestyle (>=2.7.0,<2.8.0)", "flake8 (>=3.9.2,<3.10.0)", "uvloop (>=0.15.3)"]
|
||||
docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)"]
|
||||
@@ -125,7 +119,6 @@ python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
botocore-stubs = "*"
|
||||
typing-extensions = {version = "*", markers = "python_version < \"3.9\""}
|
||||
|
||||
[package.extras]
|
||||
accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.20.0)"]
|
||||
@@ -454,9 +447,6 @@ category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
typing-extensions = {version = "*", markers = "python_version < \"3.9\""}
|
||||
|
||||
[[package]]
|
||||
name = "cached-property"
|
||||
version = "1.5.2"
|
||||
@@ -524,7 +514,6 @@ python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
@@ -605,7 +594,6 @@ optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
|
||||
|
||||
[package.dependencies]
|
||||
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
|
||||
mccabe = ">=0.6.0,<0.7.0"
|
||||
pycodestyle = ">=2.7.0,<2.8.0"
|
||||
pyflakes = ">=2.3.0,<2.4.0"
|
||||
@@ -664,23 +652,6 @@ category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
|
||||
[[package]]
|
||||
name = "importlib-metadata"
|
||||
version = "4.10.1"
|
||||
description = "Read metadata from Python packages"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.dependencies]
|
||||
typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
|
||||
zipp = ">=0.5"
|
||||
|
||||
[package.extras]
|
||||
docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
|
||||
perf = ["ipython"]
|
||||
testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "1.1.1"
|
||||
@@ -759,9 +730,6 @@ category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7"
|
||||
|
||||
[package.dependencies]
|
||||
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
|
||||
|
||||
[package.extras]
|
||||
docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"]
|
||||
testing = ["pytest (>=3.5,!=3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pytest-black-multipy", "pytest-cov", "ecdsa", "feedparser", "numpy", "pandas", "pymongo", "scikit-learn", "sqlalchemy", "enum34", "jsonlib"]
|
||||
@@ -785,7 +753,6 @@ python-versions = "*"
|
||||
|
||||
[package.dependencies]
|
||||
attrs = ">=17.4.0"
|
||||
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
|
||||
pyrsistent = ">=0.14.0"
|
||||
six = ">=1.11.0"
|
||||
|
||||
@@ -840,7 +807,6 @@ flask = {version = "*", optional = true, markers = "extra == \"server\""}
|
||||
flask-cors = {version = "*", optional = true, markers = "extra == \"server\""}
|
||||
graphql-core = {version = "*", optional = true, markers = "extra == \"server\""}
|
||||
idna = {version = ">=2.5,<4", optional = true, markers = "extra == \"server\""}
|
||||
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
|
||||
Jinja2 = ">=2.10.1"
|
||||
jsondiff = {version = ">=1.1.2", optional = true, markers = "extra == \"server\""}
|
||||
MarkupSafe = "!=2.0.0a1"
|
||||
@@ -890,7 +856,6 @@ python-versions = ">=3.5"
|
||||
[package.dependencies]
|
||||
mypy-extensions = ">=0.4.3,<0.5.0"
|
||||
toml = "*"
|
||||
typed-ast = {version = ">=1.4.0,<1.5.0", markers = "python_version < \"3.8\""}
|
||||
typing-extensions = ">=3.7.4"
|
||||
|
||||
[package.extras]
|
||||
@@ -947,9 +912,6 @@ category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
|
||||
|
||||
[package.extras]
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["pytest", "pytest-benchmark"]
|
||||
@@ -1061,7 +1023,6 @@ python-versions = ">=3.6"
|
||||
atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
|
||||
attrs = ">=19.2.0"
|
||||
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||
importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
|
||||
iniconfig = "*"
|
||||
packaging = "*"
|
||||
pluggy = ">=0.12,<2.0"
|
||||
@@ -1279,14 +1240,6 @@ category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
|
||||
[[package]]
|
||||
name = "typed-ast"
|
||||
version = "1.4.3"
|
||||
description = "a fork of Python 2 and 3 ast modules with type comment support"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "types-psycopg2"
|
||||
version = "2.9.6"
|
||||
@@ -1383,22 +1336,10 @@ category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "zipp"
|
||||
version = "3.7.0"
|
||||
description = "Backport of pathlib-compatible object wrapper for zip files"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.extras]
|
||||
docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
|
||||
testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.7"
|
||||
content-hash = "4ee85b435461dec70b406bf7170302fe54e9e247bdf628a9cb6b5fb9eb9afd82"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "be9c00bb5081535805824242fea2a03b2f82fa9466856d618e24b3140c7da6a0"
|
||||
|
||||
[metadata.files]
|
||||
aiopg = [
|
||||
@@ -1594,10 +1535,6 @@ idna = [
|
||||
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
|
||||
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
|
||||
]
|
||||
importlib-metadata = [
|
||||
{file = "importlib_metadata-4.10.1-py3-none-any.whl", hash = "sha256:899e2a40a8c4a1aec681feef45733de8a6c58f3f6a0dbed2eb6574b4387a77b6"},
|
||||
{file = "importlib_metadata-4.10.1.tar.gz", hash = "sha256:951f0d8a5b7260e9db5e41d429285b5f451e928479f19d80818878527d36e95e"},
|
||||
]
|
||||
iniconfig = [
|
||||
{file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
|
||||
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
|
||||
@@ -2001,38 +1938,6 @@ toml = [
|
||||
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
|
||||
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
|
||||
]
|
||||
typed-ast = [
|
||||
{file = "typed_ast-1.4.3-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6"},
|
||||
{file = "typed_ast-1.4.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075"},
|
||||
{file = "typed_ast-1.4.3-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528"},
|
||||
{file = "typed_ast-1.4.3-cp35-cp35m-win32.whl", hash = "sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428"},
|
||||
{file = "typed_ast-1.4.3-cp35-cp35m-win_amd64.whl", hash = "sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3"},
|
||||
{file = "typed_ast-1.4.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f"},
|
||||
{file = "typed_ast-1.4.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341"},
|
||||
{file = "typed_ast-1.4.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace"},
|
||||
{file = "typed_ast-1.4.3-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f"},
|
||||
{file = "typed_ast-1.4.3-cp36-cp36m-win32.whl", hash = "sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363"},
|
||||
{file = "typed_ast-1.4.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7"},
|
||||
{file = "typed_ast-1.4.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266"},
|
||||
{file = "typed_ast-1.4.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e"},
|
||||
{file = "typed_ast-1.4.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04"},
|
||||
{file = "typed_ast-1.4.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899"},
|
||||
{file = "typed_ast-1.4.3-cp37-cp37m-win32.whl", hash = "sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c"},
|
||||
{file = "typed_ast-1.4.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805"},
|
||||
{file = "typed_ast-1.4.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a"},
|
||||
{file = "typed_ast-1.4.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff"},
|
||||
{file = "typed_ast-1.4.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41"},
|
||||
{file = "typed_ast-1.4.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39"},
|
||||
{file = "typed_ast-1.4.3-cp38-cp38-win32.whl", hash = "sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927"},
|
||||
{file = "typed_ast-1.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40"},
|
||||
{file = "typed_ast-1.4.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3"},
|
||||
{file = "typed_ast-1.4.3-cp39-cp39-manylinux1_i686.whl", hash = "sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4"},
|
||||
{file = "typed_ast-1.4.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0"},
|
||||
{file = "typed_ast-1.4.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3"},
|
||||
{file = "typed_ast-1.4.3-cp39-cp39-win32.whl", hash = "sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808"},
|
||||
{file = "typed_ast-1.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c"},
|
||||
{file = "typed_ast-1.4.3.tar.gz", hash = "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"},
|
||||
]
|
||||
types-psycopg2 = [
|
||||
{file = "types-psycopg2-2.9.6.tar.gz", hash = "sha256:753b50b38da0e61bc8f89d149f2c4420c7e18535a87963d17b72343eb98f7c32"},
|
||||
{file = "types_psycopg2-2.9.6-py3-none-any.whl", hash = "sha256:2cfd855e1562ebb5da595ee9401da93a308d69121ccd359cb8341f94ba4b6d1c"},
|
||||
@@ -2123,7 +2028,3 @@ yapf = [
|
||||
{file = "yapf-0.31.0-py2.py3-none-any.whl", hash = "sha256:e3a234ba8455fe201eaa649cdac872d590089a18b661e39bbac7020978dd9c2e"},
|
||||
{file = "yapf-0.31.0.tar.gz", hash = "sha256:408fb9a2b254c302f49db83c59f9aa0b4b0fd0ec25be3a5c51181327922ff63d"},
|
||||
]
|
||||
zipp = [
|
||||
{file = "zipp-3.7.0-py3-none-any.whl", hash = "sha256:b47250dd24f92b7dd6a0a8fc5244da14608f3ca90a5efcd37a3b1642fac9a375"},
|
||||
{file = "zipp-3.7.0.tar.gz", hash = "sha256:9f50f446828eb9d45b267433fd3e9da8d801f614129124863f9c51ebceafb87d"},
|
||||
]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# WAL service
|
||||
|
||||
The zenith WAL service acts as a holding area and redistribution
|
||||
The neon WAL service acts as a holding area and redistribution
|
||||
center for recently generated WAL. The primary Postgres server streams
|
||||
the WAL to the WAL safekeeper, and treats it like a (synchronous)
|
||||
replica. A replication slot is used in the primary to prevent the
|
||||
@@ -94,7 +94,7 @@ Q: What if the compute node evicts a page, needs it back, but the page is yet
|
||||
A: If the compute node has evicted a page, changes to it have been WAL-logged
|
||||
(that's why it is called Write Ahead logging; there are some exceptions like
|
||||
index builds, but these are exceptions). These WAL records will eventually
|
||||
reach the Page Server. The Page Server notes that the compute note requests
|
||||
reach the Page Server. The Page Server notes that the compute node requests
|
||||
pages with a very recent LSN and will not respond to the compute node until a
|
||||
corresponding WAL is received from WAL safekeepers.
|
||||
|
||||
|
||||
@@ -151,7 +151,7 @@ It is assumed that in case of loosing local data by some safekeepers, it should
|
||||
* `RestartLSN`: position in WAL confirmed by all safekeepers.
|
||||
* `FlushLSN`: part of WAL persisted to the disk by safekeeper.
|
||||
* `NodeID`: pair (term,UUID)
|
||||
* `Pager`: Zenith component restoring pages from WAL stream
|
||||
* `Pager`: Neon component restoring pages from WAL stream
|
||||
* `Replica`: read-only computatio node
|
||||
* `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ use safekeeper::{broker, callmemaybe};
|
||||
use safekeeper::{http, s3_offload};
|
||||
use utils::{
|
||||
http::endpoint, logging, project_git_version, shutdown::exit_now, signals, tcp_listener,
|
||||
zid::ZNodeId,
|
||||
zid::NodeId,
|
||||
};
|
||||
|
||||
const LOCK_FILE_NAME: &str = "safekeeper.lock";
|
||||
@@ -167,7 +167,7 @@ fn main() -> anyhow::Result<()> {
|
||||
|
||||
let mut given_id = None;
|
||||
if let Some(given_id_str) = arg_matches.value_of("id") {
|
||||
given_id = Some(ZNodeId(
|
||||
given_id = Some(NodeId(
|
||||
given_id_str
|
||||
.parse()
|
||||
.context("failed to parse safekeeper id")?,
|
||||
@@ -192,7 +192,7 @@ fn main() -> anyhow::Result<()> {
|
||||
start_safekeeper(conf, given_id, arg_matches.is_present("init"))
|
||||
}
|
||||
|
||||
fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<ZNodeId>, init: bool) -> Result<()> {
|
||||
fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bool) -> Result<()> {
|
||||
let log_file = logging::init("safekeeper.log", conf.daemonize)?;
|
||||
|
||||
info!("version: {GIT_VERSION}");
|
||||
@@ -245,7 +245,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<ZNodeId>, init: b
|
||||
// Otherwise, the coverage data will be damaged.
|
||||
match daemonize.exit_action(|| exit_now(0)).start() {
|
||||
Ok(_) => info!("Success, daemonized"),
|
||||
Err(e) => error!("Error, {}", e),
|
||||
Err(err) => bail!("Error: {err}. could not daemonize. bailing."),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -345,14 +345,14 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<ZNodeId>, init: b
|
||||
}
|
||||
|
||||
/// Determine safekeeper id and set it in config.
|
||||
fn set_id(conf: &mut SafeKeeperConf, given_id: Option<ZNodeId>) -> Result<()> {
|
||||
fn set_id(conf: &mut SafeKeeperConf, given_id: Option<NodeId>) -> Result<()> {
|
||||
let id_file_path = conf.workdir.join(ID_FILE_NAME);
|
||||
|
||||
let my_id: ZNodeId;
|
||||
let my_id: NodeId;
|
||||
// If ID exists, read it in; otherwise set one passed
|
||||
match fs::read(&id_file_path) {
|
||||
Ok(id_serialized) => {
|
||||
my_id = ZNodeId(
|
||||
my_id = NodeId(
|
||||
std::str::from_utf8(&id_serialized)
|
||||
.context("failed to parse safekeeper id")?
|
||||
.parse()
|
||||
|
||||
@@ -12,7 +12,7 @@ use tokio::{runtime, time::sleep};
|
||||
use tracing::*;
|
||||
|
||||
use crate::{timeline::GlobalTimelines, SafeKeeperConf};
|
||||
use utils::zid::{ZNodeId, ZTenantTimelineId};
|
||||
use utils::zid::{NodeId, ZTenantTimelineId};
|
||||
|
||||
const RETRY_INTERVAL_MSEC: u64 = 1000;
|
||||
const PUSH_INTERVAL_MSEC: u64 = 1000;
|
||||
@@ -36,7 +36,7 @@ pub fn thread_main(conf: SafeKeeperConf) {
|
||||
fn timeline_safekeeper_path(
|
||||
broker_etcd_prefix: String,
|
||||
zttid: ZTenantTimelineId,
|
||||
sk_id: ZNodeId,
|
||||
sk_id: NodeId,
|
||||
) -> String {
|
||||
format!(
|
||||
"{}/{sk_id}",
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
|
||||
use utils::zid::{NodeId, ZTenantId, ZTimelineId};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
pub tenant_id: ZTenantId,
|
||||
pub timeline_id: ZTimelineId,
|
||||
pub peer_ids: Vec<ZNodeId>,
|
||||
pub peer_ids: Vec<NodeId>,
|
||||
}
|
||||
|
||||
@@ -20,14 +20,14 @@ use utils::{
|
||||
RequestExt, RouterBuilder,
|
||||
},
|
||||
lsn::Lsn,
|
||||
zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
};
|
||||
|
||||
use super::models::TimelineCreateRequest;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct SafekeeperStatus {
|
||||
id: ZNodeId,
|
||||
id: NodeId,
|
||||
}
|
||||
|
||||
/// Healthcheck handler.
|
||||
@@ -178,7 +178,7 @@ async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<B
|
||||
let safekeeper_info: SkTimelineInfo = json_request(&mut request).await?;
|
||||
|
||||
let tli = GlobalTimelines::get(get_conf(&request), zttid, false).map_err(ApiError::from_err)?;
|
||||
tli.record_safekeeper_info(&safekeeper_info, ZNodeId(1))?;
|
||||
tli.record_safekeeper_info(&safekeeper_info, NodeId(1))?;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
use url::Url;
|
||||
|
||||
use utils::zid::{ZNodeId, ZTenantId, ZTenantTimelineId};
|
||||
use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId};
|
||||
|
||||
pub mod broker;
|
||||
pub mod callmemaybe;
|
||||
@@ -49,7 +49,7 @@ pub struct SafeKeeperConf {
|
||||
pub listen_http_addr: String,
|
||||
pub ttl: Option<Duration>,
|
||||
pub recall_period: Duration,
|
||||
pub my_id: ZNodeId,
|
||||
pub my_id: NodeId,
|
||||
pub broker_endpoints: Vec<Url>,
|
||||
pub broker_etcd_prefix: String,
|
||||
pub s3_offload_enabled: bool,
|
||||
@@ -79,7 +79,7 @@ impl Default for SafeKeeperConf {
|
||||
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
ttl: None,
|
||||
recall_period: defaults::DEFAULT_RECALL_PERIOD,
|
||||
my_id: ZNodeId(0),
|
||||
my_id: NodeId(0),
|
||||
broker_endpoints: Vec::new(),
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
s3_offload_enabled: true,
|
||||
|
||||
@@ -26,7 +26,7 @@ use utils::{
|
||||
bin_ser::LeSer,
|
||||
lsn::Lsn,
|
||||
pq_proto::{SystemId, ZenithFeedback},
|
||||
zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
};
|
||||
|
||||
pub const SK_MAGIC: u32 = 0xcafeceefu32;
|
||||
@@ -164,7 +164,7 @@ impl PeerInfo {
|
||||
// vector-based node id -> peer state map with very limited functionality we
|
||||
// need/
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Peers(pub Vec<(ZNodeId, PeerInfo)>);
|
||||
pub struct Peers(pub Vec<(NodeId, PeerInfo)>);
|
||||
|
||||
/// Persistent information stored on safekeeper node
|
||||
/// On disk data is prefixed by magic and format version and followed by checksum.
|
||||
@@ -224,7 +224,7 @@ pub struct SafekeeperMemState {
|
||||
}
|
||||
|
||||
impl SafeKeeperState {
|
||||
pub fn new(zttid: &ZTenantTimelineId, peers: Vec<ZNodeId>) -> SafeKeeperState {
|
||||
pub fn new(zttid: &ZTenantTimelineId, peers: Vec<NodeId>) -> SafeKeeperState {
|
||||
SafeKeeperState {
|
||||
tenant_id: zttid.tenant_id,
|
||||
timeline_id: zttid.timeline_id,
|
||||
@@ -277,7 +277,7 @@ pub struct ProposerGreeting {
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct AcceptorGreeting {
|
||||
term: u64,
|
||||
node_id: ZNodeId,
|
||||
node_id: NodeId,
|
||||
}
|
||||
|
||||
/// Vote request sent from proposer to safekeepers
|
||||
@@ -531,7 +531,7 @@ pub struct SafeKeeper<CTRL: control_file::Storage, WAL: wal_storage::Storage> {
|
||||
|
||||
pub wal_store: WAL,
|
||||
|
||||
node_id: ZNodeId, // safekeeper's node id
|
||||
node_id: NodeId, // safekeeper's node id
|
||||
}
|
||||
|
||||
impl<CTRL, WAL> SafeKeeper<CTRL, WAL>
|
||||
@@ -544,7 +544,7 @@ where
|
||||
ztli: ZTimelineId,
|
||||
state: CTRL,
|
||||
mut wal_store: WAL,
|
||||
node_id: ZNodeId,
|
||||
node_id: NodeId,
|
||||
) -> Result<SafeKeeper<CTRL, WAL>> {
|
||||
if state.timeline_id != ZTimelineId::from([0u8; 16]) && ztli != state.timeline_id {
|
||||
bail!("Calling SafeKeeper::new with inconsistent ztli ({}) and SafeKeeperState.server.timeline_id ({})", ztli, state.timeline_id);
|
||||
@@ -1013,7 +1013,7 @@ mod tests {
|
||||
};
|
||||
let wal_store = DummyWalStore { lsn: Lsn(0) };
|
||||
let ztli = ZTimelineId::from([0u8; 16]);
|
||||
let mut sk = SafeKeeper::new(ztli, storage, wal_store, ZNodeId(0)).unwrap();
|
||||
let mut sk = SafeKeeper::new(ztli, storage, wal_store, NodeId(0)).unwrap();
|
||||
|
||||
// check voting for 1 is ok
|
||||
let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest { term: 1 });
|
||||
@@ -1028,7 +1028,7 @@ mod tests {
|
||||
let storage = InMemoryState {
|
||||
persisted_state: state,
|
||||
};
|
||||
sk = SafeKeeper::new(ztli, storage, sk.wal_store, ZNodeId(0)).unwrap();
|
||||
sk = SafeKeeper::new(ztli, storage, sk.wal_store, NodeId(0)).unwrap();
|
||||
|
||||
// and ensure voting second time for 1 is not ok
|
||||
vote_resp = sk.process_msg(&vote_request);
|
||||
@@ -1045,7 +1045,7 @@ mod tests {
|
||||
};
|
||||
let wal_store = DummyWalStore { lsn: Lsn(0) };
|
||||
let ztli = ZTimelineId::from([0u8; 16]);
|
||||
let mut sk = SafeKeeper::new(ztli, storage, wal_store, ZNodeId(0)).unwrap();
|
||||
let mut sk = SafeKeeper::new(ztli, storage, wal_store, NodeId(0)).unwrap();
|
||||
|
||||
let mut ar_hdr = AppendRequestHeader {
|
||||
term: 1,
|
||||
|
||||
@@ -21,7 +21,7 @@ use tracing::*;
|
||||
use utils::{
|
||||
lsn::Lsn,
|
||||
pq_proto::ZenithFeedback,
|
||||
zid::{ZNodeId, ZTenantId, ZTenantTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTenantTimelineId},
|
||||
};
|
||||
|
||||
use crate::callmemaybe::{CallmeEvent, SubscriptionStateKey};
|
||||
@@ -99,7 +99,7 @@ impl SharedState {
|
||||
fn create(
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: &ZTenantTimelineId,
|
||||
peer_ids: Vec<ZNodeId>,
|
||||
peer_ids: Vec<NodeId>,
|
||||
) -> Result<Self> {
|
||||
let state = SafeKeeperState::new(zttid, peer_ids);
|
||||
let control_store = control_file::FileStorage::create_new(zttid, conf, state)?;
|
||||
@@ -448,7 +448,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
/// Update timeline state with peer safekeeper data.
|
||||
pub fn record_safekeeper_info(&self, sk_info: &SkTimelineInfo, _sk_id: ZNodeId) -> Result<()> {
|
||||
pub fn record_safekeeper_info(&self, sk_info: &SkTimelineInfo, _sk_id: NodeId) -> Result<()> {
|
||||
let mut shared_state = self.mutex.lock().unwrap();
|
||||
shared_state.sk.record_safekeeper_info(sk_info)?;
|
||||
self.notify_wal_senders(&mut shared_state);
|
||||
@@ -551,7 +551,7 @@ impl GlobalTimelines {
|
||||
mut state: MutexGuard<GlobalTimelinesState>,
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: ZTenantTimelineId,
|
||||
peer_ids: Vec<ZNodeId>,
|
||||
peer_ids: Vec<NodeId>,
|
||||
) -> Result<Arc<Timeline>> {
|
||||
match state.timelines.get(&zttid) {
|
||||
Some(_) => bail!("timeline {} already exists", zttid),
|
||||
@@ -576,7 +576,7 @@ impl GlobalTimelines {
|
||||
pub fn create(
|
||||
conf: &SafeKeeperConf,
|
||||
zttid: ZTenantTimelineId,
|
||||
peer_ids: Vec<ZNodeId>,
|
||||
peer_ids: Vec<NodeId>,
|
||||
) -> Result<Arc<Timeline>> {
|
||||
let state = TIMELINES_STATE.lock().unwrap();
|
||||
GlobalTimelines::create_internal(state, conf, zttid, peer_ids)
|
||||
|
||||
20
test_runner/batch_others/test_basebackup_error.py
Normal file
20
test_runner/batch_others/test_basebackup_error.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import pytest
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
#
|
||||
# Test error handling, if the 'basebackup' command fails in the middle
|
||||
# of building the tar archive.
|
||||
#
|
||||
def test_basebackup_error(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_basebackup_error", "empty")
|
||||
|
||||
# Introduce failpoint
|
||||
env.pageserver.safe_psql(f"failpoints basebackup-before-control-file=return")
|
||||
|
||||
with pytest.raises(Exception, match="basebackup-before-control-file"):
|
||||
pg = env.postgres.create_start('test_basebackup_error')
|
||||
13
test_runner/batch_others/test_ci.py
Normal file
13
test_runner/batch_others/test_ci.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import pytest
|
||||
import os
|
||||
import time
|
||||
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
|
||||
|
||||
@pytest.mark.parametrize('iteration', list(range(100)))
|
||||
def test_timeout(zenith_env_builder: ZenithEnvBuilder, test_output_dir: str, iteration: int):
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
# Sleep long enough with no output so CI step times out
|
||||
time.sleep(1000)
|
||||
@@ -393,7 +393,10 @@ class MockS3Server:
|
||||
):
|
||||
self.port = port
|
||||
|
||||
self.subprocess = subprocess.Popen([f'poetry run moto_server s3 -p{port}'], shell=True)
|
||||
# XXX: do not use `shell=True` or add `exec ` to the command here otherwise.
|
||||
# We use `self.subprocess.kill()` to shut down the server, which would not "just" work in Linux
|
||||
# if a process is started from the shell process.
|
||||
self.subprocess = subprocess.Popen(['poetry', 'run', 'moto_server', 's3', f'-p{port}'])
|
||||
error = None
|
||||
try:
|
||||
return_code = self.subprocess.poll()
|
||||
@@ -403,7 +406,7 @@ class MockS3Server:
|
||||
error = f"expected mock s3 server to start but it failed with exception: {e}. stdout: '{self.subprocess.stdout}', stderr: '{self.subprocess.stderr}'"
|
||||
if error is not None:
|
||||
log.error(error)
|
||||
self.subprocess.kill()
|
||||
self.kill()
|
||||
raise RuntimeError("failed to start s3 mock server")
|
||||
|
||||
def endpoint(self) -> str:
|
||||
@@ -1890,7 +1893,11 @@ class Etcd:
|
||||
f"--data-dir={self.datadir}",
|
||||
f"--listen-client-urls={client_url}",
|
||||
f"--advertise-client-urls={client_url}",
|
||||
f"--listen-peer-urls=http://127.0.0.1:{self.peer_port}"
|
||||
f"--listen-peer-urls=http://127.0.0.1:{self.peer_port}",
|
||||
# Set --quota-backend-bytes to keep the etcd virtual memory
|
||||
# size smaller. Our test etcd clusters are very small.
|
||||
# See https://github.com/etcd-io/etcd/issues/7910
|
||||
f"--quota-backend-bytes=100000000"
|
||||
]
|
||||
self.handle = subprocess.Popen(args, stdout=log_file, stderr=log_file)
|
||||
|
||||
|
||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: 79af2faf08...038b2b98e5
Reference in New Issue
Block a user