mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-05 03:30:36 +00:00
Compare commits
17 Commits
prewarm_ne
...
jk/cleanup
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca1ed3dc3b | ||
|
|
dc2554dff6 | ||
|
|
5112142997 | ||
|
|
a0a74868a4 | ||
|
|
b154992510 | ||
|
|
a86a38c96e | ||
|
|
590f894db8 | ||
|
|
0a0595b98d | ||
|
|
e56d11c8e1 | ||
|
|
ccdc3188ed | ||
|
|
67401cbdb8 | ||
|
|
d42700280f | ||
|
|
6df4d5c911 | ||
|
|
32d14403bd | ||
|
|
0df3467146 | ||
|
|
c64a121aa8 | ||
|
|
22cc8760b9 |
6
.github/workflows/build_and_test.yml
vendored
6
.github/workflows/build_and_test.yml
vendored
@@ -127,8 +127,8 @@ jobs:
|
||||
target/
|
||||
# Fall back to older versions of the key, if no cache for current Cargo.lock was found
|
||||
key: |
|
||||
v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
|
||||
v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-
|
||||
v10-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
|
||||
v10-${{ runner.os }}-${{ matrix.build_type }}-cargo-
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
@@ -389,7 +389,7 @@ jobs:
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git/
|
||||
target/
|
||||
key: v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
|
||||
key: v10-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
|
||||
|
||||
- name: Get Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
|
||||
2
.github/workflows/codestyle.yml
vendored
2
.github/workflows/codestyle.yml
vendored
@@ -106,7 +106,7 @@ jobs:
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v5-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust
|
||||
key: v6-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust
|
||||
|
||||
- name: Run cargo clippy
|
||||
run: ./run_clippy.sh
|
||||
|
||||
20
Cargo.lock
generated
20
Cargo.lock
generated
@@ -317,12 +317,6 @@ dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "boxfnonce"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5988cb1d626264ac94100be357308f29ff7cbdd3b36bda27f450a4ee3f713426"
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "1.0.1"
|
||||
@@ -600,6 +594,7 @@ dependencies = [
|
||||
"tar",
|
||||
"thiserror",
|
||||
"toml",
|
||||
"url",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -849,16 +844,6 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "daemonize"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70c24513e34f53b640819f0ac9f705b673fcf4006d7aab8778bee72ebfc89815"
|
||||
dependencies = [
|
||||
"boxfnonce",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.14.1"
|
||||
@@ -2140,7 +2125,6 @@ dependencies = [
|
||||
"crc32c",
|
||||
"criterion",
|
||||
"crossbeam-utils",
|
||||
"daemonize",
|
||||
"etcd_broker",
|
||||
"fail",
|
||||
"futures",
|
||||
@@ -3087,7 +3071,6 @@ dependencies = [
|
||||
"clap 4.0.15",
|
||||
"const_format",
|
||||
"crc32c",
|
||||
"daemonize",
|
||||
"etcd_broker",
|
||||
"fs2",
|
||||
"git-version",
|
||||
@@ -3095,6 +3078,7 @@ dependencies = [
|
||||
"humantime",
|
||||
"hyper",
|
||||
"metrics",
|
||||
"nix 0.25.0",
|
||||
"once_cell",
|
||||
"parking_lot 0.12.1",
|
||||
"postgres",
|
||||
|
||||
14
Makefile
14
Makefile
@@ -111,8 +111,6 @@ postgres-v14: postgres-v14-configure \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
|
||||
+@echo "Compiling libpq v14"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
|
||||
+@echo "Compiling pg_prewarm v14"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_prewarm install
|
||||
+@echo "Compiling pg_buffercache v14"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
|
||||
+@echo "Compiling pageinspect v14"
|
||||
@@ -125,8 +123,6 @@ postgres-v15: postgres-v15-configure \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
|
||||
+@echo "Compiling libpq v15"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
|
||||
+@echo "Compiling pg_prewarm v15"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_prewarm install
|
||||
+@echo "Compiling pg_buffercache v15"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
|
||||
+@echo "Compiling pageinspect v15"
|
||||
@@ -155,6 +151,11 @@ neon-pg-ext-v14: postgres-v14
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/neon-v14 && \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v14/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install)
|
||||
+@echo "Compiling neon_walredo v14"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-v14
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/neon-walredo-v14 && \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v14/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install)
|
||||
+@echo "Compiling neon_test_utils" v14
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v14
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v14 && \
|
||||
@@ -167,6 +168,11 @@ neon-pg-ext-v15: postgres-v15
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/neon-v15 && \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install)
|
||||
+@echo "Compiling neon_walredo v15"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-v15
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/neon-walredo-v15 && \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install)
|
||||
+@echo "Compiling neon_test_utils" v15
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v15
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v15 && \
|
||||
|
||||
@@ -223,10 +223,7 @@ Ensure your dependencies are installed as described [here](https://github.com/ne
|
||||
```sh
|
||||
git clone --recursive https://github.com/neondatabase/neon.git
|
||||
|
||||
# either:
|
||||
CARGO_BUILD_FLAGS="--features=testing" make
|
||||
# or:
|
||||
make debug
|
||||
|
||||
./scripts/pytest
|
||||
```
|
||||
|
||||
@@ -4,20 +4,21 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
clap = "4.0"
|
||||
comfy-table = "6.1"
|
||||
git-version = "0.3.5"
|
||||
tar = "0.4.38"
|
||||
nix = "0.25"
|
||||
once_cell = "1.13.0"
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
regex = "1"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = "2.0"
|
||||
toml = "0.5"
|
||||
once_cell = "1.13.0"
|
||||
regex = "1"
|
||||
anyhow = "1.0"
|
||||
tar = "0.4.38"
|
||||
thiserror = "1"
|
||||
nix = "0.25"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
toml = "0.5"
|
||||
url = "2.2.2"
|
||||
|
||||
# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
|
||||
# instead, so that recompile times are better.
|
||||
|
||||
264
control_plane/src/background_process.rs
Normal file
264
control_plane/src/background_process.rs
Normal file
@@ -0,0 +1,264 @@
|
||||
//! Spawns and kills background processes that are needed by Neon CLI.
|
||||
//! Applies common set-up such as log and pid files (if needed) to every process.
|
||||
//!
|
||||
//! Neon CLI does not run in background, so it needs to store the information about
|
||||
//! spawned processes, which it does in this module.
|
||||
//! We do that by storing the pid of the process in the "${process_name}.pid" file.
|
||||
//! The pid file can be created by the process itself
|
||||
//! (Neon storage binaries do that and also ensure that a lock is taken onto that file)
|
||||
//! or we create such file after starting the process
|
||||
//! (non-Neon binaries don't necessarily follow our pidfile conventions).
|
||||
//! The pid stored in the file is later used to stop the service.
|
||||
//!
|
||||
//! See [`lock_file`] module for more info.
|
||||
|
||||
use std::ffi::OsStr;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::process::{Child, Command};
|
||||
use std::time::Duration;
|
||||
use std::{fs, io, thread};
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
|
||||
use utils::lock_file;
|
||||
|
||||
const RETRIES: u32 = 15;
|
||||
const RETRY_TIMEOUT_MILLIS: u64 = 500;
|
||||
|
||||
/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
|
||||
/// it itself.
|
||||
pub enum InitialPidFile<'t> {
|
||||
/// Create a pidfile, to allow future CLI invocations to manipulate the process.
|
||||
Create(&'t Path),
|
||||
/// The process will create the pidfile itself, need to wait for that event.
|
||||
Expect(&'t Path),
|
||||
}
|
||||
|
||||
/// Start a background child process using the parameters given.
|
||||
pub fn start_process<F, S: AsRef<OsStr>>(
|
||||
process_name: &str,
|
||||
datadir: &Path,
|
||||
command: &Path,
|
||||
args: &[S],
|
||||
initial_pid_file: InitialPidFile,
|
||||
process_status_check: F,
|
||||
) -> anyhow::Result<Child>
|
||||
where
|
||||
F: Fn() -> anyhow::Result<bool>,
|
||||
{
|
||||
let log_path = datadir.join(format!("{process_name}.log"));
|
||||
let process_log_file = fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.append(true)
|
||||
.open(&log_path)
|
||||
.with_context(|| {
|
||||
format!("Could not open {process_name} log file {log_path:?} for writing")
|
||||
})?;
|
||||
let same_file_for_stderr = process_log_file.try_clone().with_context(|| {
|
||||
format!("Could not reuse {process_name} log file {log_path:?} for writing stderr")
|
||||
})?;
|
||||
|
||||
let mut command = Command::new(command);
|
||||
let background_command = command
|
||||
.stdout(process_log_file)
|
||||
.stderr(same_file_for_stderr)
|
||||
.args(args);
|
||||
let filled_cmd = fill_aws_secrets_vars(fill_rust_env_vars(background_command));
|
||||
|
||||
let mut spawned_process = filled_cmd.spawn().with_context(|| {
|
||||
format!("Could not spawn {process_name}, see console output and log files for details.")
|
||||
})?;
|
||||
let pid = spawned_process.id();
|
||||
let pid = Pid::from_raw(
|
||||
i32::try_from(pid)
|
||||
.with_context(|| format!("Subprocess {process_name} has invalid pid {pid}"))?,
|
||||
);
|
||||
|
||||
let pid_file_to_check = match initial_pid_file {
|
||||
InitialPidFile::Create(target_pid_file_path) => {
|
||||
match lock_file::create_lock_file(target_pid_file_path, pid.to_string()) {
|
||||
lock_file::LockCreationResult::Created { .. } => {
|
||||
// We use "lock" file here only to create the pid file. The lock on the pidfile will be dropped as soon
|
||||
// as this CLI invocation exits, so it's a bit useless, but doesn't any harm either.
|
||||
}
|
||||
lock_file::LockCreationResult::AlreadyLocked { .. } => {
|
||||
anyhow::bail!("Cannot write pid file for {process_name} at path {target_pid_file_path:?}: file is already locked by another process")
|
||||
}
|
||||
lock_file::LockCreationResult::CreationFailed(e) => {
|
||||
return Err(e.context(format!(
|
||||
"Failed to create pid file for {process_name} at path {target_pid_file_path:?}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
InitialPidFile::Expect(pid_file_path) => Some(pid_file_path),
|
||||
};
|
||||
|
||||
for retries in 0..RETRIES {
|
||||
match process_started(pid, pid_file_to_check, &process_status_check) {
|
||||
Ok(true) => {
|
||||
println!("\n{process_name} started, pid: {pid}");
|
||||
return Ok(spawned_process);
|
||||
}
|
||||
Ok(false) => {
|
||||
if retries < 5 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
} else {
|
||||
if retries == 5 {
|
||||
println!() // put a line break after dots for second message
|
||||
}
|
||||
println!("{process_name} has not started yet, retrying ({retries})...");
|
||||
}
|
||||
thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS));
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{process_name} failed to start: {e:#}");
|
||||
if let Err(e) = spawned_process.kill() {
|
||||
println!("Could not stop {process_name} subprocess: {e:#}")
|
||||
};
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
anyhow::bail!("{process_name} could not start in {RETRIES} attempts");
|
||||
}
|
||||
|
||||
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
|
||||
pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> anyhow::Result<()> {
|
||||
if !pid_file.exists() {
|
||||
println!("{process_name} is already stopped: no pid file {pid_file:?} is present");
|
||||
return Ok(());
|
||||
}
|
||||
let pid = read_pidfile(pid_file)?;
|
||||
|
||||
let sig = if immediate {
|
||||
print!("Stopping {process_name} with pid {pid} immediately..");
|
||||
Signal::SIGQUIT
|
||||
} else {
|
||||
print!("Stopping {process_name} with pid {pid} gracefully..");
|
||||
Signal::SIGTERM
|
||||
};
|
||||
io::stdout().flush().unwrap();
|
||||
match kill(pid, sig) {
|
||||
Ok(()) => (),
|
||||
Err(Errno::ESRCH) => {
|
||||
println!(
|
||||
"{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => anyhow::bail!("Failed to send signal to {process_name} with pid {pid}: {e}"),
|
||||
}
|
||||
|
||||
// Wait until process is gone
|
||||
for _ in 0..RETRIES {
|
||||
match process_has_stopped(pid) {
|
||||
Ok(true) => {
|
||||
println!("\n{process_name} stopped");
|
||||
if let Err(e) = fs::remove_file(pid_file) {
|
||||
if e.kind() != io::ErrorKind::NotFound {
|
||||
eprintln!("Failed to remove pid file {pid_file:?} after stopping the process: {e:#}");
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
Ok(false) => {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
thread::sleep(Duration::from_secs(1))
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{process_name} with pid {pid} failed to stop: {e:#}");
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts");
|
||||
}
|
||||
|
||||
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
||||
let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
|
||||
|
||||
let var = "LLVM_PROFILE_FILE";
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
filled_cmd = filled_cmd.env(var, val);
|
||||
}
|
||||
|
||||
const RUST_LOG_KEY: &str = "RUST_LOG";
|
||||
if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
|
||||
filled_cmd.env(RUST_LOG_KEY, rust_log_value)
|
||||
} else {
|
||||
filled_cmd
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
for env_key in [
|
||||
"AWS_ACCESS_KEY_ID",
|
||||
"AWS_SECRET_ACCESS_KEY",
|
||||
"AWS_SESSION_TOKEN",
|
||||
] {
|
||||
if let Ok(value) = std::env::var(env_key) {
|
||||
cmd = cmd.env(env_key, value);
|
||||
}
|
||||
}
|
||||
cmd
|
||||
}
|
||||
|
||||
fn process_started<F>(
|
||||
pid: Pid,
|
||||
pid_file_to_check: Option<&Path>,
|
||||
status_check: &F,
|
||||
) -> anyhow::Result<bool>
|
||||
where
|
||||
F: Fn() -> anyhow::Result<bool>,
|
||||
{
|
||||
match status_check() {
|
||||
Ok(true) => match pid_file_to_check {
|
||||
Some(pid_file_path) => {
|
||||
if pid_file_path.exists() {
|
||||
let pid_in_file = read_pidfile(pid_file_path)?;
|
||||
Ok(pid_in_file == pid)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
None => Ok(true),
|
||||
},
|
||||
Ok(false) => Ok(false),
|
||||
Err(e) => anyhow::bail!("process failed to start: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a PID file
|
||||
///
|
||||
/// We expect a file that contains a single integer.
|
||||
fn read_pidfile(pidfile: &Path) -> Result<Pid> {
|
||||
let pid_str = fs::read_to_string(pidfile)
|
||||
.with_context(|| format!("failed to read pidfile {pidfile:?}"))?;
|
||||
let pid: i32 = pid_str
|
||||
.parse()
|
||||
.map_err(|_| anyhow!("failed to parse pidfile {pidfile:?}"))?;
|
||||
if pid < 1 {
|
||||
bail!("pidfile {pidfile:?} contained bad value '{pid}'");
|
||||
}
|
||||
Ok(Pid::from_raw(pid))
|
||||
}
|
||||
|
||||
fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
|
||||
match kill(pid, None) {
|
||||
// Process exists, keep waiting
|
||||
Ok(_) => Ok(false),
|
||||
// Process not found, we're done
|
||||
Err(Errno::ESRCH) => Ok(true),
|
||||
Err(err) => anyhow::bail!("Failed to send signal to process with pid {pid}: {err}"),
|
||||
}
|
||||
}
|
||||
@@ -9,8 +9,8 @@ use anyhow::{anyhow, bail, Context, Result};
|
||||
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
|
||||
use control_plane::compute::ComputeControlPlane;
|
||||
use control_plane::local_env::{EtcdBroker, LocalEnv};
|
||||
use control_plane::pageserver::PageServerNode;
|
||||
use control_plane::safekeeper::SafekeeperNode;
|
||||
use control_plane::storage::PageServerNode;
|
||||
use control_plane::{etcd, local_env};
|
||||
use pageserver_api::models::TimelineInfo;
|
||||
use pageserver_api::{
|
||||
|
||||
@@ -12,15 +12,14 @@ use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use utils::{
|
||||
connstring::connection_host_port,
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
postgres_backend::AuthType,
|
||||
};
|
||||
|
||||
use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
|
||||
use crate::pageserver::PageServerNode;
|
||||
use crate::postgresql_conf::PostgresConf;
|
||||
use crate::storage::PageServerNode;
|
||||
|
||||
//
|
||||
// ComputeControlPlane
|
||||
@@ -300,7 +299,8 @@ impl PostgresNode {
|
||||
|
||||
// Configure the node to fetch pages from pageserver
|
||||
let pageserver_connstr = {
|
||||
let (host, port) = connection_host_port(&self.pageserver.pg_connection_config);
|
||||
let config = &self.pageserver.pg_connection_config;
|
||||
let (host, port) = (config.host(), config.port());
|
||||
|
||||
// Set up authentication
|
||||
//
|
||||
@@ -319,7 +319,7 @@ impl PostgresNode {
|
||||
// uses only needed variables namely host, port, user, password.
|
||||
format!("postgresql://no_user:{password}@{host}:{port}")
|
||||
};
|
||||
conf.append("shared_preload_libraries", "neon,pg_prewarm");
|
||||
conf.append("shared_preload_libraries", "neon");
|
||||
conf.append_line("");
|
||||
conf.append("neon.pageserver_connstring", &pageserver_connstr);
|
||||
conf.append("neon.tenant_id", &self.tenant_id.to_string());
|
||||
|
||||
57
control_plane/src/connection.rs
Normal file
57
control_plane/src/connection.rs
Normal file
@@ -0,0 +1,57 @@
|
||||
use url::Url;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PgConnectionConfig {
|
||||
url: Url,
|
||||
}
|
||||
|
||||
impl PgConnectionConfig {
|
||||
pub fn host(&self) -> &str {
|
||||
self.url.host_str().expect("BUG: no host")
|
||||
}
|
||||
|
||||
pub fn port(&self) -> u16 {
|
||||
self.url.port().expect("BUG: no port")
|
||||
}
|
||||
|
||||
/// Return a `<host>:<port>` string.
|
||||
pub fn raw_address(&self) -> String {
|
||||
format!("{}:{}", self.host(), self.port())
|
||||
}
|
||||
|
||||
/// Connect using postgres protocol with TLS disabled.
|
||||
pub fn connect_no_tls(&self) -> Result<postgres::Client, postgres::Error> {
|
||||
postgres::Client::connect(self.url.as_str(), postgres::NoTls)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for PgConnectionConfig {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let mut url: Url = s.parse()?;
|
||||
|
||||
match url.scheme() {
|
||||
"postgres" | "postgresql" => {}
|
||||
other => anyhow::bail!("invalid scheme: {other}"),
|
||||
}
|
||||
|
||||
// It's not a valid connection url if host is unavailable.
|
||||
if url.host().is_none() {
|
||||
anyhow::bail!(url::ParseError::EmptyHost);
|
||||
}
|
||||
|
||||
// E.g. `postgres:bar`.
|
||||
if url.cannot_be_a_base() {
|
||||
anyhow::bail!("URL cannot be a base");
|
||||
}
|
||||
|
||||
// Set the default PG port if it's missing.
|
||||
if url.port().is_none() {
|
||||
url.set_port(Some(5432))
|
||||
.expect("BUG: couldn't set the default port");
|
||||
}
|
||||
|
||||
Ok(Self { url })
|
||||
}
|
||||
}
|
||||
@@ -1,99 +1,75 @@
|
||||
use std::{
|
||||
fs,
|
||||
path::PathBuf,
|
||||
process::{Command, Stdio},
|
||||
};
|
||||
use std::{fs, path::PathBuf};
|
||||
|
||||
use anyhow::Context;
|
||||
use nix::{
|
||||
sys::signal::{kill, Signal},
|
||||
unistd::Pid,
|
||||
};
|
||||
|
||||
use crate::{local_env, read_pidfile};
|
||||
use crate::{background_process, local_env};
|
||||
|
||||
pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
let etcd_broker = &env.etcd_broker;
|
||||
println!(
|
||||
"Starting etcd broker using {}",
|
||||
etcd_broker.etcd_binary_path.display()
|
||||
"Starting etcd broker using {:?}",
|
||||
etcd_broker.etcd_binary_path
|
||||
);
|
||||
|
||||
let etcd_data_dir = env.base_data_dir.join("etcd");
|
||||
fs::create_dir_all(&etcd_data_dir).with_context(|| {
|
||||
format!(
|
||||
"Failed to create etcd data dir: {}",
|
||||
etcd_data_dir.display()
|
||||
)
|
||||
})?;
|
||||
fs::create_dir_all(&etcd_data_dir)
|
||||
.with_context(|| format!("Failed to create etcd data dir {etcd_data_dir:?}"))?;
|
||||
|
||||
let etcd_stdout_file =
|
||||
fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
|
||||
format!(
|
||||
"Failed to create etcd stout file in directory {}",
|
||||
etcd_data_dir.display()
|
||||
)
|
||||
})?;
|
||||
let etcd_stderr_file =
|
||||
fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
|
||||
format!(
|
||||
"Failed to create etcd stderr file in directory {}",
|
||||
etcd_data_dir.display()
|
||||
)
|
||||
})?;
|
||||
let client_urls = etcd_broker.comma_separated_endpoints();
|
||||
let args = [
|
||||
format!("--data-dir={}", etcd_data_dir.display()),
|
||||
format!("--listen-client-urls={client_urls}"),
|
||||
format!("--advertise-client-urls={client_urls}"),
|
||||
// Set --quota-backend-bytes to keep the etcd virtual memory
|
||||
// size smaller. Our test etcd clusters are very small.
|
||||
// See https://github.com/etcd-io/etcd/issues/7910
|
||||
"--quota-backend-bytes=100000000".to_string(),
|
||||
// etcd doesn't compact (vacuum) with default settings,
|
||||
// enable it to prevent space exhaustion.
|
||||
"--auto-compaction-mode=revision".to_string(),
|
||||
"--auto-compaction-retention=1".to_string(),
|
||||
];
|
||||
|
||||
let etcd_process = Command::new(&etcd_broker.etcd_binary_path)
|
||||
.args(&[
|
||||
format!("--data-dir={}", etcd_data_dir.display()),
|
||||
format!("--listen-client-urls={client_urls}"),
|
||||
format!("--advertise-client-urls={client_urls}"),
|
||||
// Set --quota-backend-bytes to keep the etcd virtual memory
|
||||
// size smaller. Our test etcd clusters are very small.
|
||||
// See https://github.com/etcd-io/etcd/issues/7910
|
||||
"--quota-backend-bytes=100000000".to_string(),
|
||||
// etcd doesn't compact (vacuum) with default settings,
|
||||
// enable it to prevent space exhaustion.
|
||||
"--auto-compaction-mode=revision".to_string(),
|
||||
"--auto-compaction-retention=1".to_string(),
|
||||
])
|
||||
.stdout(Stdio::from(etcd_stdout_file))
|
||||
.stderr(Stdio::from(etcd_stderr_file))
|
||||
.spawn()
|
||||
.context("Failed to spawn etcd subprocess")?;
|
||||
let pid = etcd_process.id();
|
||||
let pid_file_path = etcd_pid_file_path(env);
|
||||
|
||||
let etcd_pid_file_path = etcd_pid_file_path(env);
|
||||
fs::write(&etcd_pid_file_path, pid.to_string()).with_context(|| {
|
||||
format!(
|
||||
"Failed to create etcd pid file at {}",
|
||||
etcd_pid_file_path.display()
|
||||
)
|
||||
})?;
|
||||
let client = reqwest::blocking::Client::new();
|
||||
|
||||
background_process::start_process(
|
||||
"etcd",
|
||||
&etcd_data_dir,
|
||||
&etcd_broker.etcd_binary_path,
|
||||
&args,
|
||||
background_process::InitialPidFile::Create(&pid_file_path),
|
||||
|| {
|
||||
for broker_endpoint in &etcd_broker.broker_endpoints {
|
||||
let request = broker_endpoint
|
||||
.join("health")
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to append /health path to broker endopint {}",
|
||||
broker_endpoint
|
||||
)
|
||||
})
|
||||
.and_then(|url| {
|
||||
client.get(&url.to_string()).build().with_context(|| {
|
||||
format!("Failed to construct request to etcd endpoint {url}")
|
||||
})
|
||||
})?;
|
||||
if client.execute(request).is_ok() {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
},
|
||||
)
|
||||
.context("Failed to spawn etcd subprocess")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn stop_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
let etcd_path = &env.etcd_broker.etcd_binary_path;
|
||||
println!("Stopping etcd broker at {}", etcd_path.display());
|
||||
|
||||
let etcd_pid_file_path = etcd_pid_file_path(env);
|
||||
let pid = Pid::from_raw(read_pidfile(&etcd_pid_file_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to read etcd pid file at {}",
|
||||
etcd_pid_file_path.display()
|
||||
)
|
||||
})?);
|
||||
|
||||
kill(pid, Signal::SIGTERM).with_context(|| {
|
||||
format!(
|
||||
"Failed to stop etcd with pid {pid} at {}",
|
||||
etcd_pid_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
background_process::stop_process(true, "etcd", &etcd_pid_file_path(env))
|
||||
}
|
||||
|
||||
fn etcd_pid_file_path(env: &local_env::LocalEnv) -> PathBuf {
|
||||
|
||||
@@ -6,59 +6,12 @@
|
||||
// Intended to be used in integration tests and in CLI tools for
|
||||
// local installations.
|
||||
//
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
mod background_process;
|
||||
pub mod compute;
|
||||
pub mod connection;
|
||||
pub mod etcd;
|
||||
pub mod local_env;
|
||||
pub mod pageserver;
|
||||
pub mod postgresql_conf;
|
||||
pub mod safekeeper;
|
||||
pub mod storage;
|
||||
|
||||
/// Read a PID file
|
||||
///
|
||||
/// We expect a file that contains a single integer.
|
||||
/// We return an i32 for compatibility with libc and nix.
|
||||
pub fn read_pidfile(pidfile: &Path) -> Result<i32> {
|
||||
let pid_str = fs::read_to_string(pidfile)
|
||||
.with_context(|| format!("failed to read pidfile {:?}", pidfile))?;
|
||||
let pid: i32 = pid_str
|
||||
.parse()
|
||||
.map_err(|_| anyhow!("failed to parse pidfile {:?}", pidfile))?;
|
||||
if pid < 1 {
|
||||
bail!("pidfile {:?} contained bad value '{}'", pidfile, pid);
|
||||
}
|
||||
Ok(pid)
|
||||
}
|
||||
|
||||
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
||||
let cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
|
||||
|
||||
let var = "LLVM_PROFILE_FILE";
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
cmd.env(var, val);
|
||||
}
|
||||
|
||||
const RUST_LOG_KEY: &str = "RUST_LOG";
|
||||
if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
|
||||
cmd.env(RUST_LOG_KEY, rust_log_value)
|
||||
} else {
|
||||
cmd
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
for env_key in [
|
||||
"AWS_ACCESS_KEY_ID",
|
||||
"AWS_SECRET_ACCESS_KEY",
|
||||
"AWS_SESSION_TOKEN",
|
||||
] {
|
||||
if let Ok(value) = std::env::var(env_key) {
|
||||
cmd = cmd.env(env_key, value);
|
||||
}
|
||||
}
|
||||
cmd
|
||||
}
|
||||
|
||||
@@ -226,12 +226,12 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pageserver_bin(&self) -> anyhow::Result<PathBuf> {
|
||||
Ok(self.neon_distrib_dir.join("pageserver"))
|
||||
pub fn pageserver_bin(&self) -> PathBuf {
|
||||
self.neon_distrib_dir.join("pageserver")
|
||||
}
|
||||
|
||||
pub fn safekeeper_bin(&self) -> anyhow::Result<PathBuf> {
|
||||
Ok(self.neon_distrib_dir.join("safekeeper"))
|
||||
pub fn safekeeper_bin(&self) -> PathBuf {
|
||||
self.neon_distrib_dir.join("safekeeper")
|
||||
}
|
||||
|
||||
pub fn pg_data_dirs_path(&self) -> PathBuf {
|
||||
|
||||
@@ -1,33 +1,27 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufReader, Write};
|
||||
use std::num::NonZeroU64;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
use std::{io, result, thread};
|
||||
use std::process::Child;
|
||||
use std::{io, result};
|
||||
|
||||
use crate::connection::PgConnectionConfig;
|
||||
use anyhow::{bail, Context};
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use pageserver_api::models::{
|
||||
TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
|
||||
};
|
||||
use postgres::{Config, NoTls};
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use thiserror::Error;
|
||||
use utils::{
|
||||
connstring::connection_address,
|
||||
http::error::HttpErrorBody,
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
postgres_backend::AuthType,
|
||||
};
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
|
||||
use crate::{background_process, local_env::LocalEnv};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum PageserverHttpError {
|
||||
@@ -75,7 +69,7 @@ impl ResponseErrorMessageExt for Response {
|
||||
//
|
||||
#[derive(Debug)]
|
||||
pub struct PageServerNode {
|
||||
pub pg_connection_config: Config,
|
||||
pub pg_connection_config: PgConnectionConfig,
|
||||
pub env: LocalEnv,
|
||||
pub http_client: Client,
|
||||
pub http_base_url: String,
|
||||
@@ -101,7 +95,7 @@ impl PageServerNode {
|
||||
}
|
||||
|
||||
/// Construct libpq connection string for connecting to the pageserver.
|
||||
fn pageserver_connection_config(password: &str, listen_addr: &str) -> Config {
|
||||
fn pageserver_connection_config(password: &str, listen_addr: &str) -> PgConnectionConfig {
|
||||
format!("postgresql://no_user:{password}@{listen_addr}/no_db")
|
||||
.parse()
|
||||
.unwrap()
|
||||
@@ -161,7 +155,15 @@ impl PageServerNode {
|
||||
init_config_overrides.push("auth_validation_public_key_path='auth_public_key.pem'");
|
||||
}
|
||||
|
||||
self.start_node(&init_config_overrides, &self.env.base_data_dir, true)?;
|
||||
let mut pageserver_process = self
|
||||
.start_node(&init_config_overrides, &self.env.base_data_dir, true)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to start a process for pageserver {}",
|
||||
self.env.pageserver.id,
|
||||
)
|
||||
})?;
|
||||
|
||||
let init_result = self
|
||||
.try_init_timeline(create_tenant, initial_timeline_id, pg_version)
|
||||
.context("Failed to create initial tenant and timeline for pageserver");
|
||||
@@ -171,7 +173,29 @@ impl PageServerNode {
|
||||
}
|
||||
Err(e) => eprintln!("{e:#}"),
|
||||
}
|
||||
self.stop(false)?;
|
||||
match pageserver_process.kill() {
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"Failed to stop pageserver {} process with pid {}: {e:#}",
|
||||
self.env.pageserver.id,
|
||||
pageserver_process.id(),
|
||||
)
|
||||
}
|
||||
Ok(()) => {
|
||||
println!(
|
||||
"Stopped pageserver {} process with pid {}",
|
||||
self.env.pageserver.id,
|
||||
pageserver_process.id(),
|
||||
);
|
||||
// cleanup after pageserver startup, since we do not call regular `stop_process` during init
|
||||
let pid_file = self.pid_file();
|
||||
if let Err(e) = fs::remove_file(&pid_file) {
|
||||
if e.kind() != io::ErrorKind::NotFound {
|
||||
eprintln!("Failed to remove pid file {pid_file:?} after stopping the process: {e:#}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
init_result
|
||||
}
|
||||
|
||||
@@ -196,11 +220,14 @@ impl PageServerNode {
|
||||
self.env.pageserver_data_dir()
|
||||
}
|
||||
|
||||
pub fn pid_file(&self) -> PathBuf {
|
||||
/// The pid file is created by the pageserver process, with its pid stored inside.
|
||||
/// Other pageservers cannot lock the same file and overwrite it for as long as the current
|
||||
/// pageserver runs. (Unless someone removes the file manually; never do that!)
|
||||
fn pid_file(&self) -> PathBuf {
|
||||
self.repo_path().join("pageserver.pid")
|
||||
}
|
||||
|
||||
pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
|
||||
pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<Child> {
|
||||
self.start_node(config_overrides, &self.repo_path(), false)
|
||||
}
|
||||
|
||||
@@ -209,10 +236,10 @@ impl PageServerNode {
|
||||
config_overrides: &[&str],
|
||||
datadir: &Path,
|
||||
update_config: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
) -> anyhow::Result<Child> {
|
||||
println!(
|
||||
"Starting pageserver at '{}' in '{}'",
|
||||
connection_address(&self.pg_connection_config),
|
||||
self.pg_connection_config.raw_address(),
|
||||
datadir.display()
|
||||
);
|
||||
io::stdout().flush()?;
|
||||
@@ -220,10 +247,7 @@ impl PageServerNode {
|
||||
let mut args = vec![
|
||||
"-D",
|
||||
datadir.to_str().with_context(|| {
|
||||
format!(
|
||||
"Datadir path '{}' cannot be represented as a unicode string",
|
||||
datadir.display()
|
||||
)
|
||||
format!("Datadir path {datadir:?} cannot be represented as a unicode string")
|
||||
})?,
|
||||
];
|
||||
|
||||
@@ -235,48 +259,18 @@ impl PageServerNode {
|
||||
args.extend(["-c", config_override]);
|
||||
}
|
||||
|
||||
let mut cmd = Command::new(self.env.pageserver_bin()?);
|
||||
let mut filled_cmd = fill_rust_env_vars(cmd.args(&args).arg("--daemonize"));
|
||||
filled_cmd = fill_aws_secrets_vars(filled_cmd);
|
||||
|
||||
if !filled_cmd.status()?.success() {
|
||||
bail!(
|
||||
"Pageserver failed to start. See console output and '{}' for details.",
|
||||
datadir.join("pageserver.log").display()
|
||||
);
|
||||
}
|
||||
|
||||
// It takes a while for the page server to start up. Wait until it is
|
||||
// open for business.
|
||||
const RETRIES: i8 = 15;
|
||||
for retries in 1..RETRIES {
|
||||
match self.check_status() {
|
||||
Ok(()) => {
|
||||
println!("\nPageserver started");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => {
|
||||
match err {
|
||||
PageserverHttpError::Transport(err) => {
|
||||
if err.is_connect() && retries < 5 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
} else {
|
||||
if retries == 5 {
|
||||
println!() // put a line break after dots for second message
|
||||
}
|
||||
println!("Pageserver not responding yet, err {err} retrying ({retries})...");
|
||||
}
|
||||
}
|
||||
PageserverHttpError::Response(msg) => {
|
||||
bail!("pageserver failed to start: {msg} ")
|
||||
}
|
||||
}
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
bail!("pageserver failed to start in {RETRIES} seconds");
|
||||
background_process::start_process(
|
||||
"pageserver",
|
||||
datadir,
|
||||
&self.env.pageserver_bin(),
|
||||
&args,
|
||||
background_process::InitialPidFile::Expect(&self.pid_file()),
|
||||
|| match self.check_status() {
|
||||
Ok(()) => Ok(true),
|
||||
Err(PageserverHttpError::Transport(_)) => Ok(false),
|
||||
Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
///
|
||||
@@ -288,69 +282,18 @@ impl PageServerNode {
|
||||
/// If the server is not running, returns success
|
||||
///
|
||||
pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
|
||||
let pid_file = self.pid_file();
|
||||
if !pid_file.exists() {
|
||||
println!("Pageserver is already stopped");
|
||||
return Ok(());
|
||||
}
|
||||
let pid = Pid::from_raw(read_pidfile(&pid_file)?);
|
||||
|
||||
let sig = if immediate {
|
||||
print!("Stopping pageserver immediately..");
|
||||
Signal::SIGQUIT
|
||||
} else {
|
||||
print!("Stopping pageserver gracefully..");
|
||||
Signal::SIGTERM
|
||||
};
|
||||
io::stdout().flush().unwrap();
|
||||
match kill(pid, sig) {
|
||||
Ok(_) => (),
|
||||
Err(Errno::ESRCH) => {
|
||||
println!("Pageserver with pid {pid} does not exist, but a PID file was found");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to pageserver with pid {pid}: {}",
|
||||
err.desc()
|
||||
),
|
||||
}
|
||||
|
||||
// Wait until process is gone
|
||||
for i in 0..600 {
|
||||
let signal = None; // Send no signal, just get the error code
|
||||
match kill(pid, signal) {
|
||||
Ok(_) => (), // Process exists, keep waiting
|
||||
Err(Errno::ESRCH) => {
|
||||
// Process not found, we're done
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to pageserver with pid {}: {}",
|
||||
pid,
|
||||
err.desc()
|
||||
),
|
||||
};
|
||||
|
||||
if i % 10 == 0 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
}
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
}
|
||||
|
||||
bail!("Failed to stop pageserver with pid {pid}");
|
||||
background_process::stop_process(immediate, "pageserver", &self.pid_file())
|
||||
}
|
||||
|
||||
pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
|
||||
let mut client = self.pg_connection_config.connect(NoTls).unwrap();
|
||||
let mut client = self.pg_connection_config.connect_no_tls().unwrap();
|
||||
|
||||
println!("Pageserver query: '{sql}'");
|
||||
client.simple_query(sql).unwrap()
|
||||
}
|
||||
|
||||
pub fn page_server_psql_client(&self) -> result::Result<postgres::Client, postgres::Error> {
|
||||
self.pg_connection_config.connect(NoTls)
|
||||
self.pg_connection_config.connect_no_tls()
|
||||
}
|
||||
|
||||
fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
|
||||
@@ -549,7 +492,7 @@ impl PageServerNode {
|
||||
pg_wal: Option<(Lsn, PathBuf)>,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut client = self.pg_connection_config.connect(NoTls).unwrap();
|
||||
let mut client = self.pg_connection_config.connect_no_tls().unwrap();
|
||||
|
||||
// Init base reader
|
||||
let (start_lsn, base_tarfile_path) = base;
|
||||
@@ -1,23 +1,21 @@
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::process::Child;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{io, result, thread};
|
||||
use std::{io, result};
|
||||
|
||||
use anyhow::bail;
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use postgres::Config;
|
||||
use anyhow::Context;
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use thiserror::Error;
|
||||
use utils::{connstring::connection_address, http::error::HttpErrorBody, id::NodeId};
|
||||
use utils::{http::error::HttpErrorBody, id::NodeId};
|
||||
|
||||
use crate::local_env::{LocalEnv, SafekeeperConf};
|
||||
use crate::storage::PageServerNode;
|
||||
use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
|
||||
use crate::connection::PgConnectionConfig;
|
||||
use crate::pageserver::PageServerNode;
|
||||
use crate::{
|
||||
background_process,
|
||||
local_env::{LocalEnv, SafekeeperConf},
|
||||
};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SafekeeperHttpError {
|
||||
@@ -63,7 +61,7 @@ pub struct SafekeeperNode {
|
||||
|
||||
pub conf: SafekeeperConf,
|
||||
|
||||
pub pg_connection_config: Config,
|
||||
pub pg_connection_config: PgConnectionConfig,
|
||||
pub env: LocalEnv,
|
||||
pub http_client: Client,
|
||||
pub http_base_url: String,
|
||||
@@ -87,15 +85,15 @@ impl SafekeeperNode {
|
||||
}
|
||||
|
||||
/// Construct libpq connection string for connecting to this safekeeper.
|
||||
fn safekeeper_connection_config(port: u16) -> Config {
|
||||
fn safekeeper_connection_config(port: u16) -> PgConnectionConfig {
|
||||
// TODO safekeeper authentication not implemented yet
|
||||
format!("postgresql://no_user@127.0.0.1:{}/no_db", port)
|
||||
format!("postgresql://no_user@127.0.0.1:{port}/no_db")
|
||||
.parse()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
|
||||
env.safekeeper_data_dir(format!("sk{}", sk_id).as_ref())
|
||||
env.safekeeper_data_dir(&format!("sk{sk_id}"))
|
||||
}
|
||||
|
||||
pub fn datadir_path(&self) -> PathBuf {
|
||||
@@ -106,91 +104,78 @@ impl SafekeeperNode {
|
||||
self.datadir_path().join("safekeeper.pid")
|
||||
}
|
||||
|
||||
pub fn start(&self) -> anyhow::Result<()> {
|
||||
pub fn start(&self) -> anyhow::Result<Child> {
|
||||
print!(
|
||||
"Starting safekeeper at '{}' in '{}'",
|
||||
connection_address(&self.pg_connection_config),
|
||||
self.pg_connection_config.raw_address(),
|
||||
self.datadir_path().display()
|
||||
);
|
||||
io::stdout().flush().unwrap();
|
||||
|
||||
let listen_pg = format!("127.0.0.1:{}", self.conf.pg_port);
|
||||
let listen_http = format!("127.0.0.1:{}", self.conf.http_port);
|
||||
let id = self.id;
|
||||
let datadir = self.datadir_path();
|
||||
|
||||
let mut cmd = Command::new(self.env.safekeeper_bin()?);
|
||||
fill_rust_env_vars(
|
||||
cmd.args(&["-D", self.datadir_path().to_str().unwrap()])
|
||||
.args(&["--id", self.id.to_string().as_ref()])
|
||||
.args(&["--listen-pg", &listen_pg])
|
||||
.args(&["--listen-http", &listen_http])
|
||||
.arg("--daemonize"),
|
||||
);
|
||||
let id_string = id.to_string();
|
||||
let mut args = vec![
|
||||
"-D",
|
||||
datadir.to_str().with_context(|| {
|
||||
format!("Datadir path {datadir:?} cannot be represented as a unicode string")
|
||||
})?,
|
||||
"--id",
|
||||
&id_string,
|
||||
"--listen-pg",
|
||||
&listen_pg,
|
||||
"--listen-http",
|
||||
&listen_http,
|
||||
];
|
||||
if !self.conf.sync {
|
||||
cmd.arg("--no-sync");
|
||||
args.push("--no-sync");
|
||||
}
|
||||
|
||||
let comma_separated_endpoints = self.env.etcd_broker.comma_separated_endpoints();
|
||||
if !comma_separated_endpoints.is_empty() {
|
||||
cmd.args(&["--broker-endpoints", &comma_separated_endpoints]);
|
||||
args.extend(["--broker-endpoints", &comma_separated_endpoints]);
|
||||
}
|
||||
if let Some(prefix) = self.env.etcd_broker.broker_etcd_prefix.as_deref() {
|
||||
cmd.args(&["--broker-etcd-prefix", prefix]);
|
||||
args.extend(["--broker-etcd-prefix", prefix]);
|
||||
}
|
||||
|
||||
let mut backup_threads = String::new();
|
||||
if let Some(threads) = self.conf.backup_threads {
|
||||
cmd.args(&["--backup-threads", threads.to_string().as_ref()]);
|
||||
backup_threads = threads.to_string();
|
||||
args.extend(["--backup-threads", &backup_threads]);
|
||||
} else {
|
||||
drop(backup_threads);
|
||||
}
|
||||
|
||||
if let Some(ref remote_storage) = self.conf.remote_storage {
|
||||
cmd.args(&["--remote-storage", remote_storage]);
|
||||
args.extend(["--remote-storage", remote_storage]);
|
||||
}
|
||||
|
||||
let key_path = self.env.base_data_dir.join("auth_public_key.pem");
|
||||
if self.conf.auth_enabled {
|
||||
cmd.arg("--auth-validation-public-key-path");
|
||||
// PathBuf is better be passed as is, not via `String`.
|
||||
cmd.arg(self.env.base_data_dir.join("auth_public_key.pem"));
|
||||
args.extend([
|
||||
"--auth-validation-public-key-path",
|
||||
key_path.to_str().with_context(|| {
|
||||
format!("Key path {key_path:?} cannot be represented as a unicode string")
|
||||
})?,
|
||||
]);
|
||||
}
|
||||
|
||||
fill_aws_secrets_vars(&mut cmd);
|
||||
|
||||
if !cmd.status()?.success() {
|
||||
bail!(
|
||||
"Safekeeper failed to start. See '{}' for details.",
|
||||
self.datadir_path().join("safekeeper.log").display()
|
||||
);
|
||||
}
|
||||
|
||||
// It takes a while for the safekeeper to start up. Wait until it is
|
||||
// open for business.
|
||||
const RETRIES: i8 = 15;
|
||||
for retries in 1..RETRIES {
|
||||
match self.check_status() {
|
||||
Ok(_) => {
|
||||
println!("\nSafekeeper started");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => {
|
||||
match err {
|
||||
SafekeeperHttpError::Transport(err) => {
|
||||
if err.is_connect() && retries < 5 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
} else {
|
||||
if retries == 5 {
|
||||
println!() // put a line break after dots for second message
|
||||
}
|
||||
println!(
|
||||
"Safekeeper not responding yet, err {} retrying ({})...",
|
||||
err, retries
|
||||
);
|
||||
}
|
||||
}
|
||||
SafekeeperHttpError::Response(msg) => {
|
||||
bail!("safekeeper failed to start: {} ", msg)
|
||||
}
|
||||
}
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
bail!("safekeeper failed to start in {} seconds", RETRIES);
|
||||
background_process::start_process(
|
||||
&format!("safekeeper {id}"),
|
||||
&datadir,
|
||||
&self.env.safekeeper_bin(),
|
||||
&args,
|
||||
background_process::InitialPidFile::Expect(&self.pid_file()),
|
||||
|| match self.check_status() {
|
||||
Ok(()) => Ok(true),
|
||||
Err(SafekeeperHttpError::Transport(_)) => Ok(false),
|
||||
Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
///
|
||||
@@ -202,63 +187,11 @@ impl SafekeeperNode {
|
||||
/// If the server is not running, returns success
|
||||
///
|
||||
pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
|
||||
let pid_file = self.pid_file();
|
||||
if !pid_file.exists() {
|
||||
println!("Safekeeper {} is already stopped", self.id);
|
||||
return Ok(());
|
||||
}
|
||||
let pid = read_pidfile(&pid_file)?;
|
||||
let pid = Pid::from_raw(pid);
|
||||
|
||||
let sig = if immediate {
|
||||
print!("Stopping safekeeper {} immediately..", self.id);
|
||||
Signal::SIGQUIT
|
||||
} else {
|
||||
print!("Stopping safekeeper {} gracefully..", self.id);
|
||||
Signal::SIGTERM
|
||||
};
|
||||
io::stdout().flush().unwrap();
|
||||
match kill(pid, sig) {
|
||||
Ok(_) => (),
|
||||
Err(Errno::ESRCH) => {
|
||||
println!(
|
||||
"Safekeeper with pid {} does not exist, but a PID file was found",
|
||||
pid
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to safekeeper with pid {}: {}",
|
||||
pid,
|
||||
err.desc()
|
||||
),
|
||||
}
|
||||
|
||||
// Wait until process is gone
|
||||
for i in 0..600 {
|
||||
let signal = None; // Send no signal, just get the error code
|
||||
match kill(pid, signal) {
|
||||
Ok(_) => (), // Process exists, keep waiting
|
||||
Err(Errno::ESRCH) => {
|
||||
// Process not found, we're done
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to pageserver with pid {}: {}",
|
||||
pid,
|
||||
err.desc()
|
||||
),
|
||||
};
|
||||
|
||||
if i % 10 == 0 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
}
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
}
|
||||
|
||||
bail!("Failed to stop safekeeper with pid {}", pid);
|
||||
background_process::stop_process(
|
||||
immediate,
|
||||
&format!("safekeeper {}", self.id),
|
||||
&self.pid_file(),
|
||||
)
|
||||
}
|
||||
|
||||
fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
|
||||
|
||||
246
docs/rfcs/020-pageserver-s3-coordination.md
Normal file
246
docs/rfcs/020-pageserver-s3-coordination.md
Normal file
@@ -0,0 +1,246 @@
|
||||
# Coordinating access of multiple pageservers to the same s3 data
|
||||
|
||||
## Motivation
|
||||
|
||||
There are some blind spots around coordinating access of multiple pageservers
|
||||
to the same s3 data. Currently this is applicable only to tenant relocation
|
||||
case, but in the future we'll need to solve similar problems for
|
||||
replica/standby pageservers.
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
Pageserver
|
||||
|
||||
## The problem
|
||||
|
||||
### Relocation
|
||||
|
||||
During relocation both pageservers can write to s3. This should be ok for all
|
||||
data except the `index_part.json`. For index part it causes problems during
|
||||
compaction/gc because they remove files from index/s3.
|
||||
|
||||
Imagine this case:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant PS1
|
||||
participant S3
|
||||
participant PS2
|
||||
|
||||
PS1->>S3: Uploads L1, L2 <br/> Index contains L1 L2
|
||||
PS2->>S3: Attach called, sees L1, L2
|
||||
PS1->>S3: Compaction comes <br/> Removes L1, adds L3
|
||||
note over S3: Index now L2, L3
|
||||
PS2->>S3: Uploads new layer L4 <br/> (added to previous view of the index)
|
||||
note over S3: Index now L1, L2, L4
|
||||
```
|
||||
|
||||
At this point it is not possible to restore from index, it contains L2 which
|
||||
is no longer available in s3 and doesnt contain L3 added by compaction by the
|
||||
first pageserver. So if any of the pageservers restart initial sync will fail
|
||||
(or in on-demand world it will fail a bit later during page request from
|
||||
missing layer)
|
||||
|
||||
### Standby pageserver
|
||||
|
||||
Another related case is standby pageserver. In this case second pageserver can
|
||||
be used as a replica to scale reads and serve as a failover target in case
|
||||
first one fails.
|
||||
|
||||
In this mode second pageserver needs to have the same picture of s3 files to
|
||||
be able to load layers on-demand. To accomplish that second pageserver
|
||||
cannot run gc/compaction jobs. Instead it needs to receive updates for index
|
||||
contents. (There is no need to run walreceiver on the second pageserver then).
|
||||
|
||||
## Observations
|
||||
|
||||
- If both pageservers ingest wal then their layer set diverges, because layer
|
||||
file generation is not deterministic
|
||||
- If one of the pageservers does not ingest wal (and just picks up layer
|
||||
updates) then it lags behind and cannot really answer queries in the same
|
||||
pace as the primary one
|
||||
- Can compaction help make layers deterministic? E g we do not upload level
|
||||
zero layers and construction of higher levels should be deterministic.
|
||||
This way we can guarantee that layer creation by timeout wont mess things up.
|
||||
This way one pageserver uploads data and second one can just ingest it.
|
||||
But we still need some form of election
|
||||
|
||||
## Solutions
|
||||
|
||||
### Manual orchestration
|
||||
|
||||
One possible solution for relocation case is to orchestrate background jobs
|
||||
from outside. The oracle who runs migration can turn off background jobs on
|
||||
PS1 before migration and then run migration -> enable them on PS2. The problem
|
||||
comes if migration fails. In this case in order to resume background jobs
|
||||
oracle needs to guarantee that PS2 doesnt run background jobs and if it doesnt
|
||||
respond then PS1 is stuck unable to run compaction/gc. This cannot be solved
|
||||
without human ensuring that no upload from PS2 can happen. In order to be able
|
||||
to resolve this automatically CAS is required on S3 side so pageserver can
|
||||
avoid overwriting index part if it is no longer the leading one
|
||||
|
||||
Note that flag that disables background jobs needs to be persistent, because
|
||||
otherwise pageserver restart will clean it
|
||||
|
||||
### Avoid index_part.json
|
||||
|
||||
Index part consists of two parts, list of layers and metadata. List of layers
|
||||
can be easily obtained by `ListObjects` S3 API method. But what to do with
|
||||
metadata? Create metadata instance for each checkpoint and add some counter
|
||||
to the file name?
|
||||
|
||||
Back to potentially long s3 ls.
|
||||
|
||||
### Coordination based approach
|
||||
|
||||
Do it like safekeepers chose leader for WAL upload. Ping each other and decide
|
||||
based on some heuristics e g smallest node id. During relocation PS1 sends
|
||||
"resign" ping message so others can start election without waiting for a timeout.
|
||||
|
||||
This still leaves metadata question open and non deterministic layers are a
|
||||
problem as well
|
||||
|
||||
### Avoid metadata file
|
||||
|
||||
One way to eliminate metadata file is to store it in layer files under some
|
||||
special key. This may resonate with intention to keep all relation sizes in
|
||||
some special segment to avoid initial download during size calculation.
|
||||
Maybe with that we can even store pre calculated value.
|
||||
|
||||
As a downside each checkpoint gets 512 bytes larger.
|
||||
|
||||
If we entirely avoid metadata file this opens up many approaches
|
||||
|
||||
* * *
|
||||
|
||||
During discussion it seems that we converged on the approach consisting of:
|
||||
|
||||
- index files stored per pageserver in the same timeline directory. With that
|
||||
index file name starts to look like: `<pageserver_node_id>_index_part.json`.
|
||||
In such set up there are no concurrent overwrites of index file by different
|
||||
pageservers.
|
||||
- For replica pageservers the solution would be for primary to broadcast index
|
||||
changes to any followers with an ability to check index files in s3 and
|
||||
restore the full state. To properly merge changes with index files we can use
|
||||
a counter that is persisted in an index file, is incremented on every change
|
||||
to it and passed along with broadcasted change. This way we can determine
|
||||
whether we need to apply change to the index state or not.
|
||||
- Responsibility for running background jobs is assigned externally. Pageserver
|
||||
keeps locally persistent flag for each tenant that indicates whether this
|
||||
pageserver is considered as primary one or not. TODO what happends if we
|
||||
crash and cannot start for some extended period of time? Control plane can
|
||||
assign ownership to some other pageserver. Pageserver needs some way to check
|
||||
if its still the blessed one. Maybe by explicit request to control plane on
|
||||
start.
|
||||
|
||||
Requirement for deterministic layer generation was considered overly strict
|
||||
because of two reasons:
|
||||
|
||||
- It can limit possible optimizations e g when pageserver wants to reshuffle
|
||||
some data locally and doesnt want to coordinate this
|
||||
- The deterministic algorithm itself can change so during deployments for some
|
||||
time there will be two different version running at the same time which can
|
||||
cause non determinism
|
||||
|
||||
### External elections
|
||||
|
||||
The above case with lost state in this schema with externally managed
|
||||
leadership is represented like this:
|
||||
|
||||
Note that here we keep objects list in the index file.
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant PS1
|
||||
participant CP as Control Plane
|
||||
participant S3
|
||||
participant PS2
|
||||
|
||||
note over PS1,PS2: PS1 starts up and still a leader
|
||||
PS1->>CP: Am I still the leader for Tenant X?
|
||||
activate CP
|
||||
CP->>PS1: Yes
|
||||
deactivate CP
|
||||
PS1->>S3: Fetch PS1 index.
|
||||
note over PS1: Continue operations, start backround jobs
|
||||
note over PS1,PS2: PS1 starts up and still and is not a leader anymore
|
||||
PS1->>CP: Am I still the leader for Tenant X?
|
||||
CP->>PS1: No
|
||||
PS1->>PS2: Subscribe to index changes
|
||||
PS1->>S3: Fetch PS1 and PS2 indexes
|
||||
note over PS1: Combine index file to include layers <br> from both indexes to be able <br> to see newer files from leader (PS2)
|
||||
note over PS1: Continue operations, do not start background jobs
|
||||
```
|
||||
|
||||
### Internal elections
|
||||
|
||||
To manage leadership internally we can use broker to exchange pings so nodes
|
||||
can decide on the leader roles. In case multiple pageservers are active leader
|
||||
is the one with lowest node id.
|
||||
|
||||
Operations with internally managed elections:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant PS1
|
||||
participant S3
|
||||
|
||||
note over PS1: Starts up
|
||||
note over PS1: Subscribes to changes, waits for two ping <br> timeouts to see if there is a leader
|
||||
PS1->>S3: Fetch indexes from s3
|
||||
alt there is a leader
|
||||
note over PS1: do not start background jobs, <br> continue applying index updates
|
||||
else there is no leader
|
||||
note over PS1: start background jobs, <br> broadcast index changes
|
||||
end
|
||||
|
||||
note over PS1,S3: Then the picture is similar to external elections <br> the difference is that follower can become a leader <br> if there are no pings after some timeout new leader gets elected
|
||||
```
|
||||
|
||||
### Eviction
|
||||
|
||||
When two pageservers operate on a tenant for extended period of time follower
|
||||
doesnt perform write operations in s3. When layer is evicted follower relies
|
||||
on updates from primary to get info about layers it needs to cover range for
|
||||
evicted layer.
|
||||
|
||||
Note that it wont match evicted layer exactly, so layers will overlap and
|
||||
lookup code needs to correctly handle that.
|
||||
|
||||
### Relocation flow
|
||||
|
||||
Actions become:
|
||||
|
||||
- Attach tenant to new pageserver
|
||||
- New pageserver becomes follower since previous one is still leading
|
||||
- New pageserver starts replicating from safekeepers but does not upload layers
|
||||
- Detach is called on the old one
|
||||
- New pageserver becomes leader after it realizes that old one disappeared
|
||||
|
||||
### Index File
|
||||
|
||||
Using `s3 ls` on startup simplifies things, but we still need metadata, so we
|
||||
need to fetch index files anyway. If they contain list of files we can combine
|
||||
them and avoid costly `s3 ls`
|
||||
|
||||
### Remaining issues
|
||||
|
||||
- More than one remote consistent lsn for safekeepers to know
|
||||
|
||||
Anything else?
|
||||
|
||||
### Proposed solution
|
||||
|
||||
To recap. On meeting we converged on approach with external elections but I
|
||||
think it will be overall harder to manage and will introduce a dependency on
|
||||
control plane for pageserver. Using separate index files for each pageserver
|
||||
consisting of log of operations and a metadata snapshot should be enough.
|
||||
|
||||
### What we need to get there?
|
||||
|
||||
- Change index file structure to contain log of changes instead of just the
|
||||
file list
|
||||
- Implement pinging/elections for pageservers
|
||||
@@ -52,6 +52,10 @@ PostgreSQL extension that implements storage manager API and network communicati
|
||||
|
||||
PostgreSQL extension that contains functions needed for testing and debugging.
|
||||
|
||||
`/pgxn/neon_walredo`:
|
||||
|
||||
Library to run Postgres as a "WAL redo process" in the pageserver.
|
||||
|
||||
`/safekeeper`:
|
||||
|
||||
The neon WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
|
||||
|
||||
@@ -230,7 +230,6 @@ pub enum PagestreamFeMessage {
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
Fcntl(PagestreamFcntlRequest),
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
@@ -271,12 +270,6 @@ pub struct PagestreamDbSizeRequest {
|
||||
pub dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamFcntlRequest {
|
||||
pub cmd: u32,
|
||||
pub data: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsResponse {
|
||||
pub exists: bool,
|
||||
@@ -348,14 +341,6 @@ impl PagestreamFeMessage {
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
dbnode: body.get_u32(),
|
||||
})),
|
||||
4 => {
|
||||
let cmd = body.get_u32();
|
||||
let size = body.get_u32() as usize;
|
||||
Ok(PagestreamFeMessage::Fcntl(PagestreamFcntlRequest {
|
||||
cmd,
|
||||
data: body.copy_to_bytes(size),
|
||||
}))
|
||||
}
|
||||
_ => bail!("unknown smgr message tag: {},'{:?}'", msg_tag, body),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -232,6 +232,3 @@ pub const PGDATA_SPECIAL_FILES: [&str; 3] =
|
||||
["pg_hba.conf", "pg_ident.conf", "postgresql.auto.conf"];
|
||||
|
||||
pub static PG_HBA: &str = include_str!("../samples/pg_hba.conf");
|
||||
|
||||
pub static AUTOPREWARM_FILE_NAME: &str = "autoprewarm.blocks";
|
||||
pub const SMGR_FCNTL_CACHE_SNAPSHOT: u32 = 1;
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
use postgres::Config;
|
||||
|
||||
pub fn connection_host_port(config: &Config) -> (String, u16) {
|
||||
assert_eq!(
|
||||
config.get_hosts().len(),
|
||||
1,
|
||||
"only one pair of host and port is supported in connection string"
|
||||
);
|
||||
assert_eq!(
|
||||
config.get_ports().len(),
|
||||
1,
|
||||
"only one pair of host and port is supported in connection string"
|
||||
);
|
||||
let host = match &config.get_hosts()[0] {
|
||||
postgres::config::Host::Tcp(host) => host.as_ref(),
|
||||
postgres::config::Host::Unix(host) => host.to_str().unwrap(),
|
||||
};
|
||||
(host.to_owned(), config.get_ports()[0])
|
||||
}
|
||||
|
||||
pub fn connection_address(config: &Config) -> String {
|
||||
let (host, port) = connection_host_port(config);
|
||||
format!("{}:{}", host, port)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_connection_host_port() {
|
||||
let config: Config = "postgresql://no_user@localhost:64000/no_db"
|
||||
.parse()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
connection_host_port(&config),
|
||||
("localhost".to_owned(), 64000)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "only one pair of host and port is supported in connection string")]
|
||||
fn test_connection_host_port_multiple_ports() {
|
||||
let config: Config = "postgresql://no_user@localhost:64000,localhost:64001/no_db"
|
||||
.parse()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
connection_host_port(&config),
|
||||
("localhost".to_owned(), 64000)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -19,9 +19,6 @@ pub mod postgres_backend;
|
||||
pub mod postgres_backend_async;
|
||||
pub mod pq_proto;
|
||||
|
||||
// dealing with connstring parsing and handy access to it's parts
|
||||
pub mod connstring;
|
||||
|
||||
// helper functions for creating and fsyncing
|
||||
pub mod crashsafe;
|
||||
|
||||
@@ -39,6 +36,8 @@ pub mod sock_split;
|
||||
// common log initialisation routine
|
||||
pub mod logging;
|
||||
|
||||
pub mod lock_file;
|
||||
|
||||
// Misc
|
||||
pub mod accum;
|
||||
pub mod shutdown;
|
||||
|
||||
81
libs/utils/src/lock_file.rs
Normal file
81
libs/utils/src/lock_file.rs
Normal file
@@ -0,0 +1,81 @@
|
||||
//! A module to create and read lock files. A lock file ensures that only one
|
||||
//! process is running at a time, in a particular directory.
|
||||
//!
|
||||
//! File locking is done using [`fcntl::flock`], which means that holding the
|
||||
//! lock on file only prevents acquiring another lock on it; all other
|
||||
//! operations are still possible on files. Other process can still open, read,
|
||||
//! write, or remove the file, for example.
|
||||
//! If the file is removed while a process is holding a lock on it,
|
||||
//! the process that holds the lock does not get any error or notification.
|
||||
//! Furthermore, you can create a new file with the same name and lock the new file,
|
||||
//! while the old process is still running.
|
||||
//! Deleting the lock file while the locking process is still running is a bad idea!
|
||||
|
||||
use std::{fs, os::unix::prelude::AsRawFd, path::Path};
|
||||
|
||||
use anyhow::Context;
|
||||
use nix::fcntl;
|
||||
|
||||
use crate::crashsafe;
|
||||
|
||||
pub enum LockCreationResult {
|
||||
Created {
|
||||
new_lock_contents: String,
|
||||
file: fs::File,
|
||||
},
|
||||
AlreadyLocked {
|
||||
existing_lock_contents: String,
|
||||
},
|
||||
CreationFailed(anyhow::Error),
|
||||
}
|
||||
|
||||
/// Creates a lock file in the path given and writes the given contents into the file.
|
||||
/// Note: The lock is automatically released when the file closed. You might want to use Box::leak to make sure it lives until the end of the program.
|
||||
pub fn create_lock_file(lock_file_path: &Path, contents: String) -> LockCreationResult {
|
||||
let lock_file = match fs::OpenOptions::new()
|
||||
.create(true) // O_CREAT
|
||||
.write(true)
|
||||
.open(lock_file_path)
|
||||
.context("Failed to open lock file")
|
||||
{
|
||||
Ok(file) => file,
|
||||
Err(e) => return LockCreationResult::CreationFailed(e),
|
||||
};
|
||||
|
||||
match fcntl::flock(
|
||||
lock_file.as_raw_fd(),
|
||||
fcntl::FlockArg::LockExclusiveNonblock,
|
||||
) {
|
||||
Ok(()) => {
|
||||
match lock_file
|
||||
.set_len(0)
|
||||
.context("Failed to truncate lockfile")
|
||||
.and_then(|()| {
|
||||
fs::write(lock_file_path, &contents).with_context(|| {
|
||||
format!("Failed to write '{contents}' contents into lockfile")
|
||||
})
|
||||
})
|
||||
.and_then(|()| {
|
||||
crashsafe::fsync_file_and_parent(lock_file_path)
|
||||
.context("Failed to fsync lockfile")
|
||||
}) {
|
||||
Ok(()) => LockCreationResult::Created {
|
||||
new_lock_contents: contents,
|
||||
file: lock_file,
|
||||
},
|
||||
Err(e) => LockCreationResult::CreationFailed(e),
|
||||
}
|
||||
}
|
||||
Err(nix::errno::Errno::EAGAIN) => {
|
||||
match fs::read_to_string(lock_file_path).context("Failed to read lockfile contents") {
|
||||
Ok(existing_lock_contents) => LockCreationResult::AlreadyLocked {
|
||||
existing_lock_contents,
|
||||
},
|
||||
Err(e) => LockCreationResult::CreationFailed(e),
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
LockCreationResult::CreationFailed(anyhow::anyhow!("Failed to lock lockfile: {e}"))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,6 @@
|
||||
use std::{
|
||||
fs::{File, OpenOptions},
|
||||
path::Path,
|
||||
str::FromStr,
|
||||
};
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use anyhow::Context;
|
||||
use strum_macros::{EnumString, EnumVariantNames};
|
||||
|
||||
#[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)]
|
||||
@@ -25,19 +21,8 @@ impl LogFormat {
|
||||
})
|
||||
}
|
||||
}
|
||||
pub fn init(
|
||||
log_filename: impl AsRef<Path>,
|
||||
daemonize: bool,
|
||||
log_format: LogFormat,
|
||||
) -> Result<File> {
|
||||
// Don't open the same file for output multiple times;
|
||||
// the different fds could overwrite each other's output.
|
||||
let log_file = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&log_filename)
|
||||
.with_context(|| format!("failed to open {:?}", log_filename.as_ref()))?;
|
||||
|
||||
pub fn init(log_format: LogFormat) -> anyhow::Result<()> {
|
||||
let default_filter_str = "info";
|
||||
|
||||
// We fall back to printing all spans at info-level or above if
|
||||
@@ -45,50 +30,16 @@ pub fn init(
|
||||
let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter_str));
|
||||
|
||||
let x: File = log_file.try_clone().unwrap();
|
||||
let base_logger = tracing_subscriber::fmt()
|
||||
.with_env_filter(env_filter)
|
||||
.with_target(false)
|
||||
.with_ansi(false)
|
||||
.with_writer(move || -> Box<dyn std::io::Write> {
|
||||
// we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
|
||||
// if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
|
||||
// for example to be in line with docker log command which expects logs comimg from stdout
|
||||
if daemonize {
|
||||
Box::new(x.try_clone().unwrap())
|
||||
} else {
|
||||
Box::new(std::io::stdout())
|
||||
}
|
||||
});
|
||||
.with_writer(std::io::stdout);
|
||||
|
||||
match log_format {
|
||||
LogFormat::Json => base_logger.json().init(),
|
||||
LogFormat::Plain => base_logger.init(),
|
||||
}
|
||||
|
||||
Ok(log_file)
|
||||
}
|
||||
|
||||
// #[cfg(test)]
|
||||
// Due to global logger, can't run tests in same process.
|
||||
// So until there's a non-global one, the tests are in ../tests/ as separate files.
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! test_init_file_logger {
|
||||
($log_level:expr, $log_format:expr) => {{
|
||||
use std::str::FromStr;
|
||||
std::env::set_var("RUST_LOG", $log_level);
|
||||
|
||||
let tmp_dir = tempfile::TempDir::new().unwrap();
|
||||
let log_file_path = tmp_dir.path().join("logfile");
|
||||
|
||||
let log_format = $crate::logging::LogFormat::from_str($log_format).unwrap();
|
||||
let _log_file = $crate::logging::init(&log_file_path, true, log_format).unwrap();
|
||||
|
||||
let log_file = std::fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&log_file_path)
|
||||
.unwrap();
|
||||
|
||||
log_file
|
||||
}};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
// This could be in ../src/logging.rs but since the logger is global, these
|
||||
// can't be run in threads of the same process
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Lines};
|
||||
use tracing::*;
|
||||
use utils::test_init_file_logger;
|
||||
|
||||
fn read_lines(file: File) -> Lines<BufReader<File>> {
|
||||
BufReader::new(file).lines()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_format_has_message_and_custom_field() {
|
||||
std::env::set_var("RUST_LOG", "info");
|
||||
|
||||
let log_file = test_init_file_logger!("info", "json");
|
||||
|
||||
let custom_field: &str = "hi";
|
||||
trace!(custom = %custom_field, "test log message");
|
||||
debug!(custom = %custom_field, "test log message");
|
||||
info!(custom = %custom_field, "test log message");
|
||||
warn!(custom = %custom_field, "test log message");
|
||||
error!(custom = %custom_field, "test log message");
|
||||
|
||||
let lines = read_lines(log_file);
|
||||
for line in lines {
|
||||
let content = line.unwrap();
|
||||
let json_object = serde_json::from_str::<serde_json::Value>(&content).unwrap();
|
||||
|
||||
assert_eq!(json_object["fields"]["custom"], "hi");
|
||||
assert_eq!(json_object["fields"]["message"], "test log message");
|
||||
|
||||
assert_ne!(json_object["level"], "TRACE");
|
||||
assert_ne!(json_object["level"], "DEBUG");
|
||||
}
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
// This could be in ../src/logging.rs but since the logger is global, these
|
||||
// can't be run in threads of the same process
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Lines};
|
||||
use tracing::*;
|
||||
use utils::test_init_file_logger;
|
||||
|
||||
fn read_lines(file: File) -> Lines<BufReader<File>> {
|
||||
BufReader::new(file).lines()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_plain_format_has_message_and_custom_field() {
|
||||
std::env::set_var("RUST_LOG", "warn");
|
||||
|
||||
let log_file = test_init_file_logger!("warn", "plain");
|
||||
|
||||
let custom_field: &str = "hi";
|
||||
trace!(custom = %custom_field, "test log message");
|
||||
debug!(custom = %custom_field, "test log message");
|
||||
info!(custom = %custom_field, "test log message");
|
||||
warn!(custom = %custom_field, "test log message");
|
||||
error!(custom = %custom_field, "test log message");
|
||||
|
||||
let lines = read_lines(log_file);
|
||||
for line in lines {
|
||||
let content = line.unwrap();
|
||||
serde_json::from_str::<serde_json::Value>(&content).unwrap_err();
|
||||
assert!(content.contains("custom=hi"));
|
||||
assert!(content.contains("test log message"));
|
||||
|
||||
assert!(!content.contains("TRACE"));
|
||||
assert!(!content.contains("DEBUG"));
|
||||
assert!(!content.contains("INFO"));
|
||||
}
|
||||
}
|
||||
@@ -24,7 +24,6 @@ hex = "0.4.3"
|
||||
hyper = "0.14"
|
||||
itertools = "0.10.3"
|
||||
clap = { version = "4.0", features = ["string"] }
|
||||
daemonize = "0.4.1"
|
||||
tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
|
||||
tokio-util = { version = "0.7.3", features = ["io", "io-util"] }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
|
||||
@@ -17,7 +17,6 @@ use itertools::Itertools;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::SystemTime;
|
||||
use tar::{Builder, EntryType, Header};
|
||||
@@ -26,10 +25,8 @@ use tracing::*;
|
||||
use crate::tenant::Timeline;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
|
||||
use postgres_ffi::pg_constants::{
|
||||
AUTOPREWARM_FILE_NAME, PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA,
|
||||
};
|
||||
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
||||
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
|
||||
use postgres_ffi::TransactionId;
|
||||
use postgres_ffi::XLogFileName;
|
||||
use postgres_ffi::PG_TLI;
|
||||
@@ -148,7 +145,6 @@ where
|
||||
self.ar.append(&header, &mut io::empty())?;
|
||||
}
|
||||
}
|
||||
self.add_prewarm_file()?;
|
||||
|
||||
// Gather non-relational files from object storage pages.
|
||||
for kind in [
|
||||
@@ -222,21 +218,6 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
//
|
||||
// Include "autoprewarm-bin.blocks" in archive (if exists)
|
||||
//
|
||||
fn add_prewarm_file(&mut self) -> anyhow::Result<()> {
|
||||
let path = self
|
||||
.timeline
|
||||
.conf
|
||||
.timeline_path(&self.timeline.timeline_id, &self.timeline.tenant_id)
|
||||
.join(AUTOPREWARM_FILE_NAME);
|
||||
if PathBuf::from(&path).exists() {
|
||||
self.ar.append_path_with_name(path, AUTOPREWARM_FILE_NAME)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
//
|
||||
// Generate SLRU segment files from repository.
|
||||
//
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
//! Main entry point for the Page Server executable.
|
||||
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use std::{env, ops::ControlFlow, path::Path, str::FromStr};
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use fail::FailScenario;
|
||||
use nix::unistd::Pid;
|
||||
use tracing::*;
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use daemonize::Daemonize;
|
||||
|
||||
use fail::FailScenario;
|
||||
use metrics::set_build_info_metric;
|
||||
|
||||
use pageserver::{
|
||||
config::{defaults::*, PageServerConf},
|
||||
http, page_cache, page_service, profiling, task_mgr,
|
||||
@@ -19,20 +16,22 @@ use pageserver::{
|
||||
task_mgr::{
|
||||
BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
|
||||
},
|
||||
tenant_mgr, virtual_file, LOG_FILE_NAME,
|
||||
tenant_mgr, virtual_file,
|
||||
};
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
logging,
|
||||
lock_file, logging,
|
||||
postgres_backend::AuthType,
|
||||
project_git_version,
|
||||
shutdown::exit_now,
|
||||
signals::{self, Signal},
|
||||
tcp_listener,
|
||||
};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
const PID_FILE_NAME: &str = "pageserver.pid";
|
||||
|
||||
const FEATURES: &[&str] = &[
|
||||
#[cfg(feature = "testing")]
|
||||
"testing",
|
||||
@@ -65,6 +64,7 @@ fn main() -> anyhow::Result<()> {
|
||||
let workdir = workdir
|
||||
.canonicalize()
|
||||
.with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
|
||||
|
||||
let cfg_file_path = workdir.join("pageserver.toml");
|
||||
|
||||
// Set CWD to workdir for non-daemon modes
|
||||
@@ -75,8 +75,6 @@ fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
})?;
|
||||
|
||||
let daemonize = arg_matches.get_flag("daemonize");
|
||||
|
||||
let conf = match initialize_config(&cfg_file_path, arg_matches, &workdir)? {
|
||||
ControlFlow::Continue(conf) => conf,
|
||||
ControlFlow::Break(()) => {
|
||||
@@ -102,7 +100,7 @@ fn main() -> anyhow::Result<()> {
|
||||
virtual_file::init(conf.max_file_descriptors);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
|
||||
start_pageserver(conf).context("Failed to start pageserver")?;
|
||||
|
||||
scenario.teardown();
|
||||
Ok(())
|
||||
@@ -197,12 +195,34 @@ fn initialize_config(
|
||||
})
|
||||
}
|
||||
|
||||
fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {
|
||||
// Initialize logger
|
||||
let log_file = logging::init(LOG_FILE_NAME, daemonize, conf.log_format)?;
|
||||
|
||||
fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
logging::init(conf.log_format)?;
|
||||
info!("version: {}", version());
|
||||
|
||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
|
||||
lock_file::LockCreationResult::Created {
|
||||
new_lock_contents,
|
||||
file,
|
||||
} => {
|
||||
info!("Created lock file at {lock_file_path:?} with contents {new_lock_contents}");
|
||||
file
|
||||
}
|
||||
lock_file::LockCreationResult::AlreadyLocked {
|
||||
existing_lock_contents,
|
||||
} => anyhow::bail!(
|
||||
"Could not lock pid file; pageserver is already running in {:?} with PID {}",
|
||||
conf.workdir,
|
||||
existing_lock_contents
|
||||
),
|
||||
lock_file::LockCreationResult::CreationFailed(e) => {
|
||||
return Err(e.context(format!("Failed to create lock file at {lock_file_path:?}")))
|
||||
}
|
||||
};
|
||||
// ensure that the lock file is held even if the main thread of the process is panics
|
||||
// we need to release the lock file only when the current process is gone
|
||||
let _ = Box::leak(Box::new(lock_file));
|
||||
|
||||
// TODO: Check that it looks like a valid repository before going further
|
||||
|
||||
// bind sockets before daemonizing so we report errors early and do not return until we are listening
|
||||
@@ -218,33 +238,6 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
);
|
||||
let pageserver_listener = tcp_listener::bind(conf.listen_pg_addr.clone())?;
|
||||
|
||||
// NB: Don't spawn any threads before daemonizing!
|
||||
if daemonize {
|
||||
info!("daemonizing...");
|
||||
|
||||
// There shouldn't be any logging to stdin/stdout. Redirect it to the main log so
|
||||
// that we will see any accidental manual fprintf's or backtraces.
|
||||
let stdout = log_file
|
||||
.try_clone()
|
||||
.with_context(|| format!("Failed to clone log file '{:?}'", log_file))?;
|
||||
let stderr = log_file;
|
||||
|
||||
let daemonize = Daemonize::new()
|
||||
.pid_file("pageserver.pid")
|
||||
.working_directory(".")
|
||||
.stdout(stdout)
|
||||
.stderr(stderr);
|
||||
|
||||
// XXX: The parent process should exit abruptly right after
|
||||
// it has spawned a child to prevent coverage machinery from
|
||||
// dumping stats into a `profraw` file now owned by the child.
|
||||
// Otherwise, the coverage data will be damaged.
|
||||
match daemonize.exit_action(|| exit_now(0)).start() {
|
||||
Ok(_) => info!("Success, daemonized"),
|
||||
Err(err) => bail!("{err}. could not daemonize. bailing."),
|
||||
}
|
||||
}
|
||||
|
||||
let signals = signals::install_shutdown_handlers()?;
|
||||
|
||||
// start profiler (if enabled)
|
||||
@@ -347,14 +340,6 @@ fn cli() -> Command {
|
||||
Command::new("Neon page server")
|
||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||
.version(version())
|
||||
.arg(
|
||||
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
.long("daemonize")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Run in the background"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("init")
|
||||
.long("init")
|
||||
|
||||
@@ -227,13 +227,10 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let timelines = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("timeline_list", tenant = %tenant_id).entered();
|
||||
let timelines = info_span!("timeline_list", tenant = %tenant_id).in_scope(|| {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
Ok(tenant.list_timelines())
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;
|
||||
})?;
|
||||
|
||||
let mut response_data = Vec::with_capacity(timelines.len());
|
||||
for timeline in timelines {
|
||||
@@ -523,9 +520,7 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
// if tenant is in progress of downloading it can be absent in global tenant map
|
||||
let tenant = tokio::task::spawn_blocking(move || tenant_mgr::get_tenant(tenant_id, false))
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false);
|
||||
|
||||
let state = get_state(&request);
|
||||
let remote_index = &state.remote_index;
|
||||
|
||||
@@ -43,8 +43,6 @@ pub const DEFAULT_PG_VERSION: u32 = 14;
|
||||
pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
|
||||
pub const DELTA_FILE_MAGIC: u16 = 0x5A61;
|
||||
|
||||
pub const LOG_FILE_NAME: &str = "pageserver.log";
|
||||
|
||||
static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);
|
||||
|
||||
/// Config for the Repository checkpointer
|
||||
@@ -81,7 +79,6 @@ pub async fn shutdown_pageserver(exit_code: i32) {
|
||||
|
||||
// There should be nothing left, but let's be sure
|
||||
task_mgr::shutdown_tasks(None, None, None).await;
|
||||
|
||||
info!("Shut down successfully completed");
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
|
||||
@@ -15,11 +15,10 @@ use futures::{Stream, StreamExt};
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||
PagestreamFcntlRequest, PagestreamFeMessage, PagestreamGetPageRequest,
|
||||
PagestreamGetPageResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
|
||||
PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||
};
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::net::TcpListener;
|
||||
use std::str;
|
||||
use std::str::FromStr;
|
||||
@@ -46,12 +45,9 @@ use crate::task_mgr;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::Timeline;
|
||||
use crate::tenant_mgr;
|
||||
use crate::virtual_file::VirtualFile;
|
||||
use crate::CheckpointConfig;
|
||||
use crate::TEMP_FILE_SUFFIX;
|
||||
use postgres_ffi::pg_constants::{
|
||||
AUTOPREWARM_FILE_NAME, DEFAULTTABLESPACE_OID, SMGR_FCNTL_CACHE_SNAPSHOT,
|
||||
};
|
||||
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Bytes>> + '_ {
|
||||
@@ -304,6 +300,7 @@ impl PageServerHandler {
|
||||
trace!("query: {copy_data_bytes:?}");
|
||||
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(copy_data_bytes)?;
|
||||
|
||||
let response = match neon_fe_msg {
|
||||
PagestreamFeMessage::Exists(req) => {
|
||||
let _timer = metrics.get_rel_exists.start_timer();
|
||||
@@ -321,10 +318,6 @@ impl PageServerHandler {
|
||||
let _timer = metrics.get_db_size.start_timer();
|
||||
self.handle_db_size_request(&timeline, &req).await
|
||||
}
|
||||
PagestreamFeMessage::Fcntl(req) => {
|
||||
self.handle_fcntl_request(&timeline, &req).await?;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
@@ -594,33 +587,6 @@ impl PageServerHandler {
|
||||
}))
|
||||
}
|
||||
|
||||
async fn handle_fcntl_request(
|
||||
&self,
|
||||
timeline: &Timeline,
|
||||
req: &PagestreamFcntlRequest,
|
||||
) -> Result<()> {
|
||||
if req.cmd == SMGR_FCNTL_CACHE_SNAPSHOT {
|
||||
let temp_path = self
|
||||
.conf
|
||||
.timeline_path(&timeline.timeline_id, &timeline.tenant_id)
|
||||
.join(format!("{AUTOPREWARM_FILE_NAME}.{TEMP_FILE_SUFFIX}"));
|
||||
let mut file = VirtualFile::open_with_options(
|
||||
&temp_path,
|
||||
std::fs::OpenOptions::new().write(true).create_new(true),
|
||||
)?;
|
||||
file.write_all(&req.data)?;
|
||||
drop(file);
|
||||
let final_path = self
|
||||
.conf
|
||||
.timeline_path(&timeline.timeline_id, &timeline.tenant_id)
|
||||
.join(AUTOPREWARM_FILE_NAME);
|
||||
std::fs::rename(temp_path, &final_path)?;
|
||||
} else {
|
||||
warn!("Fcntl request {} is not supported", req.cmd);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[instrument(skip(self, pgb))]
|
||||
async fn handle_basebackup_request(
|
||||
&self,
|
||||
|
||||
@@ -523,7 +523,6 @@ impl Tenant {
|
||||
let timelines = self.timelines.lock().unwrap();
|
||||
let timelines_to_compact = timelines
|
||||
.iter()
|
||||
.filter(|(_, timeline)| timeline.is_active())
|
||||
.map(|(timeline_id, timeline)| (*timeline_id, timeline.clone()))
|
||||
.collect::<Vec<_>>();
|
||||
drop(timelines);
|
||||
@@ -995,7 +994,6 @@ impl Tenant {
|
||||
|
||||
timelines
|
||||
.iter()
|
||||
.filter(|(_, timeline)| timeline.is_active())
|
||||
.map(|(timeline_id, timeline_entry)| {
|
||||
// This is unresolved question for now, how to do gc in presence of remote timelines
|
||||
// especially when this is combined with branching.
|
||||
|
||||
@@ -62,8 +62,8 @@ use crate::{
|
||||
};
|
||||
|
||||
pub struct Timeline {
|
||||
pub conf: &'static PageServerConf,
|
||||
pub tenant_conf: Arc<RwLock<TenantConfOpt>>,
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: Arc<RwLock<TenantConfOpt>>,
|
||||
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
|
||||
@@ -155,22 +155,19 @@ impl<E: Clone> TaskHandle<E> {
|
||||
|
||||
/// Aborts current task, waiting for it to finish.
|
||||
pub async fn shutdown(self) {
|
||||
match self.join_handle {
|
||||
Some(jh) => {
|
||||
self.cancellation.send(()).ok();
|
||||
match jh.await {
|
||||
Ok(Ok(())) => debug!("Shutdown success"),
|
||||
Ok(Err(e)) => error!("Shutdown task error: {e:?}"),
|
||||
Err(join_error) => {
|
||||
if join_error.is_cancelled() {
|
||||
error!("Shutdown task was cancelled");
|
||||
} else {
|
||||
error!("Shutdown task join error: {join_error}")
|
||||
}
|
||||
if let Some(jh) = self.join_handle {
|
||||
self.cancellation.send(()).ok();
|
||||
match jh.await {
|
||||
Ok(Ok(())) => debug!("Shutdown success"),
|
||||
Ok(Err(e)) => error!("Shutdown task error: {e:?}"),
|
||||
Err(join_error) => {
|
||||
if join_error.is_cancelled() {
|
||||
error!("Shutdown task was cancelled");
|
||||
} else {
|
||||
error!("Shutdown task join error: {join_error}")
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,7 +93,7 @@ pub fn spawn_connection_manager_task(
|
||||
}
|
||||
}
|
||||
.instrument(
|
||||
info_span!("wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id),
|
||||
info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id),
|
||||
),
|
||||
);
|
||||
}
|
||||
@@ -836,15 +836,20 @@ fn wal_stream_connection_string(
|
||||
listen_pg_addr_str: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
let sk_connstr = format!("postgresql://no_user@{listen_pg_addr_str}/no_db");
|
||||
let me_conf = sk_connstr
|
||||
.parse::<postgres::config::Config>()
|
||||
.with_context(|| {
|
||||
format!("Failed to parse pageserver connection string '{sk_connstr}' as a postgres one")
|
||||
})?;
|
||||
let (host, port) = utils::connstring::connection_host_port(&me_conf);
|
||||
Ok(format!(
|
||||
"host={host} port={port} options='-c timeline_id={timeline_id} tenant_id={tenant_id}'"
|
||||
))
|
||||
sk_connstr
|
||||
.parse()
|
||||
.context("bad url")
|
||||
.and_then(|url: url::Url| {
|
||||
let host = url.host_str().context("host is missing")?;
|
||||
let port = url.port().unwrap_or(5432); // default PG port
|
||||
|
||||
Ok(format!(
|
||||
"host={host} \
|
||||
port={port} \
|
||||
options='-c timeline_id={timeline_id} tenant_id={tenant_id}'"
|
||||
))
|
||||
})
|
||||
.with_context(|| format!("Failed to parse pageserver connection URL '{sk_connstr}'"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -892,7 +897,7 @@ mod tests {
|
||||
peer_horizon_lsn: None,
|
||||
local_start_lsn: None,
|
||||
|
||||
safekeeper_connstr: Some("no commit_lsn".to_string()),
|
||||
safekeeper_connstr: Some("no_commit_lsn".to_string()),
|
||||
},
|
||||
etcd_version: 0,
|
||||
latest_update: now,
|
||||
@@ -909,7 +914,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
local_start_lsn: None,
|
||||
safekeeper_connstr: Some("no commit_lsn".to_string()),
|
||||
safekeeper_connstr: Some("no_commit_lsn".to_string()),
|
||||
},
|
||||
etcd_version: 0,
|
||||
latest_update: now,
|
||||
@@ -1005,7 +1010,7 @@ mod tests {
|
||||
peer_horizon_lsn: None,
|
||||
local_start_lsn: None,
|
||||
|
||||
safekeeper_connstr: Some("not advanced Lsn".to_string()),
|
||||
safekeeper_connstr: Some("not_advanced_lsn".to_string()),
|
||||
},
|
||||
etcd_version: 0,
|
||||
latest_update: now,
|
||||
@@ -1023,7 +1028,7 @@ mod tests {
|
||||
peer_horizon_lsn: None,
|
||||
local_start_lsn: None,
|
||||
|
||||
safekeeper_connstr: Some("not enough advanced Lsn".to_string()),
|
||||
safekeeper_connstr: Some("not_enough_advanced_lsn".to_string()),
|
||||
},
|
||||
etcd_version: 0,
|
||||
latest_update: now,
|
||||
@@ -1093,7 +1098,7 @@ mod tests {
|
||||
peer_horizon_lsn: None,
|
||||
local_start_lsn: None,
|
||||
|
||||
safekeeper_connstr: Some("smaller commit_lsn".to_string()),
|
||||
safekeeper_connstr: Some("smaller_commit_lsn".to_string()),
|
||||
},
|
||||
etcd_version: 0,
|
||||
latest_update: now,
|
||||
@@ -1283,7 +1288,7 @@ mod tests {
|
||||
peer_horizon_lsn: None,
|
||||
local_start_lsn: None,
|
||||
|
||||
safekeeper_connstr: Some("advanced by Lsn safekeeper".to_string()),
|
||||
safekeeper_connstr: Some("advanced_by_lsn_safekeeper".to_string()),
|
||||
},
|
||||
etcd_version: 0,
|
||||
latest_update: now,
|
||||
@@ -1307,7 +1312,7 @@ mod tests {
|
||||
);
|
||||
assert!(over_threshcurrent_candidate
|
||||
.wal_source_connstr
|
||||
.contains("advanced by Lsn safekeeper"));
|
||||
.contains("advanced_by_lsn_safekeeper"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
//! process. Then we get the page image back. Communication with the
|
||||
//! postgres process happens via stdin/stdout
|
||||
//!
|
||||
//! See src/backend/tcop/zenith_wal_redo.c for the other side of
|
||||
//! See pgxn/neon_walredo/walredoproc.c for the other side of
|
||||
//! this communication.
|
||||
//!
|
||||
//! The Postgres process is assumed to be secure against malicious WAL
|
||||
@@ -644,14 +644,12 @@ impl PostgresRedoProcess {
|
||||
),
|
||||
));
|
||||
} else {
|
||||
// Limit shared cache for wal-redo-postres
|
||||
// Limit shared cache for wal-redo-postgres
|
||||
let mut config = OpenOptions::new()
|
||||
.append(true)
|
||||
.open(PathBuf::from(&datadir).join("postgresql.conf"))?;
|
||||
config.write_all(b"shared_buffers=128kB\n")?;
|
||||
config.write_all(b"fsync=off\n")?;
|
||||
config.write_all(b"shared_preload_libraries=neon\n")?;
|
||||
config.write_all(b"neon.wal_redo=on\n")?;
|
||||
}
|
||||
|
||||
// Start postgres itself
|
||||
@@ -664,18 +662,15 @@ impl PostgresRedoProcess {
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir_path)
|
||||
.env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
|
||||
.env("PGDATA", &datadir)
|
||||
// The redo process is not trusted, so it runs in seccomp mode
|
||||
// (see seccomp in zenith_wal_redo.c). We have to make sure it doesn't
|
||||
// inherit any file descriptors from the pageserver that would allow
|
||||
// an attacker to do bad things.
|
||||
// The redo process is not trusted, and runs in seccomp mode that
|
||||
// doesn't allow it to open any files. We have to also make sure it
|
||||
// doesn't inherit any file descriptors from the pageserver, that
|
||||
// would allow an attacker to read any files that happen to be open
|
||||
// in the pageserver.
|
||||
//
|
||||
// The Rust standard library makes sure to mark any file descriptors with
|
||||
// as close-on-exec by default, but that's not enough, since we use
|
||||
// libraries that directly call libc open without setting that flag.
|
||||
//
|
||||
// One example is the pidfile of the daemonize library, which doesn't
|
||||
// currently mark file descriptors as close-on-exec. Either way, we
|
||||
// want to be on the safe side and prevent accidental regression.
|
||||
.close_fds()
|
||||
.spawn()
|
||||
.map_err(|e| {
|
||||
@@ -844,7 +839,7 @@ impl PostgresRedoProcess {
|
||||
}
|
||||
|
||||
// Functions for constructing messages to send to the postgres WAL redo
|
||||
// process. See vendor/postgres/src/backend/tcop/zenith_wal_redo.c for
|
||||
// process. See pgxn/neon_walredo/walredoproc.c for
|
||||
// explanation of the protocol.
|
||||
|
||||
fn build_begin_redo_for_block_msg(tag: BufferTag, buf: &mut Vec<u8>) {
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
MODULE_big = neon
|
||||
OBJS = \
|
||||
$(WIN32RES) \
|
||||
inmem_smgr.o \
|
||||
libpagestore.o \
|
||||
libpqwalproposer.o \
|
||||
pagestore_smgr.o \
|
||||
|
||||
@@ -419,15 +419,6 @@ pg_init_libpagestore(void)
|
||||
0, /* no flags required */
|
||||
check_neon_id, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable("neon.wal_redo",
|
||||
"start in wal-redo mode",
|
||||
NULL,
|
||||
&wal_redo,
|
||||
false,
|
||||
PGC_POSTMASTER,
|
||||
0,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable("neon.max_cluster_size",
|
||||
"cluster size limit",
|
||||
NULL,
|
||||
@@ -452,13 +443,7 @@ pg_init_libpagestore(void)
|
||||
neon_timeline_walproposer = neon_timeline;
|
||||
neon_tenant_walproposer = neon_tenant;
|
||||
|
||||
if (wal_redo)
|
||||
{
|
||||
neon_log(PageStoreTrace, "set inmem_smgr hook");
|
||||
smgr_hook = smgr_inmem;
|
||||
smgr_init_hook = smgr_init_inmem;
|
||||
}
|
||||
else if (page_server_connstring && page_server_connstring[0])
|
||||
if (page_server_connstring && page_server_connstring[0])
|
||||
{
|
||||
neon_log(PageStoreTrace, "set neon_smgr hook");
|
||||
smgr_hook = smgr_neon;
|
||||
|
||||
@@ -32,7 +32,6 @@ typedef enum
|
||||
T_NeonNblocksRequest,
|
||||
T_NeonGetPageRequest,
|
||||
T_NeonDbSizeRequest,
|
||||
T_NeonFcntlRequest,
|
||||
|
||||
/* pagestore -> pagestore_client */
|
||||
T_NeonExistsResponse = 100,
|
||||
@@ -92,14 +91,6 @@ typedef struct
|
||||
BlockNumber blkno;
|
||||
} NeonGetPageRequest;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
int cmd;
|
||||
int size;
|
||||
char data[1];
|
||||
} NeonFcntlRequest;
|
||||
|
||||
/* supertype of all the Neon*Response structs below */
|
||||
typedef struct
|
||||
{
|
||||
@@ -164,10 +155,6 @@ extern int32 max_cluster_size;
|
||||
extern const f_smgr *smgr_neon(BackendId backend, RelFileNode rnode);
|
||||
extern void smgr_init_neon(void);
|
||||
|
||||
extern const f_smgr *smgr_inmem(BackendId backend, RelFileNode rnode);
|
||||
extern void smgr_init_inmem(void);
|
||||
extern void smgr_shutdown_inmem(void);
|
||||
|
||||
/* Neon storage manager functionality */
|
||||
|
||||
extern void neon_init(void);
|
||||
@@ -180,7 +167,7 @@ extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
extern bool neon_prefetch_in_progress(SMgrRelation reln);
|
||||
extern void neon_reset_prefetch(SMgrRelation reln);
|
||||
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
|
||||
@@ -197,29 +184,6 @@ extern void neon_truncate(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber nblocks);
|
||||
extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);
|
||||
|
||||
/* neon wal-redo storage manager functionality */
|
||||
|
||||
extern void inmem_init(void);
|
||||
extern void inmem_open(SMgrRelation reln);
|
||||
extern void inmem_close(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
||||
extern bool inmem_exists(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
|
||||
extern void inmem_extend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
extern bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
extern void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
extern void inmem_write(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
extern void inmem_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, BlockNumber nblocks);
|
||||
extern BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void inmem_truncate(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber nblocks);
|
||||
extern void inmem_immedsync(SMgrRelation reln, ForkNumber forknum);
|
||||
|
||||
/* utils for neon relsize cache */
|
||||
extern void relsize_hash_init(void);
|
||||
extern bool get_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber *size);
|
||||
|
||||
@@ -99,7 +99,6 @@ char *page_server_connstring;
|
||||
/*with substituted password*/
|
||||
char *neon_timeline;
|
||||
char *neon_tenant;
|
||||
bool wal_redo = false;
|
||||
int32 max_cluster_size;
|
||||
|
||||
/* unlogged relation build states */
|
||||
@@ -127,7 +126,7 @@ static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
|
||||
* all prefetch responses has to be consumed.
|
||||
*/
|
||||
|
||||
#define MAX_PREFETCH_REQUESTS 1024
|
||||
#define MAX_PREFETCH_REQUESTS 128
|
||||
|
||||
BufferTag prefetch_requests[MAX_PREFETCH_REQUESTS];
|
||||
BufferTag prefetch_responses[MAX_PREFETCH_REQUESTS];
|
||||
@@ -220,15 +219,6 @@ nm_pack_request(NeonRequest * msg)
|
||||
|
||||
break;
|
||||
}
|
||||
case T_NeonFcntlRequest:
|
||||
{
|
||||
NeonFcntlRequest *msg_req = (NeonFcntlRequest *) msg;
|
||||
pq_sendint32(&s, msg_req->cmd);
|
||||
pq_sendint32(&s, msg_req->size);
|
||||
pq_sendbytes(&s, msg_req->data, msg_req->size);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* pagestore -> pagestore_client. We never need to create these. */
|
||||
case T_NeonExistsResponse:
|
||||
@@ -1015,25 +1005,12 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
|
||||
|
||||
|
||||
/*
|
||||
* neon_prefetch_in_progress() -- Check if there are active prefetch requests.
|
||||
* neon_reset_prefetch() -- reoe all previously rgistered prefeth requests
|
||||
*/
|
||||
bool
|
||||
neon_prefetch_in_progress(SMgrRelation reln)
|
||||
{
|
||||
return n_prefetch_requests + n_prefetch_responses != 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
neon_fcntl(SMgrRelation reln, int cmd, void const* data, size_t size)
|
||||
neon_reset_prefetch(SMgrRelation reln)
|
||||
{
|
||||
NeonFcntlRequest* req = (NeonFcntlRequest *)palloc(sizeof(NeonFcntlRequest) + size);
|
||||
req->req.tag = T_NeonFcntlRequest;
|
||||
req->cmd = cmd;
|
||||
req->size = (int)size;
|
||||
memcpy(req->data, data, size);
|
||||
page_server->send((NeonRequest*) req);
|
||||
page_server->flush();
|
||||
n_prefetch_requests = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1838,7 +1815,7 @@ static const struct f_smgr neon_smgr =
|
||||
.smgr_unlink = neon_unlink,
|
||||
.smgr_extend = neon_extend,
|
||||
.smgr_prefetch = neon_prefetch,
|
||||
.smgr_prefetch_in_progress = neon_prefetch_in_progress,
|
||||
.smgr_reset_prefetch = neon_reset_prefetch,
|
||||
.smgr_read = neon_read,
|
||||
.smgr_write = neon_write,
|
||||
.smgr_writeback = neon_writeback,
|
||||
@@ -1849,7 +1826,6 @@ static const struct f_smgr neon_smgr =
|
||||
.smgr_start_unlogged_build = neon_start_unlogged_build,
|
||||
.smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1,
|
||||
.smgr_end_unlogged_build = neon_end_unlogged_build,
|
||||
.smgr_fcntl = neon_fcntl
|
||||
};
|
||||
|
||||
const f_smgr *
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
#include "access/xlogrecovery.h"
|
||||
#endif
|
||||
#include "storage/fd.h"
|
||||
#include "storage/latch.h"
|
||||
#include "miscadmin.h"
|
||||
#include "pgstat.h"
|
||||
@@ -69,7 +70,8 @@
|
||||
#include "neon.h"
|
||||
#include "walproposer.h"
|
||||
#include "walproposer_utils.h"
|
||||
#include "replication/walpropshim.h"
|
||||
|
||||
static bool syncSafekeepers = false;
|
||||
|
||||
char *wal_acceptors_list;
|
||||
int wal_acceptor_reconnect_timeout;
|
||||
@@ -117,8 +119,8 @@ static TimestampTz last_reconnect_attempt;
|
||||
static WalproposerShmemState * walprop_shared;
|
||||
|
||||
/* Prototypes for private functions */
|
||||
static void WalProposerInitImpl(XLogRecPtr flushRecPtr, uint64 systemId);
|
||||
static void WalProposerStartImpl(void);
|
||||
static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId);
|
||||
static void WalProposerStart(void);
|
||||
static void WalProposerLoop(void);
|
||||
static void InitEventSet(void);
|
||||
static void UpdateEventSet(Safekeeper *sk, uint32 events);
|
||||
@@ -186,9 +188,56 @@ pg_init_walproposer(void)
|
||||
ProcessInterruptsCallback = backpressure_throttling_impl;
|
||||
|
||||
WalProposerRegister();
|
||||
}
|
||||
|
||||
WalProposerInit = &WalProposerInitImpl;
|
||||
WalProposerStart = &WalProposerStartImpl;
|
||||
/*
|
||||
* Entry point for `postgres --sync-safekeepers`.
|
||||
*/
|
||||
void
|
||||
WalProposerSync(int argc, char *argv[])
|
||||
{
|
||||
struct stat stat_buf;
|
||||
|
||||
syncSafekeepers = true;
|
||||
#if PG_VERSION_NUM < 150000
|
||||
ThisTimeLineID = 1;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Initialize postmaster_alive_fds as WaitEventSet checks them.
|
||||
*
|
||||
* Copied from InitPostmasterDeathWatchHandle()
|
||||
*/
|
||||
if (pipe(postmaster_alive_fds) < 0)
|
||||
ereport(FATAL,
|
||||
(errcode_for_file_access(),
|
||||
errmsg_internal("could not create pipe to monitor postmaster death: %m")));
|
||||
if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)
|
||||
ereport(FATAL,
|
||||
(errcode_for_socket_access(),
|
||||
errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
|
||||
|
||||
ChangeToDataDir();
|
||||
|
||||
/* Create pg_wal directory, if it doesn't exist */
|
||||
if (stat(XLOGDIR, &stat_buf) != 0)
|
||||
{
|
||||
ereport(LOG, (errmsg("creating missing WAL directory \"%s\"", XLOGDIR)));
|
||||
if (MakePGDirectory(XLOGDIR) < 0)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not create directory \"%s\": %m",
|
||||
XLOGDIR)));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
WalProposerInit(0, 0);
|
||||
|
||||
BackgroundWorkerUnblockSignals();
|
||||
|
||||
WalProposerStart();
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -429,7 +478,7 @@ WalProposerRegister(void)
|
||||
}
|
||||
|
||||
static void
|
||||
WalProposerInitImpl(XLogRecPtr flushRecPtr, uint64 systemId)
|
||||
WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId)
|
||||
{
|
||||
char *host;
|
||||
char *sep;
|
||||
@@ -508,7 +557,7 @@ WalProposerInitImpl(XLogRecPtr flushRecPtr, uint64 systemId)
|
||||
}
|
||||
|
||||
static void
|
||||
WalProposerStartImpl(void)
|
||||
WalProposerStart(void)
|
||||
{
|
||||
|
||||
/* Initiate connections to all safekeeper nodes */
|
||||
|
||||
22
pgxn/neon_walredo/Makefile
Normal file
22
pgxn/neon_walredo/Makefile
Normal file
@@ -0,0 +1,22 @@
|
||||
# pgxs/neon_walredo/Makefile
|
||||
|
||||
MODULE_big = neon_walredo
|
||||
OBJS = \
|
||||
$(WIN32RES) \
|
||||
inmem_smgr.o \
|
||||
walredoproc.o \
|
||||
|
||||
# This really should be guarded by $(with_libseccomp), but I couldn't
|
||||
# make that work with pgxs. So we always compile it, but its contents
|
||||
# are wrapped in #ifdef HAVE_LIBSECCOMP instead.
|
||||
OBJS += seccomp.o
|
||||
|
||||
PGFILEDESC = "neon_walredo - helper process that runs in Neon pageserver"
|
||||
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
|
||||
ifeq ($(with_libseccomp),yes)
|
||||
SHLIB_LINK += -lseccomp
|
||||
endif
|
||||
@@ -3,9 +3,8 @@
|
||||
* inmem_smgr.c
|
||||
*
|
||||
* This is an implementation of the SMGR interface, used in the WAL redo
|
||||
* process (see src/backend/tcop/zenith_wal_redo.c). It has no persistent
|
||||
* storage, the pages that are written out are kept in a small number of
|
||||
* in-memory buffers.
|
||||
* process. It has no persistent storage, the pages that are written out
|
||||
* are kept in a small number of in-memory buffers.
|
||||
*
|
||||
* Normally, replaying a WAL record only needs to access a handful of
|
||||
* buffers, which fit in the normal buffer cache, so this is just for
|
||||
@@ -15,15 +14,11 @@
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* contrib/neon/inmem_smgr.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/xlog.h"
|
||||
#include "pagestore_client.h"
|
||||
#include "storage/block.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/relfilenode.h"
|
||||
@@ -33,6 +28,8 @@
|
||||
#include "access/xlogutils.h"
|
||||
#endif
|
||||
|
||||
#include "inmem_smgr.h"
|
||||
|
||||
/* Size of the in-memory smgr */
|
||||
#define MAX_PAGES 64
|
||||
|
||||
@@ -59,10 +56,34 @@ locate_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno)
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* neon wal-redo storage manager functionality */
|
||||
static void inmem_init(void);
|
||||
static void inmem_open(SMgrRelation reln);
|
||||
static void inmem_close(SMgrRelation reln, ForkNumber forknum);
|
||||
static void inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
||||
static bool inmem_exists(SMgrRelation reln, ForkNumber forknum);
|
||||
static void inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
|
||||
static void inmem_extend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
static bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
static void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
static void inmem_write(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
static void inmem_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, BlockNumber nblocks);
|
||||
static BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum);
|
||||
static void inmem_truncate(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber nblocks);
|
||||
static void inmem_immedsync(SMgrRelation reln, ForkNumber forknum);
|
||||
|
||||
|
||||
/*
|
||||
* inmem_init() -- Initialize private state
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_init(void)
|
||||
{
|
||||
used_pages = 0;
|
||||
@@ -71,7 +92,7 @@ inmem_init(void)
|
||||
/*
|
||||
* inmem_exists() -- Does the physical file exist?
|
||||
*/
|
||||
bool
|
||||
static bool
|
||||
inmem_exists(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
for (int i = 0; i < used_pages; i++)
|
||||
@@ -90,7 +111,7 @@ inmem_exists(SMgrRelation reln, ForkNumber forknum)
|
||||
*
|
||||
* If isRedo is true, it's okay for the relation to exist already.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo)
|
||||
{
|
||||
}
|
||||
@@ -98,7 +119,7 @@ inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo)
|
||||
/*
|
||||
* inmem_unlink() -- Unlink a relation.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo)
|
||||
{
|
||||
}
|
||||
@@ -112,7 +133,7 @@ inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo)
|
||||
* EOF). Note that we assume writing a block beyond current EOF
|
||||
* causes intervening file space to become filled with zeroes.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
char *buffer, bool skipFsync)
|
||||
{
|
||||
@@ -123,7 +144,7 @@ inmem_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
/*
|
||||
* inmem_open() -- Initialize newly-opened relation.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_open(SMgrRelation reln)
|
||||
{
|
||||
}
|
||||
@@ -131,7 +152,7 @@ inmem_open(SMgrRelation reln)
|
||||
/*
|
||||
* inmem_close() -- Close the specified relation, if it isn't closed already.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_close(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
}
|
||||
@@ -139,7 +160,7 @@ inmem_close(SMgrRelation reln, ForkNumber forknum)
|
||||
/*
|
||||
* inmem_prefetch() -- Initiate asynchronous read of the specified block of a relation
|
||||
*/
|
||||
bool
|
||||
static bool
|
||||
inmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
{
|
||||
return true;
|
||||
@@ -148,7 +169,7 @@ inmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
/*
|
||||
* inmem_writeback() -- Tell the kernel to write pages back to storage.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, BlockNumber nblocks)
|
||||
{
|
||||
@@ -157,7 +178,7 @@ inmem_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
/*
|
||||
* inmem_read() -- Read the specified block from a relation.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
char *buffer)
|
||||
{
|
||||
@@ -177,7 +198,7 @@ inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
* relation (ie, those before the current EOF). To extend a relation,
|
||||
* use mdextend().
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer, bool skipFsync)
|
||||
{
|
||||
@@ -224,7 +245,7 @@ inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
/*
|
||||
* inmem_nblocks() -- Get the number of blocks stored in a relation.
|
||||
*/
|
||||
BlockNumber
|
||||
static BlockNumber
|
||||
inmem_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
/*
|
||||
@@ -243,7 +264,7 @@ inmem_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
/*
|
||||
* inmem_truncate() -- Truncate relation to specified number of blocks.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
{
|
||||
}
|
||||
@@ -251,7 +272,7 @@ inmem_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
/*
|
||||
* inmem_immedsync() -- Immediately sync a relation to stable storage.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
inmem_immedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
}
|
||||
17
pgxn/neon_walredo/inmem_smgr.h
Normal file
17
pgxn/neon_walredo/inmem_smgr.h
Normal file
@@ -0,0 +1,17 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* inmem_smgr.h
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef INMEM_SMGR_H
|
||||
#define INMEM_SMGR_H
|
||||
|
||||
extern const f_smgr *smgr_inmem(BackendId backend, RelFileNode rnode);
|
||||
extern void smgr_init_inmem(void);
|
||||
|
||||
#endif /* INMEM_SMGR_H */
|
||||
22
pgxn/neon_walredo/neon_seccomp.h
Normal file
22
pgxn/neon_walredo/neon_seccomp.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef NEON_SECCOMP_H
|
||||
#define NEON_SECCOMP_H
|
||||
|
||||
#include <seccomp.h>
|
||||
|
||||
typedef struct {
|
||||
int psr_syscall; /* syscall number */
|
||||
uint32 psr_action; /* libseccomp action, e.g. SCMP_ACT_ALLOW */
|
||||
} PgSeccompRule;
|
||||
|
||||
#define PG_SCMP(syscall, action) \
|
||||
(PgSeccompRule) { \
|
||||
.psr_syscall = SCMP_SYS(syscall), \
|
||||
.psr_action = (action), \
|
||||
}
|
||||
|
||||
#define PG_SCMP_ALLOW(syscall) \
|
||||
PG_SCMP(syscall, SCMP_ACT_ALLOW)
|
||||
|
||||
extern void seccomp_load_rules(PgSeccompRule *syscalls, int count);
|
||||
|
||||
#endif /* NEON_SECCOMP_H */
|
||||
257
pgxn/neon_walredo/seccomp.c
Normal file
257
pgxn/neon_walredo/seccomp.c
Normal file
@@ -0,0 +1,257 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* seccomp.c
|
||||
* Secure Computing BPF API wrapper.
|
||||
*
|
||||
* Pageserver delegates complex WAL decoding duties to postgres,
|
||||
* which means that the latter might fall victim to carefully designed
|
||||
* malicious WAL records and start doing harmful things to the system.
|
||||
* To prevent this, it has been decided to limit possible interactions
|
||||
* with the outside world using the Secure Computing BPF mode.
|
||||
*
|
||||
* We use this mode to disable all syscalls not in the allowlist. This
|
||||
* approach has its pros & cons:
|
||||
*
|
||||
* - We have to carefully handpick and maintain the set of syscalls
|
||||
* required for the WAL redo process. Core dumps help with that.
|
||||
* The method of trial and error seems to work reasonably well,
|
||||
* but it would be nice to find a proper way to "prove" that
|
||||
* the set in question is both necessary and sufficient.
|
||||
*
|
||||
* - Once we enter the seccomp bpf mode, it's impossible to lift those
|
||||
* restrictions (otherwise, what kind of "protection" would that be?).
|
||||
* Thus, we have to either enable extra syscalls for the clean shutdown,
|
||||
* or exit the process immediately via _exit() instead of proc_exit().
|
||||
*
|
||||
* - Should we simply use SCMP_ACT_KILL_PROCESS, or implement a custom
|
||||
* facility to deal with the forbidden syscalls? If we'd like to embed
|
||||
* a startup security test, we should go with the latter; In that
|
||||
* case, which one of the following options is preferable?
|
||||
*
|
||||
* * Catch the denied syscalls with a signal handler using SCMP_ACT_TRAP.
|
||||
* Provide a common signal handler with a static switch to override
|
||||
* its behavior for the test case. This would undermine the whole
|
||||
* purpose of such protection, so we'd have to go further and remap
|
||||
* the memory backing the switch as readonly, then ban mprotect().
|
||||
* Ugly and fragile, to say the least.
|
||||
*
|
||||
* * Yet again, catch the denied syscalls using SCMP_ACT_TRAP.
|
||||
* Provide 2 different signal handlers: one for a test case,
|
||||
* another for the main processing loop. Install the first one,
|
||||
* enable seccomp, perform the test, switch to the second one,
|
||||
* finally ban sigaction(), presto!
|
||||
*
|
||||
* * Spoof the result of a syscall using SECCOMP_RET_ERRNO for the
|
||||
* test, then ban it altogether with another filter. The downside
|
||||
* of this solution is that we don't actually check that
|
||||
* SCMP_ACT_KILL_PROCESS/SCMP_ACT_TRAP works.
|
||||
*
|
||||
* Either approach seems to require two eBPF filter programs,
|
||||
* which is unfortunate: the man page tells this is uncommon.
|
||||
* Maybe I (@funbringer) am missing something, though; I encourage
|
||||
* any reader to get familiar with it and scrutinize my conclusions.
|
||||
*
|
||||
* TODOs and ideas in no particular order:
|
||||
*
|
||||
* - Do something about mmap() in musl's malloc().
|
||||
* Definitely not a priority if we don't care about musl.
|
||||
*
|
||||
* - See if we can untangle PG's shutdown sequence (involving unlink()):
|
||||
*
|
||||
* * Simplify (or rather get rid of) shmem setup in PG's WAL redo mode.
|
||||
* * Investigate chroot() or mount namespaces for better FS isolation.
|
||||
* * (Per Heikki) Simply call _exit(), no big deal.
|
||||
* * Come up with a better idea?
|
||||
*
|
||||
* - Make use of seccomp's argument inspection (for what?).
|
||||
* Unfortunately, it views all syscall arguments as scalars,
|
||||
* so it won't work for e.g. string comparison in unlink().
|
||||
*
|
||||
* - Benchmark with bpf jit on/off, try seccomp_syscall_priority().
|
||||
*
|
||||
* - Test against various linux distros & glibc versions.
|
||||
* I suspect that certain libc functions might involve slightly
|
||||
* different syscalls, e.g. select/pselect6/pselect6_time64/whatever.
|
||||
*
|
||||
* - Test on any arch other than amd64 to see if it works there.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
/*
|
||||
* I couldn't find a good way to do a conditional OBJS += seccomp.o in
|
||||
* the Makefile, so this file is compiled even when seccomp is disabled,
|
||||
* it's just empty in that case.
|
||||
*/
|
||||
#ifdef HAVE_LIBSECCOMP
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "miscadmin.h"
|
||||
|
||||
#include "neon_seccomp.h"
|
||||
|
||||
static void die(int code, const char *str);
|
||||
|
||||
static bool seccomp_test_sighandler_done = false;
|
||||
static void seccomp_test_sighandler(int signum, siginfo_t *info, void *cxt);
|
||||
static void seccomp_deny_sighandler(int signum, siginfo_t *info, void *cxt);
|
||||
|
||||
static int do_seccomp_load_rules(PgSeccompRule *rules, int count, uint32 def_action);
|
||||
|
||||
void
|
||||
seccomp_load_rules(PgSeccompRule *rules, int count)
|
||||
{
|
||||
struct sigaction action = { .sa_flags = SA_SIGINFO };
|
||||
PgSeccompRule rule;
|
||||
long fd;
|
||||
|
||||
/*
|
||||
* Install a test signal handler.
|
||||
* XXX: pqsignal() is too restrictive for our purposes,
|
||||
* since we'd like to examine the contents of siginfo_t.
|
||||
*/
|
||||
action.sa_sigaction = seccomp_test_sighandler;
|
||||
if (sigaction(SIGSYS, &action, NULL) != 0)
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_SYSTEM_ERROR),
|
||||
errmsg("seccomp: could not install test SIGSYS handler")));
|
||||
|
||||
/*
|
||||
* First, check that open of a well-known file works.
|
||||
* XXX: We use raw syscall() to call the very open().
|
||||
*/
|
||||
fd = syscall(SCMP_SYS(open), "/dev/null", O_RDONLY, 0);
|
||||
if (seccomp_test_sighandler_done)
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_SYSTEM_ERROR),
|
||||
errmsg("seccomp: signal handler test flag was set unexpectedly")));
|
||||
if (fd < 0)
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_SYSTEM_ERROR),
|
||||
errmsg("seccomp: could not open /dev/null for seccomp testing: %m")));
|
||||
close((int) fd);
|
||||
|
||||
/* Set a trap on open() to test seccomp bpf */
|
||||
rule = PG_SCMP(open, SCMP_ACT_TRAP);
|
||||
if (do_seccomp_load_rules(&rule, 1, SCMP_ACT_ALLOW) != 0)
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_SYSTEM_ERROR),
|
||||
errmsg("seccomp: could not load test trap")));
|
||||
|
||||
/* Finally, check that open() now raises SIGSYS */
|
||||
(void) syscall(SCMP_SYS(open), "/dev/null", O_RDONLY, 0);
|
||||
if (!seccomp_test_sighandler_done)
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_SYSTEM_ERROR),
|
||||
errmsg("seccomp: SIGSYS handler doesn't seem to work")));
|
||||
|
||||
/* Now that everything seems to work, install a proper handler */
|
||||
action.sa_sigaction = seccomp_deny_sighandler;
|
||||
if (sigaction(SIGSYS, &action, NULL) != 0)
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_SYSTEM_ERROR),
|
||||
errmsg("seccomp: could not install SIGSYS handler")));
|
||||
|
||||
/* If this succeeds, any syscall not in the list will crash the process */
|
||||
if (do_seccomp_load_rules(rules, count, SCMP_ACT_TRAP) != 0)
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_SYSTEM_ERROR),
|
||||
errmsg("seccomp: could not enter seccomp mode")));
|
||||
}
|
||||
|
||||
/*
|
||||
* Enter seccomp mode with a BPF filter that will only allow
|
||||
* certain syscalls to proceed.
|
||||
*/
|
||||
static int
|
||||
do_seccomp_load_rules(PgSeccompRule *rules, int count, uint32 def_action)
|
||||
{
|
||||
scmp_filter_ctx ctx;
|
||||
int rc = -1;
|
||||
|
||||
/* Create a context with a default action for syscalls not in the list */
|
||||
if ((ctx = seccomp_init(def_action)) == NULL)
|
||||
goto cleanup;
|
||||
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
PgSeccompRule *rule = &rules[i];
|
||||
if ((rc = seccomp_rule_add(ctx, rule->psr_action, rule->psr_syscall, 0)) != 0)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Try building & loading the program into the kernel */
|
||||
if ((rc = seccomp_load(ctx)) != 0)
|
||||
goto cleanup;
|
||||
|
||||
cleanup:
|
||||
/*
|
||||
* We don't need the context anymore regardless of the result,
|
||||
* since either we failed or the eBPF program has already been
|
||||
* loaded into the linux kernel.
|
||||
*/
|
||||
seccomp_release(ctx);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void
|
||||
die(int code, const char *str)
|
||||
{
|
||||
/* work around gcc ignoring that it shouldn't warn on (void) result being unused */
|
||||
ssize_t _unused pg_attribute_unused();
|
||||
/* Best effort write to stderr */
|
||||
_unused = write(fileno(stderr), str, strlen(str));
|
||||
|
||||
/* XXX: we don't want to run any atexit callbacks */
|
||||
_exit(code);
|
||||
}
|
||||
|
||||
static void
|
||||
seccomp_test_sighandler(int signum, siginfo_t *info, void *cxt pg_attribute_unused())
|
||||
{
|
||||
#define DIE_PREFIX "seccomp test signal handler: "
|
||||
|
||||
/* Check that this signal handler is used only for a single test case */
|
||||
if (seccomp_test_sighandler_done)
|
||||
die(1, DIE_PREFIX "test handler should only be used for 1 test\n");
|
||||
seccomp_test_sighandler_done = true;
|
||||
|
||||
if (signum != SIGSYS)
|
||||
die(1, DIE_PREFIX "bad signal number\n");
|
||||
|
||||
/* TODO: maybe somehow extract the hardcoded syscall number */
|
||||
if (info->si_syscall != SCMP_SYS(open))
|
||||
die(1, DIE_PREFIX "bad syscall number\n");
|
||||
|
||||
#undef DIE_PREFIX
|
||||
}
|
||||
|
||||
static void
|
||||
seccomp_deny_sighandler(int signum, siginfo_t *info, void *cxt pg_attribute_unused())
|
||||
{
|
||||
/*
|
||||
* Unfortunately, we can't use seccomp_syscall_resolve_num_arch()
|
||||
* to resolve the syscall's name, since it calls strdup()
|
||||
* under the hood (wtf!).
|
||||
*/
|
||||
char buffer[128];
|
||||
(void)snprintf(buffer, lengthof(buffer),
|
||||
"---------------------------------------\n"
|
||||
"seccomp: bad syscall %d\n"
|
||||
"---------------------------------------\n",
|
||||
info->si_syscall);
|
||||
|
||||
/*
|
||||
* Instead of silently crashing the process with
|
||||
* a fake SIGSYS caused by SCMP_ACT_KILL_PROCESS,
|
||||
* we'd like to receive a real SIGSYS to print the
|
||||
* message and *then* immediately exit.
|
||||
*/
|
||||
die(1, buffer);
|
||||
}
|
||||
|
||||
#endif /* HAVE_LIBSECCOMP */
|
||||
847
pgxn/neon_walredo/walredoproc.c
Normal file
847
pgxn/neon_walredo/walredoproc.c
Normal file
@@ -0,0 +1,847 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* walredoproc.c
|
||||
* Entry point for WAL redo helper
|
||||
*
|
||||
*
|
||||
* This file contains an alternative main() function for the 'postgres'
|
||||
* binary. In the special mode, we go into a special mode that's similar
|
||||
* to the single user mode. We don't launch postmaster or any auxiliary
|
||||
* processes. Instead, we wait for command from 'stdin', and respond to
|
||||
* 'stdout'.
|
||||
*
|
||||
* The protocol through stdin/stdout is loosely based on the libpq protocol.
|
||||
* The process accepts messages through stdin, and each message has the format:
|
||||
*
|
||||
* char msgtype;
|
||||
* int32 length; // length of message including 'length' but excluding
|
||||
* // 'msgtype', in network byte order
|
||||
* <payload>
|
||||
*
|
||||
* There are three message types:
|
||||
*
|
||||
* BeginRedoForBlock ('B'): Prepare for WAL replay for given block
|
||||
* PushPage ('P'): Copy a page image (in the payload) to buffer cache
|
||||
* ApplyRecord ('A'): Apply a WAL record (in the payload)
|
||||
* GetPage ('G'): Return a page image from buffer cache.
|
||||
*
|
||||
* Currently, you only get a response to GetPage requests; the response is
|
||||
* simply a 8k page, without any headers. Errors are logged to stderr.
|
||||
*
|
||||
* FIXME:
|
||||
* - this currently requires a valid PGDATA, and creates a lock file there
|
||||
* like a normal postmaster. There's no fundamental reason for that, though.
|
||||
* - should have EndRedoForBlock, and flush page cache, to allow using this
|
||||
* mechanism for more than one block without restarting the process.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/socket.h>
|
||||
#ifdef HAVE_SYS_SELECT_H
|
||||
#include <sys/select.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_RESOURCE_H
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_LIBSECCOMP) && defined(__GLIBC__)
|
||||
#define MALLOC_NO_MMAP
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_GETRUSAGE
|
||||
#include "rusagestub.h"
|
||||
#endif
|
||||
|
||||
#include "access/xlog.h"
|
||||
#include "access/xlog_internal.h"
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
#include "access/xlogrecovery.h"
|
||||
#endif
|
||||
#include "access/xlogutils.h"
|
||||
#include "catalog/pg_class.h"
|
||||
#include "libpq/libpq.h"
|
||||
#include "libpq/pqformat.h"
|
||||
#include "miscadmin.h"
|
||||
#include "postmaster/postmaster.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/proc.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "tcop/tcopprot.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/ps_status.h"
|
||||
|
||||
#include "inmem_smgr.h"
|
||||
|
||||
#ifdef HAVE_LIBSECCOMP
|
||||
#include "neon_seccomp.h"
|
||||
#endif
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
static int ReadRedoCommand(StringInfo inBuf);
|
||||
static void BeginRedoForBlock(StringInfo input_message);
|
||||
static void PushPage(StringInfo input_message);
|
||||
static void ApplyRecord(StringInfo input_message);
|
||||
static void apply_error_callback(void *arg);
|
||||
static bool redo_block_filter(XLogReaderState *record, uint8 block_id);
|
||||
static void GetPage(StringInfo input_message);
|
||||
static ssize_t buffered_read(void *buf, size_t count);
|
||||
|
||||
static BufferTag target_redo_tag;
|
||||
|
||||
static XLogReaderState *reader_state;
|
||||
|
||||
#define TRACE DEBUG5
|
||||
|
||||
#ifdef HAVE_LIBSECCOMP
|
||||
static void
|
||||
enter_seccomp_mode(void)
|
||||
{
|
||||
PgSeccompRule syscalls[] =
|
||||
{
|
||||
/* Hard requirements */
|
||||
PG_SCMP_ALLOW(exit_group),
|
||||
PG_SCMP_ALLOW(pselect6),
|
||||
PG_SCMP_ALLOW(read),
|
||||
PG_SCMP_ALLOW(select),
|
||||
PG_SCMP_ALLOW(write),
|
||||
|
||||
/* Memory allocation */
|
||||
PG_SCMP_ALLOW(brk),
|
||||
#ifndef MALLOC_NO_MMAP
|
||||
/* TODO: musl doesn't have mallopt */
|
||||
PG_SCMP_ALLOW(mmap),
|
||||
PG_SCMP_ALLOW(munmap),
|
||||
#endif
|
||||
/*
|
||||
* getpid() is called on assertion failure, in ExceptionalCondition.
|
||||
* It's not really needed, but seems pointless to hide it either. The
|
||||
* system call unlikely to expose a kernel vulnerability, and the PID
|
||||
* is stored in MyProcPid anyway.
|
||||
*/
|
||||
PG_SCMP_ALLOW(getpid),
|
||||
|
||||
/* Enable those for a proper shutdown.
|
||||
PG_SCMP_ALLOW(munmap),
|
||||
PG_SCMP_ALLOW(shmctl),
|
||||
PG_SCMP_ALLOW(shmdt),
|
||||
PG_SCMP_ALLOW(unlink), // shm_unlink
|
||||
*/
|
||||
};
|
||||
|
||||
#ifdef MALLOC_NO_MMAP
|
||||
/* Ask glibc not to use mmap() */
|
||||
mallopt(M_MMAP_MAX, 0);
|
||||
#endif
|
||||
|
||||
seccomp_load_rules(syscalls, lengthof(syscalls));
|
||||
}
|
||||
#endif /* HAVE_LIBSECCOMP */
|
||||
|
||||
/*
|
||||
* Entry point for the WAL redo process.
|
||||
*
|
||||
* Performs similar initialization as PostgresMain does for normal
|
||||
* backend processes. Some initialization was done in CallExtMain
|
||||
* already.
|
||||
*/
|
||||
void
|
||||
WalRedoMain(int argc, char *argv[])
|
||||
{
|
||||
int firstchar;
|
||||
StringInfoData input_message;
|
||||
#ifdef HAVE_LIBSECCOMP
|
||||
bool enable_seccomp;
|
||||
#endif
|
||||
|
||||
am_wal_redo_postgres = true;
|
||||
|
||||
/*
|
||||
* WAL redo does not need a large number of buffers. And speed of
|
||||
* DropRelFileNodeAllLocalBuffers() is proportional to the number of
|
||||
* buffers. So let's keep it small (default value is 1024)
|
||||
*/
|
||||
num_temp_buffers = 4;
|
||||
|
||||
/*
|
||||
* install the simple in-memory smgr
|
||||
*/
|
||||
smgr_hook = smgr_inmem;
|
||||
smgr_init_hook = smgr_init_inmem;
|
||||
|
||||
/*
|
||||
* Validate we have been given a reasonable-looking DataDir and change into it.
|
||||
*/
|
||||
checkDataDir();
|
||||
ChangeToDataDir();
|
||||
|
||||
/*
|
||||
* Create lockfile for data directory.
|
||||
*/
|
||||
CreateDataDirLockFile(false);
|
||||
|
||||
/* read control file (error checking and contains config ) */
|
||||
LocalProcessControlFile(false);
|
||||
|
||||
/*
|
||||
* process any libraries that should be preloaded at postmaster start
|
||||
*/
|
||||
process_shared_preload_libraries();
|
||||
|
||||
/* Initialize MaxBackends (if under postmaster, was done already) */
|
||||
InitializeMaxBackends();
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
/*
|
||||
* Give preloaded libraries a chance to request additional shared memory.
|
||||
*/
|
||||
process_shmem_requests();
|
||||
|
||||
/*
|
||||
* Now that loadable modules have had their chance to request additional
|
||||
* shared memory, determine the value of any runtime-computed GUCs that
|
||||
* depend on the amount of shared memory required.
|
||||
*/
|
||||
InitializeShmemGUCs();
|
||||
|
||||
/*
|
||||
* Now that modules have been loaded, we can process any custom resource
|
||||
* managers specified in the wal_consistency_checking GUC.
|
||||
*/
|
||||
InitializeWalConsistencyChecking();
|
||||
#endif
|
||||
|
||||
CreateSharedMemoryAndSemaphores();
|
||||
|
||||
/*
|
||||
* Remember stand-alone backend startup time,roughly at the same point
|
||||
* during startup that postmaster does so.
|
||||
*/
|
||||
PgStartTime = GetCurrentTimestamp();
|
||||
|
||||
/*
|
||||
* Create a per-backend PGPROC struct in shared memory. We must do
|
||||
* this before we can use LWLocks.
|
||||
*/
|
||||
InitAuxiliaryProcess();
|
||||
|
||||
SetProcessingMode(NormalProcessing);
|
||||
|
||||
/* Redo routines won't work if we're not "in recovery" */
|
||||
InRecovery = true;
|
||||
|
||||
/*
|
||||
* Create the memory context we will use in the main loop.
|
||||
*
|
||||
* MessageContext is reset once per iteration of the main loop, ie, upon
|
||||
* completion of processing of each command message from the client.
|
||||
*/
|
||||
MessageContext = AllocSetContextCreate(TopMemoryContext,
|
||||
"MessageContext",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
|
||||
/* we need a ResourceOwner to hold buffer pins */
|
||||
Assert(CurrentResourceOwner == NULL);
|
||||
CurrentResourceOwner = ResourceOwnerCreate(NULL, "wal redo");
|
||||
|
||||
/* Initialize resource managers */
|
||||
for (int rmid = 0; rmid <= RM_MAX_ID; rmid++)
|
||||
{
|
||||
if (RmgrTable[rmid].rm_startup != NULL)
|
||||
RmgrTable[rmid].rm_startup();
|
||||
}
|
||||
reader_state = XLogReaderAllocate(wal_segment_size, NULL, XL_ROUTINE(), NULL);
|
||||
|
||||
#ifdef HAVE_LIBSECCOMP
|
||||
/* We prefer opt-out to opt-in for greater security */
|
||||
enable_seccomp = true;
|
||||
for (int i = 1; i < argc; i++)
|
||||
if (strcmp(argv[i], "--disable-seccomp") == 0)
|
||||
enable_seccomp = false;
|
||||
|
||||
/*
|
||||
* We deliberately delay the transition to the seccomp mode
|
||||
* until it's time to enter the main processing loop;
|
||||
* else we'd have to add a lot more syscalls to the allowlist.
|
||||
*/
|
||||
if (enable_seccomp)
|
||||
enter_seccomp_mode();
|
||||
#endif /* HAVE_LIBSECCOMP */
|
||||
|
||||
/*
|
||||
* Main processing loop
|
||||
*/
|
||||
MemoryContextSwitchTo(MessageContext);
|
||||
initStringInfo(&input_message);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* Release memory left over from prior query cycle. */
|
||||
resetStringInfo(&input_message);
|
||||
|
||||
set_ps_display("idle");
|
||||
|
||||
/*
|
||||
* (3) read a command (loop blocks here)
|
||||
*/
|
||||
firstchar = ReadRedoCommand(&input_message);
|
||||
switch (firstchar)
|
||||
{
|
||||
case 'B': /* BeginRedoForBlock */
|
||||
BeginRedoForBlock(&input_message);
|
||||
break;
|
||||
|
||||
case 'P': /* PushPage */
|
||||
PushPage(&input_message);
|
||||
break;
|
||||
|
||||
case 'A': /* ApplyRecord */
|
||||
ApplyRecord(&input_message);
|
||||
break;
|
||||
|
||||
case 'G': /* GetPage */
|
||||
GetPage(&input_message);
|
||||
break;
|
||||
|
||||
/*
|
||||
* EOF means we're done. Perform normal shutdown.
|
||||
*/
|
||||
case EOF:
|
||||
ereport(LOG,
|
||||
(errmsg("received EOF on stdin, shutting down")));
|
||||
|
||||
#ifdef HAVE_LIBSECCOMP
|
||||
/*
|
||||
* Skip the shutdown sequence, leaving some garbage behind.
|
||||
* Hopefully, postgres will clean it up in the next run.
|
||||
* This way we don't have to enable extra syscalls, which is nice.
|
||||
* See enter_seccomp_mode() above.
|
||||
*/
|
||||
if (enable_seccomp)
|
||||
_exit(0);
|
||||
#endif /* HAVE_LIBSECCOMP */
|
||||
/*
|
||||
* NOTE: if you are tempted to add more code here, DON'T!
|
||||
* Whatever you had in mind to do should be set up as an
|
||||
* on_proc_exit or on_shmem_exit callback, instead. Otherwise
|
||||
* it will fail to be called during other backend-shutdown
|
||||
* scenarios.
|
||||
*/
|
||||
proc_exit(0);
|
||||
|
||||
default:
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
||||
errmsg("invalid frontend message type %d",
|
||||
firstchar)));
|
||||
}
|
||||
} /* end of input-reading loop */
|
||||
}
|
||||
|
||||
|
||||
/* Version compatility wrapper for ReadBufferWithoutRelcache */
|
||||
static inline Buffer
|
||||
NeonRedoReadBuffer(RelFileNode rnode,
|
||||
ForkNumber forkNum, BlockNumber blockNum,
|
||||
ReadBufferMode mode)
|
||||
{
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
return ReadBufferWithoutRelcache(rnode, forkNum, blockNum, mode,
|
||||
NULL, /* no strategy */
|
||||
true); /* WAL redo is only performed on permanent rels */
|
||||
#else
|
||||
return ReadBufferWithoutRelcache(rnode, forkNum, blockNum, mode,
|
||||
NULL); /* no strategy */
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Some debug function that may be handy for now.
|
||||
*/
|
||||
pg_attribute_unused()
|
||||
static char *
|
||||
pprint_buffer(char *data, int len)
|
||||
{
|
||||
StringInfoData s;
|
||||
|
||||
initStringInfo(&s);
|
||||
appendStringInfo(&s, "\n");
|
||||
for (int i = 0; i < len; i++) {
|
||||
|
||||
appendStringInfo(&s, "%02x ", (*(((char *) data) + i) & 0xff) );
|
||||
if (i % 32 == 31) {
|
||||
appendStringInfo(&s, "\n");
|
||||
}
|
||||
}
|
||||
appendStringInfo(&s, "\n");
|
||||
|
||||
return s.data;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* routines to obtain user input
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Read next command from the client.
|
||||
*
|
||||
* the string entered by the user is placed in its parameter inBuf,
|
||||
* and we act like a Q message was received.
|
||||
*
|
||||
* EOF is returned if end-of-file input is seen; time to shut down.
|
||||
* ----------------
|
||||
*/
|
||||
static int
|
||||
ReadRedoCommand(StringInfo inBuf)
|
||||
{
|
||||
ssize_t ret;
|
||||
char hdr[1 + sizeof(int32)];
|
||||
int qtype;
|
||||
int32 len;
|
||||
|
||||
/* Read message type and message length */
|
||||
ret = buffered_read(hdr, sizeof(hdr));
|
||||
if (ret != sizeof(hdr))
|
||||
{
|
||||
if (ret == 0)
|
||||
return EOF;
|
||||
else if (ret < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONNECTION_FAILURE),
|
||||
errmsg("could not read message header: %m")));
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
||||
errmsg("unexpected EOF")));
|
||||
}
|
||||
|
||||
qtype = hdr[0];
|
||||
memcpy(&len, &hdr[1], sizeof(int32));
|
||||
len = pg_ntoh32(len);
|
||||
|
||||
if (len < 4)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
||||
errmsg("invalid message length")));
|
||||
|
||||
len -= 4; /* discount length itself */
|
||||
|
||||
/* Read the message payload */
|
||||
enlargeStringInfo(inBuf, len);
|
||||
ret = buffered_read(inBuf->data, len);
|
||||
if (ret != len)
|
||||
{
|
||||
if (ret < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONNECTION_FAILURE),
|
||||
errmsg("could not read message: %m")));
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
||||
errmsg("unexpected EOF")));
|
||||
}
|
||||
inBuf->len = len;
|
||||
inBuf->data[len] = '\0';
|
||||
|
||||
return qtype;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare for WAL replay on given block
|
||||
*/
|
||||
static void
|
||||
BeginRedoForBlock(StringInfo input_message)
|
||||
{
|
||||
RelFileNode rnode;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blknum;
|
||||
SMgrRelation reln;
|
||||
|
||||
/*
|
||||
* message format:
|
||||
*
|
||||
* spcNode
|
||||
* dbNode
|
||||
* relNode
|
||||
* ForkNumber
|
||||
* BlockNumber
|
||||
*/
|
||||
forknum = pq_getmsgbyte(input_message);
|
||||
rnode.spcNode = pq_getmsgint(input_message, 4);
|
||||
rnode.dbNode = pq_getmsgint(input_message, 4);
|
||||
rnode.relNode = pq_getmsgint(input_message, 4);
|
||||
blknum = pq_getmsgint(input_message, 4);
|
||||
wal_redo_buffer = InvalidBuffer;
|
||||
|
||||
INIT_BUFFERTAG(target_redo_tag, rnode, forknum, blknum);
|
||||
|
||||
elog(TRACE, "BeginRedoForBlock %u/%u/%u.%d blk %u",
|
||||
target_redo_tag.rnode.spcNode,
|
||||
target_redo_tag.rnode.dbNode,
|
||||
target_redo_tag.rnode.relNode,
|
||||
target_redo_tag.forkNum,
|
||||
target_redo_tag.blockNum);
|
||||
|
||||
reln = smgropen(rnode, InvalidBackendId, RELPERSISTENCE_PERMANENT);
|
||||
if (reln->smgr_cached_nblocks[forknum] == InvalidBlockNumber ||
|
||||
reln->smgr_cached_nblocks[forknum] < blknum + 1)
|
||||
{
|
||||
reln->smgr_cached_nblocks[forknum] = blknum + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Receive a page given by the client, and put it into buffer cache.
|
||||
*/
|
||||
static void
|
||||
PushPage(StringInfo input_message)
|
||||
{
|
||||
RelFileNode rnode;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blknum;
|
||||
const char *content;
|
||||
Buffer buf;
|
||||
Page page;
|
||||
|
||||
/*
|
||||
* message format:
|
||||
*
|
||||
* spcNode
|
||||
* dbNode
|
||||
* relNode
|
||||
* ForkNumber
|
||||
* BlockNumber
|
||||
* 8k page content
|
||||
*/
|
||||
forknum = pq_getmsgbyte(input_message);
|
||||
rnode.spcNode = pq_getmsgint(input_message, 4);
|
||||
rnode.dbNode = pq_getmsgint(input_message, 4);
|
||||
rnode.relNode = pq_getmsgint(input_message, 4);
|
||||
blknum = pq_getmsgint(input_message, 4);
|
||||
content = pq_getmsgbytes(input_message, BLCKSZ);
|
||||
|
||||
buf = NeonRedoReadBuffer(rnode, forknum, blknum, RBM_ZERO_AND_LOCK);
|
||||
wal_redo_buffer = buf;
|
||||
page = BufferGetPage(buf);
|
||||
memcpy(page, content, BLCKSZ);
|
||||
MarkBufferDirty(buf); /* pro forma */
|
||||
UnlockReleaseBuffer(buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Receive a WAL record, and apply it.
|
||||
*
|
||||
* All the pages should be loaded into the buffer cache by PushPage calls already.
|
||||
*/
|
||||
static void
|
||||
ApplyRecord(StringInfo input_message)
|
||||
{
|
||||
char *errormsg;
|
||||
XLogRecPtr lsn;
|
||||
XLogRecord *record;
|
||||
int nleft;
|
||||
ErrorContextCallback errcallback;
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
DecodedXLogRecord *decoded;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* message format:
|
||||
*
|
||||
* LSN (the *end* of the record)
|
||||
* record
|
||||
*/
|
||||
lsn = pq_getmsgint64(input_message);
|
||||
|
||||
smgrinit(); /* reset inmem smgr state */
|
||||
|
||||
/* note: the input must be aligned here */
|
||||
record = (XLogRecord *) pq_getmsgbytes(input_message, sizeof(XLogRecord));
|
||||
|
||||
nleft = input_message->len - input_message->cursor;
|
||||
if (record->xl_tot_len != sizeof(XLogRecord) + nleft)
|
||||
elog(ERROR, "mismatch between record (%d) and message size (%d)",
|
||||
record->xl_tot_len, (int) sizeof(XLogRecord) + nleft);
|
||||
|
||||
/* Setup error traceback support for ereport() */
|
||||
errcallback.callback = apply_error_callback;
|
||||
errcallback.arg = (void *) reader_state;
|
||||
errcallback.previous = error_context_stack;
|
||||
error_context_stack = &errcallback;
|
||||
|
||||
XLogBeginRead(reader_state, lsn);
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
decoded = (DecodedXLogRecord *) XLogReadRecordAlloc(reader_state, record->xl_tot_len, true);
|
||||
|
||||
if (!DecodeXLogRecord(reader_state, decoded, record, lsn, &errormsg))
|
||||
elog(ERROR, "failed to decode WAL record: %s", errormsg);
|
||||
else
|
||||
{
|
||||
/* Record the location of the next record. */
|
||||
decoded->next_lsn = reader_state->NextRecPtr;
|
||||
|
||||
/*
|
||||
* If it's in the decode buffer, mark the decode buffer space as
|
||||
* occupied.
|
||||
*/
|
||||
if (!decoded->oversized)
|
||||
{
|
||||
/* The new decode buffer head must be MAXALIGNed. */
|
||||
Assert(decoded->size == MAXALIGN(decoded->size));
|
||||
if ((char *) decoded == reader_state->decode_buffer)
|
||||
reader_state->decode_buffer_tail = reader_state->decode_buffer + decoded->size;
|
||||
else
|
||||
reader_state->decode_buffer_tail += decoded->size;
|
||||
}
|
||||
|
||||
/* Insert it into the queue of decoded records. */
|
||||
Assert(reader_state->decode_queue_tail != decoded);
|
||||
if (reader_state->decode_queue_tail)
|
||||
reader_state->decode_queue_tail->next = decoded;
|
||||
reader_state->decode_queue_tail = decoded;
|
||||
if (!reader_state->decode_queue_head)
|
||||
reader_state->decode_queue_head = decoded;
|
||||
|
||||
/*
|
||||
* Update the pointers to the beginning and one-past-the-end of this
|
||||
* record, again for the benefit of historical code that expected the
|
||||
* decoder to track this rather than accessing these fields of the record
|
||||
* itself.
|
||||
*/
|
||||
reader_state->record = reader_state->decode_queue_head;
|
||||
reader_state->ReadRecPtr = reader_state->record->lsn;
|
||||
reader_state->EndRecPtr = reader_state->record->next_lsn;
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* In lieu of calling XLogReadRecord, store the record 'decoded_record'
|
||||
* buffer directly.
|
||||
*/
|
||||
reader_state->ReadRecPtr = lsn;
|
||||
reader_state->decoded_record = record;
|
||||
if (!DecodeXLogRecord(reader_state, record, &errormsg))
|
||||
elog(ERROR, "failed to decode WAL record: %s", errormsg);
|
||||
#endif
|
||||
|
||||
/* Ignore any other blocks than the ones the caller is interested in */
|
||||
redo_read_buffer_filter = redo_block_filter;
|
||||
|
||||
RmgrTable[record->xl_rmid].rm_redo(reader_state);
|
||||
|
||||
/*
|
||||
* If no base image of the page was provided by PushPage, initialize
|
||||
* wal_redo_buffer here. The first WAL record must initialize the page
|
||||
* in that case.
|
||||
*/
|
||||
if (BufferIsInvalid(wal_redo_buffer))
|
||||
{
|
||||
wal_redo_buffer = NeonRedoReadBuffer(target_redo_tag.rnode,
|
||||
target_redo_tag.forkNum,
|
||||
target_redo_tag.blockNum,
|
||||
RBM_NORMAL);
|
||||
Assert(!BufferIsInvalid(wal_redo_buffer));
|
||||
ReleaseBuffer(wal_redo_buffer);
|
||||
}
|
||||
|
||||
redo_read_buffer_filter = NULL;
|
||||
|
||||
/* Pop the error context stack */
|
||||
error_context_stack = errcallback.previous;
|
||||
|
||||
elog(TRACE, "applied WAL record with LSN %X/%X",
|
||||
(uint32) (lsn >> 32), (uint32) lsn);
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
if (decoded && decoded->oversized)
|
||||
pfree(decoded);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Error context callback for errors occurring during ApplyRecord
|
||||
*/
|
||||
static void
|
||||
apply_error_callback(void *arg)
|
||||
{
|
||||
XLogReaderState *record = (XLogReaderState *) arg;
|
||||
StringInfoData buf;
|
||||
|
||||
initStringInfo(&buf);
|
||||
xlog_outdesc(&buf, record);
|
||||
|
||||
/* translator: %s is a WAL record description */
|
||||
errcontext("WAL redo at %X/%X for %s",
|
||||
LSN_FORMAT_ARGS(record->ReadRecPtr),
|
||||
buf.data);
|
||||
|
||||
pfree(buf.data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static bool
|
||||
redo_block_filter(XLogReaderState *record, uint8 block_id)
|
||||
{
|
||||
BufferTag target_tag;
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
XLogRecGetBlockTag(record, block_id,
|
||||
&target_tag.rnode, &target_tag.forkNum, &target_tag.blockNum);
|
||||
#else
|
||||
if (!XLogRecGetBlockTag(record, block_id,
|
||||
&target_tag.rnode, &target_tag.forkNum, &target_tag.blockNum))
|
||||
{
|
||||
/* Caller specified a bogus block_id */
|
||||
elog(PANIC, "failed to locate backup block with ID %d", block_id);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Can a WAL redo function ever access a relation other than the one that
|
||||
* it modifies? I don't see why it would.
|
||||
*/
|
||||
if (!RelFileNodeEquals(target_tag.rnode, target_redo_tag.rnode))
|
||||
elog(WARNING, "REDO accessing unexpected page: %u/%u/%u.%u blk %u",
|
||||
target_tag.rnode.spcNode, target_tag.rnode.dbNode, target_tag.rnode.relNode, target_tag.forkNum, target_tag.blockNum);
|
||||
|
||||
/*
|
||||
* If this block isn't one we are currently restoring, then return 'true'
|
||||
* so that this gets ignored
|
||||
*/
|
||||
return !BUFFERTAGS_EQUAL(target_tag, target_redo_tag);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a page image back from buffer cache.
|
||||
*
|
||||
* After applying some records.
|
||||
*/
|
||||
static void
|
||||
GetPage(StringInfo input_message)
|
||||
{
|
||||
RelFileNode rnode;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blknum;
|
||||
Buffer buf;
|
||||
Page page;
|
||||
int tot_written;
|
||||
|
||||
/*
|
||||
* message format:
|
||||
*
|
||||
* spcNode
|
||||
* dbNode
|
||||
* relNode
|
||||
* ForkNumber
|
||||
* BlockNumber
|
||||
*/
|
||||
forknum = pq_getmsgbyte(input_message);
|
||||
rnode.spcNode = pq_getmsgint(input_message, 4);
|
||||
rnode.dbNode = pq_getmsgint(input_message, 4);
|
||||
rnode.relNode = pq_getmsgint(input_message, 4);
|
||||
blknum = pq_getmsgint(input_message, 4);
|
||||
|
||||
/* FIXME: check that we got a BeginRedoForBlock message or this earlier */
|
||||
|
||||
buf = NeonRedoReadBuffer(rnode, forknum, blknum, RBM_NORMAL);
|
||||
Assert(buf == wal_redo_buffer);
|
||||
page = BufferGetPage(buf);
|
||||
/* single thread, so don't bother locking the page */
|
||||
|
||||
/* Response: Page content */
|
||||
tot_written = 0;
|
||||
do {
|
||||
ssize_t rc;
|
||||
|
||||
rc = write(STDOUT_FILENO, &page[tot_written], BLCKSZ - tot_written);
|
||||
if (rc < 0) {
|
||||
/* If interrupted by signal, just retry */
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not write to stdout: %m")));
|
||||
}
|
||||
tot_written += rc;
|
||||
} while (tot_written < BLCKSZ);
|
||||
|
||||
ReleaseBuffer(buf);
|
||||
DropRelFileNodeAllLocalBuffers(rnode);
|
||||
wal_redo_buffer = InvalidBuffer;
|
||||
|
||||
elog(TRACE, "Page sent back for block %u", blknum);
|
||||
}
|
||||
|
||||
|
||||
/* Buffer used by buffered_read() */
|
||||
static char stdin_buf[16 * 1024];
|
||||
static size_t stdin_len = 0; /* # of bytes in buffer */
|
||||
static size_t stdin_ptr = 0; /* # of bytes already consumed */
|
||||
|
||||
/*
|
||||
* Like read() on stdin, but buffered.
|
||||
*
|
||||
* We cannot use libc's buffered fread(), because it uses syscalls that we
|
||||
* have disabled with seccomp(). Depending on the platform, it can call
|
||||
* 'fstat' or 'newfstatat'. 'fstat' is probably harmless, but 'newfstatat'
|
||||
* seems problematic because it allows interrogating files by path name.
|
||||
*
|
||||
* The return value is the number of bytes read. On error, -1 is returned, and
|
||||
* errno is set appropriately. Unlike read(), this fills the buffer completely
|
||||
* unless an error happens or EOF is reached.
|
||||
*/
|
||||
static ssize_t
|
||||
buffered_read(void *buf, size_t count)
|
||||
{
|
||||
char *dst = buf;
|
||||
|
||||
while (count > 0)
|
||||
{
|
||||
size_t nthis;
|
||||
|
||||
if (stdin_ptr == stdin_len)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
ret = read(STDIN_FILENO, stdin_buf, sizeof(stdin_buf));
|
||||
if (ret < 0)
|
||||
{
|
||||
/* don't do anything here that could set 'errno' */
|
||||
return ret;
|
||||
}
|
||||
if (ret == 0)
|
||||
{
|
||||
/* EOF */
|
||||
break;
|
||||
}
|
||||
stdin_len = (size_t) ret;
|
||||
stdin_ptr = 0;
|
||||
}
|
||||
nthis = Min(stdin_len - stdin_ptr, count);
|
||||
|
||||
memcpy(dst, &stdin_buf[stdin_ptr], nthis);
|
||||
|
||||
stdin_ptr += nthis;
|
||||
count -= nthis;
|
||||
dst += nthis;
|
||||
}
|
||||
|
||||
return (dst - (char *) buf);
|
||||
}
|
||||
23
poetry.lock
generated
23
poetry.lock
generated
@@ -1568,7 +1568,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "17cdbfe90f1b06dffaf24c3e076384ec08dd4a2dce5a05e50565f7364932eb2d"
|
||||
content-hash = "9352a89d49d34807f6a58f6c3f898acbd8cf3570e0f45ede973673644bde4d0e"
|
||||
|
||||
[metadata.files]
|
||||
aiopg = [
|
||||
@@ -1978,6 +1978,7 @@ prometheus-client = [
|
||||
psycopg2-binary = [
|
||||
{file = "psycopg2-binary-2.9.3.tar.gz", hash = "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f2534ab7dc7e776a263b463a16e189eb30e85ec9bbe1bff9e78dae802608932"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e82d38390a03da28c7985b394ec3f56873174e2c88130e6966cb1c946508e65"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57804fc02ca3ce0dbfbef35c4b3a4a774da66d66ea20f4bda601294ad2ea6092"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:083a55275f09a62b8ca4902dd11f4b33075b743cf0d360419e2051a8a5d5ff76"},
|
||||
@@ -2011,6 +2012,7 @@ psycopg2-binary = [
|
||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-win32.whl", hash = "sha256:adf20d9a67e0b6393eac162eb81fb10bc9130a80540f4df7e7355c2dd4af9fba"},
|
||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f9ffd643bc7349eeb664eba8864d9e01f057880f510e4681ba40a6532f93c71"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:def68d7c21984b0f8218e8a15d514f714d96904265164f75f8d3a70f9c295667"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e6aa71ae45f952a2205377773e76f4e3f27951df38e69a4c95440c779e013560"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dffc08ca91c9ac09008870c9eb77b00a46b3378719584059c034b8945e26b272"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:280b0bb5cbfe8039205c7981cceb006156a675362a00fe29b16fbc264e242834"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:af9813db73395fb1fc211bac696faea4ca9ef53f32dc0cfa27e4e7cf766dcf24"},
|
||||
@@ -2022,6 +2024,7 @@ psycopg2-binary = [
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-win32.whl", hash = "sha256:6472a178e291b59e7f16ab49ec8b4f3bdada0a879c68d3817ff0963e722a82ce"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:35168209c9d51b145e459e05c31a9eaeffa9a6b0fd61689b48e07464ffd1a83e"},
|
||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:47133f3f872faf28c1e87d4357220e809dfd3fa7c64295a4a148bcd1e6e34ec9"},
|
||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b3a24a1982ae56461cc24f6680604fffa2c1b818e9dc55680da038792e004d18"},
|
||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91920527dea30175cc02a1099f331aa8c1ba39bf8b7762b7b56cbf54bc5cce42"},
|
||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887dd9aac71765ac0d0bac1d0d4b4f2c99d5f5c1382d8b770404f0f3d0ce8a39"},
|
||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:1f14c8b0942714eb3c74e1e71700cbbcb415acbc311c730370e70c578a44a25c"},
|
||||
@@ -2038,18 +2041,7 @@ py = [
|
||||
{file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
|
||||
]
|
||||
pyasn1 = [
|
||||
{file = "pyasn1-0.4.8-py2.4.egg", hash = "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"},
|
||||
{file = "pyasn1-0.4.8-py2.5.egg", hash = "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf"},
|
||||
{file = "pyasn1-0.4.8-py2.6.egg", hash = "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00"},
|
||||
{file = "pyasn1-0.4.8-py2.7.egg", hash = "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8"},
|
||||
{file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"},
|
||||
{file = "pyasn1-0.4.8-py3.1.egg", hash = "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86"},
|
||||
{file = "pyasn1-0.4.8-py3.2.egg", hash = "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7"},
|
||||
{file = "pyasn1-0.4.8-py3.3.egg", hash = "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576"},
|
||||
{file = "pyasn1-0.4.8-py3.4.egg", hash = "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12"},
|
||||
{file = "pyasn1-0.4.8-py3.5.egg", hash = "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2"},
|
||||
{file = "pyasn1-0.4.8-py3.6.egg", hash = "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359"},
|
||||
{file = "pyasn1-0.4.8-py3.7.egg", hash = "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776"},
|
||||
{file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"},
|
||||
]
|
||||
pycodestyle = [
|
||||
@@ -2159,6 +2151,13 @@ pyyaml = [
|
||||
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# version, we can consider updating.
|
||||
# See https://tracker.debian.org/pkg/rustc for more details on Debian rustc package,
|
||||
# we use "unstable" version number as the highest version used in the project by default.
|
||||
channel = "1.61" # do update GitHub CI cache values for rust builds, when changing this value
|
||||
channel = "1.62.1" # do update GitHub CI cache values for rust builds, when changing this value
|
||||
profile = "default"
|
||||
# The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
|
||||
# https://rust-lang.github.io/rustup/concepts/profiles.html
|
||||
|
||||
@@ -12,7 +12,7 @@ fs2 = "0.4.3"
|
||||
serde_json = "1"
|
||||
tracing = "0.1.27"
|
||||
clap = "4.0"
|
||||
daemonize = "0.4.1"
|
||||
nix = "0.25"
|
||||
tokio = { version = "1.17", features = ["macros", "fs"] }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
|
||||
@@ -4,8 +4,7 @@
|
||||
use anyhow::{bail, Context, Result};
|
||||
use clap::{value_parser, Arg, ArgAction, Command};
|
||||
use const_format::formatcp;
|
||||
use daemonize::Daemonize;
|
||||
use fs2::FileExt;
|
||||
use nix::unistd::Pid;
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{ErrorKind, Write};
|
||||
@@ -16,6 +15,7 @@ use tokio::sync::mpsc;
|
||||
use toml_edit::Document;
|
||||
use tracing::*;
|
||||
use url::{ParseError, Url};
|
||||
use utils::lock_file;
|
||||
|
||||
use metrics::set_build_info_metric;
|
||||
use safekeeper::broker;
|
||||
@@ -35,12 +35,10 @@ use utils::{
|
||||
http::endpoint,
|
||||
id::NodeId,
|
||||
logging::{self, LogFormat},
|
||||
project_git_version,
|
||||
shutdown::exit_now,
|
||||
signals, tcp_listener,
|
||||
project_git_version, signals, tcp_listener,
|
||||
};
|
||||
|
||||
const LOCK_FILE_NAME: &str = "safekeeper.lock";
|
||||
const PID_FILE_NAME: &str = "safekeeper.pid";
|
||||
const ID_FILE_NAME: &str = "safekeeper.id";
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
@@ -65,10 +63,6 @@ fn main() -> anyhow::Result<()> {
|
||||
conf.no_sync = true;
|
||||
}
|
||||
|
||||
if arg_matches.get_flag("daemonize") {
|
||||
conf.daemonize = true;
|
||||
}
|
||||
|
||||
if let Some(addr) = arg_matches.get_one::<String>("listen-pg") {
|
||||
conf.listen_pg_addr = addr.to_string();
|
||||
}
|
||||
@@ -143,19 +137,33 @@ fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bool) -> Result<()> {
|
||||
let log_file = logging::init("safekeeper.log", conf.daemonize, conf.log_format)?;
|
||||
|
||||
logging::init(conf.log_format)?;
|
||||
info!("version: {GIT_VERSION}");
|
||||
|
||||
// Prevent running multiple safekeepers on the same directory
|
||||
let lock_file_path = conf.workdir.join(LOCK_FILE_NAME);
|
||||
let lock_file = File::create(&lock_file_path).context("failed to open lockfile")?;
|
||||
lock_file.try_lock_exclusive().with_context(|| {
|
||||
format!(
|
||||
"control file {} is locked by some other process",
|
||||
lock_file_path.display()
|
||||
)
|
||||
})?;
|
||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
|
||||
lock_file::LockCreationResult::Created {
|
||||
new_lock_contents,
|
||||
file,
|
||||
} => {
|
||||
info!("Created lock file at {lock_file_path:?} with contents {new_lock_contents}");
|
||||
file
|
||||
}
|
||||
lock_file::LockCreationResult::AlreadyLocked {
|
||||
existing_lock_contents,
|
||||
} => anyhow::bail!(
|
||||
"Could not lock pid file; safekeeper is already running in {:?} with PID {}",
|
||||
conf.workdir,
|
||||
existing_lock_contents
|
||||
),
|
||||
lock_file::LockCreationResult::CreationFailed(e) => {
|
||||
return Err(e.context(format!("Failed to create lock file at {lock_file_path:?}")))
|
||||
}
|
||||
};
|
||||
// ensure that the lock file is held even if the main thread of the process is panics
|
||||
// we need to release the lock file only when the current process is gone
|
||||
let _ = Box::leak(Box::new(lock_file));
|
||||
|
||||
// Set or read our ID.
|
||||
set_id(&mut conf, given_id)?;
|
||||
@@ -187,31 +195,6 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
|
||||
}
|
||||
};
|
||||
|
||||
// XXX: Don't spawn any threads before daemonizing!
|
||||
if conf.daemonize {
|
||||
info!("daemonizing...");
|
||||
|
||||
// There should'n be any logging to stdin/stdout. Redirect it to the main log so
|
||||
// that we will see any accidental manual fprintf's or backtraces.
|
||||
let stdout = log_file.try_clone().unwrap();
|
||||
let stderr = log_file;
|
||||
|
||||
let daemonize = Daemonize::new()
|
||||
.pid_file("safekeeper.pid")
|
||||
.working_directory(Path::new("."))
|
||||
.stdout(stdout)
|
||||
.stderr(stderr);
|
||||
|
||||
// XXX: The parent process should exit abruptly right after
|
||||
// it has spawned a child to prevent coverage machinery from
|
||||
// dumping stats into a `profraw` file now owned by the child.
|
||||
// Otherwise, the coverage data will be damaged.
|
||||
match daemonize.exit_action(|| exit_now(0)).start() {
|
||||
Ok(_) => info!("Success, daemonized"),
|
||||
Err(err) => bail!("Error: {err}. could not daemonize. bailing."),
|
||||
}
|
||||
}
|
||||
|
||||
// Register metrics collector for active timelines. It's important to do this
|
||||
// after daemonizing, otherwise process collector will be upset.
|
||||
let timeline_collector = safekeeper::metrics::TimelineCollector::new();
|
||||
@@ -384,13 +367,6 @@ fn cli() -> Command {
|
||||
.short('p')
|
||||
.long("pageserver"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
.long("daemonize")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Run in the background"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("no-sync")
|
||||
.short('n')
|
||||
|
||||
@@ -54,7 +54,6 @@ pub struct SafeKeeperConf {
|
||||
// data directories to avoid clashing with each other.
|
||||
pub workdir: PathBuf,
|
||||
|
||||
pub daemonize: bool,
|
||||
pub no_sync: bool,
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
@@ -88,7 +87,6 @@ impl Default for SafeKeeperConf {
|
||||
// command line, so that when the server is running, all paths are relative
|
||||
// to that.
|
||||
workdir: PathBuf::from("./"),
|
||||
daemonize: false,
|
||||
no_sync: false,
|
||||
listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
|
||||
@@ -4,18 +4,12 @@
|
||||
# Outline of steps:
|
||||
# 1. Get `(last_lsn, prev_lsn)` from old pageserver
|
||||
# 2. Get `fullbackup` from old pageserver, which creates a basebackup tar file
|
||||
# 3. This tar file might be missing relation files for empty relations, if the pageserver
|
||||
# is old enough (we didn't always store those). So to recreate them, we start a local
|
||||
# vanilla postgres on this basebackup and ask it what relations should exist, then touch
|
||||
# any missing files and re-pack the tar.
|
||||
# TODO This functionality is no longer needed, so we can delete it later if we don't
|
||||
# end up using the same utils for the pg 15 upgrade. Not sure.
|
||||
# 4. We import the patched basebackup into a new pageserver
|
||||
# 5. We export again via fullbackup, now from the new pageserver and compare the returned
|
||||
# 3. We import the basebackup into a new pageserver
|
||||
# 4. We export again via fullbackup, now from the new pageserver and compare the returned
|
||||
# tar file with the one we imported. This confirms that we imported everything that was
|
||||
# exported, but doesn't guarantee correctness (what if we didn't **export** everything
|
||||
# initially?)
|
||||
# 6. We wait for the new pageserver's remote_consistent_lsn to catch up
|
||||
# 5. We wait for the new pageserver's remote_consistent_lsn to catch up
|
||||
#
|
||||
# For more context on how to use this, see:
|
||||
# https://github.com/neondatabase/cloud/wiki/Storage-format-migration
|
||||
@@ -24,17 +18,13 @@ import argparse
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
from contextlib import closing
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, cast
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import psycopg2
|
||||
import requests
|
||||
from psycopg2.extensions import connection as PgConnection
|
||||
from psycopg2.extensions import parse_dsn
|
||||
|
||||
###############################################
|
||||
### client-side utils copied from test fixtures
|
||||
@@ -135,105 +125,6 @@ class PgBin:
|
||||
)
|
||||
|
||||
|
||||
class PgProtocol:
|
||||
"""Reusable connection logic"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.default_options = kwargs
|
||||
|
||||
def conn_options(self, **kwargs):
|
||||
conn_options = self.default_options.copy()
|
||||
if "dsn" in kwargs:
|
||||
conn_options.update(parse_dsn(kwargs["dsn"]))
|
||||
conn_options.update(kwargs)
|
||||
|
||||
# Individual statement timeout in seconds. 2 minutes should be
|
||||
# enough for our tests, but if you need a longer, you can
|
||||
# change it by calling "SET statement_timeout" after
|
||||
# connecting.
|
||||
conn_options["options"] = f"-cstatement_timeout=120s {conn_options.get('options', '')}"
|
||||
|
||||
return conn_options
|
||||
|
||||
# autocommit=True here by default because that's what we need most of the time
|
||||
def connect(self, autocommit=True, **kwargs) -> PgConnection:
|
||||
"""
|
||||
Connect to the node.
|
||||
Returns psycopg2's connection object.
|
||||
This method passes all extra params to connstr.
|
||||
"""
|
||||
conn = psycopg2.connect(**self.conn_options(**kwargs))
|
||||
|
||||
# WARNING: this setting affects *all* tests!
|
||||
conn.autocommit = autocommit
|
||||
return conn
|
||||
|
||||
def safe_psql(self, query: str, **kwargs: Any) -> List[Tuple[Any, ...]]:
|
||||
"""
|
||||
Execute query against the node and return all rows.
|
||||
This method passes all extra params to connstr.
|
||||
"""
|
||||
return self.safe_psql_many([query], **kwargs)[0]
|
||||
|
||||
def safe_psql_many(self, queries: List[str], **kwargs: Any) -> List[List[Tuple[Any, ...]]]:
|
||||
"""
|
||||
Execute queries against the node and return all rows.
|
||||
This method passes all extra params to connstr.
|
||||
"""
|
||||
result: List[List[Any]] = []
|
||||
with closing(self.connect(**kwargs)) as conn:
|
||||
with conn.cursor() as cur:
|
||||
for query in queries:
|
||||
print(f"Executing query: {query}")
|
||||
cur.execute(query)
|
||||
|
||||
if cur.description is None:
|
||||
result.append([]) # query didn't return data
|
||||
else:
|
||||
result.append(cast(List[Any], cur.fetchall()))
|
||||
return result
|
||||
|
||||
|
||||
class VanillaPostgres(PgProtocol):
|
||||
def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True):
|
||||
super().__init__(host="localhost", port=port, dbname="postgres")
|
||||
self.pgdatadir = pgdatadir
|
||||
self.pg_bin = pg_bin
|
||||
self.running = False
|
||||
if init:
|
||||
self.pg_bin.run_capture(["initdb", "-D", str(pgdatadir)])
|
||||
self.configure([f"port = {port}\n"])
|
||||
|
||||
def configure(self, options: List[str]):
|
||||
"""Append lines into postgresql.conf file."""
|
||||
assert not self.running
|
||||
with open(os.path.join(self.pgdatadir, "postgresql.conf"), "a") as conf_file:
|
||||
conf_file.write("\n".join(options))
|
||||
|
||||
def start(self, log_path: Optional[str] = None):
|
||||
assert not self.running
|
||||
self.running = True
|
||||
|
||||
if log_path is None:
|
||||
log_path = os.path.join(self.pgdatadir, "pg.log")
|
||||
|
||||
self.pg_bin.run_capture(
|
||||
["pg_ctl", "-w", "-D", str(self.pgdatadir), "-l", log_path, "start"]
|
||||
)
|
||||
|
||||
def stop(self):
|
||||
assert self.running
|
||||
self.running = False
|
||||
self.pg_bin.run_capture(["pg_ctl", "-w", "-D", str(self.pgdatadir), "stop"])
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
if self.running:
|
||||
self.stop()
|
||||
|
||||
|
||||
class NeonPageserverApiException(Exception):
|
||||
pass
|
||||
|
||||
@@ -370,84 +261,6 @@ def pack_base(log_dir, restored_dir, output_tar):
|
||||
shutil.move(tmp_tar_path, output_tar)
|
||||
|
||||
|
||||
def reconstruct_paths(log_dir, pg_bin, base_tar):
|
||||
"""Reconstruct what relation files should exist in the datadir by querying postgres."""
|
||||
with tempfile.TemporaryDirectory() as restored_dir:
|
||||
# Unpack the base tar
|
||||
subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
|
||||
|
||||
# Start a vanilla postgres from the given datadir and query it to find
|
||||
# what relfiles should exist, but possibly don't.
|
||||
port = "55439" # Probably free
|
||||
with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg:
|
||||
vanilla_pg.configure([f"port={port}"])
|
||||
vanilla_pg.start(log_path=os.path.join(log_dir, "tmp_pg.log"))
|
||||
|
||||
# Create database based on template0 because we can't connect to template0
|
||||
query = "create database template0copy template template0"
|
||||
vanilla_pg.safe_psql(query, user="cloud_admin")
|
||||
vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin")
|
||||
|
||||
# Get all databases
|
||||
query = "select oid, datname from pg_database"
|
||||
oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin")
|
||||
template0_oid = [
|
||||
oid for (oid, database) in oid_dbname_pairs if database == "template0"
|
||||
][0]
|
||||
|
||||
# Get rel paths for each database
|
||||
for oid, database in oid_dbname_pairs:
|
||||
if database == "template0":
|
||||
# We can't connect to template0
|
||||
continue
|
||||
|
||||
query = "select relname, pg_relation_filepath(oid) from pg_class"
|
||||
result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database)
|
||||
for relname, filepath in result:
|
||||
if filepath is not None:
|
||||
|
||||
if database == "template0copy":
|
||||
# Add all template0copy paths to template0
|
||||
prefix = f"base/{oid}/"
|
||||
if filepath.startswith(prefix):
|
||||
suffix = filepath[len(prefix) :]
|
||||
yield f"base/{template0_oid}/{suffix}"
|
||||
elif filepath.startswith("global"):
|
||||
print(f"skipping {database} global file {filepath}")
|
||||
else:
|
||||
raise AssertionError
|
||||
else:
|
||||
yield filepath
|
||||
|
||||
|
||||
def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths):
|
||||
"""Add the appropriate empty files to a basebadkup tar."""
|
||||
with tempfile.TemporaryDirectory() as restored_dir:
|
||||
# Unpack the base tar
|
||||
subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir])
|
||||
|
||||
# Touch files that don't exist
|
||||
for path in paths:
|
||||
absolute_path = os.path.join(restored_dir, path)
|
||||
exists = os.path.exists(absolute_path)
|
||||
if not exists:
|
||||
print(f"File {absolute_path} didn't exist. Creating..")
|
||||
Path(absolute_path).touch()
|
||||
|
||||
# Repackage
|
||||
pack_base(log_dir, restored_dir, output_tar)
|
||||
|
||||
|
||||
# HACK This is a workaround for exporting from old pageservers that
|
||||
# can't export empty relations. In this case we need to start
|
||||
# a vanilla postgres from the exported datadir, and query it
|
||||
# to see what empty relations are missing, and then create
|
||||
# those empty files before importing.
|
||||
def add_missing_rels(base_tar, output_tar, log_dir, pg_bin):
|
||||
reconstructed_paths = set(reconstruct_paths(log_dir, pg_bin, base_tar))
|
||||
touch_missing_rels(log_dir, base_tar, output_tar, reconstructed_paths)
|
||||
|
||||
|
||||
def get_rlsn(pageserver_connstr, tenant_id, timeline_id):
|
||||
conn = psycopg2.connect(pageserver_connstr)
|
||||
conn.autocommit = True
|
||||
@@ -516,7 +329,6 @@ def export_timeline(
|
||||
pg_version,
|
||||
):
|
||||
# Choose filenames
|
||||
incomplete_filename = tar_filename + ".incomplete"
|
||||
stderr_filename = os.path.join(args.work_dir, f"{tenant_id}_{timeline_id}.stderr")
|
||||
|
||||
# Construct export command
|
||||
@@ -525,18 +337,14 @@ def export_timeline(
|
||||
|
||||
# Run export command
|
||||
print(f"Running: {cmd}")
|
||||
with open(incomplete_filename, "w") as stdout_f:
|
||||
with open(tar_filename, "w") as stdout_f:
|
||||
with open(stderr_filename, "w") as stderr_f:
|
||||
print(f"(capturing output to {incomplete_filename})")
|
||||
print(f"(capturing output to {tar_filename})")
|
||||
pg_bin = PgBin(args.work_dir, args.pg_distrib_dir, pg_version)
|
||||
subprocess.run(
|
||||
cmd, stdout=stdout_f, stderr=stderr_f, env=pg_bin._build_env(None), check=True
|
||||
)
|
||||
|
||||
# Add missing rels
|
||||
pg_bin = PgBin(args.work_dir, args.pg_distrib_dir, pg_version)
|
||||
add_missing_rels(incomplete_filename, tar_filename, args.work_dir, pg_bin)
|
||||
|
||||
# Log more info
|
||||
file_size = os.path.getsize(tar_filename)
|
||||
print(f"Done export: {tar_filename}, size {file_size}")
|
||||
@@ -633,6 +441,13 @@ def main(args: argparse.Namespace):
|
||||
raise AssertionError(f"Sizes don't match old: {old_size} new: {new_size}")
|
||||
|
||||
|
||||
def non_zero_tcp_port(arg: Any):
|
||||
port = int(arg)
|
||||
if port < 1 or port > 65535:
|
||||
raise argparse.ArgumentTypeError(f"invalid tcp port: {arg}")
|
||||
return port
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
@@ -728,5 +543,13 @@ if __name__ == "__main__":
|
||||
default=False,
|
||||
help="directory where temporary tar files are stored",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tmp-pg-port",
|
||||
dest="tmp_pg_port",
|
||||
required=False,
|
||||
default=55439,
|
||||
type=non_zero_tcp_port,
|
||||
help="localhost port to use for temporary postgres instance",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
|
||||
@@ -6,6 +6,6 @@ set -euox pipefail
|
||||
echo 'Reformatting Rust code'
|
||||
cargo fmt
|
||||
echo 'Reformatting Python code'
|
||||
poetry run isort test_runner
|
||||
poetry run flake8 test_runner
|
||||
poetry run black test_runner
|
||||
poetry run isort test_runner scripts
|
||||
poetry run flake8 test_runner scripts
|
||||
poetry run black test_runner scripts
|
||||
|
||||
@@ -19,7 +19,7 @@ from dataclasses import dataclass, field
|
||||
from enum import Flag, auto
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union, cast
|
||||
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast
|
||||
|
||||
import asyncpg
|
||||
import backoff # type: ignore
|
||||
@@ -36,7 +36,7 @@ from psycopg2.extensions import connection as PgConnection
|
||||
from psycopg2.extensions import make_dsn, parse_dsn
|
||||
from typing_extensions import Literal
|
||||
|
||||
from .utils import allure_attach_from_dir, etcd_path, get_self_dir, subprocess_capture
|
||||
from .utils import Fn, allure_attach_from_dir, etcd_path, get_self_dir, subprocess_capture
|
||||
|
||||
"""
|
||||
This file contains pytest fixtures. A fixture is a test resource that can be
|
||||
@@ -56,7 +56,6 @@ put directly-importable functions into utils.py or another separate file.
|
||||
"""
|
||||
|
||||
Env = Dict[str, str]
|
||||
Fn = TypeVar("Fn", bound=Callable[..., Any])
|
||||
|
||||
DEFAULT_OUTPUT_DIR = "test_output"
|
||||
DEFAULT_BRANCH_NAME = "main"
|
||||
@@ -965,11 +964,11 @@ def neon_env_builder(
|
||||
yield builder
|
||||
|
||||
|
||||
class NeonPageserverApiException(Exception):
|
||||
class PageserverApiException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class NeonPageserverHttpClient(requests.Session):
|
||||
class PageserverHttpClient(requests.Session):
|
||||
def __init__(self, port: int, is_testing_enabled_or_skip: Fn, auth_token: Optional[str] = None):
|
||||
super().__init__()
|
||||
self.port = port
|
||||
@@ -987,7 +986,7 @@ class NeonPageserverHttpClient(requests.Session):
|
||||
msg = res.json()["msg"]
|
||||
except: # noqa: E722
|
||||
msg = ""
|
||||
raise NeonPageserverApiException(msg) from e
|
||||
raise PageserverApiException(msg) from e
|
||||
|
||||
def check_status(self):
|
||||
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
|
||||
@@ -1624,8 +1623,6 @@ class ComputeCtl(AbstractNeonCli):
|
||||
class NeonPageserver(PgProtocol):
|
||||
"""
|
||||
An object representing a running pageserver.
|
||||
|
||||
Initializes the repository via `neon init`.
|
||||
"""
|
||||
|
||||
TEMP_FILE_SUFFIX = "___temp"
|
||||
@@ -1674,8 +1671,8 @@ class NeonPageserver(PgProtocol):
|
||||
if '"profiling"' not in self.version:
|
||||
pytest.skip("pageserver was built without 'profiling' feature")
|
||||
|
||||
def http_client(self, auth_token: Optional[str] = None) -> NeonPageserverHttpClient:
|
||||
return NeonPageserverHttpClient(
|
||||
def http_client(self, auth_token: Optional[str] = None) -> PageserverHttpClient:
|
||||
return PageserverHttpClient(
|
||||
port=self.service_port.http,
|
||||
auth_token=auth_token,
|
||||
is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
|
||||
@@ -2260,11 +2257,6 @@ class PostgresFactory:
|
||||
return self
|
||||
|
||||
|
||||
def read_pid(path: Path) -> int:
|
||||
"""Read content of file into number"""
|
||||
return int(path.read_text())
|
||||
|
||||
|
||||
@dataclass
|
||||
class SafekeeperPort:
|
||||
pg: int
|
||||
@@ -2688,26 +2680,8 @@ def check_restored_datadir_content(
|
||||
assert (mismatch, error) == ([], [])
|
||||
|
||||
|
||||
def wait_until(number_of_iterations: int, interval: float, func):
|
||||
"""
|
||||
Wait until 'func' returns successfully, without exception. Returns the
|
||||
last return value from the function.
|
||||
"""
|
||||
last_exception = None
|
||||
for i in range(number_of_iterations):
|
||||
try:
|
||||
res = func()
|
||||
except Exception as e:
|
||||
log.info("waiting for %s iteration %s failed", func, i + 1)
|
||||
last_exception = e
|
||||
time.sleep(interval)
|
||||
continue
|
||||
return res
|
||||
raise Exception("timed out while waiting for %s" % func) from last_exception
|
||||
|
||||
|
||||
def assert_no_in_progress_downloads_for_tenant(
|
||||
pageserver_http_client: NeonPageserverHttpClient,
|
||||
pageserver_http_client: PageserverHttpClient,
|
||||
tenant: TenantId,
|
||||
):
|
||||
tenant_status = pageserver_http_client.tenant_status(tenant)
|
||||
@@ -2715,7 +2689,7 @@ def assert_no_in_progress_downloads_for_tenant(
|
||||
|
||||
|
||||
def remote_consistent_lsn(
|
||||
pageserver_http_client: NeonPageserverHttpClient, tenant: TenantId, timeline: TimelineId
|
||||
pageserver_http_client: PageserverHttpClient, tenant: TenantId, timeline: TimelineId
|
||||
) -> Lsn:
|
||||
detail = pageserver_http_client.timeline_detail(tenant, timeline)
|
||||
|
||||
@@ -2730,7 +2704,7 @@ def remote_consistent_lsn(
|
||||
|
||||
|
||||
def wait_for_upload(
|
||||
pageserver_http_client: NeonPageserverHttpClient,
|
||||
pageserver_http_client: PageserverHttpClient,
|
||||
tenant: TenantId,
|
||||
timeline: TimelineId,
|
||||
lsn: Lsn,
|
||||
@@ -2754,7 +2728,7 @@ def wait_for_upload(
|
||||
|
||||
|
||||
def last_record_lsn(
|
||||
pageserver_http_client: NeonPageserverHttpClient, tenant: TenantId, timeline: TimelineId
|
||||
pageserver_http_client: PageserverHttpClient, tenant: TenantId, timeline: TimelineId
|
||||
) -> Lsn:
|
||||
detail = pageserver_http_client.timeline_detail(tenant, timeline)
|
||||
|
||||
@@ -2764,7 +2738,7 @@ def last_record_lsn(
|
||||
|
||||
|
||||
def wait_for_last_record_lsn(
|
||||
pageserver_http_client: NeonPageserverHttpClient,
|
||||
pageserver_http_client: PageserverHttpClient,
|
||||
tenant: TenantId,
|
||||
timeline: TimelineId,
|
||||
lsn: Lsn,
|
||||
|
||||
@@ -4,13 +4,16 @@ import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tarfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Tuple
|
||||
from typing import Any, Callable, List, Tuple, TypeVar
|
||||
|
||||
import allure # type: ignore
|
||||
from fixtures.log_helper import log
|
||||
from psycopg2.extensions import cursor
|
||||
|
||||
Fn = TypeVar("Fn", bound=Callable[..., Any])
|
||||
|
||||
|
||||
def get_self_dir() -> str:
|
||||
"""Get the path to the directory where this script lives."""
|
||||
@@ -188,3 +191,57 @@ def allure_attach_from_dir(dir: Path):
|
||||
extension = attachment.suffix.removeprefix(".")
|
||||
|
||||
allure.attach.file(source, name, attachment_type, extension)
|
||||
|
||||
|
||||
def start_in_background(
|
||||
command: list[str], cwd: Path, log_file_name: str, is_started: Fn
|
||||
) -> subprocess.Popen[bytes]:
|
||||
"""Starts a process, creates the logfile and redirects stderr and stdout there. Runs the start checks before the process is started, or errors."""
|
||||
|
||||
log.info(f'Running command "{" ".join(command)}"')
|
||||
|
||||
with open(cwd / log_file_name, "wb") as log_file:
|
||||
spawned_process = subprocess.Popen(command, stdout=log_file, stderr=log_file, cwd=cwd)
|
||||
error = None
|
||||
try:
|
||||
return_code = spawned_process.poll()
|
||||
if return_code is not None:
|
||||
error = f"expected subprocess to run but it exited with code {return_code}"
|
||||
else:
|
||||
attempts = 10
|
||||
try:
|
||||
wait_until(
|
||||
number_of_iterations=attempts,
|
||||
interval=1,
|
||||
func=is_started,
|
||||
)
|
||||
except Exception:
|
||||
error = f"Failed to get correct status from subprocess in {attempts} attempts"
|
||||
except Exception as e:
|
||||
error = f"expected subprocess to start but it failed with exception: {e}"
|
||||
|
||||
if error is not None:
|
||||
log.error(error)
|
||||
spawned_process.kill()
|
||||
raise Exception(f"Failed to run subprocess as {command}, reason: {error}")
|
||||
|
||||
log.info("subprocess spawned")
|
||||
return spawned_process
|
||||
|
||||
|
||||
def wait_until(number_of_iterations: int, interval: float, func: Fn):
|
||||
"""
|
||||
Wait until 'func' returns successfully, without exception. Returns the
|
||||
last return value from the function.
|
||||
"""
|
||||
last_exception = None
|
||||
for i in range(number_of_iterations):
|
||||
try:
|
||||
res = func()
|
||||
except Exception as e:
|
||||
log.info("waiting for %s iteration %s failed", func, i + 1)
|
||||
last_exception = e
|
||||
time.sleep(interval)
|
||||
continue
|
||||
return res
|
||||
raise Exception("timed out while waiting for %s" % func) from last_exception
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from contextlib import closing
|
||||
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, PageserverApiException
|
||||
from fixtures.types import TenantId
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# fail to create branch using token with different tenant_id
|
||||
with pytest.raises(
|
||||
NeonPageserverApiException, match="Forbidden: Tenant id mismatch. Permission denied"
|
||||
PageserverApiException, match="Forbidden: Tenant id mismatch. Permission denied"
|
||||
):
|
||||
invalid_tenant_http_client.timeline_create(
|
||||
tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id
|
||||
@@ -50,7 +50,7 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# fail to create tenant using tenant token
|
||||
with pytest.raises(
|
||||
NeonPageserverApiException,
|
||||
PageserverApiException,
|
||||
match="Forbidden: Attempt to access management api with tenant scope. Permission denied",
|
||||
):
|
||||
tenant_http_client.tenant_create()
|
||||
|
||||
@@ -10,7 +10,7 @@ import toml
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonCli,
|
||||
NeonEnvBuilder,
|
||||
NeonPageserverHttpClient,
|
||||
PageserverHttpClient,
|
||||
PgBin,
|
||||
PortDistributor,
|
||||
wait_for_last_record_lsn,
|
||||
@@ -177,7 +177,7 @@ def test_backward_compatibility(
|
||||
cli.raw_cli(["start"])
|
||||
request.addfinalizer(lambda: cli.raw_cli(["stop"]))
|
||||
|
||||
result = cli.pg_start("main")
|
||||
result = cli.pg_start("main", port=port_distributor.get_port())
|
||||
request.addfinalizer(lambda: cli.pg_stop("main"))
|
||||
except Exception:
|
||||
breaking_changes_allowed = (
|
||||
@@ -208,7 +208,7 @@ def test_backward_compatibility(
|
||||
timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id]
|
||||
pageserver_port = snapshot_config["pageserver"]["listen_http_addr"].split(":")[-1]
|
||||
auth_token = snapshot_config["pageserver"]["auth_token"]
|
||||
pageserver_http = NeonPageserverHttpClient(
|
||||
pageserver_http = PageserverHttpClient(
|
||||
port=pageserver_port,
|
||||
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled
|
||||
auth_token=auth_token,
|
||||
|
||||
@@ -5,13 +5,13 @@ from fixtures.neon_fixtures import (
|
||||
DEFAULT_BRANCH_NAME,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
NeonPageserverHttpClient,
|
||||
PageserverHttpClient,
|
||||
)
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
|
||||
|
||||
def helper_compare_timeline_list(
|
||||
pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv, initial_tenant: TenantId
|
||||
pageserver_http_client: PageserverHttpClient, env: NeonEnv, initial_tenant: TenantId
|
||||
):
|
||||
"""
|
||||
Compare timelines list returned by CLI and directly via API.
|
||||
@@ -56,7 +56,7 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv):
|
||||
assert nested_timeline_id in timelines_cli
|
||||
|
||||
|
||||
def helper_compare_tenant_list(pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv):
|
||||
def helper_compare_tenant_list(pageserver_http_client: PageserverHttpClient, env: NeonEnv):
|
||||
tenants = pageserver_http_client.tenant_list()
|
||||
tenants_api = sorted(map(lambda t: cast(str, t["id"]), tenants))
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, PageserverHttpClient
|
||||
|
||||
|
||||
def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
|
||||
def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
# we rely upon autocommit after each statement
|
||||
|
||||
@@ -6,12 +6,12 @@ from fixtures.neon_fixtures import (
|
||||
DEFAULT_BRANCH_NAME,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
NeonPageserverHttpClient,
|
||||
PageserverHttpClient,
|
||||
neon_binpath,
|
||||
pg_distrib_dir,
|
||||
wait_until,
|
||||
)
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
|
||||
# test that we cannot override node id after init
|
||||
@@ -29,8 +29,9 @@ def test_pageserver_init_node_id(neon_simple_env: NeonEnv):
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
|
||||
# remove initial config
|
||||
# remove initial config and stop existing pageserver
|
||||
pageserver_config.unlink()
|
||||
neon_simple_env.pageserver.stop()
|
||||
|
||||
bad_init = run_pageserver(["--init", "-c", f'pg_distrib_dir="{pg_distrib_dir}"'])
|
||||
assert (
|
||||
@@ -60,7 +61,7 @@ def test_pageserver_init_node_id(neon_simple_env: NeonEnv):
|
||||
assert "has node id already, it cannot be overridden" in bad_update.stderr
|
||||
|
||||
|
||||
def check_client(client: NeonPageserverHttpClient, initial_tenant: TenantId):
|
||||
def check_client(client: PageserverHttpClient, initial_tenant: TenantId):
|
||||
client.check_status()
|
||||
|
||||
# check initial tenant is there
|
||||
@@ -116,7 +117,7 @@ def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
|
||||
|
||||
|
||||
def expect_updated_msg_lsn(
|
||||
client: NeonPageserverHttpClient,
|
||||
client: PageserverHttpClient,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
prev_msg_lsn: Optional[Lsn],
|
||||
|
||||
@@ -15,10 +15,9 @@ from fixtures.neon_fixtures import (
|
||||
available_remote_storages,
|
||||
wait_for_last_record_lsn,
|
||||
wait_for_upload,
|
||||
wait_until,
|
||||
)
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import query_scalar
|
||||
from fixtures.utils import query_scalar, wait_until
|
||||
|
||||
|
||||
#
|
||||
|
||||
@@ -2,16 +2,12 @@ from threading import Thread
|
||||
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
NeonPageserverApiException,
|
||||
NeonPageserverHttpClient,
|
||||
)
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, PageserverApiException, PageserverHttpClient
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
|
||||
|
||||
def do_gc_target(
|
||||
pageserver_http: NeonPageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId
|
||||
pageserver_http: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId
|
||||
):
|
||||
"""Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
|
||||
try:
|
||||
@@ -27,7 +23,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
# first check for non existing tenant
|
||||
tenant_id = TenantId.generate()
|
||||
with pytest.raises(
|
||||
expected_exception=NeonPageserverApiException,
|
||||
expected_exception=PageserverApiException,
|
||||
match=f"Tenant not found for id {tenant_id}",
|
||||
):
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
@@ -49,7 +45,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# gc should not try to even start
|
||||
with pytest.raises(
|
||||
expected_exception=NeonPageserverApiException, match="gc target timeline does not exist"
|
||||
expected_exception=PageserverApiException, match="gc target timeline does not exist"
|
||||
):
|
||||
bogus_timeline_id = TimelineId.generate()
|
||||
pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)
|
||||
@@ -78,6 +74,6 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()
|
||||
|
||||
with pytest.raises(
|
||||
expected_exception=NeonPageserverApiException, match=f"Tenant {tenant_id} not found"
|
||||
expected_exception=PageserverApiException, match=f"Tenant {tenant_id} not found"
|
||||
):
|
||||
pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
import os
|
||||
import pathlib
|
||||
import signal
|
||||
import subprocess
|
||||
import threading
|
||||
from contextlib import closing, contextmanager
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
@@ -12,7 +10,7 @@ from fixtures.neon_fixtures import (
|
||||
Etcd,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
NeonPageserverHttpClient,
|
||||
PageserverHttpClient,
|
||||
PortDistributor,
|
||||
Postgres,
|
||||
assert_no_in_progress_downloads_for_tenant,
|
||||
@@ -21,10 +19,9 @@ from fixtures.neon_fixtures import (
|
||||
pg_distrib_dir,
|
||||
wait_for_last_record_lsn,
|
||||
wait_for_upload,
|
||||
wait_until,
|
||||
)
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import query_scalar, subprocess_capture
|
||||
from fixtures.utils import query_scalar, start_in_background, subprocess_capture, wait_until
|
||||
|
||||
|
||||
def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float):
|
||||
@@ -32,7 +29,7 @@ def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float):
|
||||
|
||||
|
||||
@contextmanager
|
||||
def new_pageserver_helper(
|
||||
def new_pageserver_service(
|
||||
new_pageserver_dir: pathlib.Path,
|
||||
pageserver_bin: pathlib.Path,
|
||||
remote_storage_mock_path: pathlib.Path,
|
||||
@@ -49,7 +46,6 @@ def new_pageserver_helper(
|
||||
str(pageserver_bin),
|
||||
"--workdir",
|
||||
str(new_pageserver_dir),
|
||||
"--daemonize",
|
||||
"--update-config",
|
||||
f"-c listen_pg_addr='localhost:{pg_port}'",
|
||||
f"-c listen_http_addr='localhost:{http_port}'",
|
||||
@@ -61,16 +57,26 @@ def new_pageserver_helper(
|
||||
cmd.append(
|
||||
f"-c broker_endpoints=['{broker.client_url()}']",
|
||||
)
|
||||
|
||||
log.info("starting new pageserver %s", cmd)
|
||||
out = subprocess.check_output(cmd, text=True)
|
||||
log.info("started new pageserver %s", out)
|
||||
pageserver_client = PageserverHttpClient(
|
||||
port=http_port,
|
||||
auth_token=None,
|
||||
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled
|
||||
)
|
||||
try:
|
||||
yield
|
||||
pageserver_process = start_in_background(
|
||||
cmd, new_pageserver_dir, "pageserver.log", pageserver_client.check_status
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(e)
|
||||
pageserver_process.kill()
|
||||
raise Exception(f"Failed to start pageserver as {cmd}, reason: {e}")
|
||||
|
||||
log.info("new pageserver started")
|
||||
try:
|
||||
yield pageserver_process
|
||||
finally:
|
||||
log.info("stopping new pageserver")
|
||||
pid = int((new_pageserver_dir / "pageserver.pid").read_text())
|
||||
os.kill(pid, signal.SIGQUIT)
|
||||
pageserver_process.kill()
|
||||
|
||||
|
||||
@contextmanager
|
||||
@@ -113,7 +119,7 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
|
||||
def populate_branch(
|
||||
pg: Postgres,
|
||||
tenant_id: TenantId,
|
||||
ps_http: NeonPageserverHttpClient,
|
||||
ps_http: PageserverHttpClient,
|
||||
create_table: bool,
|
||||
expected_sum: Optional[int],
|
||||
) -> Tuple[TimelineId, Lsn]:
|
||||
@@ -146,7 +152,7 @@ def populate_branch(
|
||||
|
||||
|
||||
def ensure_checkpoint(
|
||||
pageserver_http: NeonPageserverHttpClient,
|
||||
pageserver_http: PageserverHttpClient,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
current_lsn: Lsn,
|
||||
@@ -159,7 +165,7 @@ def ensure_checkpoint(
|
||||
|
||||
|
||||
def check_timeline_attached(
|
||||
new_pageserver_http_client: NeonPageserverHttpClient,
|
||||
new_pageserver_http_client: PageserverHttpClient,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
old_timeline_detail: Dict[str, Any],
|
||||
@@ -346,13 +352,13 @@ def test_tenant_relocation(
|
||||
log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port)
|
||||
pageserver_bin = pathlib.Path(neon_binpath) / "pageserver"
|
||||
|
||||
new_pageserver_http = NeonPageserverHttpClient(
|
||||
new_pageserver_http = PageserverHttpClient(
|
||||
port=new_pageserver_http_port,
|
||||
auth_token=None,
|
||||
is_testing_enabled_or_skip=env.pageserver.is_testing_enabled_or_skip,
|
||||
)
|
||||
|
||||
with new_pageserver_helper(
|
||||
with new_pageserver_service(
|
||||
new_pageserver_dir,
|
||||
pageserver_bin,
|
||||
remote_storage_mock_path,
|
||||
@@ -386,6 +392,8 @@ def test_tenant_relocation(
|
||||
pg_distrib_dir,
|
||||
"--work-dir",
|
||||
os.path.join(test_output_dir),
|
||||
"--tmp-pg-port",
|
||||
str(port_distributor.get_port()),
|
||||
]
|
||||
subprocess_capture(test_output_dir, cmd, check=True)
|
||||
elif method == "minor":
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, wait_until
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
|
||||
def get_only_element(l): # noqa: E741
|
||||
|
||||
@@ -25,10 +25,9 @@ from fixtures.neon_fixtures import (
|
||||
available_remote_storages,
|
||||
wait_for_last_record_lsn,
|
||||
wait_for_upload,
|
||||
wait_until,
|
||||
)
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import query_scalar
|
||||
from fixtures.utils import query_scalar, wait_until
|
||||
|
||||
|
||||
async def tenant_workload(env: NeonEnv, pg: Postgres):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonPageserverApiException, wait_until
|
||||
from fixtures.neon_fixtures import NeonEnv, PageserverApiException
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
|
||||
def test_timeline_delete(neon_simple_env: NeonEnv):
|
||||
@@ -11,13 +12,13 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
|
||||
# first try to delete non existing timeline
|
||||
# for existing tenant:
|
||||
invalid_timeline_id = TimelineId.generate()
|
||||
with pytest.raises(NeonPageserverApiException, match="timeline not found"):
|
||||
with pytest.raises(PageserverApiException, match="timeline not found"):
|
||||
ps_http.timeline_delete(tenant_id=env.initial_tenant, timeline_id=invalid_timeline_id)
|
||||
|
||||
# for non existing tenant:
|
||||
invalid_tenant_id = TenantId.generate()
|
||||
with pytest.raises(
|
||||
NeonPageserverApiException,
|
||||
PageserverApiException,
|
||||
match=f"Tenant {invalid_tenant_id} not found in the local state",
|
||||
):
|
||||
ps_http.timeline_delete(tenant_id=invalid_tenant_id, timeline_id=invalid_timeline_id)
|
||||
@@ -32,7 +33,7 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
with pytest.raises(
|
||||
NeonPageserverApiException, match="Cannot delete timeline which has child timelines"
|
||||
PageserverApiException, match="Cannot delete timeline which has child timelines"
|
||||
):
|
||||
|
||||
timeline_path = (
|
||||
@@ -64,7 +65,7 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
|
||||
|
||||
# check 404
|
||||
with pytest.raises(
|
||||
NeonPageserverApiException,
|
||||
PageserverApiException,
|
||||
match=f"Timeline {env.initial_tenant}/{leaf_timeline_id} was not found",
|
||||
):
|
||||
ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id)
|
||||
|
||||
@@ -11,7 +11,7 @@ from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
NeonPageserverHttpClient,
|
||||
PageserverHttpClient,
|
||||
PgBin,
|
||||
PortDistributor,
|
||||
Postgres,
|
||||
@@ -462,7 +462,7 @@ def assert_physical_size(env: NeonEnv, tenant_id: TenantId, timeline_id: Timelin
|
||||
# Timeline logical size initialization is an asynchronous background task that runs once,
|
||||
# try a few times to ensure it's activated properly
|
||||
def wait_for_timeline_size_init(
|
||||
client: NeonPageserverHttpClient, tenant: TenantId, timeline: TimelineId
|
||||
client: PageserverHttpClient, tenant: TenantId, timeline: TimelineId
|
||||
):
|
||||
for i in range(10):
|
||||
timeline_details = client.timeline_detail(
|
||||
|
||||
@@ -27,6 +27,7 @@ from fixtures.neon_fixtures import (
|
||||
RemoteStorageKind,
|
||||
RemoteStorageUsers,
|
||||
Safekeeper,
|
||||
SafekeeperHttpClient,
|
||||
SafekeeperPort,
|
||||
available_remote_storages,
|
||||
neon_binpath,
|
||||
@@ -34,7 +35,7 @@ from fixtures.neon_fixtures import (
|
||||
wait_for_upload,
|
||||
)
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import get_dir_size, query_scalar
|
||||
from fixtures.utils import get_dir_size, query_scalar, start_in_background
|
||||
|
||||
|
||||
def wait_lsn_force_checkpoint(
|
||||
@@ -841,7 +842,7 @@ class SafekeeperEnv:
|
||||
safekeeper_dir = self.repo_dir / f"sk{i}"
|
||||
safekeeper_dir.mkdir(exist_ok=True)
|
||||
|
||||
args = [
|
||||
cmd = [
|
||||
self.bin_safekeeper,
|
||||
"-l",
|
||||
f"127.0.0.1:{port.pg}",
|
||||
@@ -853,11 +854,22 @@ class SafekeeperEnv:
|
||||
str(i),
|
||||
"--broker-endpoints",
|
||||
self.broker.client_url(),
|
||||
"--daemonize",
|
||||
]
|
||||
log.info(f'Running command "{" ".join(cmd)}"')
|
||||
|
||||
log.info(f'Running command "{" ".join(args)}"')
|
||||
return subprocess.run(args, check=True)
|
||||
safekeeper_client = SafekeeperHttpClient(
|
||||
port=port.http,
|
||||
auth_token=None,
|
||||
)
|
||||
try:
|
||||
safekeeper_process = start_in_background(
|
||||
cmd, safekeeper_dir, "safekeeper.log", safekeeper_client.check_status
|
||||
)
|
||||
return safekeeper_process
|
||||
except Exception as e:
|
||||
log.error(e)
|
||||
safekeeper_process.kill()
|
||||
raise Exception(f"Failed to start safekepeer as {cmd}, reason: {e}")
|
||||
|
||||
def get_safekeeper_connstrs(self):
|
||||
return ",".join([sk_proc.args[2] for sk_proc in self.safekeepers])
|
||||
|
||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: 1b42ba86fb...e9b0010b45
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: 74fd94d4e8...5cd7e44799
Reference in New Issue
Block a user