Add WAL offloading to s3 on safekeepers.

Separate task is launched for each timeline and stopped when timeline doesn't
need offloading. Decision who offloads is done through etcd leader election;
currently there is no pre condition for participating, that's a TODO.

neon_local and tests infrastructure for remote storage in safekeepers added,
along with the test itself.

ref #1009

Co-authored-by: Anton Shyrabokau <ahtoxa@Antons-MacBook-Pro.local>
This commit is contained in:
Arseny Sher
2022-04-27 00:24:59 -07:00
parent 1d71949c51
commit 0e1bd57c53
28 changed files with 1146 additions and 429 deletions

View File

@@ -43,10 +43,10 @@ pub struct SkTimelineInfo {
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub commit_lsn: Option<Lsn>,
/// LSN up to which safekeeper offloaded WAL to s3.
/// LSN up to which safekeeper has backed WAL.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub s3_wal_lsn: Option<Lsn>,
pub backup_lsn: Option<Lsn>,
/// LSN of last checkpoint uploaded by pageserver.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]

View File

@@ -6,7 +6,6 @@ edition = "2021"
[dependencies]
anyhow = { version = "1.0", features = ["backtrace"] }
async-trait = "0.1"
metrics = { version = "0.1", path = "../metrics" }
once_cell = "1.8.0"
rusoto_core = "0.48"
@@ -15,6 +14,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
tokio-util = { version = "0.7", features = ["io"] }
toml_edit = { version = "0.13", features = ["easy"] }
tracing = "0.1.27"
workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -16,8 +16,10 @@ use std::{
path::{Path, PathBuf},
};
use anyhow::Context;
use anyhow::{bail, Context};
use tokio::io;
use toml_edit::Item;
use tracing::info;
pub use self::{
@@ -203,6 +205,90 @@ pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str)
.with_extension(new_extension.as_ref())
}
impl RemoteStorageConfig {
pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
let local_path = toml.get("local_path");
let bucket_name = toml.get("bucket_name");
let bucket_region = toml.get("bucket_region");
let max_concurrent_syncs = NonZeroUsize::new(
parse_optional_integer("max_concurrent_syncs", toml)?
.unwrap_or(DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS),
)
.context("Failed to parse 'max_concurrent_syncs' as a positive integer")?;
let max_sync_errors = NonZeroU32::new(
parse_optional_integer("max_sync_errors", toml)?
.unwrap_or(DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS),
)
.context("Failed to parse 'max_sync_errors' as a positive integer")?;
let concurrency_limit = NonZeroUsize::new(
parse_optional_integer("concurrency_limit", toml)?
.unwrap_or(DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT),
)
.context("Failed to parse 'concurrency_limit' as a positive integer")?;
let storage = match (local_path, bucket_name, bucket_region) {
(None, None, None) => bail!("no 'local_path' nor 'bucket_name' option"),
(_, Some(_), None) => {
bail!("'bucket_region' option is mandatory if 'bucket_name' is given ")
}
(_, None, Some(_)) => {
bail!("'bucket_name' option is mandatory if 'bucket_region' is given ")
}
(None, Some(bucket_name), Some(bucket_region)) => RemoteStorageKind::AwsS3(S3Config {
bucket_name: parse_toml_string("bucket_name", bucket_name)?,
bucket_region: parse_toml_string("bucket_region", bucket_region)?,
prefix_in_bucket: toml
.get("prefix_in_bucket")
.map(|prefix_in_bucket| parse_toml_string("prefix_in_bucket", prefix_in_bucket))
.transpose()?,
endpoint: toml
.get("endpoint")
.map(|endpoint| parse_toml_string("endpoint", endpoint))
.transpose()?,
concurrency_limit,
}),
(Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
parse_toml_string("local_path", local_path)?,
)),
(Some(_), Some(_), _) => bail!("local_path and bucket_name are mutually exclusive"),
};
Ok(RemoteStorageConfig {
max_concurrent_syncs,
max_sync_errors,
storage,
})
}
}
// Helper functions to parse a toml Item
fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
where
I: TryFrom<i64, Error = E>,
E: std::error::Error + Send + Sync + 'static,
{
let toml_integer = match item.get(name) {
Some(item) => item
.as_integer()
.with_context(|| format!("configure option {name} is not an integer"))?,
None => return Ok(None),
};
I::try_from(toml_integer)
.map(Some)
.with_context(|| format!("configure option {name} is too large"))
}
fn parse_toml_string(name: &str, item: &Item) -> anyhow::Result<String> {
let s = item
.as_str()
.with_context(|| format!("configure option {name} is not a string"))?;
Ok(s.to_string())
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -26,6 +26,9 @@ impl Lsn {
/// Maximum possible value for an LSN
pub const MAX: Lsn = Lsn(u64::MAX);
/// Invalid value for InvalidXLogRecPtr, as defined in xlogdefs.h
pub const INVALID: Lsn = Lsn(0);
/// Subtract a number, returning None on overflow.
pub fn checked_sub<T: Into<u64>>(self, other: T) -> Option<Lsn> {
let other: u64 = other.into();
@@ -103,6 +106,12 @@ impl Lsn {
pub fn is_aligned(&self) -> bool {
*self == self.align()
}
/// Return if the LSN is valid
/// mimics postgres XLogRecPtrIsInvalid macro
pub fn is_valid(self) -> bool {
self != Lsn::INVALID
}
}
impl From<u64> for Lsn {