From fd88d4608c3e8a8cb8579786a7b507a436033efc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 10 Apr 2024 09:12:07 +0200 Subject: [PATCH] Add command to time travel recover prefixes (#7322) Adds another tool to the DR toolbox: ability in pagectl to recover arbitrary prefixes in remote storage. Requires remote storage config, the prefix, and the travel-to timestamp parameter to be specified as cli args. The done-if-after parameter is also supported. Example invocation (after `aws login --profile dev`): ``` RUST_LOG=remote_storage=debug AWS_PROFILE=dev cargo run -p pagectl time-travel-remote-prefix 'remote_storage = { bucket_name = "neon-test-bucket-name", bucket_region = "us-east-2" }' wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/ 2024-04-05T17:00:00Z ``` This has been written to resolve a customer recovery case: https://neondb.slack.com/archives/C033RQ5SPDH/p1712256888468009 There is validation of the prefix to prevent accidentially specifying too generic prefixes, which can cause corruption and data loss if used wrongly. Still, the validation is not perfect and it is important that the command is used with caution. If possible, `time_travel_remote_storage` should be used instead which has additional checks in place. --- Cargo.lock | 5 ++ pageserver/ctl/Cargo.toml | 5 ++ pageserver/ctl/src/main.rs | 166 ++++++++++++++++++++++++++++++++++++- 3 files changed, 175 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index a7e29b1de3..4c2bcf250e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3477,12 +3477,17 @@ dependencies = [ "camino", "clap", "git-version", + "humantime", "pageserver", + "pageserver_api", "postgres_ffi", + "remote_storage", "serde", "serde_json", "svg_fmt", "tokio", + "tokio-util", + "toml_edit", "utils", "workspace_hack", ] diff --git a/pageserver/ctl/Cargo.toml b/pageserver/ctl/Cargo.toml index c5cd451e8d..843f5dd862 100644 --- a/pageserver/ctl/Cargo.toml +++ b/pageserver/ctl/Cargo.toml @@ -12,9 +12,14 @@ bytes.workspace = true camino.workspace = true clap = { workspace = true, features = ["string"] } git-version.workspace = true +humantime.workspace = true pageserver = { path = ".." } +pageserver_api.workspace = true +remote_storage = { path = "../../libs/remote_storage" } postgres_ffi.workspace = true tokio.workspace = true +tokio-util.workspace = true +toml_edit.workspace = true utils.workspace = true svg_fmt.workspace = true workspace_hack.workspace = true diff --git a/pageserver/ctl/src/main.rs b/pageserver/ctl/src/main.rs index e73d961e36..1fb75584fc 100644 --- a/pageserver/ctl/src/main.rs +++ b/pageserver/ctl/src/main.rs @@ -9,6 +9,11 @@ mod index_part; mod layer_map_analyzer; mod layers; +use std::{ + str::FromStr, + time::{Duration, SystemTime}, +}; + use camino::{Utf8Path, Utf8PathBuf}; use clap::{Parser, Subcommand}; use index_part::IndexPartCmd; @@ -20,8 +25,16 @@ use pageserver::{ tenant::{dump_layerfile_from_path, metadata::TimelineMetadata}, virtual_file, }; +use pageserver_api::shard::TenantShardId; use postgres_ffi::ControlFileData; -use utils::{lsn::Lsn, project_git_version}; +use remote_storage::{RemotePath, RemoteStorageConfig}; +use tokio_util::sync::CancellationToken; +use utils::{ + id::TimelineId, + logging::{self, LogFormat, TracingErrorLayerEnablement}, + lsn::Lsn, + project_git_version, +}; project_git_version!(GIT_VERSION); @@ -43,6 +56,7 @@ enum Commands { #[command(subcommand)] IndexPart(IndexPartCmd), PrintLayerFile(PrintLayerFileCmd), + TimeTravelRemotePrefix(TimeTravelRemotePrefixCmd), DrawTimeline {}, AnalyzeLayerMap(AnalyzeLayerMapCmd), #[command(subcommand)] @@ -68,6 +82,26 @@ struct PrintLayerFileCmd { path: Utf8PathBuf, } +/// Roll back the time for the specified prefix using S3 history. +/// +/// The command is fairly low level and powerful. Validation is only very light, +/// so it is more powerful, and thus potentially more dangerous. +#[derive(Parser)] +struct TimeTravelRemotePrefixCmd { + /// A configuration string for the remote_storage configuration. + /// + /// Example: `remote_storage = { bucket_name = "aws-storage-bucket-name", bucket_region = "us-east-2" }` + config_toml_str: String, + /// remote prefix to time travel recover. For safety reasons, we require it to contain + /// a timeline or tenant ID in the prefix. + prefix: String, + /// Timestamp to travel to. Given in format like `2024-01-20T10:45:45Z`. Assumes UTC and second accuracy. + travel_to: String, + /// Timestamp of the start of the operation, must be after any changes we want to roll back and after. + /// You can use a few seconds before invoking the command. Same format as `travel_to`. + done_if_after: Option, +} + #[derive(Parser)] struct AnalyzeLayerMapCmd { /// Pageserver data path @@ -78,6 +112,14 @@ struct AnalyzeLayerMapCmd { #[tokio::main] async fn main() -> anyhow::Result<()> { + logging::init( + LogFormat::Plain, + TracingErrorLayerEnablement::EnableWithRustLogFilter, + logging::Output::Stdout, + )?; + + logging::replace_panic_hook_with_tracing_panic_hook().forget(); + let cli = CliOpts::parse(); match cli.command { @@ -105,6 +147,42 @@ async fn main() -> anyhow::Result<()> { print_layerfile(&cmd.path).await?; } } + Commands::TimeTravelRemotePrefix(cmd) => { + let timestamp = humantime::parse_rfc3339(&cmd.travel_to) + .map_err(|_e| anyhow::anyhow!("Invalid time for travel_to: '{}'", cmd.travel_to))?; + + let done_if_after = if let Some(done_if_after) = &cmd.done_if_after { + humantime::parse_rfc3339(done_if_after).map_err(|_e| { + anyhow::anyhow!("Invalid time for done_if_after: '{}'", done_if_after) + })? + } else { + const SAFETY_MARGIN: Duration = Duration::from_secs(3); + tokio::time::sleep(SAFETY_MARGIN).await; + // Convert to string representation and back to get rid of sub-second values + let done_if_after = SystemTime::now(); + tokio::time::sleep(SAFETY_MARGIN).await; + done_if_after + }; + + let timestamp = strip_subsecond(timestamp); + let done_if_after = strip_subsecond(done_if_after); + + let Some(prefix) = validate_prefix(&cmd.prefix) else { + println!("specified prefix '{}' failed validation", cmd.prefix); + return Ok(()); + }; + let toml_document = toml_edit::Document::from_str(&cmd.config_toml_str)?; + let toml_item = toml_document + .get("remote_storage") + .expect("need remote_storage"); + let config = RemoteStorageConfig::from_toml(toml_item)?.expect("incomplete config"); + let storage = remote_storage::GenericRemoteStorage::from_config(&config); + let cancel = CancellationToken::new(); + storage + .unwrap() + .time_travel_recover(Some(&prefix), timestamp, done_if_after, &cancel) + .await?; + } }; Ok(()) } @@ -185,3 +263,89 @@ fn handle_metadata( Ok(()) } + +/// Ensures that the given S3 prefix is sufficiently constrained. +/// The command is very risky already and we don't want to expose something +/// that allows usually unintentional and quite catastrophic time travel of +/// an entire bucket, which would be a major catastrophy and away +/// by only one character change (similar to "rm -r /home /username/foobar"). +fn validate_prefix(prefix: &str) -> Option { + if prefix.is_empty() { + // Empty prefix means we want to specify the *whole* bucket + return None; + } + let components = prefix.split('/').collect::>(); + let (last, components) = { + let last = components.last()?; + if last.is_empty() { + ( + components.iter().nth_back(1)?, + &components[..(components.len() - 1)], + ) + } else { + (last, &components[..]) + } + }; + 'valid: { + if let Ok(_timeline_id) = TimelineId::from_str(last) { + // Ends in either a tenant or timeline ID + break 'valid; + } + if *last == "timelines" { + if let Some(before_last) = components.iter().nth_back(1) { + if let Ok(_tenant_id) = TenantShardId::from_str(before_last) { + // Has a valid tenant id + break 'valid; + } + } + } + + return None; + } + RemotePath::from_string(prefix).ok() +} + +fn strip_subsecond(timestamp: SystemTime) -> SystemTime { + let ts_str = humantime::format_rfc3339_seconds(timestamp).to_string(); + humantime::parse_rfc3339(&ts_str).expect("can't parse just created timestamp") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validate_prefix() { + assert_eq!(validate_prefix(""), None); + assert_eq!(validate_prefix("/"), None); + #[track_caller] + fn assert_valid(prefix: &str) { + let remote_path = RemotePath::from_string(prefix).unwrap(); + assert_eq!(validate_prefix(prefix), Some(remote_path)); + } + assert_valid("wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/"); + // Path is not relative but absolute + assert_eq!( + validate_prefix( + "/wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/" + ), + None + ); + assert_valid("wal/3aa8fcc61f6d357410b7de754b1d9001/"); + // Partial tenant IDs should be invalid, S3 will match all tenants with the specific ID prefix + assert_eq!(validate_prefix("wal/3aa8fcc61f6d357410b7d"), None); + assert_eq!(validate_prefix("wal"), None); + assert_eq!(validate_prefix("/wal/"), None); + assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001"); + // Partial tenant ID + assert_eq!( + validate_prefix("pageserver/v1/tenants/3aa8fcc61f6d357410b"), + None + ); + assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines"); + assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001-0004/timelines"); + assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/"); + assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/641e5342083b2235ee3deb8066819683"); + assert_eq!(validate_prefix("pageserver/v1/tenants/"), None); + } +}