mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-04 22:10:39 +00:00
feat(pageserver): support detaching behavior v2 (#11158)
## Problem close https://github.com/neondatabase/neon/issues/10310 ## Summary of changes This patch adds a new behavior for the detach_ancestor API: detach with multi-level ancestor and no reparenting. Though we can potentially support multi-level + do reparenting / single-level + no-reparenting in the future, as it's not required for the recovery/snapshot epic, I'd prefer keeping things simple now that we only handle the old one and the new one instead of supporting the full feature matrix. I only added a test case of successful detaching instead of testing failures. I'd like to make this into staging and add more tests in the future. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -72,6 +72,7 @@ use crate::tenant::remote_timeline_client::{
|
||||
use crate::tenant::secondary::SecondaryController;
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};
|
||||
use crate::tenant::timeline::detach_ancestor::DetachBehavior;
|
||||
use crate::tenant::timeline::offload::{OffloadError, offload_timeline};
|
||||
use crate::tenant::timeline::{
|
||||
CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout,
|
||||
@@ -2505,6 +2506,8 @@ async fn timeline_detach_ancestor_handler(
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let behavior: Option<DetachBehavior> = parse_query_param(&request, "detach_behavior")?;
|
||||
let behavior = behavior.unwrap_or_default();
|
||||
|
||||
let span = tracing::info_span!("detach_ancestor", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
|
||||
|
||||
@@ -2554,7 +2557,7 @@ async fn timeline_detach_ancestor_handler(
|
||||
let ctx = &ctx.with_scope_timeline(&timeline);
|
||||
|
||||
let progress = timeline
|
||||
.prepare_to_detach_from_ancestor(&tenant, options, ctx)
|
||||
.prepare_to_detach_from_ancestor(&tenant, options, behavior, ctx)
|
||||
.await?;
|
||||
|
||||
// uncomment to allow early as possible Tenant::drop
|
||||
@@ -2569,6 +2572,7 @@ async fn timeline_detach_ancestor_handler(
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
prepared,
|
||||
behavior,
|
||||
attempt,
|
||||
ctx,
|
||||
)
|
||||
|
||||
@@ -300,9 +300,8 @@ impl TimelineMetadata {
|
||||
|
||||
/// Returns true if anything was changed
|
||||
pub fn detach_from_ancestor(&mut self, branchpoint: &(TimelineId, Lsn)) {
|
||||
if let Some(ancestor) = self.body.ancestor_timeline {
|
||||
assert_eq!(ancestor, branchpoint.0);
|
||||
}
|
||||
// Detaching from ancestor now doesn't always detach directly to the direct ancestor, but we
|
||||
// ensure the LSN is the same. So we don't check the timeline ID.
|
||||
if self.body.ancestor_lsn != Lsn(0) {
|
||||
assert_eq!(self.body.ancestor_lsn, branchpoint.1);
|
||||
}
|
||||
|
||||
@@ -1914,6 +1914,7 @@ impl TenantManager {
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
prepared: PreparedTimelineDetach,
|
||||
behavior: detach_ancestor::DetachBehavior,
|
||||
mut attempt: detach_ancestor::Attempt,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<HashSet<TimelineId>, detach_ancestor::Error> {
|
||||
@@ -1957,7 +1958,14 @@ impl TenantManager {
|
||||
.map_err(Error::NotFound)?;
|
||||
|
||||
let resp = timeline
|
||||
.detach_from_ancestor_and_reparent(&tenant, prepared, ctx)
|
||||
.detach_from_ancestor_and_reparent(
|
||||
&tenant,
|
||||
prepared,
|
||||
attempt.ancestor_timeline_id,
|
||||
attempt.ancestor_lsn,
|
||||
behavior,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut slot_guard = slot_guard;
|
||||
|
||||
@@ -5388,9 +5388,10 @@ impl Timeline {
|
||||
self: &Arc<Timeline>,
|
||||
tenant: &crate::tenant::Tenant,
|
||||
options: detach_ancestor::Options,
|
||||
behavior: detach_ancestor::DetachBehavior,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<detach_ancestor::Progress, detach_ancestor::Error> {
|
||||
detach_ancestor::prepare(self, tenant, options, ctx).await
|
||||
detach_ancestor::prepare(self, tenant, behavior, options, ctx).await
|
||||
}
|
||||
|
||||
/// Second step of detach from ancestor; detaches the `self` from it's current ancestor and
|
||||
@@ -5406,9 +5407,21 @@ impl Timeline {
|
||||
self: &Arc<Timeline>,
|
||||
tenant: &crate::tenant::Tenant,
|
||||
prepared: detach_ancestor::PreparedTimelineDetach,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
behavior: detach_ancestor::DetachBehavior,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<detach_ancestor::DetachingAndReparenting, detach_ancestor::Error> {
|
||||
detach_ancestor::detach_and_reparent(self, tenant, prepared, ctx).await
|
||||
detach_ancestor::detach_and_reparent(
|
||||
self,
|
||||
tenant,
|
||||
prepared,
|
||||
ancestor_timeline_id,
|
||||
ancestor_lsn,
|
||||
behavior,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Final step which unblocks the GC.
|
||||
|
||||
@@ -32,6 +32,9 @@ pub(crate) enum Error {
|
||||
#[error("too many ancestors")]
|
||||
TooManyAncestors,
|
||||
|
||||
#[error("ancestor is not empty")]
|
||||
AncestorNotEmpty,
|
||||
|
||||
#[error("shutting down, please retry later")]
|
||||
ShuttingDown,
|
||||
|
||||
@@ -89,7 +92,9 @@ impl From<Error> for ApiError {
|
||||
fn from(value: Error) -> Self {
|
||||
match value {
|
||||
Error::NoAncestor => ApiError::Conflict(value.to_string()),
|
||||
Error::TooManyAncestors => ApiError::BadRequest(anyhow::anyhow!("{value}")),
|
||||
Error::TooManyAncestors | Error::AncestorNotEmpty => {
|
||||
ApiError::BadRequest(anyhow::anyhow!("{value}"))
|
||||
}
|
||||
Error::ShuttingDown => ApiError::ShuttingDown,
|
||||
Error::Archived(_) => ApiError::BadRequest(anyhow::anyhow!("{value}")),
|
||||
Error::OtherTimelineDetachOngoing(_) | Error::FailedToReparentAll => {
|
||||
@@ -127,13 +132,37 @@ pub(crate) struct PreparedTimelineDetach {
|
||||
layers: Vec<Layer>,
|
||||
}
|
||||
|
||||
/// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments.
|
||||
// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Options {
|
||||
pub(crate) rewrite_concurrency: std::num::NonZeroUsize,
|
||||
pub(crate) copy_concurrency: std::num::NonZeroUsize,
|
||||
}
|
||||
|
||||
/// Controls the detach ancestor behavior.
|
||||
/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point.
|
||||
/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub enum DetachBehavior {
|
||||
#[default]
|
||||
NoAncestorAndReparent,
|
||||
MultiLevelAndNoReparent,
|
||||
}
|
||||
|
||||
impl std::str::FromStr for DetachBehavior {
|
||||
type Err = &'static str;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent),
|
||||
"multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent),
|
||||
"v1" => Ok(DetachBehavior::NoAncestorAndReparent),
|
||||
"v2" => Ok(DetachBehavior::MultiLevelAndNoReparent),
|
||||
_ => Err("cannot parse detach behavior"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
@@ -147,7 +176,8 @@ impl Default for Options {
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Attempt {
|
||||
pub(crate) timeline_id: TimelineId,
|
||||
|
||||
pub(crate) ancestor_timeline_id: TimelineId,
|
||||
pub(crate) ancestor_lsn: Lsn,
|
||||
_guard: completion::Completion,
|
||||
gate_entered: Option<utils::sync::gate::GateGuard>,
|
||||
}
|
||||
@@ -167,25 +197,30 @@ impl Attempt {
|
||||
pub(super) async fn prepare(
|
||||
detached: &Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
behavior: DetachBehavior,
|
||||
options: Options,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Progress, Error> {
|
||||
use Error::*;
|
||||
|
||||
let Some((ancestor, ancestor_lsn)) = detached
|
||||
let Some((mut ancestor, mut ancestor_lsn)) = detached
|
||||
.ancestor_timeline
|
||||
.as_ref()
|
||||
.map(|tl| (tl.clone(), detached.ancestor_lsn))
|
||||
else {
|
||||
let ancestor_id;
|
||||
let ancestor_lsn;
|
||||
let still_in_progress = {
|
||||
let accessor = detached.remote_client.initialized_upload_queue()?;
|
||||
|
||||
// we are safe to inspect the latest uploaded, because we can only witness this after
|
||||
// restart is complete and ancestor is no more.
|
||||
let latest = accessor.latest_uploaded_index_part();
|
||||
if latest.lineage.detached_previous_ancestor().is_none() {
|
||||
let Some((id, lsn)) = latest.lineage.detached_previous_ancestor() else {
|
||||
return Err(NoAncestor);
|
||||
};
|
||||
ancestor_id = id;
|
||||
ancestor_lsn = lsn;
|
||||
|
||||
latest
|
||||
.gc_blocking
|
||||
@@ -196,7 +231,8 @@ pub(super) async fn prepare(
|
||||
if still_in_progress {
|
||||
// gc is still blocked, we can still reparent and complete.
|
||||
// we are safe to reparent remaining, because they were locked in in the beginning.
|
||||
let attempt = continue_with_blocked_gc(detached, tenant).await?;
|
||||
let attempt =
|
||||
continue_with_blocked_gc(detached, tenant, ancestor_id, ancestor_lsn).await?;
|
||||
|
||||
// because the ancestor of detached is already set to none, we have published all
|
||||
// of the layers, so we are still "prepared."
|
||||
@@ -224,13 +260,34 @@ pub(super) async fn prepare(
|
||||
|
||||
check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?;
|
||||
|
||||
if ancestor.ancestor_timeline.is_some() {
|
||||
if let DetachBehavior::MultiLevelAndNoReparent = behavior {
|
||||
// If the ancestor has an ancestor, we might be able to fast-path detach it if the current ancestor does not have any data written/used by the detaching timeline.
|
||||
while let Some(ancestor_of_ancestor) = ancestor.ancestor_timeline.clone() {
|
||||
if ancestor_lsn != ancestor.ancestor_lsn {
|
||||
// non-technical requirement; we could flatten still if ancestor LSN does not match but that needs
|
||||
// us to copy and cut more layers.
|
||||
return Err(AncestorNotEmpty);
|
||||
}
|
||||
// Use the ancestor of the ancestor as the new ancestor (only when the ancestor LSNs are the same)
|
||||
ancestor_lsn = ancestor.ancestor_lsn; // Get the LSN first before resetting the `ancestor` variable
|
||||
ancestor = ancestor_of_ancestor;
|
||||
// TODO: do we still need to check if we don't want to reparent?
|
||||
check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?;
|
||||
}
|
||||
} else if ancestor.ancestor_timeline.is_some() {
|
||||
// non-technical requirement; we could flatten N ancestors just as easily but we chose
|
||||
// not to, at least initially
|
||||
return Err(TooManyAncestors);
|
||||
}
|
||||
|
||||
let attempt = start_new_attempt(detached, tenant).await?;
|
||||
tracing::info!(
|
||||
"attempt to detach the timeline from the ancestor: {}@{}, behavior={:?}",
|
||||
ancestor.timeline_id,
|
||||
ancestor_lsn,
|
||||
behavior
|
||||
);
|
||||
|
||||
let attempt = start_new_attempt(detached, tenant, ancestor.timeline_id, ancestor_lsn).await?;
|
||||
|
||||
utils::pausable_failpoint!("timeline-detach-ancestor::before_starting_after_locking-pausable");
|
||||
|
||||
@@ -450,8 +507,13 @@ pub(super) async fn prepare(
|
||||
Ok(Progress::Prepared(attempt, prepared))
|
||||
}
|
||||
|
||||
async fn start_new_attempt(detached: &Timeline, tenant: &Tenant) -> Result<Attempt, Error> {
|
||||
let attempt = obtain_exclusive_attempt(detached, tenant)?;
|
||||
async fn start_new_attempt(
|
||||
detached: &Timeline,
|
||||
tenant: &Tenant,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
) -> Result<Attempt, Error> {
|
||||
let attempt = obtain_exclusive_attempt(detached, tenant, ancestor_timeline_id, ancestor_lsn)?;
|
||||
|
||||
// insert the block in the index_part.json, if not already there.
|
||||
let _dont_care = tenant
|
||||
@@ -466,13 +528,23 @@ async fn start_new_attempt(detached: &Timeline, tenant: &Tenant) -> Result<Attem
|
||||
Ok(attempt)
|
||||
}
|
||||
|
||||
async fn continue_with_blocked_gc(detached: &Timeline, tenant: &Tenant) -> Result<Attempt, Error> {
|
||||
async fn continue_with_blocked_gc(
|
||||
detached: &Timeline,
|
||||
tenant: &Tenant,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
) -> Result<Attempt, Error> {
|
||||
// FIXME: it would be nice to confirm that there is an in-memory version, since we've just
|
||||
// verified there is a persistent one?
|
||||
obtain_exclusive_attempt(detached, tenant)
|
||||
obtain_exclusive_attempt(detached, tenant, ancestor_timeline_id, ancestor_lsn)
|
||||
}
|
||||
|
||||
fn obtain_exclusive_attempt(detached: &Timeline, tenant: &Tenant) -> Result<Attempt, Error> {
|
||||
fn obtain_exclusive_attempt(
|
||||
detached: &Timeline,
|
||||
tenant: &Tenant,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
) -> Result<Attempt, Error> {
|
||||
use Error::{OtherTimelineDetachOngoing, ShuttingDown};
|
||||
|
||||
// ensure we are the only active attempt for this tenant
|
||||
@@ -493,6 +565,8 @@ fn obtain_exclusive_attempt(detached: &Timeline, tenant: &Tenant) -> Result<Atte
|
||||
|
||||
Ok(Attempt {
|
||||
timeline_id: detached.timeline_id,
|
||||
ancestor_timeline_id,
|
||||
ancestor_lsn,
|
||||
_guard: guard,
|
||||
gate_entered: Some(_gate_entered),
|
||||
})
|
||||
@@ -795,6 +869,9 @@ pub(super) async fn detach_and_reparent(
|
||||
detached: &Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
prepared: PreparedTimelineDetach,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
behavior: DetachBehavior,
|
||||
_ctx: &RequestContext,
|
||||
) -> Result<DetachingAndReparenting, Error> {
|
||||
let PreparedTimelineDetach { layers } = prepared;
|
||||
@@ -822,7 +899,30 @@ pub(super) async fn detach_and_reparent(
|
||||
"cannot (detach? reparent)? complete if the operation is not still ongoing"
|
||||
);
|
||||
|
||||
let ancestor = match (detached.ancestor_timeline.as_ref(), recorded_branchpoint) {
|
||||
let ancestor_to_detach = match detached.ancestor_timeline.as_ref() {
|
||||
Some(mut ancestor) => {
|
||||
while ancestor.timeline_id != ancestor_timeline_id {
|
||||
match ancestor.ancestor_timeline.as_ref() {
|
||||
Some(found) => {
|
||||
if ancestor_lsn != ancestor.ancestor_lsn {
|
||||
return Err(Error::DetachReparent(anyhow::anyhow!(
|
||||
"cannot find the ancestor timeline to detach from: wrong ancestor lsn"
|
||||
)));
|
||||
}
|
||||
ancestor = found;
|
||||
}
|
||||
None => {
|
||||
return Err(Error::DetachReparent(anyhow::anyhow!(
|
||||
"cannot find the ancestor timeline to detach from"
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(ancestor)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
let ancestor = match (ancestor_to_detach, recorded_branchpoint) {
|
||||
(Some(ancestor), None) => {
|
||||
assert!(
|
||||
!layers.is_empty(),
|
||||
@@ -895,6 +995,11 @@ pub(super) async fn detach_and_reparent(
|
||||
Ancestor::Detached(ancestor, ancestor_lsn) => (ancestor, ancestor_lsn, false),
|
||||
};
|
||||
|
||||
if let DetachBehavior::MultiLevelAndNoReparent = behavior {
|
||||
// Do not reparent if the user requests to behave so.
|
||||
return Ok(DetachingAndReparenting::Reparented(HashSet::new()));
|
||||
}
|
||||
|
||||
let mut tasks = tokio::task::JoinSet::new();
|
||||
|
||||
// Returns a single permit semaphore which will be used to make one reparenting succeed,
|
||||
@@ -1032,6 +1137,11 @@ pub(super) async fn complete(
|
||||
}
|
||||
|
||||
/// Query against a locked `Tenant::timelines`.
|
||||
///
|
||||
/// A timeline is reparentable if:
|
||||
///
|
||||
/// - It is not the timeline being detached.
|
||||
/// - It has the same ancestor as the timeline being detached. Note that the ancestor might not be the direct ancestor.
|
||||
fn reparentable_timelines<'a, I>(
|
||||
timelines: I,
|
||||
detached: &'a Arc<Timeline>,
|
||||
|
||||
@@ -1070,11 +1070,14 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
|
||||
tenant_id: TenantId | TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
batch_size: int | None = None,
|
||||
behavior_v2: bool = False,
|
||||
**kwargs,
|
||||
) -> set[TimelineId]:
|
||||
params = {}
|
||||
params: dict[str, Any] = {}
|
||||
if batch_size is not None:
|
||||
params["batch_size"] = batch_size
|
||||
if behavior_v2:
|
||||
params["detach_behavior"] = "v2"
|
||||
res = self.put(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/detach_ancestor",
|
||||
params=params,
|
||||
|
||||
@@ -319,8 +319,9 @@ def test_ancestor_detach_reparents_earlier(neon_env_builder: NeonEnvBuilder):
|
||||
# this does not contain Z in the end, so fromisoformat accepts it
|
||||
# it is to be in line with the deletion timestamp.. well, almost.
|
||||
when = original_ancestor[2][:26]
|
||||
when_ts = datetime.datetime.fromisoformat(when)
|
||||
assert when_ts < datetime.datetime.now()
|
||||
when_ts = datetime.datetime.fromisoformat(when).replace(tzinfo=datetime.UTC)
|
||||
now = datetime.datetime.utcnow().replace(tzinfo=datetime.UTC)
|
||||
assert when_ts < now
|
||||
assert len(lineage.get("reparenting_history", [])) == 0
|
||||
elif expected_ancestor == timeline_id:
|
||||
assert len(lineage.get("original_ancestor", [])) == 0
|
||||
@@ -342,6 +343,138 @@ def test_ancestor_detach_reparents_earlier(neon_env_builder: NeonEnvBuilder):
|
||||
wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline)
|
||||
|
||||
|
||||
def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test the v2 behavior of ancestor detach.
|
||||
|
||||
old main -------|---------X--------->
|
||||
| | |
|
||||
| | +-> after
|
||||
| +--X empty snapshot branch
|
||||
| |
|
||||
| +-> branch-to-detach
|
||||
|
|
||||
+-> earlier
|
||||
|
||||
Ends up as:
|
||||
|
||||
old main -------|---------X--------->
|
||||
| | |
|
||||
| | +-> after
|
||||
| +--X empty snapshot branch
|
||||
|
|
||||
+-> earlier
|
||||
|
||||
|
||||
new main -------|---------|----> branch-to-detach
|
||||
"""
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.pageserver.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
with env.endpoints.create_start("main", tenant_id=env.initial_tenant) as ep:
|
||||
ep.safe_psql("CREATE TABLE foo (i BIGINT);")
|
||||
ep.safe_psql("CREATE TABLE audit AS SELECT 1 as starts;")
|
||||
|
||||
branchpoint_pipe = wait_for_last_flush_lsn(
|
||||
env, ep, env.initial_tenant, env.initial_timeline
|
||||
)
|
||||
|
||||
ep.safe_psql("INSERT INTO foo SELECT i::bigint FROM generate_series(0, 8191) g(i);")
|
||||
|
||||
branchpoint_x = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline)
|
||||
client.timeline_checkpoint(env.initial_tenant, env.initial_timeline)
|
||||
|
||||
ep.safe_psql("INSERT INTO foo SELECT i::bigint FROM generate_series(8192, 16383) g(i);")
|
||||
wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline)
|
||||
|
||||
earlier = env.create_branch(
|
||||
"earlier", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_pipe
|
||||
)
|
||||
|
||||
snapshot_branchpoint = env.create_branch(
|
||||
"snapshot_branchpoint", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_x
|
||||
)
|
||||
|
||||
branch_to_detach = env.create_branch(
|
||||
"branch_to_detach",
|
||||
ancestor_branch_name="snapshot_branchpoint",
|
||||
ancestor_start_lsn=branchpoint_x,
|
||||
)
|
||||
|
||||
after = env.create_branch("after", ancestor_branch_name="main", ancestor_start_lsn=None)
|
||||
|
||||
all_reparented = client.detach_ancestor(env.initial_tenant, branch_to_detach, behavior_v2=True)
|
||||
assert set(all_reparented) == set()
|
||||
|
||||
env.pageserver.quiesce_tenants()
|
||||
|
||||
# checking the ancestor after is much faster than waiting for the endpoint not start
|
||||
expected_result = [
|
||||
("main", env.initial_timeline, None, 16384, 1),
|
||||
("after", after, env.initial_timeline, 16384, 1),
|
||||
("snapshot_branchpoint", snapshot_branchpoint, env.initial_timeline, 8192, 1),
|
||||
("branch_to_detach", branch_to_detach, None, 8192, 1),
|
||||
("earlier", earlier, env.initial_timeline, 0, 1),
|
||||
]
|
||||
|
||||
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
||||
|
||||
for branch_name, queried_timeline, expected_ancestor, _, _ in expected_result:
|
||||
details = client.timeline_detail(env.initial_tenant, queried_timeline)
|
||||
ancestor_timeline_id = details["ancestor_timeline_id"]
|
||||
if expected_ancestor is None:
|
||||
assert ancestor_timeline_id is None
|
||||
else:
|
||||
assert (
|
||||
TimelineId(ancestor_timeline_id) == expected_ancestor
|
||||
), f"when checking branch {branch_name}, mapping={expected_result}"
|
||||
|
||||
index_part = env.pageserver_remote_storage.index_content(
|
||||
env.initial_tenant, queried_timeline
|
||||
)
|
||||
lineage = index_part["lineage"]
|
||||
assert lineage is not None
|
||||
|
||||
assert lineage.get("reparenting_history_overflown", "false") == "false"
|
||||
|
||||
if queried_timeline == branch_to_detach:
|
||||
original_ancestor = lineage["original_ancestor"]
|
||||
assert original_ancestor is not None
|
||||
assert original_ancestor[0] == str(env.initial_timeline)
|
||||
assert original_ancestor[1] == str(branchpoint_x)
|
||||
|
||||
# this does not contain Z in the end, so fromisoformat accepts it
|
||||
# it is to be in line with the deletion timestamp.. well, almost.
|
||||
when = original_ancestor[2][:26]
|
||||
when_ts = datetime.datetime.fromisoformat(when).replace(tzinfo=datetime.UTC)
|
||||
now = datetime.datetime.utcnow().replace(tzinfo=datetime.UTC)
|
||||
assert when_ts < now
|
||||
assert len(lineage.get("reparenting_history", [])) == 0
|
||||
elif expected_ancestor == branch_to_detach:
|
||||
assert len(lineage.get("original_ancestor", [])) == 0
|
||||
assert lineage["reparenting_history"] == [str(env.initial_timeline)]
|
||||
else:
|
||||
assert len(lineage.get("original_ancestor", [])) == 0
|
||||
assert len(lineage.get("reparenting_history", [])) == 0
|
||||
|
||||
for name, _, _, rows, starts in expected_result:
|
||||
with env.endpoints.create_start(name, tenant_id=env.initial_tenant) as ep:
|
||||
assert ep.safe_psql("SELECT count(*) FROM foo;")[0][0] == rows
|
||||
assert ep.safe_psql(f"SELECT count(*) FROM audit WHERE starts = {starts}")[0][0] == 1
|
||||
|
||||
# delete the new timeline to confirm it doesn't carry over the anything from the old timeline
|
||||
client.timeline_delete(env.initial_tenant, branch_to_detach)
|
||||
wait_timeline_detail_404(client, env.initial_tenant, branch_to_detach)
|
||||
|
||||
# delete the after timeline
|
||||
client.timeline_delete(env.initial_tenant, after)
|
||||
wait_timeline_detail_404(client, env.initial_tenant, after)
|
||||
|
||||
|
||||
def test_detached_receives_flushes_while_being_detached(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Makes sure that the timeline is able to receive writes through-out the detach process.
|
||||
|
||||
Reference in New Issue
Block a user