mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-27 01:50:38 +00:00
extend detach/delete tests
This commit is contained in:
committed by
Dmitry Rodionov
parent
520ffb341b
commit
1a5af6d7a5
@@ -13,7 +13,7 @@ use crate::timelines::CreateRepo;
|
||||
use crate::walredo::PostgresRedoManager;
|
||||
use crate::{thread_mgr, timelines, walreceiver};
|
||||
use crate::{DatadirTimelineImpl, RepositoryImpl};
|
||||
use anyhow::{bail, Context};
|
||||
use anyhow::Context;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use std::collections::hash_map::Entry;
|
||||
@@ -401,7 +401,14 @@ pub fn get_local_timeline_with_load(
|
||||
}
|
||||
|
||||
pub fn delete_timeline(tenant_id: ZTenantId, timeline_id: ZTimelineId) -> anyhow::Result<()> {
|
||||
// shutdown the timeline tasks (this shuts down the walreceiver)
|
||||
// Start with the shutdown of timeline tasks (this shuts down the walreceiver)
|
||||
// It is important that we do not take locks here, and do not check whether the timeline exists
|
||||
// because if we hold tenants_state::write_tenants() while awaiting for the threads to join
|
||||
// we cannot create new timelines and tenants, and that can take quite some time,
|
||||
// it can even become stuck due to a bug making whole pageserver unavailable for some operations
|
||||
// so this is the way how we deal with concurrent delete requests: shutdown everythig, wait for confirmation
|
||||
// and then try to actually remove timeline from inmemory state and this is the point when concurrent requests
|
||||
// will synchronize and either fail with the not found error or succeed
|
||||
|
||||
let (sender, receiver) = std::sync::mpsc::channel::<()>();
|
||||
tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach {
|
||||
@@ -417,13 +424,10 @@ pub fn delete_timeline(tenant_id: ZTenantId, timeline_id: ZTimelineId) -> anyhow
|
||||
debug!("thread shutdown completed");
|
||||
match tenants_state::write_tenants().get_mut(&tenant_id) {
|
||||
Some(tenant) => {
|
||||
tenant
|
||||
.repo
|
||||
.delete_timeline(timeline_id)
|
||||
.context("Failed to delete tenant timeline from repo")?;
|
||||
tenant.repo.delete_timeline(timeline_id)?;
|
||||
tenant.local_timelines.remove(&timeline_id);
|
||||
}
|
||||
None => warn!("Tenant {tenant_id} not found in local tenant state"),
|
||||
None => anyhow::bail!("Tenant {tenant_id} not found in local tenant state"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -552,7 +556,7 @@ fn check_broken_timeline(
|
||||
// A timeline with zero disk consistent LSN can happen when the page server
|
||||
// failed to checkpoint the timeline import data when creating that timeline.
|
||||
if metadata.disk_consistent_lsn() == Lsn::INVALID {
|
||||
bail!("Timeline {timeline_id} has a zero disk consistent LSN.");
|
||||
anyhow::bail!("Timeline {timeline_id} has a zero disk consistent LSN.");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -615,7 +619,7 @@ fn attach_downloaded_tenant(
|
||||
match tenants_state::write_tenants().get_mut(&tenant_id) {
|
||||
Some(tenant) => match tenant.local_timelines.entry(timeline_id) {
|
||||
Entry::Occupied(_) => {
|
||||
bail!("Local timeline {timeline_id} already registered")
|
||||
anyhow::bail!("Local timeline {timeline_id} already registered")
|
||||
}
|
||||
Entry::Vacant(v) => {
|
||||
v.insert(load_local_timeline(repo, timeline_id).with_context(|| {
|
||||
@@ -623,7 +627,7 @@ fn attach_downloaded_tenant(
|
||||
})?);
|
||||
}
|
||||
},
|
||||
None => bail!(
|
||||
None => anyhow::bail!(
|
||||
"Tenant {} not found in local tenant state",
|
||||
repo.tenant_id()
|
||||
),
|
||||
|
||||
@@ -105,26 +105,3 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
branch2_cur.execute('SELECT count(*) FROM foo')
|
||||
assert branch2_cur.fetchone() == (300000, )
|
||||
|
||||
|
||||
def test_ancestor_branch_delete(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_parent", "empty")
|
||||
|
||||
leaf_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_branch1",
|
||||
"test_ancestor_branch_delete_parent")
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
with pytest.raises(NeonPageserverApiException,
|
||||
match="Failed to delete tenant timeline from repo"):
|
||||
ps_http.timeline_delete(env.initial_tenant, parent_timeline_id)
|
||||
|
||||
ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id)
|
||||
# check 404
|
||||
with pytest.raises(NeonPageserverApiException,
|
||||
match="is not found neither locally nor remotely"):
|
||||
ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id)
|
||||
|
||||
# FIXME leaves tenant without timelines, should we prevent deletion of root timeline?
|
||||
ps_http.timeline_delete(env.initial_tenant, parent_timeline_id)
|
||||
|
||||
@@ -4,14 +4,25 @@ import psycopg2
|
||||
import pytest
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
|
||||
|
||||
|
||||
def test_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# first check for non existing tenant
|
||||
tenant_id = uuid4()
|
||||
with pytest.raises(expected_exception=NeonPageserverApiException,
|
||||
match=f'Tenant not found for id {tenant_id.hex}'):
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
|
||||
# create new nenant
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
|
||||
# assert tenant exists on disk
|
||||
assert (env.repo_dir / "tenants" / tenant_id.hex).exists()
|
||||
|
||||
pg = env.postgres.create_start('main', tenant_id=tenant_id)
|
||||
# we rely upon autocommit after each statement
|
||||
pg.safe_psql_many(queries=[
|
||||
@@ -19,11 +30,12 @@ def test_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
'INSERT INTO t SELECT generate_series(1,100000), \'payload\'',
|
||||
])
|
||||
|
||||
# gc should try to even start
|
||||
# gc should not try to even start
|
||||
with pytest.raises(expected_exception=psycopg2.DatabaseError,
|
||||
match='gc target timeline does not exist'):
|
||||
env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {uuid4().hex} 0')
|
||||
|
||||
# try to concurrently run gc and detach
|
||||
gc_thread = Thread(
|
||||
target=lambda: env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0'), )
|
||||
gc_thread.start()
|
||||
@@ -44,6 +56,9 @@ def test_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
gc_thread.join(timeout=10)
|
||||
|
||||
# check that nothing is left on disk for deleted tenant
|
||||
assert not (env.repo_dir / "tenants" / tenant_id.hex).exists()
|
||||
|
||||
with pytest.raises(expected_exception=psycopg2.DatabaseError,
|
||||
match=f'Tenant {tenant_id.hex} not found'):
|
||||
env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0')
|
||||
@@ -35,10 +35,10 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
|
||||
value = line.lstrip(name).strip()
|
||||
return int(value)
|
||||
|
||||
def detach_all_timelines(tenant):
|
||||
def delete_all_timelines(tenant):
|
||||
timelines = [UUID(t["timeline_id"]) for t in client.timeline_list(tenant)]
|
||||
for t in timelines:
|
||||
client.timeline_detach(tenant, t)
|
||||
client.timeline_delete(tenant, t)
|
||||
|
||||
def assert_idle(tenant):
|
||||
assert get_state(tenant) == "Idle"
|
||||
@@ -56,7 +56,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
|
||||
# TODO they should be already idle since there are no active computes
|
||||
for tenant_info in client.tenant_list():
|
||||
tenant_id = UUID(tenant_info["id"])
|
||||
detach_all_timelines(tenant_id)
|
||||
delete_all_timelines(tenant_id)
|
||||
wait_until(10, 0.2, lambda: assert_idle(tenant_id))
|
||||
|
||||
# Assert that all tasks finish quickly after tenants go idle
|
||||
|
||||
55
test_runner/batch_others/test_timeline_delete.py
Normal file
55
test_runner/batch_others/test_timeline_delete.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from uuid import uuid4
|
||||
import pytest
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonPageserverApiException
|
||||
|
||||
|
||||
def test_timeline_delete(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
# first try to delete non existing timeline
|
||||
# for existing tenant:
|
||||
invalid_timeline_id = uuid4()
|
||||
with pytest.raises(NeonPageserverApiException, match="timeline not found"):
|
||||
ps_http.timeline_delete(tenant_id=env.initial_tenant, timeline_id=invalid_timeline_id)
|
||||
|
||||
# for non existing tenant:
|
||||
invalid_tenant_id = uuid4()
|
||||
with pytest.raises(NeonPageserverApiException,
|
||||
match=f"Tenant {invalid_tenant_id.hex} not found in local tenant state"):
|
||||
ps_http.timeline_delete(tenant_id=invalid_tenant_id, timeline_id=invalid_timeline_id)
|
||||
|
||||
# construct pair of branches to validate that pageserver prohibits
|
||||
# deletion of ancestor timelines when they have child branches
|
||||
parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_parent", "empty")
|
||||
|
||||
leaf_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_branch1",
|
||||
"test_ancestor_branch_delete_parent")
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
with pytest.raises(NeonPageserverApiException,
|
||||
match="Cannot detach timeline which has child timelines"):
|
||||
|
||||
timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / parent_timeline_id.hex
|
||||
assert timeline_path.exists()
|
||||
|
||||
ps_http.timeline_delete(env.initial_tenant, parent_timeline_id)
|
||||
|
||||
assert not timeline_path.exists()
|
||||
|
||||
timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / leaf_timeline_id.hex
|
||||
assert timeline_path.exists()
|
||||
|
||||
ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id)
|
||||
|
||||
assert not timeline_path.exists()
|
||||
|
||||
# check 404
|
||||
with pytest.raises(NeonPageserverApiException,
|
||||
match="is not found neither locally nor remotely"):
|
||||
ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id)
|
||||
|
||||
# FIXME leaves tenant without timelines, should we prevent deletion of root timeline?
|
||||
ps_http.timeline_delete(env.initial_tenant, parent_timeline_id)
|
||||
Reference in New Issue
Block a user