Compare commits

..

8 Commits

Author SHA1 Message Date
Arpad Müller
7bf395df76 time.sleep 2024-01-23 02:21:49 +01:00
Arpad Müller
24bc6ddec4 Just add this wait to prevent a race 2024-01-23 01:58:57 +01:00
Arpad Müller
f49fe734d1 Allow this for a test 2024-01-23 00:28:22 +01:00
Arpad Müller
872e645f7d Disable cancellation support in initdb 2024-01-22 23:47:08 +01:00
Arpad Müller
648fe7c92d Add it to the allowed errors 2024-01-22 18:26:47 +01:00
Arpad Müller
21045477a3 Allow this msg 2024-01-22 15:20:18 +01:00
Arpad Müller
125f24ca49 exit initdb via kill and then await it 2024-01-22 15:19:13 +01:00
Arpad Müller
443d4ce868 Duplicate the test to try to reproduce the issue 2024-01-22 15:19:13 +01:00
28 changed files with 695 additions and 916 deletions

View File

@@ -700,14 +700,13 @@ impl ComputeNode {
// In this case we need to connect with old `zenith_admin` name
// and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges.
let connstr = self.connstr.clone();
let mut client = match Client::connect(connstr.as_str(), NoTls) {
let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
Err(e) => {
info!(
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
e
);
let mut zenith_admin_connstr = connstr.clone();
let mut zenith_admin_connstr = self.connstr.clone();
zenith_admin_connstr
.set_username("zenith_admin")
@@ -720,8 +719,8 @@ impl ComputeNode {
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
drop(client);
// reconnect with connstring with expected name
Client::connect(connstr.as_str(), NoTls)?
// reconnect with connsting with expected name
Client::connect(self.connstr.as_str(), NoTls)?
}
Ok(client) => client,
};
@@ -735,8 +734,8 @@ impl ComputeNode {
cleanup_instance(&mut client)?;
handle_roles(spec, &mut client)?;
handle_databases(spec, &mut client)?;
handle_role_deletions(spec, connstr.as_str(), &mut client)?;
handle_grants(spec, &mut client, connstr.as_str())?;
handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
handle_grants(spec, &mut client, self.connstr.as_str())?;
handle_extensions(spec, &mut client)?;
handle_extension_neon(&mut client)?;
create_availability_check_data(&mut client)?;
@@ -744,12 +743,6 @@ impl ComputeNode {
// 'Close' connection
drop(client);
if self.has_feature(ComputeFeature::Migrations) {
thread::spawn(move || {
let mut client = Client::connect(connstr.as_str(), NoTls)?;
handle_migrations(&mut client)
});
}
Ok(())
}
@@ -814,10 +807,6 @@ impl ComputeNode {
handle_grants(&spec, &mut client, self.connstr.as_str())?;
handle_extensions(&spec, &mut client)?;
handle_extension_neon(&mut client)?;
// We can skip handle_migrations here because a new migration can only appear
// if we have a new version of the compute_ctl binary, which can only happen
// if compute got restarted, in which case we'll end up inside of apply_config
// instead of reconfigure.
}
// 'Close' connection

View File

@@ -727,79 +727,3 @@ pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
Ok(())
}
#[instrument(skip_all)]
pub fn handle_migrations(client: &mut Client) -> Result<()> {
info!("handle migrations");
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
let migrations = [
"ALTER ROLE neon_superuser BYPASSRLS",
r#"
DO $$
DECLARE
role_name text;
BEGIN
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
END LOOP;
FOR role_name IN SELECT rolname FROM pg_roles
WHERE
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
END LOOP;
END $$;
"#,
];
let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
client.simple_query(query)?;
query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
client.simple_query(query)?;
query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
client.simple_query(query)?;
query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
client.simple_query(query)?;
query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
client.simple_query(query)?;
query = "SELECT id FROM neon_migration.migration_id";
let row = client.query_one(query, &[])?;
let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
let starting_migration_id = current_migration;
query = "BEGIN";
client.simple_query(query)?;
while current_migration < migrations.len() {
info!("Running migration:\n{}\n", migrations[current_migration]);
client.simple_query(migrations[current_migration])?;
current_migration += 1;
}
let setval = format!(
"UPDATE neon_migration.migration_id SET id={}",
migrations.len()
);
client.simple_query(&setval)?;
query = "COMMIT";
client.simple_query(query)?;
info!(
"Ran {} migrations",
(migrations.len() - starting_migration_id)
);
Ok(())
}

View File

@@ -184,7 +184,7 @@ impl Persistence {
pub(crate) async fn increment_generation(
&self,
tenant_shard_id: TenantShardId,
node_id: NodeId,
node_id: Option<NodeId>,
) -> anyhow::Result<Generation> {
let (write, gen) = {
let mut locked = self.state.lock().unwrap();
@@ -192,9 +192,14 @@ impl Persistence {
anyhow::bail!("Tried to increment generation of unknown shard");
};
shard.generation += 1;
shard.generation_pageserver = Some(node_id);
// If we're called with a None pageserver, we need only update the generation
// record to disassociate it with this pageserver, not actually increment the number, as
// the increment is guaranteed to happen the next time this tenant is attached.
if node_id.is_some() {
shard.generation += 1;
}
shard.generation_pageserver = node_id;
let gen = Generation::new(shard.generation);
(locked.save(), gen)
};
@@ -203,19 +208,6 @@ impl Persistence {
Ok(gen)
}
pub(crate) async fn detach(&self, tenant_shard_id: TenantShardId) -> anyhow::Result<()> {
let write = {
let mut locked = self.state.lock().unwrap();
let Some(shard) = locked.tenants.get_mut(&tenant_shard_id) else {
anyhow::bail!("Tried to increment generation of unknown shard");
};
shard.generation_pageserver = None;
locked.save()
};
write.commit().await?;
Ok(())
}
pub(crate) async fn re_attach(
&self,
node_id: NodeId,

View File

@@ -296,7 +296,7 @@ impl Reconciler {
// Increment generation before attaching to new pageserver
self.generation = self
.persistence
.increment_generation(self.tenant_shard_id, dest_ps_id)
.increment_generation(self.tenant_shard_id, Some(dest_ps_id))
.await?;
let dest_conf = build_location_config(
@@ -395,7 +395,7 @@ impl Reconciler {
// as locations with unknown (None) observed state.
self.generation = self
.persistence
.increment_generation(self.tenant_shard_id, node_id)
.increment_generation(self.tenant_shard_id, Some(node_id))
.await?;
wanted_conf.generation = self.generation.into();
tracing::info!("Observed configuration requires update.");

View File

@@ -362,14 +362,13 @@ impl Service {
);
}
let new_generation = if let Some(req_node_id) = attach_req.node_id {
let new_generation = if attach_req.node_id.is_some() {
Some(
self.persistence
.increment_generation(attach_req.tenant_shard_id, req_node_id)
.increment_generation(attach_req.tenant_shard_id, attach_req.node_id)
.await?,
)
} else {
self.persistence.detach(attach_req.tenant_shard_id).await?;
None
};
@@ -408,7 +407,6 @@ impl Service {
"attach_hook: tenant {} set generation {:?}, pageserver {}",
attach_req.tenant_shard_id,
tenant_state.generation,
// TODO: this is an odd number of 0xf's
attach_req.node_id.unwrap_or(utils::id::NodeId(0xfffffff))
);

View File

@@ -57,7 +57,7 @@ use crate::local_env::LocalEnv;
use crate::postgresql_conf::PostgresConf;
use compute_api::responses::{ComputeState, ComputeStatus};
use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
use compute_api::spec::{Cluster, ComputeMode, ComputeSpec};
// contents of a endpoint.json file
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
@@ -70,7 +70,6 @@ pub struct EndpointConf {
http_port: u16,
pg_version: u32,
skip_pg_catalog_updates: bool,
features: Vec<ComputeFeature>,
}
//
@@ -141,7 +140,6 @@ impl ComputeControlPlane {
// with this we basically test a case of waking up an idle compute, where
// we also skip catalog updates in the cloud.
skip_pg_catalog_updates: true,
features: vec![],
});
ep.create_endpoint_dir()?;
@@ -156,7 +154,6 @@ impl ComputeControlPlane {
pg_port,
pg_version,
skip_pg_catalog_updates: true,
features: vec![],
})?,
)?;
std::fs::write(
@@ -218,9 +215,6 @@ pub struct Endpoint {
// Optimizations
skip_pg_catalog_updates: bool,
// Feature flags
features: Vec<ComputeFeature>,
}
impl Endpoint {
@@ -250,7 +244,6 @@ impl Endpoint {
tenant_id: conf.tenant_id,
pg_version: conf.pg_version,
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
features: conf.features,
})
}
@@ -526,7 +519,7 @@ impl Endpoint {
skip_pg_catalog_updates: self.skip_pg_catalog_updates,
format_version: 1.0,
operation_uuid: None,
features: self.features.clone(),
features: vec![],
cluster: Cluster {
cluster_id: None, // project ID: not used
name: None, // project name: not used

View File

@@ -90,9 +90,6 @@ pub enum ComputeFeature {
/// track short-lived connections as user activity.
ActivityMonitorExperimental,
/// Enable running migrations
Migrations,
/// This is a special feature flag that is used to represent unknown feature flags.
/// Basically all unknown to enum flags are represented as this one. See unit test
/// `parse_unknown_features()` for more details.

View File

@@ -1,11 +1,9 @@
use anyhow::{bail, Result};
use byteorder::{ByteOrder, BE};
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::{Oid, TransactionId};
use serde::{Deserialize, Serialize};
use std::{fmt, ops::Range};
use std::fmt;
use crate::reltag::{BlockNumber, RelTag, SlruKind};
use crate::reltag::{BlockNumber, RelTag};
/// Key used in the Repository kv-store.
///
@@ -145,390 +143,12 @@ impl Key {
}
}
// Layout of the Key address space
//
// The Key struct, used to address the underlying key-value store, consists of
// 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
// all the data and metadata keys into those 18 bytes.
//
// Principles for the mapping:
//
// - Things that are often accessed or modified together, should be close to
// each other in the key space. For example, if a relation is extended by one
// block, we create a new key-value pair for the block data, and update the
// relation size entry. Because of that, the RelSize key comes after all the
// RelBlocks of a relation: the RelSize and the last RelBlock are always next
// to each other.
//
// The key space is divided into four major sections, identified by the first
// byte, and the form a hierarchy:
//
// 00 Relation data and metadata
//
// DbDir () -> (dbnode, spcnode)
// Filenodemap
// RelDir -> relnode forknum
// RelBlocks
// RelSize
//
// 01 SLRUs
//
// SlruDir kind
// SlruSegBlocks segno
// SlruSegSize
//
// 02 pg_twophase
//
// 03 misc
// Controlfile
// checkpoint
// pg_version
//
// 04 aux files
//
// Below is a full list of the keyspace allocation:
//
// DbDir:
// 00 00000000 00000000 00000000 00 00000000
//
// Filenodemap:
// 00 SPCNODE DBNODE 00000000 00 00000000
//
// RelDir:
// 00 SPCNODE DBNODE 00000000 00 00000001 (Postgres never uses relfilenode 0)
//
// RelBlock:
// 00 SPCNODE DBNODE RELNODE FORK BLKNUM
//
// RelSize:
// 00 SPCNODE DBNODE RELNODE FORK FFFFFFFF
//
// SlruDir:
// 01 kind 00000000 00000000 00 00000000
//
// SlruSegBlock:
// 01 kind 00000001 SEGNO 00 BLKNUM
//
// SlruSegSize:
// 01 kind 00000001 SEGNO 00 FFFFFFFF
//
// TwoPhaseDir:
// 02 00000000 00000000 00000000 00 00000000
//
// TwoPhaseFile:
// 02 00000000 00000000 00000000 00 XID
//
// ControlFile:
// 03 00000000 00000000 00000000 00 00000000
//
// Checkpoint:
// 03 00000000 00000000 00000000 00 00000001
//
// AuxFiles:
// 03 00000000 00000000 00000000 00 00000002
//
//-- Section 01: relation data and metadata
pub const DBDIR_KEY: Key = Key {
field1: 0x00,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: 0,
};
#[inline(always)]
pub fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
Key {
field1: 0x00,
field2: spcnode,
field3: dbnode,
field4: 0,
field5: 0,
field6: 0,
}..Key {
field1: 0x00,
field2: spcnode,
field3: dbnode,
field4: 0xffffffff,
field5: 0xff,
field6: 0xffffffff,
}
}
#[inline(always)]
pub fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
Key {
field1: 0x00,
field2: spcnode,
field3: dbnode,
field4: 0,
field5: 0,
field6: 0,
}
}
#[inline(always)]
pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
Key {
field1: 0x00,
field2: spcnode,
field3: dbnode,
field4: 0,
field5: 0,
field6: 1,
}
}
#[inline(always)]
pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
Key {
field1: 0x00,
field2: rel.spcnode,
field3: rel.dbnode,
field4: rel.relnode,
field5: rel.forknum,
field6: blknum,
}
}
#[inline(always)]
pub fn rel_size_to_key(rel: RelTag) -> Key {
Key {
field1: 0x00,
field2: rel.spcnode,
field3: rel.dbnode,
field4: rel.relnode,
field5: rel.forknum,
field6: 0xffffffff,
}
}
#[inline(always)]
pub fn rel_key_range(rel: RelTag) -> Range<Key> {
Key {
field1: 0x00,
field2: rel.spcnode,
field3: rel.dbnode,
field4: rel.relnode,
field5: rel.forknum,
field6: 0,
}..Key {
field1: 0x00,
field2: rel.spcnode,
field3: rel.dbnode,
field4: rel.relnode,
field5: rel.forknum + 1,
field6: 0,
}
}
//-- Section 02: SLRUs
#[inline(always)]
pub fn slru_dir_to_key(kind: SlruKind) -> Key {
Key {
field1: 0x01,
field2: match kind {
SlruKind::Clog => 0x00,
SlruKind::MultiXactMembers => 0x01,
SlruKind::MultiXactOffsets => 0x02,
},
field3: 0,
field4: 0,
field5: 0,
field6: 0,
}
}
#[inline(always)]
pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
Key {
field1: 0x01,
field2: match kind {
SlruKind::Clog => 0x00,
SlruKind::MultiXactMembers => 0x01,
SlruKind::MultiXactOffsets => 0x02,
},
field3: 1,
field4: segno,
field5: 0,
field6: blknum,
}
}
#[inline(always)]
pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
Key {
field1: 0x01,
field2: match kind {
SlruKind::Clog => 0x00,
SlruKind::MultiXactMembers => 0x01,
SlruKind::MultiXactOffsets => 0x02,
},
field3: 1,
field4: segno,
field5: 0,
field6: 0xffffffff,
}
}
#[inline(always)]
pub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
let field2 = match kind {
SlruKind::Clog => 0x00,
SlruKind::MultiXactMembers => 0x01,
SlruKind::MultiXactOffsets => 0x02,
};
Key {
field1: 0x01,
field2,
field3: 1,
field4: segno,
field5: 0,
field6: 0,
}..Key {
field1: 0x01,
field2,
field3: 1,
field4: segno,
field5: 1,
field6: 0,
}
}
//-- Section 03: pg_twophase
pub const TWOPHASEDIR_KEY: Key = Key {
field1: 0x02,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: 0,
};
#[inline(always)]
pub fn twophase_file_key(xid: TransactionId) -> Key {
Key {
field1: 0x02,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: xid,
}
}
#[inline(always)]
pub fn twophase_key_range(xid: TransactionId) -> Range<Key> {
let (next_xid, overflowed) = xid.overflowing_add(1);
Key {
field1: 0x02,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: xid,
}..Key {
field1: 0x02,
field2: 0,
field3: 0,
field4: 0,
field5: u8::from(overflowed),
field6: next_xid,
}
}
//-- Section 03: Control file
pub const CONTROLFILE_KEY: Key = Key {
field1: 0x03,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: 0,
};
pub const CHECKPOINT_KEY: Key = Key {
field1: 0x03,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: 1,
};
pub const AUX_FILES_KEY: Key = Key {
field1: 0x03,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: 2,
};
// Reverse mappings for a few Keys.
// These are needed by WAL redo manager.
// AUX_FILES currently stores only data for logical replication (slots etc), and
// we don't preserve these on a branch because safekeepers can't follow timeline
// switch (and generally it likely should be optional), so ignore these.
#[inline(always)]
pub fn is_inherited_key(key: Key) -> bool {
key != AUX_FILES_KEY
}
#[inline(always)]
pub fn is_rel_fsm_block_key(key: Key) -> bool {
key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
}
#[inline(always)]
pub fn is_rel_vm_block_key(key: Key) -> bool {
key.field1 == 0x00
&& key.field4 != 0
&& key.field5 == VISIBILITYMAP_FORKNUM
&& key.field6 != 0xffffffff
}
#[inline(always)]
pub fn key_to_slru_block(key: Key) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
Ok(match key.field1 {
0x01 => {
let kind = match key.field2 {
0x00 => SlruKind::Clog,
0x01 => SlruKind::MultiXactMembers,
0x02 => SlruKind::MultiXactOffsets,
_ => anyhow::bail!("unrecognized slru kind 0x{:02x}", key.field2),
};
let segno = key.field4;
let blknum = key.field6;
(kind, segno, blknum)
}
_ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
})
}
#[inline(always)]
pub fn is_slru_block_key(key: Key) -> bool {
key.field1 == 0x01 // SLRU-related
&& key.field3 == 0x00000001 // but not SlruDir
&& key.field6 != 0xffffffff // and not SlruSegSize
}
#[inline(always)]
pub fn is_rel_block_key(key: &Key) -> bool {
key.field1 == 0x00 && key.field4 != 0 && key.field6 != 0xffffffff
}
/// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
#[inline(always)]
pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
Ok(match key.field1 {
0x00 => (

View File

@@ -61,7 +61,7 @@ use crate::context::{DownloadBehavior, RequestContext};
use crate::import_datadir::import_wal_from_tar;
use crate::metrics;
use crate::metrics::LIVE_CONNECTIONS_COUNT;
use crate::pgdatadir_mapping::Version;
use crate::pgdatadir_mapping::{rel_block_to_key, Version};
use crate::task_mgr;
use crate::task_mgr::TaskKind;
use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
@@ -75,7 +75,6 @@ use crate::tenant::PageReconstructError;
use crate::tenant::Timeline;
use crate::trace::Tracer;
use pageserver_api::key::rel_block_to_key;
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
use postgres_ffi::BLCKSZ;
@@ -387,18 +386,12 @@ impl PageServerHandler {
/// Future that completes when we need to shut down the connection.
///
/// We currently need to shut down when any of the following happens:
/// 1. any of the timelines we hold GateGuards for in `shard_timelines` is cancelled
/// 2. task_mgr requests shutdown of the connection
/// Reasons for need to shut down are:
/// - any of the timelines we hold GateGuards for in `shard_timelines` is cancelled
/// - task_mgr requests shutdown of the connection
///
/// NB on (1): the connection's lifecycle is not actually tied to any of the
/// `shard_timelines`s' lifecycles. But it's _necessary_ in the current
/// implementation to be responsive to timeline cancellation because
/// the connection holds their `GateGuards` open (sored in `shard_timelines`).
/// We currently do the easy thing and terminate the connection if any of the
/// shard_timelines gets cancelled. But really, we cuold spend more effort
/// and simply remove the cancelled timeline from the `shard_timelines`, thereby
/// dropping the guard.
/// The need to check for `task_mgr` cancellation arises mainly from `handle_pagerequests`
/// where, at first, `shard_timelines` is empty, see <https://github.com/neondatabase/neon/pull/6388>
///
/// NB: keep in sync with [`Self::is_connection_cancelled`]
async fn await_connection_cancelled(&self) {
@@ -411,17 +404,16 @@ impl PageServerHandler {
// immutable &self). So it's fine to evaluate shard_timelines after the sleep, we don't risk
// missing any inserts to the map.
let mut cancellation_sources = Vec::with_capacity(1 + self.shard_timelines.len());
use futures::future::Either;
cancellation_sources.push(Either::Left(task_mgr::shutdown_watcher()));
cancellation_sources.extend(
self.shard_timelines
.values()
.map(|ht| Either::Right(ht.timeline.cancel.cancelled())),
);
FuturesUnordered::from_iter(cancellation_sources)
.next()
.await;
let mut futs = self
.shard_timelines
.values()
.map(|ht| ht.timeline.cancel.cancelled())
.collect::<FuturesUnordered<_>>();
tokio::select! {
_ = task_mgr::shutdown_watcher() => { }
_ = futs.next() => {}
}
}
/// Checking variant of [`Self::await_connection_cancelled`].

View File

@@ -13,12 +13,7 @@ use crate::repository::*;
use crate::walrecord::NeonWalRecord;
use anyhow::{ensure, Context};
use bytes::{Buf, Bytes};
use pageserver_api::key::{
dbdir_key_range, is_rel_block_key, is_slru_block_key, rel_block_to_key, rel_dir_to_key,
rel_key_range, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,
slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,
AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
};
use pageserver_api::key::is_rel_block_key;
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::BLCKSZ;
@@ -1540,6 +1535,366 @@ struct SlruSegmentDirectory {
static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
// Layout of the Key address space
//
// The Key struct, used to address the underlying key-value store, consists of
// 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
// all the data and metadata keys into those 18 bytes.
//
// Principles for the mapping:
//
// - Things that are often accessed or modified together, should be close to
// each other in the key space. For example, if a relation is extended by one
// block, we create a new key-value pair for the block data, and update the
// relation size entry. Because of that, the RelSize key comes after all the
// RelBlocks of a relation: the RelSize and the last RelBlock are always next
// to each other.
//
// The key space is divided into four major sections, identified by the first
// byte, and the form a hierarchy:
//
// 00 Relation data and metadata
//
// DbDir () -> (dbnode, spcnode)
// Filenodemap
// RelDir -> relnode forknum
// RelBlocks
// RelSize
//
// 01 SLRUs
//
// SlruDir kind
// SlruSegBlocks segno
// SlruSegSize
//
// 02 pg_twophase
//
// 03 misc
// Controlfile
// checkpoint
// pg_version
//
// 04 aux files
//
// Below is a full list of the keyspace allocation:
//
// DbDir:
// 00 00000000 00000000 00000000 00 00000000
//
// Filenodemap:
// 00 SPCNODE DBNODE 00000000 00 00000000
//
// RelDir:
// 00 SPCNODE DBNODE 00000000 00 00000001 (Postgres never uses relfilenode 0)
//
// RelBlock:
// 00 SPCNODE DBNODE RELNODE FORK BLKNUM
//
// RelSize:
// 00 SPCNODE DBNODE RELNODE FORK FFFFFFFF
//
// SlruDir:
// 01 kind 00000000 00000000 00 00000000
//
// SlruSegBlock:
// 01 kind 00000001 SEGNO 00 BLKNUM
//
// SlruSegSize:
// 01 kind 00000001 SEGNO 00 FFFFFFFF
//
// TwoPhaseDir:
// 02 00000000 00000000 00000000 00 00000000
//
// TwoPhaseFile:
// 02 00000000 00000000 00000000 00 XID
//
// ControlFile:
// 03 00000000 00000000 00000000 00 00000000
//
// Checkpoint:
// 03 00000000 00000000 00000000 00 00000001
//
// AuxFiles:
// 03 00000000 00000000 00000000 00 00000002
//
//-- Section 01: relation data and metadata
const DBDIR_KEY: Key = Key {
field1: 0x00,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: 0,
};
fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
Key {
field1: 0x00,
field2: spcnode,
field3: dbnode,
field4: 0,
field5: 0,
field6: 0,
}..Key {
field1: 0x00,
field2: spcnode,
field3: dbnode,
field4: 0xffffffff,
field5: 0xff,
field6: 0xffffffff,
}
}
fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
Key {
field1: 0x00,
field2: spcnode,
field3: dbnode,
field4: 0,
field5: 0,
field6: 0,
}
}
fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
Key {
field1: 0x00,
field2: spcnode,
field3: dbnode,
field4: 0,
field5: 0,
field6: 1,
}
}
pub(crate) fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
Key {
field1: 0x00,
field2: rel.spcnode,
field3: rel.dbnode,
field4: rel.relnode,
field5: rel.forknum,
field6: blknum,
}
}
fn rel_size_to_key(rel: RelTag) -> Key {
Key {
field1: 0x00,
field2: rel.spcnode,
field3: rel.dbnode,
field4: rel.relnode,
field5: rel.forknum,
field6: 0xffffffff,
}
}
fn rel_key_range(rel: RelTag) -> Range<Key> {
Key {
field1: 0x00,
field2: rel.spcnode,
field3: rel.dbnode,
field4: rel.relnode,
field5: rel.forknum,
field6: 0,
}..Key {
field1: 0x00,
field2: rel.spcnode,
field3: rel.dbnode,
field4: rel.relnode,
field5: rel.forknum + 1,
field6: 0,
}
}
//-- Section 02: SLRUs
fn slru_dir_to_key(kind: SlruKind) -> Key {
Key {
field1: 0x01,
field2: match kind {
SlruKind::Clog => 0x00,
SlruKind::MultiXactMembers => 0x01,
SlruKind::MultiXactOffsets => 0x02,
},
field3: 0,
field4: 0,
field5: 0,
field6: 0,
}
}
fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
Key {
field1: 0x01,
field2: match kind {
SlruKind::Clog => 0x00,
SlruKind::MultiXactMembers => 0x01,
SlruKind::MultiXactOffsets => 0x02,
},
field3: 1,
field4: segno,
field5: 0,
field6: blknum,
}
}
fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
Key {
field1: 0x01,
field2: match kind {
SlruKind::Clog => 0x00,
SlruKind::MultiXactMembers => 0x01,
SlruKind::MultiXactOffsets => 0x02,
},
field3: 1,
field4: segno,
field5: 0,
field6: 0xffffffff,
}
}
fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
let field2 = match kind {
SlruKind::Clog => 0x00,
SlruKind::MultiXactMembers => 0x01,
SlruKind::MultiXactOffsets => 0x02,
};
Key {
field1: 0x01,
field2,
field3: 1,
field4: segno,
field5: 0,
field6: 0,
}..Key {
field1: 0x01,
field2,
field3: 1,
field4: segno,
field5: 1,
field6: 0,
}
}
//-- Section 03: pg_twophase
const TWOPHASEDIR_KEY: Key = Key {
field1: 0x02,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: 0,
};
fn twophase_file_key(xid: TransactionId) -> Key {
Key {
field1: 0x02,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: xid,
}
}
fn twophase_key_range(xid: TransactionId) -> Range<Key> {
let (next_xid, overflowed) = xid.overflowing_add(1);
Key {
field1: 0x02,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: xid,
}..Key {
field1: 0x02,
field2: 0,
field3: 0,
field4: 0,
field5: u8::from(overflowed),
field6: next_xid,
}
}
//-- Section 03: Control file
const CONTROLFILE_KEY: Key = Key {
field1: 0x03,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: 0,
};
const CHECKPOINT_KEY: Key = Key {
field1: 0x03,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: 1,
};
const AUX_FILES_KEY: Key = Key {
field1: 0x03,
field2: 0,
field3: 0,
field4: 0,
field5: 0,
field6: 2,
};
// Reverse mappings for a few Keys.
// These are needed by WAL redo manager.
// AUX_FILES currently stores only data for logical replication (slots etc), and
// we don't preserve these on a branch because safekeepers can't follow timeline
// switch (and generally it likely should be optional), so ignore these.
pub fn is_inherited_key(key: Key) -> bool {
key != AUX_FILES_KEY
}
pub fn is_rel_fsm_block_key(key: Key) -> bool {
key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
}
pub fn is_rel_vm_block_key(key: Key) -> bool {
key.field1 == 0x00
&& key.field4 != 0
&& key.field5 == VISIBILITYMAP_FORKNUM
&& key.field6 != 0xffffffff
}
pub fn key_to_slru_block(key: Key) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
Ok(match key.field1 {
0x01 => {
let kind = match key.field2 {
0x00 => SlruKind::Clog,
0x01 => SlruKind::MultiXactMembers,
0x02 => SlruKind::MultiXactOffsets,
_ => anyhow::bail!("unrecognized slru kind 0x{:02x}", key.field2),
};
let segno = key.field4;
let blknum = key.field6;
(kind, segno, blknum)
}
_ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
})
}
fn is_slru_block_key(key: Key) -> bool {
key.field1 == 0x01 // SLRU-related
&& key.field3 == 0x00000001 // but not SlruDir
&& key.field6 != 0xffffffff // and not SlruSegSize
}
#[allow(clippy::bool_assert_comparison)]
#[cfg(test)]
mod tests {

View File

@@ -449,6 +449,7 @@ pub enum CreateTimelineError {
#[derive(thiserror::Error, Debug)]
enum InitdbError {
Other(anyhow::Error),
#[allow(unused)]
Cancelled,
Spawn(std::io::Result<()>),
Failed(std::process::ExitStatus, Vec<u8>),
@@ -716,10 +717,6 @@ impl Tenant {
// stayed in Activating for such a long time that shutdown found it in
// that state.
tracing::info!(state=%tenant_clone.current_state(), "Tenant shut down before activation");
// Make the tenant broken so that set_stopping will not hang waiting for it to leave
// the Attaching state. This is an over-reaction (nothing really broke, the tenant is
// just shutting down), but ensures progress.
make_broken(&tenant_clone, anyhow::anyhow!("Shut down while Attaching"));
return Ok(());
},
)
@@ -1881,7 +1878,7 @@ impl Tenant {
&self,
cancel: &CancellationToken,
ctx: &RequestContext,
) -> Result<(), timeline::CompactionError> {
) -> anyhow::Result<(), timeline::CompactionError> {
// Don't start doing work during shutdown, or when broken, we do not need those in the logs
if !self.is_active() {
return Ok(());
@@ -3736,7 +3733,7 @@ async fn run_initdb(
conf: &'static PageServerConf,
initdb_target_dir: &Utf8Path,
pg_version: u32,
cancel: &CancellationToken,
_cancel: &CancellationToken,
) -> Result<(), InitdbError> {
let initdb_bin_path = conf
.pg_bin_dir(pg_version)
@@ -3750,7 +3747,7 @@ async fn run_initdb(
let _permit = INIT_DB_SEMAPHORE.acquire().await;
let initdb_command = tokio::process::Command::new(&initdb_bin_path)
let mut initdb_command = tokio::process::Command::new(&initdb_bin_path)
.args(["-D", initdb_target_dir.as_ref()])
.args(["-U", &conf.superuser])
.args(["-E", "utf8"])
@@ -3771,15 +3768,19 @@ async fn run_initdb(
.spawn()?;
tokio::select! {
initdb_output = initdb_command.wait_with_output() => {
let initdb_output = initdb_output?;
if !initdb_output.status.success() {
return Err(InitdbError::Failed(initdb_output.status, initdb_output.stderr));
exit_status = initdb_command.wait() => {
let exit_status = exit_status?;
if !exit_status.success() {
let mut stderr = initdb_command.stderr.take().unwrap();
let mut stderr_vec = Vec::new();
tokio::io::copy(&mut stderr, &mut stderr_vec).await?;
return Err(InitdbError::Failed(exit_status, stderr_vec));
}
}
_ = cancel.cancelled() => {
/*_ = cancel.cancelled() => {
initdb_command.kill().await?;
return Err(InitdbError::Cancelled);
}
}*/
}
Ok(())

View File

@@ -619,8 +619,8 @@ impl LayerMap {
}
/// Return all L0 delta layers
pub fn get_level0_deltas(&self) -> Vec<Arc<PersistentLayerDesc>> {
self.l0_delta_layers.to_vec()
pub fn get_level0_deltas(&self) -> Result<Vec<Arc<PersistentLayerDesc>>> {
Ok(self.l0_delta_layers.to_vec())
}
/// debugging function to print out the contents of the layer map

View File

@@ -237,7 +237,7 @@ use utils::id::{TenantId, TimelineId};
use self::index::IndexPart;
use super::storage_layer::{Layer, LayerFileName, ResidentLayer};
use super::upload_queue::{self, SetDeletedFlagProgress};
use super::upload_queue::SetDeletedFlagProgress;
use super::Generation;
pub(crate) use download::{is_temp_download_file, list_remote_timelines};
@@ -621,9 +621,7 @@ impl RemoteTimelineClient {
///
/// Like schedule_index_upload_for_metadata_update(), this merely adds
/// the upload to the upload queue and returns quickly.
pub(crate) fn schedule_index_upload_for_file_changes(
self: &Arc<Self>,
) -> Result<(), upload_queue::NotInitialized> {
pub fn schedule_index_upload_for_file_changes(self: &Arc<Self>) -> anyhow::Result<()> {
let mut guard = self.upload_queue.lock().unwrap();
let upload_queue = guard.initialized_mut()?;
@@ -668,7 +666,7 @@ impl RemoteTimelineClient {
pub(crate) fn schedule_layer_file_upload(
self: &Arc<Self>,
layer: ResidentLayer,
) -> Result<(), upload_queue::NotInitialized> {
) -> anyhow::Result<()> {
let mut guard = self.upload_queue.lock().unwrap();
let upload_queue = guard.initialized_mut()?;
@@ -877,7 +875,7 @@ impl RemoteTimelineClient {
self: &Arc<Self>,
compacted_from: &[Layer],
compacted_to: &[ResidentLayer],
) -> Result<(), upload_queue::NotInitialized> {
) -> anyhow::Result<()> {
let mut guard = self.upload_queue.lock().unwrap();
let upload_queue = guard.initialized_mut()?;

View File

@@ -290,7 +290,7 @@ impl Layer {
}
/// Downloads if necessary and creates a guard, which will keep this layer from being evicted.
pub(crate) async fn download_and_keep_resident(&self) -> Result<ResidentLayer, DownloadError> {
pub(crate) async fn download_and_keep_resident(&self) -> anyhow::Result<ResidentLayer> {
let downloaded = self.0.get_or_maybe_download(true, None).await?;
Ok(ResidentLayer {
@@ -1174,7 +1174,7 @@ pub(crate) enum EvictionError {
/// Error internal to the [`LayerInner::get_or_maybe_download`]
#[derive(Debug, thiserror::Error)]
pub(crate) enum DownloadError {
enum DownloadError {
#[error("timeline has already shutdown")]
TimelineShutdown,
#[error("no remote storage configured")]
@@ -1197,15 +1197,6 @@ pub(crate) enum DownloadError {
PostStatFailed(#[source] std::io::Error),
}
impl DownloadError {
pub fn is_cancelled(&self) -> bool {
match self {
Self::TimelineShutdown | Self::DownloadCancelled => true,
_ => false,
}
}
}
#[derive(Debug, PartialEq)]
pub(crate) enum NeedsDownload {
NotFound,

View File

@@ -9,7 +9,6 @@ use crate::context::{DownloadBehavior, RequestContext};
use crate::metrics::TENANT_TASK_EVENTS;
use crate::task_mgr;
use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
use crate::tenant::timeline::CompactionError;
use crate::tenant::{Tenant, TenantState};
use tokio_util::sync::CancellationToken;
use tracing::*;
@@ -182,11 +181,8 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
);
error_run_count += 1;
let wait_duration = Duration::from_secs_f64(wait_duration);
log_compaction_error(
&e,
error_run_count,
&wait_duration,
cancel.is_cancelled(),
error!(
"Compaction failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}",
);
wait_duration
} else {
@@ -214,38 +210,6 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
}
fn log_compaction_error(
e: &CompactionError,
error_run_count: u32,
sleep_duration: &std::time::Duration,
task_cancelled: bool,
) {
use crate::tenant::upload_queue::NotInitialized;
use crate::tenant::PageReconstructError;
use CompactionError::*;
enum LooksLike {
Info,
Error,
}
let decision = match e {
ShuttingDown => None,
_ if task_cancelled => Some(LooksLike::Info),
Other(e) => Some(LooksLike::Error),
};
match decision {
Some(LooksLike::Info) => info!(
"Compaction failed {error_run_count} times, retrying in {sleep_duration:?}: {e:#}",
),
Some(LooksLike::Error) => error!(
"Compaction failed {error_run_count} times, retrying in {sleep_duration:?}: {e:?}",
),
None => {}
}
}
///
/// GC task's main loop
///

View File

@@ -73,8 +73,8 @@ use crate::metrics::{
TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
};
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
use crate::pgdatadir_mapping::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
use crate::tenant::config::TenantConfOpt;
use pageserver_api::key::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
use pageserver_api::reltag::RelTag;
use pageserver_api::shard::ShardIndex;
@@ -103,14 +103,11 @@ use self::layer_manager::LayerManager;
use self::logical_size::LogicalSize;
use self::walreceiver::{WalReceiver, WalReceiverConf};
use super::config::TenantConf;
use super::remote_timeline_client::index::{IndexLayerMetadata, IndexPart};
use super::remote_timeline_client::RemoteTimelineClient;
use super::secondary::heatmap::{HeatMapLayer, HeatMapTimeline};
use super::{config::TenantConf, upload_queue::NotInitialized};
use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
use super::{
remote_timeline_client::index::{IndexLayerMetadata, IndexPart},
storage_layer::layer,
};
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub(super) enum FlushLoopState {
@@ -394,7 +391,8 @@ pub(crate) enum PageReconstructError {
#[error("Ancestor LSN wait error: {0}")]
AncestorLsnTimeout(#[from] WaitLsnError),
#[error("timeline shutting down")]
/// The operation was cancelled
#[error("Cancelled")]
Cancelled,
/// The ancestor of this is being stopped
@@ -406,19 +404,6 @@ pub(crate) enum PageReconstructError {
WalRedo(anyhow::Error),
}
impl PageReconstructError {
/// Returns true if this error indicates a tenant/timeline shutdown alike situation
pub(crate) fn is_stopping(&self) -> bool {
use PageReconstructError::*;
match self {
Other(_) => false,
AncestorLsnTimeout(_) => false,
Cancelled | AncestorStopping(_) => true,
WalRedo(_) => false,
}
}
}
#[derive(thiserror::Error, Debug)]
enum FlushLayerError {
/// Timeline cancellation token was cancelled
@@ -847,7 +832,8 @@ impl Timeline {
// "enough".
let layers = self
.create_image_layers(&partitioning, lsn, false, &image_ctx)
.await?;
.await
.map_err(anyhow::Error::from)?;
if let Some(remote_client) = &self.remote_client {
for layer in layers {
remote_client.schedule_layer_file_upload(layer)?;
@@ -3214,46 +3200,7 @@ pub(crate) enum CompactionError {
ShuttingDown,
/// Compaction cannot be done right now; page reconstruction and so on.
#[error(transparent)]
Other(anyhow::Error),
}
impl CompactionError {
fn other<E>(err: E) -> Self
where
E: std::error::Error + Send + Sync + 'static,
{
CompactionError::Other(anyhow::Error::new(err))
}
}
impl From<PageReconstructError> for CompactionError {
fn from(value: PageReconstructError) -> Self {
if value.is_stopping() {
CompactionError::ShuttingDown
} else {
CompactionError::other(value)
}
}
}
impl From<NotInitialized> for CompactionError {
fn from(value: NotInitialized) -> Self {
if value.is_stopping() {
CompactionError::ShuttingDown
} else {
CompactionError::other(value)
}
}
}
impl From<layer::DownloadError> for CompactionError {
fn from(value: layer::DownloadError) -> Self {
if value.is_cancelled() {
CompactionError::ShuttingDown
} else {
CompactionError::other(value)
}
}
Other(#[from] anyhow::Error),
}
#[serde_as]
@@ -3386,7 +3333,7 @@ impl Timeline {
stats.read_lock_held_spawn_blocking_startup_micros =
stats.read_lock_acquisition_micros.till_now(); // set by caller
let layers = guard.layer_map();
let level0_deltas = layers.get_level0_deltas();
let level0_deltas = layers.get_level0_deltas()?;
let mut level0_deltas = level0_deltas
.into_iter()
.map(|x| guard.get_from_desc(&x))
@@ -3433,8 +3380,7 @@ impl Timeline {
delta
.download_and_keep_resident()
.await
.context("download layer for failpoint")
.map_err(CompactionError::Other)?,
.context("download layer for failpoint")?,
);
}
tracing::info!("compact-level0-phase1-return-same"); // so that we can check if we hit the failpoint
@@ -3518,7 +3464,7 @@ impl Timeline {
let mut all_keys = Vec::new();
for l in deltas_to_compact.iter() {
all_keys.extend(l.load_keys(ctx).await.map_err(CompactionError::Other)?);
all_keys.extend(l.load_keys(ctx).await?);
}
// FIXME: should spawn_blocking the rest of this function
@@ -3538,10 +3484,7 @@ impl Timeline {
// has not so much sense, because largest holes will corresponds field1/field2 changes.
// But we are mostly interested to eliminate holes which cause generation of excessive image layers.
// That is why it is better to measure size of hole as number of covering image layers.
let coverage_size = layers
.image_coverage(&key_range, last_record_lsn)
.map_err(CompactionError::Other)?
.len();
let coverage_size = layers.image_coverage(&key_range, last_record_lsn)?.len();
if coverage_size >= min_hole_coverage_size {
heap.push(Hole {
key_range,
@@ -3640,7 +3583,7 @@ impl Timeline {
key, lsn, ref val, ..
} in all_values_iter
{
let value = val.load(ctx).await.map_err(CompactionError::Other)?;
let value = val.load(ctx).await?;
let same_key = prev_key.map_or(false, |prev_key| prev_key == key);
// We need to check key boundaries once we reach next key or end of layer with the same key
if !same_key || lsn == dup_end_lsn {
@@ -3697,8 +3640,7 @@ impl Timeline {
.take()
.unwrap()
.finish(prev_key.unwrap().next(), self)
.await
.map_err(CompactionError::Other)?,
.await?,
);
writer = None;
@@ -3728,8 +3670,7 @@ impl Timeline {
lsn_range.clone()
},
)
.await
.map_err(CompactionError::Other)?,
.await?,
);
}
@@ -3740,12 +3681,7 @@ impl Timeline {
});
if !self.shard_identity.is_key_disposable(&key) {
writer
.as_mut()
.unwrap()
.put_value(key, lsn, value)
.await
.map_err(CompactionError::Other)?;
writer.as_mut().unwrap().put_value(key, lsn, value).await?;
} else {
debug!(
"Dropping key {} during compaction (it belongs on shard {:?})",
@@ -3761,12 +3697,7 @@ impl Timeline {
prev_key = Some(key);
}
if let Some(writer) = writer {
new_layers.push(
writer
.finish(prev_key.unwrap().next(), self)
.await
.map_err(CompactionError::Other)?,
);
new_layers.push(writer.finish(prev_key.unwrap().next(), self).await?);
}
// Sync layers
@@ -3795,8 +3726,7 @@ impl Timeline {
// minimize latency.
par_fsync::par_fsync_async(&layer_paths)
.await
.context("fsync all new layers")
.map_err(CompactionError::Other)?;
.context("fsync all new layers")?;
let timeline_dir = self
.conf
@@ -3804,8 +3734,7 @@ impl Timeline {
par_fsync::par_fsync_async(&[timeline_dir])
.await
.context("fsync of timeline dir")
.map_err(CompactionError::Other)?;
.context("fsync of timeline dir")?;
}
stats.write_layer_files_micros = stats.read_lock_drop_micros.till_now();

View File

@@ -126,27 +126,6 @@ pub(super) struct UploadQueueStopped {
pub(super) deleted_at: SetDeletedFlagProgress,
}
#[derive(thiserror::Error, Debug)]
pub(crate) enum NotInitialized {
#[error("queue is in state Uninitialized")]
Uninitialized,
#[error("queue is in state Stopping")]
Stopped,
#[error("queue is shutting down")]
ShuttingDown,
}
impl NotInitialized {
pub(crate) fn is_stopping(&self) -> bool {
use NotInitialized::*;
match self {
Uninitialized => false,
Stopped => true,
ShuttingDown => true,
}
}
}
impl UploadQueue {
pub(crate) fn initialize_empty_remote(
&mut self,
@@ -234,20 +213,18 @@ impl UploadQueue {
Ok(self.initialized_mut().expect("we just set it"))
}
pub(crate) fn initialized_mut(
&mut self,
) -> Result<&mut UploadQueueInitialized, NotInitialized> {
use UploadQueue::*;
pub(crate) fn initialized_mut(&mut self) -> anyhow::Result<&mut UploadQueueInitialized> {
match self {
Uninitialized => Err(NotInitialized::Uninitialized.into()),
Initialized(x) => {
if x.shutting_down {
Err(NotInitialized::ShuttingDown.into())
} else {
UploadQueue::Uninitialized | UploadQueue::Stopped(_) => {
anyhow::bail!("queue is in state {}", self.as_str())
}
UploadQueue::Initialized(x) => {
if !x.shutting_down {
Ok(x)
} else {
anyhow::bail!("queue is shutting down")
}
}
Stopped(_) => Err(NotInitialized::Stopped.into()),
}
}

View File

@@ -33,12 +33,11 @@ use utils::failpoint_support;
use crate::context::RequestContext;
use crate::metrics::WAL_INGEST;
use crate::pgdatadir_mapping::{DatadirModification, Version};
use crate::pgdatadir_mapping::*;
use crate::tenant::PageReconstructError;
use crate::tenant::Timeline;
use crate::walrecord::*;
use crate::ZERO_PAGE;
use pageserver_api::key::rel_block_to_key;
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
use postgres_ffi::pg_constants;
use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};

View File

@@ -47,10 +47,11 @@ use crate::metrics::{
WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM,
WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
};
use crate::pgdatadir_mapping::key_to_slru_block;
use crate::repository::Key;
use crate::walrecord::NeonWalRecord;
use pageserver_api::key::{key_to_rel_block, key_to_slru_block};
use pageserver_api::key::key_to_rel_block;
use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::pg_constants;
use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM;

View File

@@ -637,7 +637,7 @@ HandleAlterRole(AlterRoleStmt *stmt)
ListCell *option;
const char *role_name = stmt->role->rolename;
if (RoleIsNeonSuperuser(role_name) && !superuser())
if (RoleIsNeonSuperuser(role_name))
elog(ERROR, "can't ALTER neon_superuser");
foreach(option, stmt->options)

View File

@@ -64,26 +64,10 @@ static int max_reconnect_attempts = 60;
#define MAX_PAGESERVER_CONNSTRING_SIZE 256
/*
* The "neon.pageserver_connstring" GUC is marked with the PGC_SIGHUP option,
* allowing it to be changed using pg_reload_conf(). The control plane can
* update the connection string if the pageserver crashes, is relocated, or
* new shards are added. A copy of the current value of the GUC is kept in
* shared memory, updated by the postmaster, because regular backends don't
* reload the config during query execution, but we might need to re-establish
* the pageserver connection with the new connection string even in the middle
* of a query.
*
* The shared memory copy is protected by a lockless algorithm using two
* atomic counters. The counters allow a backend to quickly check if the value
* has changed since last access, and to detect and retry copying the value if
* the postmaster changes the value concurrently. (Postmaster doesn't have a
* PGPROC entry and therefore cannot use LWLocks.)
*/
typedef struct
{
pg_atomic_uint64 begin_update_counter;
pg_atomic_uint64 end_update_counter;
LWLockId lock;
pg_atomic_uint64 update_counter;
char pageserver_connstring[MAX_PAGESERVER_CONNSTRING_SIZE];
} PagestoreShmemState;
@@ -100,7 +84,7 @@ static bool pageserver_flush(void);
static void pageserver_disconnect(void);
static bool
PagestoreShmemIsValid(void)
PagestoreShmemIsValid()
{
return pagestore_shared && UsedShmemSegAddr;
}
@@ -114,58 +98,31 @@ CheckPageserverConnstring(char **newval, void **extra, GucSource source)
static void
AssignPageserverConnstring(const char *newval, void *extra)
{
/*
* Only postmaster updates the copy in shared memory.
*/
if (!PagestoreShmemIsValid() || IsUnderPostmaster)
if (!PagestoreShmemIsValid())
return;
pg_atomic_add_fetch_u64(&pagestore_shared->begin_update_counter, 1);
pg_write_barrier();
LWLockAcquire(pagestore_shared->lock, LW_EXCLUSIVE);
strlcpy(pagestore_shared->pageserver_connstring, newval, MAX_PAGESERVER_CONNSTRING_SIZE);
pg_write_barrier();
pg_atomic_add_fetch_u64(&pagestore_shared->end_update_counter, 1);
pg_atomic_fetch_add_u64(&pagestore_shared->update_counter, 1);
LWLockRelease(pagestore_shared->lock);
}
static bool
CheckConnstringUpdated(void)
CheckConnstringUpdated()
{
if (!PagestoreShmemIsValid())
return false;
return pagestore_local_counter < pg_atomic_read_u64(&pagestore_shared->begin_update_counter);
return pagestore_local_counter < pg_atomic_read_u64(&pagestore_shared->update_counter);
}
static void
ReloadConnstring(void)
ReloadConnstring()
{
uint64 begin_update_counter;
uint64 end_update_counter;
if (!PagestoreShmemIsValid())
return;
/*
* Copy the current settnig from shared to local memory. Postmaster can
* update the value concurrently, in which case we would copy a garbled
* mix of the old and new values. We will detect it because the counter's
* won't match, and retry. But it's important that we don't do anything
* within the retry-loop that would depend on the string having valid
* contents.
*/
do
{
begin_update_counter = pg_atomic_read_u64(&pagestore_shared->begin_update_counter);
end_update_counter = pg_atomic_read_u64(&pagestore_shared->end_update_counter);
pg_read_barrier();
strlcpy(local_pageserver_connstring, pagestore_shared->pageserver_connstring, sizeof(local_pageserver_connstring));
pg_read_barrier();
}
while (begin_update_counter != end_update_counter
|| begin_update_counter != pg_atomic_read_u64(&pagestore_shared->begin_update_counter)
|| end_update_counter != pg_atomic_read_u64(&pagestore_shared->end_update_counter));
pagestore_local_counter = end_update_counter;
LWLockAcquire(pagestore_shared->lock, LW_SHARED);
strlcpy(local_pageserver_connstring, pagestore_shared->pageserver_connstring, sizeof(local_pageserver_connstring));
pagestore_local_counter = pg_atomic_read_u64(&pagestore_shared->update_counter);
LWLockRelease(pagestore_shared->lock);
}
static bool
@@ -180,7 +137,7 @@ pageserver_connect(int elevel)
static TimestampTz last_connect_time = 0;
static uint64_t delay_us = MIN_RECONNECT_INTERVAL_USEC;
TimestampTz now;
uint64_t us_since_last_connect;
uint64_t us_since_last_connect;
Assert(!connected);
@@ -190,7 +147,7 @@ pageserver_connect(int elevel)
}
now = GetCurrentTimestamp();
us_since_last_connect = now - last_connect_time;
us_since_last_connect = now - last_connect_time;
if (us_since_last_connect < delay_us)
{
pg_usleep(delay_us - us_since_last_connect);
@@ -548,8 +505,8 @@ PagestoreShmemInit(void)
&found);
if (!found)
{
pg_atomic_init_u64(&pagestore_shared->begin_update_counter, 0);
pg_atomic_init_u64(&pagestore_shared->end_update_counter, 0);
pagestore_shared->lock = &(GetNamedLWLockTranche("neon_libpagestore")->lock);
pg_atomic_init_u64(&pagestore_shared->update_counter, 0);
AssignPageserverConnstring(page_server_connstring, NULL);
}
LWLockRelease(AddinShmemInitLock);
@@ -574,6 +531,7 @@ pagestore_shmem_request(void)
#endif
RequestAddinShmemSpace(PagestoreShmemSize());
RequestNamedLWLockTranche("neon_libpagestore", 1);
}
static void

View File

@@ -110,7 +110,7 @@ pub static REMOVED_WAL_SEGMENTS: Lazy<IntCounter> = Lazy::new(|| {
pub static BACKED_UP_SEGMENTS: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!(
"safekeeper_backed_up_segments_total",
"Number of WAL segments backed up to the S3"
"Number of WAL segments backed up to the broker"
)
.expect("Failed to register safekeeper_backed_up_segments_total counter")
});
@@ -337,7 +337,6 @@ pub struct TimelineCollector {
flushed_wal_seconds: GaugeVec,
collect_timeline_metrics: Gauge,
timelines_count: IntGauge,
active_timelines_count: IntGauge,
}
impl Default for TimelineCollector {
@@ -521,13 +520,6 @@ impl TimelineCollector {
.unwrap();
descs.extend(timelines_count.desc().into_iter().cloned());
let active_timelines_count = IntGauge::new(
"safekeeper_active_timelines",
"Total number of active timelines",
)
.unwrap();
descs.extend(active_timelines_count.desc().into_iter().cloned());
TimelineCollector {
descs,
commit_lsn,
@@ -548,7 +540,6 @@ impl TimelineCollector {
flushed_wal_seconds,
collect_timeline_metrics,
timelines_count,
active_timelines_count,
}
}
}
@@ -581,7 +572,6 @@ impl Collector for TimelineCollector {
let timelines = GlobalTimelines::get_all();
let timelines_count = timelines.len();
let mut active_timelines_count = 0;
// Prometheus Collector is sync, and data is stored under async lock. To
// bridge the gap with a crutch, collect data in spawned thread with
@@ -600,10 +590,6 @@ impl Collector for TimelineCollector {
let timeline_id = tli.ttid.timeline_id.to_string();
let labels = &[tenant_id.as_str(), timeline_id.as_str()];
if tli.timeline_is_active {
active_timelines_count += 1;
}
self.commit_lsn
.with_label_values(labels)
.set(tli.mem_state.commit_lsn.into());
@@ -695,8 +681,6 @@ impl Collector for TimelineCollector {
// report total number of timelines
self.timelines_count.set(timelines_count as i64);
self.active_timelines_count
.set(active_timelines_count as i64);
mfs.extend(self.timelines_count.collect());
mfs

View File

@@ -10,15 +10,11 @@ use crate::{GlobalTimelines, SafeKeeperConf};
pub async fn task_main(conf: SafeKeeperConf) -> anyhow::Result<()> {
let wal_removal_interval = Duration::from_millis(5000);
loop {
let now = tokio::time::Instant::now();
let mut active_timelines = 0;
let tlis = GlobalTimelines::get_all();
for tli in &tlis {
if !tli.is_active().await {
continue;
}
active_timelines += 1;
let ttid = tli.ttid;
async {
if let Err(e) = tli.maybe_persist_control_file().await {
@@ -31,17 +27,6 @@ pub async fn task_main(conf: SafeKeeperConf) -> anyhow::Result<()> {
.instrument(info_span!("WAL removal", ttid = %ttid))
.await;
}
let elapsed = now.elapsed();
let total_timelines = tlis.len();
if elapsed > wal_removal_interval {
info!(
"WAL removal is too long, processed {} active timelines ({} total) in {:?}",
active_timelines, total_timelines, elapsed
);
}
sleep(wal_removal_interval).await;
}
}

View File

@@ -2914,7 +2914,6 @@ class Endpoint(PgProtocol):
# Write it back updated
with open(config_path, "w") as file:
log.info(json.dumps(dict(data_dict, **kwargs)))
json.dump(dict(data_dict, **kwargs), file, indent=4)
# Mock the extension part of spec passed from control plane for local testing

View File

@@ -248,15 +248,8 @@ def test_ddl_forwarding(ddl: DdlForwardingContext):
# We don't have compute_ctl, so here, so create neon_superuser here manually
cur.execute("CREATE ROLE neon_superuser NOLOGIN CREATEDB CREATEROLE")
# Contrary to popular belief, being superman does not make you superuser
cur.execute("CREATE ROLE superman LOGIN NOSUPERUSER PASSWORD 'jungle_man'")
with ddl.pg.cursor(user="superman", password="jungle_man") as superman_cur:
# We allow real SUPERUSERs to ALTER neon_superuser
with pytest.raises(psycopg2.InternalError):
superman_cur.execute("ALTER ROLE neon_superuser LOGIN")
cur.execute("ALTER ROLE neon_superuser LOGIN")
with pytest.raises(psycopg2.InternalError):
cur.execute("ALTER ROLE neon_superuser LOGIN")
with pytest.raises(psycopg2.InternalError):
cur.execute("CREATE DATABASE trololobus WITH OWNER neon_superuser")

View File

@@ -1,37 +0,0 @@
import time
from fixtures.neon_fixtures import NeonEnv
def test_migrations(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_migrations", "empty")
endpoint = env.endpoints.create("test_migrations")
log_path = endpoint.endpoint_path() / "compute.log"
endpoint.respec(skip_pg_catalog_updates=False, features=["migrations"])
endpoint.start()
time.sleep(1) # Sleep to let migrations run
with endpoint.cursor() as cur:
cur.execute("SELECT id FROM neon_migration.migration_id")
migration_id = cur.fetchall()
assert migration_id[0][0] == 2
with open(log_path, "r") as log_file:
logs = log_file.read()
assert "INFO handle_migrations: Ran 2 migrations" in logs
endpoint.stop()
endpoint.start()
time.sleep(1) # Sleep to let migrations run
with endpoint.cursor() as cur:
cur.execute("SELECT id FROM neon_migration.migration_id")
migration_id = cur.fetchall()
assert migration_id[0][0] == 2
with open(log_path, "r") as log_file:
logs = log_file.read()
assert "INFO handle_migrations: Ran 0 migrations" in logs

View File

@@ -1,42 +0,0 @@
import threading
import time
from contextlib import closing
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, PgBin
# Test updating neon.pageserver_connstring setting on the fly.
#
# This merely changes some whitespace in the connection string, so
# this doesn't prove that the new string actually takes effect. But at
# least the code gets exercised.
def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
env = neon_simple_env
env.neon_cli.create_branch("test_pageserver_restarts")
endpoint = env.endpoints.create_start("test_pageserver_restarts")
n_reconnects = 1000
timeout = 0.01
scale = 10
def run_pgbench(connstr: str):
log.info(f"Start a pgbench workload on pg {connstr}")
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
pg_bin.run_capture(["pgbench", f"-T{int(n_reconnects*timeout)}", connstr])
thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True)
thread.start()
with closing(endpoint.connect()) as con:
with con.cursor() as c:
c.execute("SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'")
connstring = c.fetchall()[0][0]
for i in range(n_reconnects):
time.sleep(timeout)
c.execute(
"alter system set neon.pageserver_connstring=%s",
(connstring + (" " * (i % 2)),),
)
c.execute("select pg_reload_conf()")
thread.join()

View File

@@ -2,6 +2,7 @@ import concurrent.futures
import enum
import os
import shutil
import time
from threading import Thread
import pytest
@@ -556,6 +557,216 @@ def test_tenant_delete_concurrent(
assert ps_http.get_metric_value("pageserver_tenant_manager_slots") == 0
def test_tenant_delete_races_timeline_creation_01(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_02(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_03(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_04(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_05(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_06(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_07(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_08(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_09(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_10(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_11(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_12(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_13(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_14(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_15(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_16(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_17(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_18(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_19(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_20(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_21(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_22(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_23(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_24(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_25(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_26(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_27(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_28(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_29(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation_30(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
test_tenant_delete_races_timeline_creation(neon_env_builder, pg_bin)
def test_tenant_delete_races_timeline_creation(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
@@ -578,6 +789,9 @@ def test_tenant_delete_races_timeline_creation(
".*POST.*Cancelled request finished with an error: InternalServerError\\(.*ancelled"
)
# This can occur sometimes.
CONFLICT_MESSAGE = ".*Precondition failed: Invalid state Stopping. Expected Active or Broken.*"
env.pageserver.allowed_errors.extend(
[
# lucky race with stopping from flushing a layer we fail to schedule any uploads
@@ -586,6 +800,9 @@ def test_tenant_delete_races_timeline_creation(
".*POST.*/timeline.* request was dropped before completing",
# Timeline creation runs into this error
CANCELLED_ERROR,
# Timeline deletion can run into this error during deletion
CONFLICT_MESSAGE,
".*tenant_delete_handler.*still waiting, taking longer than expected.*",
]
)
@@ -643,6 +860,8 @@ def test_tenant_delete_races_timeline_creation(
except PageserverApiException:
pass
time.sleep(4)
# Physical deletion should have happened
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,