mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 13:32:57 +00:00
pageserver: refactor TenantId to TenantShardId in Tenant & Timeline (#5957)
(includes two preparatory commits from https://github.com/neondatabase/neon/pull/5960) ## Problem To accommodate multiple shards in the same tenant on the same pageserver, we must include the full TenantShardId in local paths. That means that all code touching local storage needs to see the TenantShardId. ## Summary of changes - Replace `tenant_id: TenantId` with `tenant_shard_id: TenantShardId` on Tenant, Timeline and RemoteTimelineClient. - Use TenantShardId in helpers for building local paths. - Update all the relevant call sites. This doesn't update absolutely everything: things like PageCache, TaskMgr, WalRedo are still shard-naive. The purpose of this PR is to update the core types so that others code can be added/updated incrementally without churning the most central shared types.
This commit is contained in:
@@ -9,6 +9,7 @@ use clap::Parser;
|
||||
use hex::FromHex;
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
@@ -173,7 +174,8 @@ async fn handle_re_attach(mut req: Request<Body>) -> Result<Response<Body>, ApiE
|
||||
if state.pageserver == Some(reattach_req.node_id) {
|
||||
state.generation += 1;
|
||||
response.tenants.push(ReAttachResponseTenant {
|
||||
id: *t,
|
||||
// TODO(sharding): make this shard-aware
|
||||
id: TenantShardId::unsharded(*t),
|
||||
gen: state.generation,
|
||||
});
|
||||
}
|
||||
@@ -196,7 +198,8 @@ async fn handle_validate(mut req: Request<Body>) -> Result<Response<Body>, ApiEr
|
||||
};
|
||||
|
||||
for req_tenant in validate_req.tenants {
|
||||
if let Some(tenant_state) = locked.tenants.get(&req_tenant.id) {
|
||||
// TODO(sharding): make this shard-aware
|
||||
if let Some(tenant_state) = locked.tenants.get(&req_tenant.id.tenant_id) {
|
||||
let valid = tenant_state.generation == req_tenant.gen;
|
||||
response.tenants.push(ValidateResponseTenant {
|
||||
id: req_tenant.id,
|
||||
|
||||
@@ -4,7 +4,9 @@
|
||||
//! See docs/rfcs/025-generation-numbers.md
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::id::{NodeId, TenantId};
|
||||
use utils::id::NodeId;
|
||||
|
||||
use crate::shard::TenantShardId;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ReAttachRequest {
|
||||
@@ -13,7 +15,7 @@ pub struct ReAttachRequest {
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ReAttachResponseTenant {
|
||||
pub id: TenantId,
|
||||
pub id: TenantShardId,
|
||||
pub gen: u32,
|
||||
}
|
||||
|
||||
@@ -24,7 +26,7 @@ pub struct ReAttachResponse {
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ValidateRequestTenant {
|
||||
pub id: TenantId,
|
||||
pub id: TenantShardId,
|
||||
pub gen: u32,
|
||||
}
|
||||
|
||||
@@ -40,6 +42,6 @@ pub struct ValidateResponse {
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ValidateResponseTenant {
|
||||
pub id: TenantId,
|
||||
pub id: TenantShardId,
|
||||
pub valid: bool,
|
||||
}
|
||||
|
||||
@@ -5,10 +5,10 @@ use serde::{Deserialize, Serialize};
|
||||
use thiserror;
|
||||
use utils::id::TenantId;
|
||||
|
||||
#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug)]
|
||||
#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
|
||||
pub struct ShardNumber(pub u8);
|
||||
|
||||
#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug)]
|
||||
#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
|
||||
pub struct ShardCount(pub u8);
|
||||
|
||||
impl ShardCount {
|
||||
@@ -39,7 +39,7 @@ impl ShardNumber {
|
||||
/// Note that the binary encoding is _not_ backward compatible, because
|
||||
/// at the time sharding is introduced, there are no existing binary structures
|
||||
/// containing TenantId that we need to handle.
|
||||
#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy)]
|
||||
#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
|
||||
pub struct TenantShardId {
|
||||
pub tenant_id: TenantId,
|
||||
pub shard_number: ShardNumber,
|
||||
|
||||
@@ -3,6 +3,7 @@ use pageserver::repository::Key;
|
||||
use pageserver::tenant::layer_map::LayerMap;
|
||||
use pageserver::tenant::storage_layer::LayerFileName;
|
||||
use pageserver::tenant::storage_layer::PersistentLayerDesc;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
||||
use std::cmp::{max, min};
|
||||
use std::fs::File;
|
||||
@@ -211,7 +212,7 @@ fn bench_sequential(c: &mut Criterion) {
|
||||
let i32 = (i as u32) % 100;
|
||||
let zero = Key::from_hex("000000000000000000000000000000000000").unwrap();
|
||||
let layer = PersistentLayerDesc::new_img(
|
||||
TenantId::generate(),
|
||||
TenantShardId::unsharded(TenantId::generate()),
|
||||
TimelineId::generate(),
|
||||
zero.add(10 * i32)..zero.add(10 * i32 + 1),
|
||||
Lsn(i),
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
//! See also `settings.md` for better description on every parameter.
|
||||
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::{RemotePath, RemoteStorageConfig};
|
||||
use serde::de::IntoDeserializer;
|
||||
use std::env;
|
||||
@@ -25,7 +26,7 @@ use toml_edit::{Document, Item};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use postgres_backend::AuthType;
|
||||
use utils::{
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
id::{NodeId, TimelineId},
|
||||
logging::LogFormat,
|
||||
};
|
||||
|
||||
@@ -628,12 +629,13 @@ impl PageServerConf {
|
||||
self.deletion_prefix().join(format!("header-{VERSION:02x}"))
|
||||
}
|
||||
|
||||
pub fn tenant_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
|
||||
self.tenants_path().join(tenant_id.to_string())
|
||||
pub fn tenant_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
|
||||
self.tenants_path().join(tenant_shard_id.to_string())
|
||||
}
|
||||
|
||||
pub fn tenant_ignore_mark_file_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
|
||||
self.tenant_path(tenant_id).join(IGNORED_TENANT_FILE_NAME)
|
||||
pub fn tenant_ignore_mark_file_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
|
||||
self.tenant_path(tenant_shard_id)
|
||||
.join(IGNORED_TENANT_FILE_NAME)
|
||||
}
|
||||
|
||||
/// Points to a place in pageserver's local directory,
|
||||
@@ -641,47 +643,53 @@ impl PageServerConf {
|
||||
///
|
||||
/// Legacy: superseded by tenant_location_config_path. Eventually
|
||||
/// remove this function.
|
||||
pub fn tenant_config_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
|
||||
self.tenant_path(tenant_id).join(TENANT_CONFIG_NAME)
|
||||
pub fn tenant_config_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
|
||||
self.tenant_path(tenant_shard_id).join(TENANT_CONFIG_NAME)
|
||||
}
|
||||
|
||||
pub fn tenant_location_config_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
|
||||
self.tenant_path(tenant_id)
|
||||
pub fn tenant_location_config_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
|
||||
self.tenant_path(tenant_shard_id)
|
||||
.join(TENANT_LOCATION_CONFIG_NAME)
|
||||
}
|
||||
|
||||
pub fn timelines_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
|
||||
self.tenant_path(tenant_id).join(TIMELINES_SEGMENT_NAME)
|
||||
pub fn timelines_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
|
||||
self.tenant_path(tenant_shard_id)
|
||||
.join(TIMELINES_SEGMENT_NAME)
|
||||
}
|
||||
|
||||
pub fn timeline_path(&self, tenant_id: &TenantId, timeline_id: &TimelineId) -> Utf8PathBuf {
|
||||
self.timelines_path(tenant_id).join(timeline_id.to_string())
|
||||
pub fn timeline_path(
|
||||
&self,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
) -> Utf8PathBuf {
|
||||
self.timelines_path(tenant_shard_id)
|
||||
.join(timeline_id.to_string())
|
||||
}
|
||||
|
||||
pub fn timeline_uninit_mark_file_path(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Utf8PathBuf {
|
||||
path_with_suffix_extension(
|
||||
self.timeline_path(&tenant_id, &timeline_id),
|
||||
self.timeline_path(&tenant_shard_id, &timeline_id),
|
||||
TIMELINE_UNINIT_MARK_SUFFIX,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn timeline_delete_mark_file_path(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Utf8PathBuf {
|
||||
path_with_suffix_extension(
|
||||
self.timeline_path(&tenant_id, &timeline_id),
|
||||
self.timeline_path(&tenant_shard_id, &timeline_id),
|
||||
TIMELINE_DELETE_MARK_SUFFIX,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn tenant_deleted_mark_file_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
|
||||
self.tenant_path(tenant_id)
|
||||
pub fn tenant_deleted_mark_file_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
|
||||
self.tenant_path(tenant_shard_id)
|
||||
.join(TENANT_DELETED_MARKER_FILE_NAME)
|
||||
}
|
||||
|
||||
@@ -691,20 +699,24 @@ impl PageServerConf {
|
||||
|
||||
pub fn trace_path(
|
||||
&self,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
connection_id: &ConnectionId,
|
||||
) -> Utf8PathBuf {
|
||||
self.traces_path()
|
||||
.join(tenant_id.to_string())
|
||||
.join(tenant_shard_id.to_string())
|
||||
.join(timeline_id.to_string())
|
||||
.join(connection_id.to_string())
|
||||
}
|
||||
|
||||
/// Points to a place in pageserver's local directory,
|
||||
/// where certain timeline's metadata file should be located.
|
||||
pub fn metadata_path(&self, tenant_id: &TenantId, timeline_id: &TimelineId) -> Utf8PathBuf {
|
||||
self.timeline_path(tenant_id, timeline_id)
|
||||
pub fn metadata_path(
|
||||
&self,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
) -> Utf8PathBuf {
|
||||
self.timeline_path(tenant_shard_id, timeline_id)
|
||||
.join(METADATA_FILE_NAME)
|
||||
}
|
||||
|
||||
|
||||
@@ -351,7 +351,7 @@ impl TimelineSnapshot {
|
||||
let last_record_lsn = t.get_last_record_lsn();
|
||||
|
||||
let current_exact_logical_size = {
|
||||
let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_id, timeline_id = %t.timeline_id);
|
||||
let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_shard_id.tenant_id, timeline_id = %t.timeline_id);
|
||||
let res = span
|
||||
.in_scope(|| t.get_current_logical_size(ctx))
|
||||
.context("get_current_logical_size");
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use pageserver_api::control_api::{
|
||||
ReAttachRequest, ReAttachResponse, ValidateRequest, ValidateRequestTenant, ValidateResponse,
|
||||
use pageserver_api::{
|
||||
control_api::{
|
||||
ReAttachRequest, ReAttachResponse, ValidateRequest, ValidateRequestTenant, ValidateResponse,
|
||||
},
|
||||
shard::TenantShardId,
|
||||
};
|
||||
use serde::{de::DeserializeOwned, Serialize};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use url::Url;
|
||||
use utils::{
|
||||
backoff,
|
||||
generation::Generation,
|
||||
id::{NodeId, TenantId},
|
||||
};
|
||||
use utils::{backoff, generation::Generation, id::NodeId};
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
|
||||
@@ -31,11 +30,11 @@ pub enum RetryForeverError {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait ControlPlaneGenerationsApi {
|
||||
async fn re_attach(&self) -> Result<HashMap<TenantId, Generation>, RetryForeverError>;
|
||||
async fn re_attach(&self) -> Result<HashMap<TenantShardId, Generation>, RetryForeverError>;
|
||||
async fn validate(
|
||||
&self,
|
||||
tenants: Vec<(TenantId, Generation)>,
|
||||
) -> Result<HashMap<TenantId, bool>, RetryForeverError>;
|
||||
tenants: Vec<(TenantShardId, Generation)>,
|
||||
) -> Result<HashMap<TenantShardId, bool>, RetryForeverError>;
|
||||
}
|
||||
|
||||
impl ControlPlaneClient {
|
||||
@@ -127,7 +126,7 @@ impl ControlPlaneClient {
|
||||
#[async_trait::async_trait]
|
||||
impl ControlPlaneGenerationsApi for ControlPlaneClient {
|
||||
/// Block until we get a successful response, or error out if we are shut down
|
||||
async fn re_attach(&self) -> Result<HashMap<TenantId, Generation>, RetryForeverError> {
|
||||
async fn re_attach(&self) -> Result<HashMap<TenantShardId, Generation>, RetryForeverError> {
|
||||
let re_attach_path = self
|
||||
.base_url
|
||||
.join("re-attach")
|
||||
@@ -154,8 +153,8 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
|
||||
/// Block until we get a successful response, or error out if we are shut down
|
||||
async fn validate(
|
||||
&self,
|
||||
tenants: Vec<(TenantId, Generation)>,
|
||||
) -> Result<HashMap<TenantId, bool>, RetryForeverError> {
|
||||
tenants: Vec<(TenantShardId, Generation)>,
|
||||
) -> Result<HashMap<TenantShardId, bool>, RetryForeverError> {
|
||||
let re_attach_path = self
|
||||
.base_url
|
||||
.join("validate")
|
||||
|
||||
@@ -15,6 +15,7 @@ use crate::virtual_file::MaybeFatalIo;
|
||||
use crate::virtual_file::VirtualFile;
|
||||
use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath};
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
@@ -25,7 +26,7 @@ use tracing::Instrument;
|
||||
use tracing::{self, debug, error};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::AtomicLsn;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -193,7 +194,7 @@ struct DeletionList {
|
||||
/// nested HashMaps by TenantTimelineID. Each Tenant only appears once
|
||||
/// with one unique generation ID: if someone tries to push a second generation
|
||||
/// ID for the same tenant, we will start a new DeletionList.
|
||||
tenants: HashMap<TenantId, TenantDeletionList>,
|
||||
tenants: HashMap<TenantShardId, TenantDeletionList>,
|
||||
|
||||
/// Avoid having to walk `tenants` to calculate the number of keys in
|
||||
/// the nested deletion lists
|
||||
@@ -265,7 +266,7 @@ impl DeletionList {
|
||||
/// deletion list.
|
||||
fn push(
|
||||
&mut self,
|
||||
tenant: &TenantId,
|
||||
tenant: &TenantShardId,
|
||||
timeline: &TimelineId,
|
||||
generation: Generation,
|
||||
objects: &mut Vec<RemotePath>,
|
||||
@@ -357,7 +358,7 @@ struct TenantLsnState {
|
||||
|
||||
#[derive(Default)]
|
||||
struct VisibleLsnUpdates {
|
||||
tenants: HashMap<TenantId, TenantLsnState>,
|
||||
tenants: HashMap<TenantShardId, TenantLsnState>,
|
||||
}
|
||||
|
||||
impl VisibleLsnUpdates {
|
||||
@@ -414,7 +415,7 @@ impl DeletionQueueClient {
|
||||
|
||||
pub(crate) fn recover(
|
||||
&self,
|
||||
attached_tenants: HashMap<TenantId, Generation>,
|
||||
attached_tenants: HashMap<TenantShardId, Generation>,
|
||||
) -> Result<(), DeletionQueueError> {
|
||||
self.do_push(
|
||||
&self.tx,
|
||||
@@ -431,7 +432,7 @@ impl DeletionQueueClient {
|
||||
/// backend will later wake up and notice that the tenant's generation requires validation.
|
||||
pub(crate) async fn update_remote_consistent_lsn(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
current_generation: Generation,
|
||||
lsn: Lsn,
|
||||
@@ -442,10 +443,13 @@ impl DeletionQueueClient {
|
||||
.write()
|
||||
.expect("Lock should never be poisoned");
|
||||
|
||||
let tenant_entry = locked.tenants.entry(tenant_id).or_insert(TenantLsnState {
|
||||
timelines: HashMap::new(),
|
||||
generation: current_generation,
|
||||
});
|
||||
let tenant_entry = locked
|
||||
.tenants
|
||||
.entry(tenant_shard_id)
|
||||
.or_insert(TenantLsnState {
|
||||
timelines: HashMap::new(),
|
||||
generation: current_generation,
|
||||
});
|
||||
|
||||
if tenant_entry.generation != current_generation {
|
||||
// Generation might have changed if we were detached and then re-attached: in this case,
|
||||
@@ -472,7 +476,7 @@ impl DeletionQueueClient {
|
||||
/// generations in `layers` are the generations in which those layers were written.
|
||||
pub(crate) async fn push_layers(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
current_generation: Generation,
|
||||
layers: Vec<(LayerFileName, LayerFileMetadata)>,
|
||||
@@ -483,7 +487,7 @@ impl DeletionQueueClient {
|
||||
let mut layer_paths = Vec::new();
|
||||
for (layer, meta) in layers {
|
||||
layer_paths.push(remote_layer_path(
|
||||
&tenant_id,
|
||||
&tenant_shard_id.tenant_id,
|
||||
&timeline_id,
|
||||
meta.shard,
|
||||
&layer,
|
||||
@@ -494,7 +498,7 @@ impl DeletionQueueClient {
|
||||
return self.flush_immediate().await;
|
||||
}
|
||||
|
||||
self.push_layers_sync(tenant_id, timeline_id, current_generation, layers)
|
||||
self.push_layers_sync(tenant_shard_id, timeline_id, current_generation, layers)
|
||||
}
|
||||
|
||||
/// When a Tenant has a generation, push_layers is always synchronous because
|
||||
@@ -504,7 +508,7 @@ impl DeletionQueueClient {
|
||||
/// support (`<https://github.com/neondatabase/neon/issues/5395>`)
|
||||
pub(crate) fn push_layers_sync(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
current_generation: Generation,
|
||||
layers: Vec<(LayerFileName, LayerFileMetadata)>,
|
||||
@@ -515,7 +519,7 @@ impl DeletionQueueClient {
|
||||
self.do_push(
|
||||
&self.tx,
|
||||
ListWriterQueueMessage::Delete(DeletionOp {
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
layers,
|
||||
generation: current_generation,
|
||||
@@ -783,12 +787,12 @@ mod test {
|
||||
}
|
||||
|
||||
fn set_latest_generation(&self, gen: Generation) {
|
||||
let tenant_id = self.harness.tenant_id;
|
||||
let tenant_shard_id = self.harness.tenant_shard_id;
|
||||
self.mock_control_plane
|
||||
.latest_generation
|
||||
.lock()
|
||||
.unwrap()
|
||||
.insert(tenant_id, gen);
|
||||
.insert(tenant_shard_id, gen);
|
||||
}
|
||||
|
||||
/// Returns remote layer file name, suitable for use in assert_remote_files
|
||||
@@ -797,8 +801,8 @@ mod test {
|
||||
file_name: LayerFileName,
|
||||
gen: Generation,
|
||||
) -> anyhow::Result<String> {
|
||||
let tenant_id = self.harness.tenant_id;
|
||||
let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
|
||||
let tenant_shard_id = self.harness.tenant_shard_id;
|
||||
let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
|
||||
let remote_timeline_path = self.remote_fs_dir.join(relative_remote_path.get_path());
|
||||
std::fs::create_dir_all(&remote_timeline_path)?;
|
||||
let remote_layer_file_name = format!("{}{}", file_name, gen.get_suffix());
|
||||
@@ -816,7 +820,7 @@ mod test {
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct MockControlPlane {
|
||||
pub latest_generation: std::sync::Arc<std::sync::Mutex<HashMap<TenantId, Generation>>>,
|
||||
pub latest_generation: std::sync::Arc<std::sync::Mutex<HashMap<TenantShardId, Generation>>>,
|
||||
}
|
||||
|
||||
impl MockControlPlane {
|
||||
@@ -830,20 +834,20 @@ mod test {
|
||||
#[async_trait::async_trait]
|
||||
impl ControlPlaneGenerationsApi for MockControlPlane {
|
||||
#[allow(clippy::diverging_sub_expression)] // False positive via async_trait
|
||||
async fn re_attach(&self) -> Result<HashMap<TenantId, Generation>, RetryForeverError> {
|
||||
async fn re_attach(&self) -> Result<HashMap<TenantShardId, Generation>, RetryForeverError> {
|
||||
unimplemented!()
|
||||
}
|
||||
async fn validate(
|
||||
&self,
|
||||
tenants: Vec<(TenantId, Generation)>,
|
||||
) -> Result<HashMap<TenantId, bool>, RetryForeverError> {
|
||||
tenants: Vec<(TenantShardId, Generation)>,
|
||||
) -> Result<HashMap<TenantShardId, bool>, RetryForeverError> {
|
||||
let mut result = HashMap::new();
|
||||
|
||||
let latest_generation = self.latest_generation.lock().unwrap();
|
||||
|
||||
for (tenant_id, generation) in tenants {
|
||||
if let Some(latest) = latest_generation.get(&tenant_id) {
|
||||
result.insert(tenant_id, *latest == generation);
|
||||
for (tenant_shard_id, generation) in tenants {
|
||||
if let Some(latest) = latest_generation.get(&tenant_shard_id) {
|
||||
result.insert(tenant_shard_id, *latest == generation);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -947,10 +951,10 @@ mod test {
|
||||
client.recover(HashMap::new())?;
|
||||
|
||||
let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
|
||||
let tenant_id = ctx.harness.tenant_id;
|
||||
let tenant_shard_id = ctx.harness.tenant_shard_id;
|
||||
|
||||
let content: Vec<u8> = "victim1 contents".into();
|
||||
let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
|
||||
let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
|
||||
let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());
|
||||
let deletion_prefix = ctx.harness.conf.deletion_prefix();
|
||||
|
||||
@@ -980,7 +984,7 @@ mod test {
|
||||
info!("Pushing");
|
||||
client
|
||||
.push_layers(
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
TIMELINE_ID,
|
||||
now_generation,
|
||||
[(layer_file_name_1.clone(), layer_metadata)].to_vec(),
|
||||
@@ -1027,8 +1031,8 @@ mod test {
|
||||
|
||||
ctx.set_latest_generation(latest_generation);
|
||||
|
||||
let tenant_id = ctx.harness.tenant_id;
|
||||
let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
|
||||
let tenant_shard_id = ctx.harness.tenant_shard_id;
|
||||
let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
|
||||
let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());
|
||||
|
||||
// Initial state: a remote layer exists
|
||||
@@ -1038,7 +1042,7 @@ mod test {
|
||||
tracing::debug!("Pushing...");
|
||||
client
|
||||
.push_layers(
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
TIMELINE_ID,
|
||||
stale_generation,
|
||||
[(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
|
||||
@@ -1053,7 +1057,7 @@ mod test {
|
||||
tracing::debug!("Pushing...");
|
||||
client
|
||||
.push_layers(
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
TIMELINE_ID,
|
||||
latest_generation,
|
||||
[(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
|
||||
@@ -1075,9 +1079,9 @@ mod test {
|
||||
let client = ctx.deletion_queue.new_client();
|
||||
client.recover(HashMap::new())?;
|
||||
|
||||
let tenant_id = ctx.harness.tenant_id;
|
||||
let tenant_shard_id = ctx.harness.tenant_shard_id;
|
||||
|
||||
let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
|
||||
let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
|
||||
let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());
|
||||
let deletion_prefix = ctx.harness.conf.deletion_prefix();
|
||||
|
||||
@@ -1093,7 +1097,7 @@ mod test {
|
||||
ctx.write_remote_layer(EXAMPLE_LAYER_NAME, layer_generation)?;
|
||||
client
|
||||
.push_layers(
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
TIMELINE_ID,
|
||||
now_generation.previous(),
|
||||
[(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
|
||||
@@ -1107,7 +1111,7 @@ mod test {
|
||||
ctx.write_remote_layer(EXAMPLE_LAYER_NAME_ALT, layer_generation)?;
|
||||
client
|
||||
.push_layers(
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
TIMELINE_ID,
|
||||
now_generation,
|
||||
[(EXAMPLE_LAYER_NAME_ALT.clone(), layer_metadata.clone())].to_vec(),
|
||||
@@ -1138,7 +1142,7 @@ mod test {
|
||||
drop(client);
|
||||
ctx.restart().await;
|
||||
let client = ctx.deletion_queue.new_client();
|
||||
client.recover(HashMap::from([(tenant_id, now_generation)]))?;
|
||||
client.recover(HashMap::from([(tenant_shard_id, now_generation)]))?;
|
||||
|
||||
info!("Flush-executing");
|
||||
client.flush_execute().await?;
|
||||
@@ -1202,7 +1206,7 @@ pub(crate) mod mock {
|
||||
let mut objects = op.objects;
|
||||
for (layer, meta) in op.layers {
|
||||
objects.push(remote_layer_path(
|
||||
&op.tenant_id,
|
||||
&op.tenant_shard_id.tenant_id,
|
||||
&op.timeline_id,
|
||||
meta.shard,
|
||||
&layer,
|
||||
@@ -1293,7 +1297,7 @@ pub(crate) mod mock {
|
||||
fn deletion_list_serialization() -> anyhow::Result<()> {
|
||||
let tenant_id = "ad6c1a56f5680419d3a16ff55d97ec3c"
|
||||
.to_string()
|
||||
.parse::<TenantId>()?;
|
||||
.parse::<TenantShardId>()?;
|
||||
let timeline_id = "be322c834ed9e709e63b5c9698691910"
|
||||
.to_string()
|
||||
.parse::<TimelineId>()?;
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::collections::HashMap;
|
||||
use std::fs::create_dir_all;
|
||||
use std::time::Duration;
|
||||
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use regex::Regex;
|
||||
use remote_storage::RemotePath;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -26,7 +27,6 @@ use tracing::debug;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::TenantId;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
@@ -54,7 +54,7 @@ const FRONTEND_FLUSHING_TIMEOUT: Duration = Duration::from_millis(100);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct DeletionOp {
|
||||
pub(super) tenant_id: TenantId,
|
||||
pub(super) tenant_shard_id: TenantShardId,
|
||||
pub(super) timeline_id: TimelineId,
|
||||
// `layers` and `objects` are both just lists of objects. `layers` is used if you do not
|
||||
// have a config object handy to project it to a remote key, and need the consuming worker
|
||||
@@ -62,14 +62,14 @@ pub(super) struct DeletionOp {
|
||||
pub(super) layers: Vec<(LayerFileName, LayerFileMetadata)>,
|
||||
pub(super) objects: Vec<RemotePath>,
|
||||
|
||||
/// The _current_ generation of the Tenant attachment in which we are enqueuing
|
||||
/// The _current_ generation of the Tenant shard attachment in which we are enqueuing
|
||||
/// this deletion.
|
||||
pub(super) generation: Generation,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct RecoverOp {
|
||||
pub(super) attached_tenants: HashMap<TenantId, Generation>,
|
||||
pub(super) attached_tenants: HashMap<TenantShardId, Generation>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -206,7 +206,7 @@ impl ListWriter {
|
||||
|
||||
async fn recover(
|
||||
&mut self,
|
||||
attached_tenants: HashMap<TenantId, Generation>,
|
||||
attached_tenants: HashMap<TenantShardId, Generation>,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
debug!(
|
||||
"recovering with {} attached tenants",
|
||||
@@ -309,8 +309,8 @@ impl ListWriter {
|
||||
// generation was issued to another node in the interval while we restarted,
|
||||
// then we may treat deletion lists from the previous generation as if they
|
||||
// belong to our currently attached generation, and proceed to validate & execute.
|
||||
for (tenant_id, tenant_list) in &mut deletion_list.tenants {
|
||||
if let Some(attached_gen) = attached_tenants.get(tenant_id) {
|
||||
for (tenant_shard_id, tenant_list) in &mut deletion_list.tenants {
|
||||
if let Some(attached_gen) = attached_tenants.get(tenant_shard_id) {
|
||||
if attached_gen.previous() == tenant_list.generation {
|
||||
tenant_list.generation = *attached_gen;
|
||||
}
|
||||
@@ -390,7 +390,7 @@ impl ListWriter {
|
||||
let mut layer_paths = Vec::new();
|
||||
for (layer, meta) in op.layers {
|
||||
layer_paths.push(remote_layer_path(
|
||||
&op.tenant_id,
|
||||
&op.tenant_shard_id.tenant_id,
|
||||
&op.timeline_id,
|
||||
meta.shard,
|
||||
&layer,
|
||||
@@ -400,14 +400,14 @@ impl ListWriter {
|
||||
layer_paths.extend(op.objects);
|
||||
|
||||
if !self.pending.push(
|
||||
&op.tenant_id,
|
||||
&op.tenant_shard_id,
|
||||
&op.timeline_id,
|
||||
op.generation,
|
||||
&mut layer_paths,
|
||||
) {
|
||||
self.flush().await;
|
||||
let retry_succeeded = self.pending.push(
|
||||
&op.tenant_id,
|
||||
&op.tenant_shard_id,
|
||||
&op.timeline_id,
|
||||
op.generation,
|
||||
&mut layer_paths,
|
||||
|
||||
@@ -310,7 +310,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
||||
.unwrap()
|
||||
.as_micros(),
|
||||
partition,
|
||||
desc.tenant_id,
|
||||
desc.tenant_shard_id,
|
||||
desc.timeline_id,
|
||||
candidate.layer,
|
||||
);
|
||||
@@ -380,7 +380,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
||||
let limit = Arc::new(tokio::sync::Semaphore::new(1000.max(max_batch_size)));
|
||||
|
||||
for (timeline, batch) in batched {
|
||||
let tenant_id = timeline.tenant_id;
|
||||
let tenant_shard_id = timeline.tenant_shard_id;
|
||||
let timeline_id = timeline.timeline_id;
|
||||
let batch_size =
|
||||
u32::try_from(batch.len()).expect("batch size limited to u32::MAX during partitioning");
|
||||
@@ -431,7 +431,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
||||
(evicted_bytes, evictions_failed)
|
||||
}
|
||||
}
|
||||
.instrument(tracing::info_span!("evict_batch", %tenant_id, %timeline_id, batch_size));
|
||||
.instrument(tracing::info_span!("evict_batch", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id, batch_size));
|
||||
|
||||
js.spawn(evict);
|
||||
|
||||
@@ -572,7 +572,7 @@ async fn collect_eviction_candidates(
|
||||
continue;
|
||||
}
|
||||
let info = tl.get_local_layers_for_disk_usage_eviction().await;
|
||||
debug!(tenant_id=%tl.tenant_id, timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
|
||||
debug!(tenant_id=%tl.tenant_shard_id.tenant_id, shard_id=%tl.tenant_shard_id.shard_slug(), timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
|
||||
tenant_candidates.extend(
|
||||
info.resident_layers
|
||||
.into_iter()
|
||||
|
||||
@@ -356,7 +356,8 @@ async fn build_timeline_info_common(
|
||||
let walreceiver_status = timeline.walreceiver_status();
|
||||
|
||||
let info = TimelineInfo {
|
||||
tenant_id: timeline.tenant_id,
|
||||
// TODO(sharding): add a shard_id field, or make tenant_id into a tenant_shard_id
|
||||
tenant_id: timeline.tenant_shard_id.tenant_id,
|
||||
timeline_id: timeline.timeline_id,
|
||||
ancestor_timeline_id,
|
||||
ancestor_lsn,
|
||||
|
||||
@@ -7,6 +7,7 @@ use metrics::{
|
||||
HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use strum::{EnumCount, IntoEnumIterator, VariantNames};
|
||||
use strum_macros::{EnumVariantNames, IntoStaticStr};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
@@ -1571,9 +1572,9 @@ pub struct RemoteTimelineClientMetrics {
|
||||
}
|
||||
|
||||
impl RemoteTimelineClientMetrics {
|
||||
pub fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self {
|
||||
pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
|
||||
RemoteTimelineClientMetrics {
|
||||
tenant_id: tenant_id.to_string(),
|
||||
tenant_id: tenant_shard_id.tenant_id.to_string(),
|
||||
timeline_id: timeline_id.to_string(),
|
||||
calls_unfinished_gauge: Mutex::new(HashMap::default()),
|
||||
bytes_started_counter: Mutex::new(HashMap::default()),
|
||||
|
||||
@@ -399,6 +399,9 @@ impl PageServerHandler {
|
||||
{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
// TODO(sharding): enumerate local tenant shards for this tenant, and select the one
|
||||
// that should serve this request.
|
||||
|
||||
// Make request tracer if needed
|
||||
let tenant = mgr::get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
@@ -408,9 +411,10 @@ impl PageServerHandler {
|
||||
.await?;
|
||||
let mut tracer = if tenant.get_trace_read_requests() {
|
||||
let connection_id = ConnectionId::generate();
|
||||
let path = tenant
|
||||
.conf
|
||||
.trace_path(&tenant_id, &timeline_id, &connection_id);
|
||||
let path =
|
||||
tenant
|
||||
.conf
|
||||
.trace_path(&tenant.tenant_shard_id(), &timeline_id, &connection_id);
|
||||
Some(Tracer::new(path))
|
||||
} else {
|
||||
None
|
||||
|
||||
@@ -17,6 +17,7 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use enumset::EnumSet;
|
||||
use futures::FutureExt;
|
||||
use pageserver_api::models::TimelineState;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::DownloadError;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use std::fmt;
|
||||
@@ -228,7 +229,7 @@ pub struct Tenant {
|
||||
// This is necessary to allow global config updates.
|
||||
tenant_conf: Arc<RwLock<AttachedTenantConf>>,
|
||||
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
|
||||
/// The remote storage generation, used to protect S3 objects from split-brain.
|
||||
/// Does not change over the lifetime of the [`Tenant`] object.
|
||||
@@ -272,7 +273,7 @@ pub struct Tenant {
|
||||
|
||||
impl std::fmt::Debug for Tenant {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{} ({})", self.tenant_id, self.current_state())
|
||||
write!(f, "{} ({})", self.tenant_shard_id, self.current_state())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -466,7 +467,7 @@ impl Tenant {
|
||||
init_order: Option<&InitializationOrder>,
|
||||
_ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let tenant_id = self.tenant_id;
|
||||
let tenant_id = self.tenant_shard_id;
|
||||
|
||||
let timeline = self.create_timeline_struct(
|
||||
timeline_id,
|
||||
@@ -558,7 +559,7 @@ impl Tenant {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn spawn(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
resources: TenantSharedResources,
|
||||
attached_conf: AttachedTenantConf,
|
||||
init_order: Option<InitializationOrder>,
|
||||
@@ -566,8 +567,10 @@ impl Tenant {
|
||||
mode: SpawnMode,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
// TODO(sharding): make WalRedoManager shard-aware
|
||||
let wal_redo_manager = Arc::new(WalRedoManager::from(PostgresRedoManager::new(
|
||||
conf, tenant_id,
|
||||
conf,
|
||||
tenant_shard_id.tenant_id,
|
||||
)));
|
||||
|
||||
let TenantSharedResources {
|
||||
@@ -581,7 +584,7 @@ impl Tenant {
|
||||
conf,
|
||||
attached_conf,
|
||||
wal_redo_manager,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
remote_storage.clone(),
|
||||
deletion_queue_client,
|
||||
));
|
||||
@@ -593,7 +596,7 @@ impl Tenant {
|
||||
task_mgr::spawn(
|
||||
&tokio::runtime::Handle::current(),
|
||||
TaskKind::Attach,
|
||||
Some(tenant_id),
|
||||
Some(tenant_shard_id.tenant_id),
|
||||
None,
|
||||
"attach tenant",
|
||||
false,
|
||||
@@ -632,7 +635,7 @@ impl Tenant {
|
||||
match tenant_clone
|
||||
.preload(remote_storage, task_mgr::shutdown_token())
|
||||
.instrument(
|
||||
tracing::info_span!(parent: None, "attach_preload", tenant_id=%tenant_id),
|
||||
tracing::info_span!(parent: None, "attach_preload", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()),
|
||||
)
|
||||
.await {
|
||||
Ok(p) => p,
|
||||
@@ -714,7 +717,7 @@ impl Tenant {
|
||||
Ok(())
|
||||
}
|
||||
.instrument({
|
||||
let span = tracing::info_span!(parent: None, "attach", tenant_id=%tenant_id);
|
||||
let span = tracing::info_span!(parent: None, "attach", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug());
|
||||
span.follows_from(Span::current());
|
||||
span
|
||||
}),
|
||||
@@ -732,7 +735,7 @@ impl Tenant {
|
||||
info!("listing remote timelines");
|
||||
let (remote_timeline_ids, other_keys) = remote_timeline_client::list_remote_timelines(
|
||||
remote_storage,
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
cancel.clone(),
|
||||
)
|
||||
.await?;
|
||||
@@ -844,7 +847,7 @@ impl Tenant {
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"failed to load remote timeline {} for tenant {}",
|
||||
timeline_id, self.tenant_id
|
||||
timeline_id, self.tenant_shard_id
|
||||
)
|
||||
})?;
|
||||
}
|
||||
@@ -884,7 +887,7 @@ impl Tenant {
|
||||
/// timeline that still exists: this can happen if we crashed during a deletion/creation, or
|
||||
/// if a timeline was deleted while the tenant was attached to a different pageserver.
|
||||
fn clean_up_timelines(&self, existent_timelines: &HashSet<TimelineId>) -> anyhow::Result<()> {
|
||||
let timelines_dir = self.conf.timelines_path(&self.tenant_id);
|
||||
let timelines_dir = self.conf.timelines_path(&self.tenant_shard_id);
|
||||
|
||||
let entries = match timelines_dir.read_dir_utf8() {
|
||||
Ok(d) => d,
|
||||
@@ -970,7 +973,7 @@ impl Tenant {
|
||||
span::debug_assert_current_span_has_tenant_id();
|
||||
|
||||
info!("downloading index file for timeline {}", timeline_id);
|
||||
tokio::fs::create_dir_all(self.conf.timeline_path(&self.tenant_id, &timeline_id))
|
||||
tokio::fs::create_dir_all(self.conf.timeline_path(&self.tenant_shard_id, &timeline_id))
|
||||
.await
|
||||
.context("Failed to create new timeline directory")?;
|
||||
|
||||
@@ -992,10 +995,15 @@ impl Tenant {
|
||||
let init_order = None;
|
||||
|
||||
// timeline loading after attach expects to find metadata file for each metadata
|
||||
save_metadata(self.conf, &self.tenant_id, &timeline_id, &remote_metadata)
|
||||
.await
|
||||
.context("save_metadata")
|
||||
.map_err(LoadLocalTimelineError::Load)?;
|
||||
save_metadata(
|
||||
self.conf,
|
||||
&self.tenant_shard_id,
|
||||
&timeline_id,
|
||||
&remote_metadata,
|
||||
)
|
||||
.await
|
||||
.context("save_metadata")
|
||||
.map_err(LoadLocalTimelineError::Load)?;
|
||||
|
||||
self.timeline_init_and_sync(
|
||||
timeline_id,
|
||||
@@ -1012,11 +1020,13 @@ impl Tenant {
|
||||
/// Create a placeholder Tenant object for a broken tenant
|
||||
pub fn create_broken_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
reason: String,
|
||||
) -> Arc<Tenant> {
|
||||
// TODO(sharding): make WalRedoManager shard-aware
|
||||
let wal_redo_manager = Arc::new(WalRedoManager::from(PostgresRedoManager::new(
|
||||
conf, tenant_id,
|
||||
conf,
|
||||
tenant_shard_id.tenant_id,
|
||||
)));
|
||||
Arc::new(Tenant::new(
|
||||
TenantState::Broken {
|
||||
@@ -1026,7 +1036,7 @@ impl Tenant {
|
||||
conf,
|
||||
AttachedTenantConf::try_from(LocationConf::default()).unwrap(),
|
||||
wal_redo_manager,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
None,
|
||||
DeletionQueueClient::broken(),
|
||||
))
|
||||
@@ -1039,7 +1049,7 @@ impl Tenant {
|
||||
// completed in non topological order (for example because parent has smaller number of layer files in it)
|
||||
let mut timelines_to_resume_deletion: Vec<(TimelineId, Option<TimelineMetadata>)> = vec![];
|
||||
|
||||
let timelines_dir = self.conf.timelines_path(&self.tenant_id);
|
||||
let timelines_dir = self.conf.timelines_path(&self.tenant_shard_id);
|
||||
|
||||
for entry in timelines_dir
|
||||
.read_dir_utf8()
|
||||
@@ -1070,7 +1080,7 @@ impl Tenant {
|
||||
"Could not parse timeline id out of the timeline uninit mark name {timeline_uninit_mark_file}",
|
||||
)
|
||||
})?;
|
||||
let timeline_dir = self.conf.timeline_path(&self.tenant_id, &timeline_id);
|
||||
let timeline_dir = self.conf.timeline_path(&self.tenant_shard_id, &timeline_id);
|
||||
if let Err(e) =
|
||||
remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)
|
||||
{
|
||||
@@ -1087,7 +1097,7 @@ impl Tenant {
|
||||
|
||||
info!("Found deletion mark for timeline {}", timeline_id);
|
||||
|
||||
match load_metadata(self.conf, &self.tenant_id, &timeline_id) {
|
||||
match load_metadata(self.conf, &self.tenant_shard_id, &timeline_id) {
|
||||
Ok(metadata) => {
|
||||
timelines_to_resume_deletion.push((timeline_id, Some(metadata)))
|
||||
}
|
||||
@@ -1131,7 +1141,7 @@ impl Tenant {
|
||||
})?;
|
||||
let timeline_uninit_mark_file = self
|
||||
.conf
|
||||
.timeline_uninit_mark_file_path(self.tenant_id, timeline_id);
|
||||
.timeline_uninit_mark_file_path(self.tenant_shard_id, timeline_id);
|
||||
if timeline_uninit_mark_file.exists() {
|
||||
info!(
|
||||
%timeline_id,
|
||||
@@ -1147,7 +1157,7 @@ impl Tenant {
|
||||
|
||||
let timeline_delete_mark_file = self
|
||||
.conf
|
||||
.timeline_delete_mark_file_path(self.tenant_id, timeline_id);
|
||||
.timeline_delete_mark_file_path(self.tenant_shard_id, timeline_id);
|
||||
if timeline_delete_mark_file.exists() {
|
||||
// Cleanup should be done in `is_delete_mark` branch above
|
||||
continue;
|
||||
@@ -1155,7 +1165,7 @@ impl Tenant {
|
||||
|
||||
let file_name = entry.file_name();
|
||||
if let Ok(timeline_id) = file_name.parse::<TimelineId>() {
|
||||
let metadata = load_metadata(self.conf, &self.tenant_id, &timeline_id)
|
||||
let metadata = load_metadata(self.conf, &self.tenant_shard_id, &timeline_id)
|
||||
.context("failed to load metadata")?;
|
||||
timelines_to_load.insert(timeline_id, metadata);
|
||||
} else {
|
||||
@@ -1187,7 +1197,7 @@ impl Tenant {
|
||||
remote_storage.clone(),
|
||||
self.deletion_queue_client.clone(),
|
||||
self.conf,
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
timeline_id,
|
||||
self.generation,
|
||||
);
|
||||
@@ -1393,8 +1403,12 @@ impl Tenant {
|
||||
.map_err(LoadLocalTimelineError::Load)
|
||||
}
|
||||
|
||||
pub fn tenant_id(&self) -> TenantId {
|
||||
self.tenant_id
|
||||
pub(crate) fn tenant_id(&self) -> TenantId {
|
||||
self.tenant_shard_id.tenant_id
|
||||
}
|
||||
|
||||
pub(crate) fn tenant_shard_id(&self) -> TenantShardId {
|
||||
self.tenant_shard_id
|
||||
}
|
||||
|
||||
/// Get Timeline handle for given Neon timeline ID.
|
||||
@@ -1408,13 +1422,13 @@ impl Tenant {
|
||||
let timeline = timelines_accessor
|
||||
.get(&timeline_id)
|
||||
.ok_or(GetTimelineError::NotFound {
|
||||
tenant_id: self.tenant_id,
|
||||
tenant_id: self.tenant_shard_id.tenant_id,
|
||||
timeline_id,
|
||||
})?;
|
||||
|
||||
if active_only && !timeline.is_active() {
|
||||
Err(GetTimelineError::NotActive {
|
||||
tenant_id: self.tenant_id,
|
||||
tenant_id: self.tenant_shard_id.tenant_id,
|
||||
timeline_id,
|
||||
state: timeline.current_state(),
|
||||
})
|
||||
@@ -1772,7 +1786,7 @@ impl Tenant {
|
||||
*current_state = TenantState::Activating(ActivatingFrom::Attaching);
|
||||
}
|
||||
}
|
||||
debug!(tenant_id = %self.tenant_id, "Activating tenant");
|
||||
debug!(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), "Activating tenant");
|
||||
activating = true;
|
||||
// Continue outside the closure. We need to grab timelines.lock()
|
||||
// and we plan to turn it into a tokio::sync::Mutex in a future patch.
|
||||
@@ -1809,7 +1823,8 @@ impl Tenant {
|
||||
// times to activate. see https://github.com/neondatabase/neon/issues/4025
|
||||
info!(
|
||||
since_creation_millis = elapsed.as_millis(),
|
||||
tenant_id = %self.tenant_id,
|
||||
tenant_id = %self.tenant_shard_id.tenant_id,
|
||||
shard_id = %self.tenant_shard_id.shard_slug(),
|
||||
activated_timelines,
|
||||
total_timelines,
|
||||
post_state = <&'static str>::from(&*current_state),
|
||||
@@ -1906,7 +1921,7 @@ impl Tenant {
|
||||
//
|
||||
// this will additionally shutdown and await all timeline tasks.
|
||||
tracing::debug!("Waiting for tasks...");
|
||||
task_mgr::shutdown_tasks(None, Some(self.tenant_id), None).await;
|
||||
task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id.tenant_id), None).await;
|
||||
|
||||
// Wait for any in-flight operations to complete
|
||||
self.gate.close().await;
|
||||
@@ -2081,7 +2096,7 @@ impl Tenant {
|
||||
receiver.changed().await.map_err(
|
||||
|_e: tokio::sync::watch::error::RecvError|
|
||||
// Tenant existed but was dropped: report it as non-existent
|
||||
GetActiveTenantError::NotFound(GetTenantError::NotFound(self.tenant_id))
|
||||
GetActiveTenantError::NotFound(GetTenantError::NotFound(self.tenant_shard_id.tenant_id))
|
||||
)?;
|
||||
}
|
||||
TenantState::Active { .. } => {
|
||||
@@ -2155,9 +2170,6 @@ where
|
||||
}
|
||||
|
||||
impl Tenant {
|
||||
pub fn get_tenant_id(&self) -> TenantId {
|
||||
self.tenant_id
|
||||
}
|
||||
pub fn tenant_specific_overrides(&self) -> TenantConfOpt {
|
||||
self.tenant_conf.read().unwrap().tenant_conf
|
||||
}
|
||||
@@ -2307,7 +2319,7 @@ impl Tenant {
|
||||
new_metadata,
|
||||
ancestor,
|
||||
new_timeline_id,
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
self.generation,
|
||||
Arc::clone(&self.walredo_mgr),
|
||||
resources,
|
||||
@@ -2329,14 +2341,14 @@ impl Tenant {
|
||||
conf: &'static PageServerConf,
|
||||
attached_conf: AttachedTenantConf,
|
||||
walredo_mgr: Arc<WalRedoManager>,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
) -> Tenant {
|
||||
let (state, mut rx) = watch::channel(state);
|
||||
|
||||
tokio::spawn(async move {
|
||||
let tid = tenant_id.to_string();
|
||||
let tid = tenant_shard_id.to_string();
|
||||
|
||||
fn inspect_state(state: &TenantState) -> ([&'static str; 1], bool) {
|
||||
([state.into()], matches!(state, TenantState::Broken { .. }))
|
||||
@@ -2388,7 +2400,7 @@ impl Tenant {
|
||||
});
|
||||
|
||||
Tenant {
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
generation: attached_conf.location.generation,
|
||||
conf,
|
||||
// using now here is good enough approximation to catch tenants with really long
|
||||
@@ -2406,17 +2418,17 @@ impl Tenant {
|
||||
eviction_task_tenant_state: tokio::sync::Mutex::new(EvictionTaskTenantState::default()),
|
||||
delete_progress: Arc::new(tokio::sync::Mutex::new(DeleteTenantFlow::default())),
|
||||
cancel: CancellationToken::default(),
|
||||
gate: Gate::new(format!("Tenant<{tenant_id}>")),
|
||||
gate: Gate::new(format!("Tenant<{tenant_shard_id}>")),
|
||||
}
|
||||
}
|
||||
|
||||
/// Locate and load config
|
||||
pub(super) fn load_tenant_config(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
) -> anyhow::Result<LocationConf> {
|
||||
let legacy_config_path = conf.tenant_config_path(tenant_id);
|
||||
let config_path = conf.tenant_location_config_path(tenant_id);
|
||||
let legacy_config_path = conf.tenant_config_path(tenant_shard_id);
|
||||
let config_path = conf.tenant_location_config_path(tenant_shard_id);
|
||||
|
||||
if config_path.exists() {
|
||||
// New-style config takes precedence
|
||||
@@ -2470,29 +2482,34 @@ impl Tenant {
|
||||
.with_context(|| format!("Failed to parse config from file '{path}' as toml file"))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(%tenant_id))]
|
||||
#[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]
|
||||
pub(super) async fn persist_tenant_config(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
location_conf: &LocationConf,
|
||||
) -> anyhow::Result<()> {
|
||||
let legacy_config_path = conf.tenant_config_path(tenant_id);
|
||||
let config_path = conf.tenant_location_config_path(tenant_id);
|
||||
let legacy_config_path = conf.tenant_config_path(tenant_shard_id);
|
||||
let config_path = conf.tenant_location_config_path(tenant_shard_id);
|
||||
|
||||
Self::persist_tenant_config_at(tenant_id, &config_path, &legacy_config_path, location_conf)
|
||||
.await
|
||||
Self::persist_tenant_config_at(
|
||||
tenant_shard_id,
|
||||
&config_path,
|
||||
&legacy_config_path,
|
||||
location_conf,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(%tenant_id))]
|
||||
#[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]
|
||||
pub(super) async fn persist_tenant_config_at(
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
config_path: &Utf8Path,
|
||||
legacy_config_path: &Utf8Path,
|
||||
location_conf: &LocationConf,
|
||||
) -> anyhow::Result<()> {
|
||||
// Forward compat: write out an old-style configuration that old versions can read, in case we roll back
|
||||
Self::persist_tenant_config_legacy(
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
legacy_config_path,
|
||||
&location_conf.tenant_conf,
|
||||
)
|
||||
@@ -2519,14 +2536,16 @@ impl Tenant {
|
||||
|
||||
let temp_path = path_with_suffix_extension(config_path, TEMP_FILE_SUFFIX);
|
||||
|
||||
let tenant_id = *tenant_id;
|
||||
let tenant_shard_id = *tenant_shard_id;
|
||||
let config_path = config_path.to_owned();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
Handle::current().block_on(async move {
|
||||
let conf_content = conf_content.as_bytes();
|
||||
VirtualFile::crashsafe_overwrite(&config_path, &temp_path, conf_content)
|
||||
.await
|
||||
.with_context(|| format!("write tenant {tenant_id} config to {config_path}"))
|
||||
.with_context(|| {
|
||||
format!("write tenant {tenant_shard_id} config to {config_path}")
|
||||
})
|
||||
})
|
||||
})
|
||||
.await??;
|
||||
@@ -2534,9 +2553,9 @@ impl Tenant {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(%tenant_id))]
|
||||
#[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]
|
||||
async fn persist_tenant_config_legacy(
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
target_config_path: &Utf8Path,
|
||||
tenant_conf: &TenantConfOpt,
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -2554,7 +2573,7 @@ impl Tenant {
|
||||
|
||||
let temp_path = path_with_suffix_extension(target_config_path, TEMP_FILE_SUFFIX);
|
||||
|
||||
let tenant_id = *tenant_id;
|
||||
let tenant_shard_id = *tenant_shard_id;
|
||||
let target_config_path = target_config_path.to_owned();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
Handle::current().block_on(async move {
|
||||
@@ -2562,7 +2581,7 @@ impl Tenant {
|
||||
VirtualFile::crashsafe_overwrite(&target_config_path, &temp_path, conf_content)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("write tenant {tenant_id} config to {target_config_path}")
|
||||
format!("write tenant {tenant_shard_id} config to {target_config_path}")
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -2940,7 +2959,7 @@ impl Tenant {
|
||||
// temporary directory for basebackup files for the given timeline.
|
||||
let pgdata_path = path_with_suffix_extension(
|
||||
self.conf
|
||||
.timelines_path(&self.tenant_id)
|
||||
.timelines_path(&self.tenant_shard_id)
|
||||
.join(format!("basebackup-{timeline_id}")),
|
||||
TEMP_FILE_SUFFIX,
|
||||
);
|
||||
@@ -2971,7 +2990,7 @@ impl Tenant {
|
||||
|| async {
|
||||
self::remote_timeline_client::upload_initdb_dir(
|
||||
storage,
|
||||
&self.tenant_id,
|
||||
&self.tenant_shard_id.tenant_id,
|
||||
&timeline_id,
|
||||
pgdata_zstd.clone(),
|
||||
)
|
||||
@@ -3010,7 +3029,7 @@ impl Tenant {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let tenant_id = raw_timeline.owning_tenant.tenant_id;
|
||||
let tenant_shard_id = raw_timeline.owning_tenant.tenant_shard_id;
|
||||
let unfinished_timeline = raw_timeline.raw_timeline()?;
|
||||
|
||||
import_datadir::import_timeline_from_postgres_datadir(
|
||||
@@ -3021,7 +3040,7 @@ impl Tenant {
|
||||
)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to import pgdatadir for timeline {tenant_id}/{timeline_id}")
|
||||
format!("Failed to import pgdatadir for timeline {tenant_shard_id}/{timeline_id}")
|
||||
})?;
|
||||
|
||||
// Flush the new layer files to disk, before we make the timeline as available to
|
||||
@@ -3039,7 +3058,7 @@ impl Tenant {
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to flush after pgdatadir import for timeline {tenant_id}/{timeline_id}"
|
||||
"Failed to flush after pgdatadir import for timeline {tenant_shard_id}/{timeline_id}"
|
||||
)
|
||||
})?;
|
||||
|
||||
@@ -3062,7 +3081,7 @@ impl Tenant {
|
||||
remote_storage.clone(),
|
||||
self.deletion_queue_client.clone(),
|
||||
self.conf,
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
timeline_id,
|
||||
self.generation,
|
||||
);
|
||||
@@ -3091,7 +3110,7 @@ impl Tenant {
|
||||
start_lsn: Lsn,
|
||||
ancestor: Option<Arc<Timeline>>,
|
||||
) -> anyhow::Result<UninitializedTimeline> {
|
||||
let tenant_id = self.tenant_id;
|
||||
let tenant_shard_id = self.tenant_shard_id;
|
||||
|
||||
let resources = self.build_timeline_resources(new_timeline_id);
|
||||
if let Some(remote_client) = &resources.remote_client {
|
||||
@@ -3115,12 +3134,14 @@ impl Tenant {
|
||||
.create_timeline_files(&uninit_mark.timeline_path, &new_timeline_id, new_metadata)
|
||||
.await
|
||||
{
|
||||
error!("Failed to create initial files for timeline {tenant_id}/{new_timeline_id}, cleaning up: {e:?}");
|
||||
error!("Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}");
|
||||
cleanup_timeline_directory(uninit_mark);
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
debug!("Successfully created initial files for timeline {tenant_id}/{new_timeline_id}");
|
||||
debug!(
|
||||
"Successfully created initial files for timeline {tenant_shard_id}/{new_timeline_id}"
|
||||
);
|
||||
|
||||
Ok(UninitializedTimeline::new(
|
||||
self,
|
||||
@@ -3141,9 +3162,14 @@ impl Tenant {
|
||||
anyhow::bail!("failpoint after-timeline-uninit-mark-creation");
|
||||
});
|
||||
|
||||
save_metadata(self.conf, &self.tenant_id, new_timeline_id, new_metadata)
|
||||
.await
|
||||
.context("Failed to create timeline metadata")?;
|
||||
save_metadata(
|
||||
self.conf,
|
||||
&self.tenant_shard_id,
|
||||
new_timeline_id,
|
||||
new_metadata,
|
||||
)
|
||||
.await
|
||||
.context("Failed to create timeline metadata")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -3156,13 +3182,13 @@ impl Tenant {
|
||||
timeline_id: TimelineId,
|
||||
timelines: &MutexGuard<HashMap<TimelineId, Arc<Timeline>>>,
|
||||
) -> anyhow::Result<TimelineUninitMark> {
|
||||
let tenant_id = self.tenant_id;
|
||||
let tenant_shard_id = self.tenant_shard_id;
|
||||
|
||||
anyhow::ensure!(
|
||||
timelines.get(&timeline_id).is_none(),
|
||||
"Timeline {tenant_id}/{timeline_id} already exists in pageserver's memory"
|
||||
"Timeline {tenant_shard_id}/{timeline_id} already exists in pageserver's memory"
|
||||
);
|
||||
let timeline_path = self.conf.timeline_path(&tenant_id, &timeline_id);
|
||||
let timeline_path = self.conf.timeline_path(&tenant_shard_id, &timeline_id);
|
||||
anyhow::ensure!(
|
||||
!timeline_path.exists(),
|
||||
"Timeline {timeline_path} already exists, cannot create its uninit mark file",
|
||||
@@ -3170,7 +3196,7 @@ impl Tenant {
|
||||
|
||||
let uninit_mark_path = self
|
||||
.conf
|
||||
.timeline_uninit_mark_file_path(tenant_id, timeline_id);
|
||||
.timeline_uninit_mark_file_path(tenant_shard_id, timeline_id);
|
||||
fs::File::create(&uninit_mark_path)
|
||||
.context("Failed to create uninit mark file")
|
||||
.and_then(|_| {
|
||||
@@ -3178,7 +3204,7 @@ impl Tenant {
|
||||
.context("Failed to fsync uninit mark file")
|
||||
})
|
||||
.with_context(|| {
|
||||
format!("Failed to crate uninit mark for timeline {tenant_id}/{timeline_id}")
|
||||
format!("Failed to crate uninit mark for timeline {tenant_shard_id}/{timeline_id}")
|
||||
})?;
|
||||
|
||||
let uninit_mark = TimelineUninitMark::new(uninit_mark_path, timeline_path);
|
||||
@@ -3189,7 +3215,7 @@ impl Tenant {
|
||||
/// Gathers inputs from all of the timelines to produce a sizing model input.
|
||||
///
|
||||
/// Future is cancellation safe. Only one calculation can be running at once per tenant.
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
|
||||
pub async fn gather_size_inputs(
|
||||
&self,
|
||||
// `max_retention_period` overrides the cutoff that is used to calculate the size
|
||||
@@ -3228,7 +3254,7 @@ impl Tenant {
|
||||
/// Calculate synthetic tenant size and cache the result.
|
||||
/// This is periodically called by background worker.
|
||||
/// result is cached in tenant struct
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
|
||||
pub async fn calculate_synthetic_size(
|
||||
&self,
|
||||
cause: LogicalSizeCalculationCause,
|
||||
@@ -3250,7 +3276,7 @@ impl Tenant {
|
||||
.store(size, Ordering::Relaxed);
|
||||
|
||||
TENANT_SYNTHETIC_SIZE_METRIC
|
||||
.get_metric_with_label_values(&[&self.tenant_id.to_string()])
|
||||
.get_metric_with_label_values(&[&self.tenant_shard_id.tenant_id.to_string()])
|
||||
.unwrap()
|
||||
.set(size);
|
||||
}
|
||||
@@ -3286,9 +3312,9 @@ fn remove_timeline_and_uninit_mark(
|
||||
pub(crate) async fn create_tenant_files(
|
||||
conf: &'static PageServerConf,
|
||||
location_conf: &LocationConf,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
) -> anyhow::Result<Utf8PathBuf> {
|
||||
let target_tenant_directory = conf.tenant_path(tenant_id);
|
||||
let target_tenant_directory = conf.tenant_path(tenant_shard_id);
|
||||
anyhow::ensure!(
|
||||
!target_tenant_directory
|
||||
.try_exists()
|
||||
@@ -3308,14 +3334,16 @@ pub(crate) async fn create_tenant_files(
|
||||
let creation_result = try_create_target_tenant_dir(
|
||||
conf,
|
||||
location_conf,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
&temporary_tenant_dir,
|
||||
&target_tenant_directory,
|
||||
)
|
||||
.await;
|
||||
|
||||
if creation_result.is_err() {
|
||||
error!("Failed to create directory structure for tenant {tenant_id}, cleaning tmp data");
|
||||
error!(
|
||||
"Failed to create directory structure for tenant {tenant_shard_id}, cleaning tmp data"
|
||||
);
|
||||
if let Err(e) = fs::remove_dir_all(&temporary_tenant_dir) {
|
||||
error!("Failed to remove temporary tenant directory {temporary_tenant_dir:?}: {e}")
|
||||
} else if let Err(e) = crashsafe::fsync(&temporary_tenant_dir) {
|
||||
@@ -3333,31 +3361,31 @@ pub(crate) async fn create_tenant_files(
|
||||
async fn try_create_target_tenant_dir(
|
||||
conf: &'static PageServerConf,
|
||||
location_conf: &LocationConf,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
temporary_tenant_dir: &Utf8Path,
|
||||
target_tenant_directory: &Utf8Path,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let temporary_tenant_timelines_dir = rebase_directory(
|
||||
&conf.timelines_path(tenant_id),
|
||||
&conf.timelines_path(tenant_shard_id),
|
||||
target_tenant_directory,
|
||||
temporary_tenant_dir,
|
||||
)
|
||||
.with_context(|| format!("resolve tenant {tenant_id} temporary timelines dir"))?;
|
||||
.with_context(|| format!("resolve tenant {tenant_shard_id} temporary timelines dir"))?;
|
||||
let temporary_legacy_tenant_config_path = rebase_directory(
|
||||
&conf.tenant_config_path(tenant_id),
|
||||
&conf.tenant_config_path(tenant_shard_id),
|
||||
target_tenant_directory,
|
||||
temporary_tenant_dir,
|
||||
)
|
||||
.with_context(|| format!("resolve tenant {tenant_id} temporary config path"))?;
|
||||
.with_context(|| format!("resolve tenant {tenant_shard_id} temporary config path"))?;
|
||||
let temporary_tenant_config_path = rebase_directory(
|
||||
&conf.tenant_location_config_path(tenant_id),
|
||||
&conf.tenant_location_config_path(tenant_shard_id),
|
||||
target_tenant_directory,
|
||||
temporary_tenant_dir,
|
||||
)
|
||||
.with_context(|| format!("resolve tenant {tenant_id} temporary config path"))?;
|
||||
.with_context(|| format!("resolve tenant {tenant_shard_id} temporary config path"))?;
|
||||
|
||||
Tenant::persist_tenant_config_at(
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
&temporary_tenant_config_path,
|
||||
&temporary_legacy_tenant_config_path,
|
||||
location_conf,
|
||||
@@ -3367,7 +3395,7 @@ async fn try_create_target_tenant_dir(
|
||||
crashsafe::create_dir(&temporary_tenant_timelines_dir).with_context(|| {
|
||||
format!(
|
||||
"create tenant {} temporary timelines directory {}",
|
||||
tenant_id, temporary_tenant_timelines_dir,
|
||||
tenant_shard_id, temporary_tenant_timelines_dir,
|
||||
)
|
||||
})?;
|
||||
fail::fail_point!("tenant-creation-before-tmp-rename", |_| {
|
||||
@@ -3382,19 +3410,19 @@ async fn try_create_target_tenant_dir(
|
||||
fs::rename(temporary_tenant_dir, target_tenant_directory).with_context(|| {
|
||||
format!(
|
||||
"move tenant {} temporary directory {} into the permanent one {}",
|
||||
tenant_id, temporary_tenant_dir, target_tenant_directory
|
||||
tenant_shard_id, temporary_tenant_dir, target_tenant_directory
|
||||
)
|
||||
})?;
|
||||
let target_dir_parent = target_tenant_directory.parent().with_context(|| {
|
||||
format!(
|
||||
"get tenant {} dir parent for {}",
|
||||
tenant_id, target_tenant_directory,
|
||||
tenant_shard_id, target_tenant_directory,
|
||||
)
|
||||
})?;
|
||||
crashsafe::fsync(target_dir_parent).with_context(|| {
|
||||
format!(
|
||||
"fsync renamed directory's parent {} for tenant {}",
|
||||
target_dir_parent, tenant_id,
|
||||
target_dir_parent, tenant_shard_id,
|
||||
)
|
||||
})?;
|
||||
|
||||
@@ -3472,7 +3500,7 @@ async fn run_initdb(
|
||||
|
||||
impl Drop for Tenant {
|
||||
fn drop(&mut self) {
|
||||
remove_tenant_metrics(&self.tenant_id);
|
||||
remove_tenant_metrics(&self.tenant_shard_id.tenant_id);
|
||||
}
|
||||
}
|
||||
/// Dump contents of a layer file to stdout.
|
||||
@@ -3575,7 +3603,9 @@ pub(crate) mod harness {
|
||||
pub struct TenantHarness {
|
||||
pub conf: &'static PageServerConf,
|
||||
pub tenant_conf: TenantConf,
|
||||
pub tenant_id: TenantId,
|
||||
// TODO(sharding): remove duplicative `tenant_id` in favor of access to tenant_shard_id
|
||||
pub(crate) tenant_id: TenantId,
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
pub generation: Generation,
|
||||
pub shard: ShardIndex,
|
||||
pub remote_storage: GenericRemoteStorage,
|
||||
@@ -3620,8 +3650,9 @@ pub(crate) mod harness {
|
||||
};
|
||||
|
||||
let tenant_id = TenantId::generate();
|
||||
fs::create_dir_all(conf.tenant_path(&tenant_id))?;
|
||||
fs::create_dir_all(conf.timelines_path(&tenant_id))?;
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
fs::create_dir_all(conf.tenant_path(&tenant_shard_id))?;
|
||||
fs::create_dir_all(conf.timelines_path(&tenant_shard_id))?;
|
||||
|
||||
use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
|
||||
let remote_fs_dir = conf.workdir.join("localfs");
|
||||
@@ -3636,6 +3667,7 @@ pub(crate) mod harness {
|
||||
conf,
|
||||
tenant_conf,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
generation: Generation::new(0xdeadbeef),
|
||||
shard: ShardIndex::unsharded(),
|
||||
remote_storage,
|
||||
@@ -3655,7 +3687,7 @@ pub(crate) mod harness {
|
||||
}
|
||||
|
||||
fn remote_empty(&self) -> bool {
|
||||
let tenant_path = self.conf.tenant_path(&self.tenant_id);
|
||||
let tenant_path = self.conf.tenant_path(&self.tenant_shard_id);
|
||||
let remote_tenant_dir = self
|
||||
.remote_fs_dir
|
||||
.join(tenant_path.strip_prefix(&self.conf.workdir).unwrap());
|
||||
@@ -3695,7 +3727,7 @@ pub(crate) mod harness {
|
||||
))
|
||||
.unwrap(),
|
||||
walredo_mgr,
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
Some(self.remote_storage.clone()),
|
||||
self.deletion_queue.new_client(),
|
||||
));
|
||||
@@ -3704,17 +3736,17 @@ pub(crate) mod harness {
|
||||
LoadMode::Local => {
|
||||
tenant
|
||||
.load_local(None, ctx)
|
||||
.instrument(info_span!("try_load", tenant_id=%self.tenant_id))
|
||||
.instrument(info_span!("try_load", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
|
||||
.await?;
|
||||
}
|
||||
LoadMode::Remote => {
|
||||
let preload = tenant
|
||||
.preload(&self.remote_storage, CancellationToken::new())
|
||||
.instrument(info_span!("try_load_preload", tenant_id=%self.tenant_id))
|
||||
.instrument(info_span!("try_load_preload", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
|
||||
.await?;
|
||||
tenant
|
||||
.attach(None, Some(preload), ctx)
|
||||
.instrument(info_span!("try_load", tenant_id=%self.tenant_id))
|
||||
.instrument(info_span!("try_load", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
@@ -3748,7 +3780,7 @@ pub(crate) mod harness {
|
||||
}
|
||||
|
||||
pub fn timeline_path(&self, timeline_id: &TimelineId) -> Utf8PathBuf {
|
||||
self.conf.timeline_path(&self.tenant_id, timeline_id)
|
||||
self.conf.timeline_path(&self.tenant_shard_id, timeline_id)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3864,7 +3896,7 @@ mod tests {
|
||||
e.to_string(),
|
||||
format!(
|
||||
"Timeline {}/{} already exists in pageserver's memory",
|
||||
tenant.tenant_id, TIMELINE_ID
|
||||
tenant.tenant_shard_id, TIMELINE_ID
|
||||
)
|
||||
),
|
||||
}
|
||||
@@ -4248,7 +4280,7 @@ mod tests {
|
||||
// so that all uploads finish & we can call harness.load() below again
|
||||
tenant
|
||||
.shutdown(Default::default(), true)
|
||||
.instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id))
|
||||
.instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_shard_id))
|
||||
.await
|
||||
.ok()
|
||||
.unwrap();
|
||||
@@ -4289,7 +4321,7 @@ mod tests {
|
||||
// so that all uploads finish & we can call harness.load() below again
|
||||
tenant
|
||||
.shutdown(Default::default(), true)
|
||||
.instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id))
|
||||
.instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_shard_id))
|
||||
.await
|
||||
.ok()
|
||||
.unwrap();
|
||||
@@ -4351,7 +4383,7 @@ mod tests {
|
||||
// so that all uploads finish & we can call harness.try_load() below again
|
||||
tenant
|
||||
.shutdown(Default::default(), true)
|
||||
.instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id))
|
||||
.instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_shard_id))
|
||||
.await
|
||||
.ok()
|
||||
.unwrap();
|
||||
@@ -4884,7 +4916,7 @@ mod tests {
|
||||
let raw_tline = tline.raw_timeline().unwrap();
|
||||
raw_tline
|
||||
.shutdown()
|
||||
.instrument(info_span!("test_shutdown", tenant_id=%raw_tline.tenant_id))
|
||||
.instrument(info_span!("test_shutdown", tenant_id=%raw_tline.tenant_shard_id))
|
||||
.await;
|
||||
std::mem::forget(tline);
|
||||
}
|
||||
@@ -4896,7 +4928,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
e,
|
||||
GetTimelineError::NotFound {
|
||||
tenant_id: tenant.tenant_id,
|
||||
tenant_id: tenant.tenant_shard_id.tenant_id,
|
||||
timeline_id: TIMELINE_ID,
|
||||
}
|
||||
)
|
||||
@@ -4905,12 +4937,12 @@ mod tests {
|
||||
|
||||
assert!(!harness
|
||||
.conf
|
||||
.timeline_path(&tenant.tenant_id, &TIMELINE_ID)
|
||||
.timeline_path(&tenant.tenant_shard_id, &TIMELINE_ID)
|
||||
.exists());
|
||||
|
||||
assert!(!harness
|
||||
.conf
|
||||
.timeline_uninit_mark_file_path(tenant.tenant_id, TIMELINE_ID)
|
||||
.timeline_uninit_mark_file_path(tenant.tenant_shard_id, TIMELINE_ID)
|
||||
.exists());
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -2,16 +2,13 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use pageserver_api::models::TenantState;
|
||||
use pageserver_api::{models::TenantState, shard::TenantShardId};
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath};
|
||||
use tokio::sync::OwnedMutexGuard;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, instrument, Instrument, Span};
|
||||
|
||||
use utils::{
|
||||
backoff, completion, crashsafe, fs_ext,
|
||||
id::{TenantId, TimelineId},
|
||||
};
|
||||
use utils::{backoff, completion, crashsafe, fs_ext, id::TimelineId};
|
||||
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
@@ -60,10 +57,10 @@ type DeletionGuard = tokio::sync::OwnedMutexGuard<DeleteTenantFlow>;
|
||||
|
||||
fn remote_tenant_delete_mark_path(
|
||||
conf: &PageServerConf,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
) -> anyhow::Result<RemotePath> {
|
||||
let tenant_remote_path = conf
|
||||
.tenant_path(tenant_id)
|
||||
.tenant_path(tenant_shard_id)
|
||||
.strip_prefix(&conf.workdir)
|
||||
.context("Failed to strip workdir prefix")
|
||||
.and_then(RemotePath::new)
|
||||
@@ -74,9 +71,9 @@ fn remote_tenant_delete_mark_path(
|
||||
async fn create_remote_delete_mark(
|
||||
conf: &PageServerConf,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_id)?;
|
||||
let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
|
||||
|
||||
let data: &[u8] = &[];
|
||||
backoff::retry(
|
||||
@@ -100,9 +97,9 @@ async fn create_remote_delete_mark(
|
||||
|
||||
async fn create_local_delete_mark(
|
||||
conf: &PageServerConf,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
let marker_path = conf.tenant_deleted_mark_file_path(tenant_id);
|
||||
let marker_path = conf.tenant_deleted_mark_file_path(tenant_shard_id);
|
||||
|
||||
// Note: we're ok to replace existing file.
|
||||
let _ = std::fs::OpenOptions::new()
|
||||
@@ -171,10 +168,10 @@ async fn ensure_timelines_dir_empty(timelines_path: &Utf8Path) -> Result<(), Del
|
||||
async fn remove_tenant_remote_delete_mark(
|
||||
conf: &PageServerConf,
|
||||
remote_storage: Option<&GenericRemoteStorage>,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
if let Some(remote_storage) = remote_storage {
|
||||
let path = remote_tenant_delete_mark_path(conf, tenant_id)?;
|
||||
let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
|
||||
backoff::retry(
|
||||
|| async { remote_storage.delete(&path).await },
|
||||
|_e| false,
|
||||
@@ -193,7 +190,7 @@ async fn remove_tenant_remote_delete_mark(
|
||||
// Cleanup fs traces: tenant config, timelines dir local delete mark, tenant dir
|
||||
async fn cleanup_remaining_fs_traces(
|
||||
conf: &PageServerConf,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
let rm = |p: Utf8PathBuf, is_dir: bool| async move {
|
||||
if is_dir {
|
||||
@@ -205,8 +202,8 @@ async fn cleanup_remaining_fs_traces(
|
||||
.with_context(|| format!("failed to delete {p}"))
|
||||
};
|
||||
|
||||
rm(conf.tenant_config_path(tenant_id), false).await?;
|
||||
rm(conf.tenant_location_config_path(tenant_id), false).await?;
|
||||
rm(conf.tenant_config_path(tenant_shard_id), false).await?;
|
||||
rm(conf.tenant_location_config_path(tenant_shard_id), false).await?;
|
||||
|
||||
fail::fail_point!("tenant-delete-before-remove-timelines-dir", |_| {
|
||||
Err(anyhow::anyhow!(
|
||||
@@ -214,7 +211,7 @@ async fn cleanup_remaining_fs_traces(
|
||||
))?
|
||||
});
|
||||
|
||||
rm(conf.timelines_path(tenant_id), true).await?;
|
||||
rm(conf.timelines_path(tenant_shard_id), true).await?;
|
||||
|
||||
fail::fail_point!("tenant-delete-before-remove-deleted-mark", |_| {
|
||||
Err(anyhow::anyhow!(
|
||||
@@ -228,14 +225,14 @@ async fn cleanup_remaining_fs_traces(
|
||||
// to be reordered later and thus missed if a crash occurs.
|
||||
// Note that we dont need to sync after mark file is removed
|
||||
// because we can tolerate the case when mark file reappears on startup.
|
||||
let tenant_path = &conf.tenant_path(tenant_id);
|
||||
let tenant_path = &conf.tenant_path(tenant_shard_id);
|
||||
if tenant_path.exists() {
|
||||
crashsafe::fsync_async(&conf.tenant_path(tenant_id))
|
||||
crashsafe::fsync_async(&conf.tenant_path(tenant_shard_id))
|
||||
.await
|
||||
.context("fsync_pre_mark_remove")?;
|
||||
}
|
||||
|
||||
rm(conf.tenant_deleted_mark_file_path(tenant_id), false).await?;
|
||||
rm(conf.tenant_deleted_mark_file_path(tenant_shard_id), false).await?;
|
||||
|
||||
fail::fail_point!("tenant-delete-before-remove-tenant-dir", |_| {
|
||||
Err(anyhow::anyhow!(
|
||||
@@ -243,7 +240,7 @@ async fn cleanup_remaining_fs_traces(
|
||||
))?
|
||||
});
|
||||
|
||||
rm(conf.tenant_path(tenant_id), true).await?;
|
||||
rm(conf.tenant_path(tenant_shard_id), true).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -324,7 +321,7 @@ impl DeleteTenantFlow {
|
||||
// Though sounds scary, different mark name?
|
||||
// Detach currently uses remove_dir_all so in case of a crash we can end up in a weird state.
|
||||
if let Some(remote_storage) = &remote_storage {
|
||||
create_remote_delete_mark(conf, remote_storage, &tenant.tenant_id)
|
||||
create_remote_delete_mark(conf, remote_storage, &tenant.tenant_shard_id)
|
||||
.await
|
||||
.context("remote_mark")?
|
||||
}
|
||||
@@ -335,7 +332,7 @@ impl DeleteTenantFlow {
|
||||
))?
|
||||
});
|
||||
|
||||
create_local_delete_mark(conf, &tenant.tenant_id)
|
||||
create_local_delete_mark(conf, &tenant.tenant_shard_id)
|
||||
.await
|
||||
.context("local delete mark")?;
|
||||
|
||||
@@ -377,9 +374,11 @@ impl DeleteTenantFlow {
|
||||
return Ok(acquire(tenant));
|
||||
}
|
||||
|
||||
let tenant_id = tenant.tenant_id;
|
||||
// Check local mark first, if its there there is no need to go to s3 to check whether remote one exists.
|
||||
if conf.tenant_deleted_mark_file_path(&tenant_id).exists() {
|
||||
if conf
|
||||
.tenant_deleted_mark_file_path(&tenant.tenant_shard_id)
|
||||
.exists()
|
||||
{
|
||||
Ok(acquire(tenant))
|
||||
} else {
|
||||
Ok(None)
|
||||
@@ -462,12 +461,12 @@ impl DeleteTenantFlow {
|
||||
tenants: &'static std::sync::RwLock<TenantsMap>,
|
||||
tenant: Arc<Tenant>,
|
||||
) {
|
||||
let tenant_id = tenant.tenant_id;
|
||||
let tenant_shard_id = tenant.tenant_shard_id;
|
||||
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::TimelineDeletionWorker,
|
||||
Some(tenant_id),
|
||||
Some(tenant_shard_id.tenant_id),
|
||||
None,
|
||||
"tenant_delete",
|
||||
false,
|
||||
@@ -481,7 +480,7 @@ impl DeleteTenantFlow {
|
||||
Ok(())
|
||||
}
|
||||
.instrument({
|
||||
let span = tracing::info_span!(parent: None, "delete_tenant", tenant_id=%tenant_id);
|
||||
let span = tracing::info_span!(parent: None, "delete_tenant", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug());
|
||||
span.follows_from(Span::current());
|
||||
span
|
||||
}),
|
||||
@@ -519,7 +518,7 @@ impl DeleteTenantFlow {
|
||||
}
|
||||
}
|
||||
|
||||
let timelines_path = conf.timelines_path(&tenant.tenant_id);
|
||||
let timelines_path = conf.timelines_path(&tenant.tenant_shard_id);
|
||||
// May not exist if we fail in cleanup_remaining_fs_traces after removing it
|
||||
if timelines_path.exists() {
|
||||
// sanity check to guard against layout changes
|
||||
@@ -528,7 +527,8 @@ impl DeleteTenantFlow {
|
||||
.context("timelines dir not empty")?;
|
||||
}
|
||||
|
||||
remove_tenant_remote_delete_mark(conf, remote_storage.as_ref(), &tenant.tenant_id).await?;
|
||||
remove_tenant_remote_delete_mark(conf, remote_storage.as_ref(), &tenant.tenant_shard_id)
|
||||
.await?;
|
||||
|
||||
fail::fail_point!("tenant-delete-before-cleanup-remaining-fs-traces", |_| {
|
||||
Err(anyhow::anyhow!(
|
||||
@@ -536,7 +536,7 @@ impl DeleteTenantFlow {
|
||||
))?
|
||||
});
|
||||
|
||||
cleanup_remaining_fs_traces(conf, &tenant.tenant_id)
|
||||
cleanup_remaining_fs_traces(conf, &tenant.tenant_shard_id)
|
||||
.await
|
||||
.context("cleanup_remaining_fs_traces")?;
|
||||
|
||||
@@ -553,7 +553,7 @@ impl DeleteTenantFlow {
|
||||
// we encounter an InProgress marker, yield the barrier it contains and wait on it.
|
||||
let barrier = {
|
||||
let mut locked = tenants.write().unwrap();
|
||||
let removed = locked.remove(&tenant.tenant_id);
|
||||
let removed = locked.remove(&tenant.tenant_shard_id.tenant_id);
|
||||
|
||||
// FIXME: we should not be modifying this from outside of mgr.rs.
|
||||
// This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
|
||||
|
||||
@@ -7,18 +7,19 @@ use crate::page_cache::{self, PAGE_SZ};
|
||||
use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReader};
|
||||
use crate::virtual_file::VirtualFile;
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use std::cmp::min;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::{self, ErrorKind};
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use tracing::*;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::id::TimelineId;
|
||||
|
||||
pub struct EphemeralFile {
|
||||
page_cache_file_id: page_cache::FileId,
|
||||
|
||||
_tenant_id: TenantId,
|
||||
_tenant_shard_id: TenantShardId,
|
||||
_timeline_id: TimelineId,
|
||||
file: VirtualFile,
|
||||
len: u64,
|
||||
@@ -31,7 +32,7 @@ pub struct EphemeralFile {
|
||||
impl EphemeralFile {
|
||||
pub async fn create(
|
||||
conf: &PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<EphemeralFile, io::Error> {
|
||||
static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1);
|
||||
@@ -39,7 +40,7 @@ impl EphemeralFile {
|
||||
NEXT_FILENAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
let filename = conf
|
||||
.timeline_path(&tenant_id, &timeline_id)
|
||||
.timeline_path(&tenant_shard_id, &timeline_id)
|
||||
.join(Utf8PathBuf::from(format!(
|
||||
"ephemeral-{filename_disambiguator}"
|
||||
)));
|
||||
@@ -52,7 +53,7 @@ impl EphemeralFile {
|
||||
|
||||
Ok(EphemeralFile {
|
||||
page_cache_file_id: page_cache::next_file_id(),
|
||||
_tenant_id: tenant_id,
|
||||
_tenant_shard_id: tenant_shard_id,
|
||||
_timeline_id: timeline_id,
|
||||
file,
|
||||
len: 0,
|
||||
@@ -282,7 +283,7 @@ mod tests {
|
||||
) -> Result<
|
||||
(
|
||||
&'static PageServerConf,
|
||||
TenantId,
|
||||
TenantShardId,
|
||||
TimelineId,
|
||||
RequestContext,
|
||||
),
|
||||
@@ -295,13 +296,13 @@ mod tests {
|
||||
// OK in a test.
|
||||
let conf: &'static PageServerConf = Box::leak(Box::new(conf));
|
||||
|
||||
let tenant_id = TenantId::from_str("11000000000000000000000000000000").unwrap();
|
||||
let tenant_shard_id = TenantShardId::from_str("11000000000000000000000000000000").unwrap();
|
||||
let timeline_id = TimelineId::from_str("22000000000000000000000000000000").unwrap();
|
||||
fs::create_dir_all(conf.timeline_path(&tenant_id, &timeline_id))?;
|
||||
fs::create_dir_all(conf.timeline_path(&tenant_shard_id, &timeline_id))?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
|
||||
|
||||
Ok((conf, tenant_id, timeline_id, ctx))
|
||||
Ok((conf, tenant_shard_id, timeline_id, ctx))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -11,15 +11,12 @@
|
||||
use std::io::{self};
|
||||
|
||||
use anyhow::{ensure, Context};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use serde::{de::Error, Deserialize, Serialize, Serializer};
|
||||
use thiserror::Error;
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::{
|
||||
bin_ser::BeSer,
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn};
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::virtual_file::VirtualFile;
|
||||
@@ -272,14 +269,14 @@ impl Serialize for TimelineMetadata {
|
||||
}
|
||||
|
||||
/// Save timeline metadata to file
|
||||
#[tracing::instrument(skip_all, fields(%tenant_id, %timeline_id))]
|
||||
#[tracing::instrument(skip_all, fields(%tenant_id=tenant_shard_id.tenant_id, %shard_id=tenant_shard_id.shard_slug(), %timeline_id))]
|
||||
pub async fn save_metadata(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
data: &TimelineMetadata,
|
||||
) -> anyhow::Result<()> {
|
||||
let path = conf.metadata_path(tenant_id, timeline_id);
|
||||
let path = conf.metadata_path(tenant_shard_id, timeline_id);
|
||||
let temp_path = path_with_suffix_extension(&path, TEMP_FILE_SUFFIX);
|
||||
let metadata_bytes = data.to_bytes().context("serialize metadata")?;
|
||||
VirtualFile::crashsafe_overwrite(&path, &temp_path, &metadata_bytes)
|
||||
@@ -299,10 +296,10 @@ pub enum LoadMetadataError {
|
||||
|
||||
pub fn load_metadata(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
) -> Result<TimelineMetadata, LoadMetadataError> {
|
||||
let metadata_path = conf.metadata_path(tenant_id, timeline_id);
|
||||
let metadata_path = conf.metadata_path(tenant_shard_id, timeline_id);
|
||||
let metadata_bytes = std::fs::read(metadata_path)?;
|
||||
|
||||
Ok(TimelineMetadata::from_bytes(&metadata_bytes)?)
|
||||
|
||||
@@ -272,8 +272,8 @@ pub struct TenantManager {
|
||||
}
|
||||
|
||||
fn emergency_generations(
|
||||
tenant_confs: &HashMap<TenantId, anyhow::Result<LocationConf>>,
|
||||
) -> HashMap<TenantId, Generation> {
|
||||
tenant_confs: &HashMap<TenantShardId, anyhow::Result<LocationConf>>,
|
||||
) -> HashMap<TenantShardId, Generation> {
|
||||
tenant_confs
|
||||
.iter()
|
||||
.filter_map(|(tid, lc)| {
|
||||
@@ -293,10 +293,10 @@ fn emergency_generations(
|
||||
|
||||
async fn init_load_generations(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_confs: &HashMap<TenantId, anyhow::Result<LocationConf>>,
|
||||
tenant_confs: &HashMap<TenantShardId, anyhow::Result<LocationConf>>,
|
||||
resources: &TenantSharedResources,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<Option<HashMap<TenantId, Generation>>> {
|
||||
) -> anyhow::Result<Option<HashMap<TenantShardId, Generation>>> {
|
||||
let generations = if conf.control_plane_emergency_mode {
|
||||
error!(
|
||||
"Emergency mode! Tenants will be attached unsafely using their last known generation"
|
||||
@@ -339,7 +339,7 @@ async fn init_load_generations(
|
||||
fn load_tenant_config(
|
||||
conf: &'static PageServerConf,
|
||||
dentry: Utf8DirEntry,
|
||||
) -> anyhow::Result<Option<(TenantId, anyhow::Result<LocationConf>)>> {
|
||||
) -> anyhow::Result<Option<(TenantShardId, anyhow::Result<LocationConf>)>> {
|
||||
let tenant_dir_path = dentry.path().to_path_buf();
|
||||
if crate::is_temporary(&tenant_dir_path) {
|
||||
info!("Found temporary tenant directory, removing: {tenant_dir_path}");
|
||||
@@ -375,10 +375,10 @@ fn load_tenant_config(
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let tenant_id = match tenant_dir_path
|
||||
let tenant_shard_id = match tenant_dir_path
|
||||
.file_name()
|
||||
.unwrap_or_default()
|
||||
.parse::<TenantId>()
|
||||
.parse::<TenantShardId>()
|
||||
{
|
||||
Ok(id) => id,
|
||||
Err(_) => {
|
||||
@@ -388,8 +388,8 @@ fn load_tenant_config(
|
||||
};
|
||||
|
||||
Ok(Some((
|
||||
tenant_id,
|
||||
Tenant::load_tenant_config(conf, &tenant_id),
|
||||
tenant_shard_id,
|
||||
Tenant::load_tenant_config(conf, &tenant_shard_id),
|
||||
)))
|
||||
}
|
||||
|
||||
@@ -400,7 +400,7 @@ fn load_tenant_config(
|
||||
/// seconds even on reasonably fast drives.
|
||||
async fn init_load_tenant_configs(
|
||||
conf: &'static PageServerConf,
|
||||
) -> anyhow::Result<HashMap<TenantId, anyhow::Result<LocationConf>>> {
|
||||
) -> anyhow::Result<HashMap<TenantShardId, anyhow::Result<LocationConf>>> {
|
||||
let tenants_dir = conf.tenants_path();
|
||||
|
||||
let dentries = tokio::task::spawn_blocking(move || -> anyhow::Result<Vec<Utf8DirEntry>> {
|
||||
@@ -450,19 +450,19 @@ pub async fn init_tenant_mgr(
|
||||
init_load_generations(conf, &tenant_configs, &resources, &cancel).await?;
|
||||
|
||||
// Construct `Tenant` objects and start them running
|
||||
for (tenant_id, location_conf) in tenant_configs {
|
||||
let tenant_dir_path = conf.tenant_path(&tenant_id);
|
||||
for (tenant_shard_id, location_conf) in tenant_configs {
|
||||
let tenant_dir_path = conf.tenant_path(&tenant_shard_id);
|
||||
|
||||
let mut location_conf = match location_conf {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
warn!(%tenant_id, "Marking tenant broken, failed to {e:#}");
|
||||
warn!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Marking tenant broken, failed to {e:#}");
|
||||
|
||||
tenants.insert(
|
||||
TenantShardId::unsharded(tenant_id),
|
||||
tenant_shard_id,
|
||||
TenantSlot::Attached(Tenant::create_broken_tenant(
|
||||
conf,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
format!("{}", e),
|
||||
)),
|
||||
);
|
||||
@@ -473,7 +473,7 @@ pub async fn init_tenant_mgr(
|
||||
let generation = if let Some(generations) = &tenant_generations {
|
||||
// We have a generation map: treat it as the authority for whether
|
||||
// this tenant is really attached.
|
||||
if let Some(gen) = generations.get(&tenant_id) {
|
||||
if let Some(gen) = generations.get(&tenant_shard_id) {
|
||||
*gen
|
||||
} else {
|
||||
match &location_conf.mode {
|
||||
@@ -481,8 +481,8 @@ pub async fn init_tenant_mgr(
|
||||
// We do not require the control plane's permission for secondary mode
|
||||
// tenants, because they do no remote writes and hence require no
|
||||
// generation number
|
||||
info!(%tenant_id, "Loaded tenant in secondary mode");
|
||||
tenants.insert(TenantShardId::unsharded(tenant_id), TenantSlot::Secondary);
|
||||
info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Loaded tenant in secondary mode");
|
||||
tenants.insert(tenant_shard_id, TenantSlot::Secondary);
|
||||
}
|
||||
LocationMode::Attached(_) => {
|
||||
// TODO: augment re-attach API to enable the control plane to
|
||||
@@ -490,9 +490,9 @@ pub async fn init_tenant_mgr(
|
||||
// away local state, we can gracefully fall back to secondary here, if the control
|
||||
// plane tells us so.
|
||||
// (https://github.com/neondatabase/neon/issues/5377)
|
||||
info!(%tenant_id, "Detaching tenant, control plane omitted it in re-attach response");
|
||||
info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Detaching tenant, control plane omitted it in re-attach response");
|
||||
if let Err(e) = safe_remove_tenant_dir_all(&tenant_dir_path).await {
|
||||
error!(%tenant_id,
|
||||
error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
|
||||
"Failed to remove detached tenant directory '{tenant_dir_path}': {e:?}",
|
||||
);
|
||||
}
|
||||
@@ -504,18 +504,18 @@ pub async fn init_tenant_mgr(
|
||||
} else {
|
||||
// Legacy mode: no generation information, any tenant present
|
||||
// on local disk may activate
|
||||
info!(%tenant_id, "Starting tenant in legacy mode, no generation",);
|
||||
info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Starting tenant in legacy mode, no generation",);
|
||||
Generation::none()
|
||||
};
|
||||
|
||||
// Presence of a generation number implies attachment: attach the tenant
|
||||
// if it wasn't already, and apply the generation number.
|
||||
location_conf.attach_in_generation(generation);
|
||||
Tenant::persist_tenant_config(conf, &tenant_id, &location_conf).await?;
|
||||
Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await?;
|
||||
|
||||
match tenant_spawn(
|
||||
conf,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
&tenant_dir_path,
|
||||
resources.clone(),
|
||||
AttachedTenantConf::try_from(location_conf)?,
|
||||
@@ -531,7 +531,7 @@ pub async fn init_tenant_mgr(
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
error!(%tenant_id, "Failed to start tenant: {e:#}");
|
||||
error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Failed to start tenant: {e:#}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -555,7 +555,7 @@ pub async fn init_tenant_mgr(
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn tenant_spawn(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
tenant_path: &Utf8Path,
|
||||
resources: TenantSharedResources,
|
||||
location_conf: AttachedTenantConf,
|
||||
@@ -579,16 +579,16 @@ pub(crate) fn tenant_spawn(
|
||||
"Cannot load tenant from empty directory {tenant_path:?}"
|
||||
);
|
||||
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_id);
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id);
|
||||
anyhow::ensure!(
|
||||
!conf.tenant_ignore_mark_file_path(&tenant_id).exists(),
|
||||
!conf.tenant_ignore_mark_file_path(&tenant_shard_id).exists(),
|
||||
"Cannot load tenant, ignore mark found at {tenant_ignore_mark:?}"
|
||||
);
|
||||
|
||||
info!("Attaching tenant {tenant_id}");
|
||||
info!("Attaching tenant {tenant_shard_id}");
|
||||
let tenant = match Tenant::spawn(
|
||||
conf,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
resources,
|
||||
location_conf,
|
||||
init_order,
|
||||
@@ -598,8 +598,8 @@ pub(crate) fn tenant_spawn(
|
||||
) {
|
||||
Ok(tenant) => tenant,
|
||||
Err(e) => {
|
||||
error!("Failed to spawn tenant {tenant_id}, reason: {e:#}");
|
||||
Tenant::create_broken_tenant(conf, tenant_id, format!("{e:#}"))
|
||||
error!("Failed to spawn tenant {tenant_shard_id}, reason: {e:#}");
|
||||
Tenant::create_broken_tenant(conf, tenant_shard_id, format!("{e:#}"))
|
||||
}
|
||||
};
|
||||
|
||||
@@ -757,13 +757,11 @@ pub(crate) async fn create_tenant(
|
||||
|
||||
let slot_guard =
|
||||
tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustNotExist)?;
|
||||
// TODO(sharding): make local paths shard-aware
|
||||
let tenant_path =
|
||||
super::create_tenant_files(conf, &location_conf, &tenant_shard_id.tenant_id).await?;
|
||||
let tenant_path = super::create_tenant_files(conf, &location_conf, &tenant_shard_id).await?;
|
||||
|
||||
let created_tenant = tenant_spawn(
|
||||
conf,
|
||||
tenant_shard_id.tenant_id,
|
||||
tenant_shard_id,
|
||||
&tenant_path,
|
||||
resources,
|
||||
AttachedTenantConf::try_from(location_conf)?,
|
||||
@@ -803,8 +801,9 @@ pub(crate) async fn set_new_tenant_config(
|
||||
// API to use is the location_config/ endpoint, which lets the caller provide
|
||||
// the full LocationConf.
|
||||
let location_conf = LocationConf::attached_single(new_tenant_conf, tenant.generation);
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
|
||||
Tenant::persist_tenant_config(conf, &tenant_id, &location_conf)
|
||||
Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf)
|
||||
.await
|
||||
.map_err(SetNewTenantConfigError::Persist)?;
|
||||
tenant.set_new_tenant_config(new_tenant_conf);
|
||||
@@ -935,8 +934,7 @@ impl TenantManager {
|
||||
slot_guard.drop_old_value().expect("We just shut it down");
|
||||
}
|
||||
|
||||
// TODO(sharding): make local paths sharding-aware
|
||||
let tenant_path = self.conf.tenant_path(&tenant_shard_id.tenant_id);
|
||||
let tenant_path = self.conf.tenant_path(&tenant_shard_id);
|
||||
|
||||
let new_slot = match &new_location_config.mode {
|
||||
LocationMode::Secondary(_) => {
|
||||
@@ -946,20 +944,14 @@ impl TenantManager {
|
||||
.await
|
||||
.with_context(|| format!("Creating {tenant_path}"))?;
|
||||
|
||||
// TODO(sharding): make local paths sharding-aware
|
||||
Tenant::persist_tenant_config(
|
||||
self.conf,
|
||||
&tenant_shard_id.tenant_id,
|
||||
&new_location_config,
|
||||
)
|
||||
.await
|
||||
.map_err(SetNewTenantConfigError::Persist)?;
|
||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||
.await
|
||||
.map_err(SetNewTenantConfigError::Persist)?;
|
||||
|
||||
TenantSlot::Secondary
|
||||
}
|
||||
LocationMode::Attached(_attach_config) => {
|
||||
// TODO(sharding): make local paths sharding-aware
|
||||
let timelines_path = self.conf.timelines_path(&tenant_shard_id.tenant_id);
|
||||
let timelines_path = self.conf.timelines_path(&tenant_shard_id);
|
||||
|
||||
// Directory doesn't need to be fsync'd because we do not depend on
|
||||
// it to exist after crashes: it may be recreated when tenant is
|
||||
@@ -968,19 +960,13 @@ impl TenantManager {
|
||||
.await
|
||||
.with_context(|| format!("Creating {timelines_path}"))?;
|
||||
|
||||
// TODO(sharding): make local paths sharding-aware
|
||||
Tenant::persist_tenant_config(
|
||||
self.conf,
|
||||
&tenant_shard_id.tenant_id,
|
||||
&new_location_config,
|
||||
)
|
||||
.await
|
||||
.map_err(SetNewTenantConfigError::Persist)?;
|
||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||
.await
|
||||
.map_err(SetNewTenantConfigError::Persist)?;
|
||||
|
||||
// TODO(sharding): make spawn sharding-aware
|
||||
let tenant = tenant_spawn(
|
||||
self.conf,
|
||||
tenant_shard_id.tenant_id,
|
||||
tenant_shard_id,
|
||||
&tenant_path,
|
||||
self.resources.clone(),
|
||||
AttachedTenantConf::try_from(new_location_config)?,
|
||||
@@ -1282,8 +1268,7 @@ async fn detach_tenant0(
|
||||
deletion_queue_client: &DeletionQueueClient,
|
||||
) -> Result<Utf8PathBuf, TenantStateError> {
|
||||
let tenant_dir_rename_operation = |tenant_id_to_clean: TenantShardId| async move {
|
||||
// TODO(sharding): make local path helpers shard-aware
|
||||
let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean.tenant_id);
|
||||
let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean);
|
||||
safe_rename_tenant_dir(&local_tenant_directory)
|
||||
.await
|
||||
.with_context(|| format!("local tenant directory {local_tenant_directory:?} rename"))
|
||||
@@ -1308,8 +1293,7 @@ async fn detach_tenant0(
|
||||
Err(TenantStateError::SlotError(TenantSlotError::NotFound(_)))
|
||||
)
|
||||
{
|
||||
// TODO(sharding): make local paths sharding-aware
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id.tenant_id);
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id);
|
||||
if tenant_ignore_mark.exists() {
|
||||
info!("Detaching an ignored tenant");
|
||||
let tmp_path = tenant_dir_rename_operation(tenant_shard_id)
|
||||
@@ -1338,9 +1322,9 @@ pub(crate) async fn load_tenant(
|
||||
|
||||
let slot_guard =
|
||||
tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustNotExist)?;
|
||||
let tenant_path = conf.tenant_path(&tenant_id);
|
||||
let tenant_path = conf.tenant_path(&tenant_shard_id);
|
||||
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_id);
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id);
|
||||
if tenant_ignore_mark.exists() {
|
||||
std::fs::remove_file(&tenant_ignore_mark).with_context(|| {
|
||||
format!(
|
||||
@@ -1356,14 +1340,14 @@ pub(crate) async fn load_tenant(
|
||||
};
|
||||
|
||||
let mut location_conf =
|
||||
Tenant::load_tenant_config(conf, &tenant_id).map_err(TenantMapInsertError::Other)?;
|
||||
Tenant::load_tenant_config(conf, &tenant_shard_id).map_err(TenantMapInsertError::Other)?;
|
||||
location_conf.attach_in_generation(generation);
|
||||
|
||||
Tenant::persist_tenant_config(conf, &tenant_id, &location_conf).await?;
|
||||
Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await?;
|
||||
|
||||
let new_tenant = tenant_spawn(
|
||||
conf,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
&tenant_path,
|
||||
resources,
|
||||
AttachedTenantConf::try_from(location_conf)?,
|
||||
@@ -1394,7 +1378,7 @@ async fn ignore_tenant0(
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
|
||||
remove_tenant_from_memory(tenants, tenant_shard_id, async {
|
||||
let ignore_mark_file = conf.tenant_ignore_mark_file_path(&tenant_id);
|
||||
let ignore_mark_file = conf.tenant_ignore_mark_file_path(&tenant_shard_id);
|
||||
fs::File::create(&ignore_mark_file)
|
||||
.await
|
||||
.context("Failed to create ignore mark file")
|
||||
@@ -1452,13 +1436,13 @@ pub(crate) async fn attach_tenant(
|
||||
let slot_guard =
|
||||
tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustNotExist)?;
|
||||
let location_conf = LocationConf::attached_single(tenant_conf, generation);
|
||||
let tenant_dir = create_tenant_files(conf, &location_conf, &tenant_id).await?;
|
||||
let tenant_dir = create_tenant_files(conf, &location_conf, &tenant_shard_id).await?;
|
||||
// TODO: tenant directory remains on disk if we bail out from here on.
|
||||
// See https://github.com/neondatabase/neon/issues/4233
|
||||
|
||||
let attached_tenant = tenant_spawn(
|
||||
conf,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
&tenant_dir,
|
||||
resources,
|
||||
AttachedTenantConf::try_from(location_conf)?,
|
||||
@@ -1974,6 +1958,9 @@ pub(crate) async fn immediate_gc(
|
||||
.with_context(|| format!("tenant {tenant_id}"))
|
||||
.map_err(|e| ApiError::NotFound(e.into()))?;
|
||||
|
||||
// TODO(sharding): make callers of this function shard-aware
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
|
||||
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
|
||||
// Use tenant's pitr setting
|
||||
let pitr = tenant.get_pitr_interval();
|
||||
@@ -1995,7 +1982,7 @@ pub(crate) async fn immediate_gc(
|
||||
#[allow(unused_mut)]
|
||||
let mut result = tenant
|
||||
.gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
|
||||
.instrument(info_span!("manual_gc", %tenant_id, %timeline_id))
|
||||
.instrument(info_span!("manual_gc", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await;
|
||||
// FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
|
||||
// better once the types support it.
|
||||
|
||||
@@ -188,7 +188,7 @@ use anyhow::Context;
|
||||
use camino::Utf8Path;
|
||||
use chrono::{NaiveDateTime, Utc};
|
||||
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
use pageserver_api::shard::{ShardIndex, TenantShardId};
|
||||
use scopeguard::ScopeGuard;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
pub(crate) use upload::upload_initdb_dir;
|
||||
@@ -301,7 +301,7 @@ pub struct RemoteTimelineClient {
|
||||
|
||||
runtime: tokio::runtime::Handle,
|
||||
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
generation: Generation,
|
||||
|
||||
@@ -325,7 +325,7 @@ impl RemoteTimelineClient {
|
||||
remote_storage: GenericRemoteStorage,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
generation: Generation,
|
||||
) -> RemoteTimelineClient {
|
||||
@@ -337,13 +337,16 @@ impl RemoteTimelineClient {
|
||||
} else {
|
||||
BACKGROUND_RUNTIME.handle().clone()
|
||||
},
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
generation,
|
||||
storage_impl: remote_storage,
|
||||
deletion_queue_client,
|
||||
upload_queue: Mutex::new(UploadQueue::Uninitialized),
|
||||
metrics: Arc::new(RemoteTimelineClientMetrics::new(&tenant_id, &timeline_id)),
|
||||
metrics: Arc::new(RemoteTimelineClientMetrics::new(
|
||||
&tenant_shard_id,
|
||||
&timeline_id,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -403,11 +406,6 @@ impl RemoteTimelineClient {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_shard_index(&self) -> ShardIndex {
|
||||
// TODO: carry this on the struct
|
||||
ShardIndex::unsharded()
|
||||
}
|
||||
|
||||
pub fn remote_consistent_lsn_projected(&self) -> Option<Lsn> {
|
||||
match &mut *self.upload_queue.lock().unwrap() {
|
||||
UploadQueue::Uninitialized => None,
|
||||
@@ -469,14 +467,13 @@ impl RemoteTimelineClient {
|
||||
|
||||
let index_part = download::download_index_part(
|
||||
&self.storage_impl,
|
||||
&self.tenant_id,
|
||||
&self.tenant_shard_id,
|
||||
&self.timeline_id,
|
||||
self.get_shard_index(),
|
||||
self.generation,
|
||||
cancel,
|
||||
)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id.tenant_id,
|
||||
self.timeline_id,
|
||||
RemoteOpFileKind::Index,
|
||||
RemoteOpKind::Download,
|
||||
@@ -512,13 +509,13 @@ impl RemoteTimelineClient {
|
||||
download::download_layer_file(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
layer_file_name,
|
||||
layer_metadata,
|
||||
)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id.tenant_id,
|
||||
self.timeline_id,
|
||||
RemoteOpFileKind::Layer,
|
||||
RemoteOpKind::Download,
|
||||
@@ -966,9 +963,8 @@ impl RemoteTimelineClient {
|
||||
|| {
|
||||
upload::upload_index_part(
|
||||
&self.storage_impl,
|
||||
&self.tenant_id,
|
||||
&self.tenant_shard_id,
|
||||
&self.timeline_id,
|
||||
self.get_shard_index(),
|
||||
self.generation,
|
||||
&index_part_with_deleted_at,
|
||||
)
|
||||
@@ -1025,7 +1021,7 @@ impl RemoteTimelineClient {
|
||||
.drain()
|
||||
.map(|(file_name, meta)| {
|
||||
remote_layer_path(
|
||||
&self.tenant_id,
|
||||
&self.tenant_shard_id.tenant_id,
|
||||
&self.timeline_id,
|
||||
meta.shard,
|
||||
&file_name,
|
||||
@@ -1040,7 +1036,7 @@ impl RemoteTimelineClient {
|
||||
|
||||
// Do not delete index part yet, it is needed for possible retry. If we remove it first
|
||||
// and retry will arrive to different pageserver there wont be any traces of it on remote storage
|
||||
let timeline_storage_path = remote_timeline_path(&self.tenant_id, &self.timeline_id);
|
||||
let timeline_storage_path = remote_timeline_path(&self.tenant_shard_id, &self.timeline_id);
|
||||
|
||||
// Execute all pending deletions, so that when we proceed to do a list_prefixes below, we aren't
|
||||
// taking the burden of listing all the layers that we already know we should delete.
|
||||
@@ -1076,12 +1072,7 @@ impl RemoteTimelineClient {
|
||||
.unwrap_or(
|
||||
// No generation-suffixed indices, assume we are dealing with
|
||||
// a legacy index.
|
||||
remote_index_path(
|
||||
&self.tenant_id,
|
||||
&self.timeline_id,
|
||||
self.get_shard_index(),
|
||||
Generation::none(),
|
||||
),
|
||||
remote_index_path(&self.tenant_shard_id, &self.timeline_id, Generation::none()),
|
||||
);
|
||||
|
||||
let remaining_layers: Vec<RemotePath> = remaining
|
||||
@@ -1213,12 +1204,12 @@ impl RemoteTimelineClient {
|
||||
|
||||
// Spawn task to perform the task
|
||||
let self_rc = Arc::clone(self);
|
||||
let tenant_id = self.tenant_id;
|
||||
let tenant_shard_id = self.tenant_shard_id;
|
||||
let timeline_id = self.timeline_id;
|
||||
task_mgr::spawn(
|
||||
&self.runtime,
|
||||
TaskKind::RemoteUploadTask,
|
||||
Some(self.tenant_id),
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
"remote upload",
|
||||
false,
|
||||
@@ -1226,7 +1217,7 @@ impl RemoteTimelineClient {
|
||||
self_rc.perform_upload_task(task).await;
|
||||
Ok(())
|
||||
}
|
||||
.instrument(info_span!(parent: None, "remote_upload", %tenant_id, %timeline_id, %upload_task_id)),
|
||||
.instrument(info_span!(parent: None, "remote_upload", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id, %upload_task_id)),
|
||||
);
|
||||
|
||||
// Loop back to process next task
|
||||
@@ -1278,7 +1269,7 @@ impl RemoteTimelineClient {
|
||||
self.generation,
|
||||
)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id.tenant_id,
|
||||
self.timeline_id,
|
||||
RemoteOpFileKind::Layer,
|
||||
RemoteOpKind::Upload,
|
||||
@@ -1298,14 +1289,13 @@ impl RemoteTimelineClient {
|
||||
|
||||
let res = upload::upload_index_part(
|
||||
&self.storage_impl,
|
||||
&self.tenant_id,
|
||||
&self.tenant_shard_id,
|
||||
&self.timeline_id,
|
||||
self.get_shard_index(),
|
||||
self.generation,
|
||||
index_part,
|
||||
)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id.tenant_id,
|
||||
self.timeline_id,
|
||||
RemoteOpFileKind::Index,
|
||||
RemoteOpKind::Upload,
|
||||
@@ -1325,7 +1315,7 @@ impl RemoteTimelineClient {
|
||||
pausable_failpoint!("before-delete-layer-pausable");
|
||||
self.deletion_queue_client
|
||||
.push_layers(
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.generation,
|
||||
delete.layers.clone(),
|
||||
@@ -1444,7 +1434,7 @@ impl RemoteTimelineClient {
|
||||
// data safety guarantees (see docs/rfcs/025-generation-numbers.md)
|
||||
self.deletion_queue_client
|
||||
.update_remote_consistent_lsn(
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.generation,
|
||||
lsn,
|
||||
@@ -1602,15 +1592,21 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remote_timelines_path(tenant_id: &TenantId) -> RemotePath {
|
||||
let path = format!("tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}");
|
||||
pub fn remote_timelines_path(tenant_shard_id: &TenantShardId) -> RemotePath {
|
||||
let path = format!("tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}");
|
||||
RemotePath::from_string(&path).expect("Failed to construct path")
|
||||
}
|
||||
|
||||
pub fn remote_timeline_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
|
||||
remote_timelines_path(tenant_id).join(Utf8Path::new(&timeline_id.to_string()))
|
||||
pub fn remote_timeline_path(
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
) -> RemotePath {
|
||||
remote_timelines_path(tenant_shard_id).join(Utf8Path::new(&timeline_id.to_string()))
|
||||
}
|
||||
|
||||
/// Note that the shard component of a remote layer path is _not_ always the same
|
||||
/// as in the TenantShardId of the caller: tenants may reference layers from a different
|
||||
/// ShardIndex. Use the ShardIndex from the layer's metadata.
|
||||
pub fn remote_layer_path(
|
||||
tenant_id: &TenantId,
|
||||
timeline_id: &TimelineId,
|
||||
@@ -1637,14 +1633,12 @@ pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId
|
||||
}
|
||||
|
||||
pub fn remote_index_path(
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
shard: ShardIndex,
|
||||
generation: Generation,
|
||||
) -> RemotePath {
|
||||
RemotePath::from_string(&format!(
|
||||
"tenants/{tenant_id}{0}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{1}{2}",
|
||||
shard.get_suffix(),
|
||||
"tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
|
||||
IndexPart::FILE_NAME,
|
||||
generation.get_suffix()
|
||||
))
|
||||
@@ -1786,14 +1780,14 @@ mod tests {
|
||||
Arc::new(RemoteTimelineClient {
|
||||
conf: self.harness.conf,
|
||||
runtime: tokio::runtime::Handle::current(),
|
||||
tenant_id: self.harness.tenant_id,
|
||||
tenant_shard_id: self.harness.tenant_shard_id,
|
||||
timeline_id: TIMELINE_ID,
|
||||
generation,
|
||||
storage_impl: self.harness.remote_storage.clone(),
|
||||
deletion_queue_client: self.harness.deletion_queue.new_client(),
|
||||
upload_queue: Mutex::new(UploadQueue::Uninitialized),
|
||||
metrics: Arc::new(RemoteTimelineClientMetrics::new(
|
||||
&self.harness.tenant_id,
|
||||
&self.harness.tenant_shard_id,
|
||||
&TIMELINE_ID,
|
||||
)),
|
||||
})
|
||||
@@ -2100,11 +2094,7 @@ mod tests {
|
||||
assert_eq!(actual_c, expected_c);
|
||||
}
|
||||
|
||||
async fn inject_index_part(
|
||||
test_state: &TestSetup,
|
||||
generation: Generation,
|
||||
shard: ShardIndex,
|
||||
) -> IndexPart {
|
||||
async fn inject_index_part(test_state: &TestSetup, generation: Generation) -> IndexPart {
|
||||
// An empty IndexPart, just sufficient to ensure deserialization will succeed
|
||||
let example_metadata = TimelineMetadata::example();
|
||||
let example_index_part = IndexPart::new(
|
||||
@@ -2126,9 +2116,8 @@ mod tests {
|
||||
|
||||
let index_path = test_state.harness.remote_fs_dir.join(
|
||||
remote_index_path(
|
||||
&test_state.harness.tenant_id,
|
||||
&test_state.harness.tenant_shard_id,
|
||||
&TIMELINE_ID,
|
||||
shard,
|
||||
generation,
|
||||
)
|
||||
.get_path(),
|
||||
@@ -2168,12 +2157,7 @@ mod tests {
|
||||
|
||||
// Simple case: we are in generation N, load the index from generation N - 1
|
||||
let generation_n = 5;
|
||||
let injected = inject_index_part(
|
||||
&test_state,
|
||||
Generation::new(generation_n - 1),
|
||||
ShardIndex::unsharded(),
|
||||
)
|
||||
.await;
|
||||
let injected = inject_index_part(&test_state, Generation::new(generation_n - 1)).await;
|
||||
|
||||
assert_got_index_part(&test_state, Generation::new(generation_n), &injected).await;
|
||||
|
||||
@@ -2191,34 +2175,22 @@ mod tests {
|
||||
|
||||
// A generation-less IndexPart exists in the bucket, we should find it
|
||||
let generation_n = 5;
|
||||
let injected_none =
|
||||
inject_index_part(&test_state, Generation::none(), ShardIndex::unsharded()).await;
|
||||
let injected_none = inject_index_part(&test_state, Generation::none()).await;
|
||||
assert_got_index_part(&test_state, Generation::new(generation_n), &injected_none).await;
|
||||
|
||||
// If a more recent-than-none generation exists, we should prefer to load that
|
||||
let injected_1 =
|
||||
inject_index_part(&test_state, Generation::new(1), ShardIndex::unsharded()).await;
|
||||
let injected_1 = inject_index_part(&test_state, Generation::new(1)).await;
|
||||
assert_got_index_part(&test_state, Generation::new(generation_n), &injected_1).await;
|
||||
|
||||
// If a more-recent-than-me generation exists, we should ignore it.
|
||||
let _injected_10 =
|
||||
inject_index_part(&test_state, Generation::new(10), ShardIndex::unsharded()).await;
|
||||
let _injected_10 = inject_index_part(&test_state, Generation::new(10)).await;
|
||||
assert_got_index_part(&test_state, Generation::new(generation_n), &injected_1).await;
|
||||
|
||||
// If a directly previous generation exists, _and_ an index exists in my own
|
||||
// generation, I should prefer my own generation.
|
||||
let _injected_prev = inject_index_part(
|
||||
&test_state,
|
||||
Generation::new(generation_n - 1),
|
||||
ShardIndex::unsharded(),
|
||||
)
|
||||
.await;
|
||||
let injected_current = inject_index_part(
|
||||
&test_state,
|
||||
Generation::new(generation_n),
|
||||
ShardIndex::unsharded(),
|
||||
)
|
||||
.await;
|
||||
let _injected_prev =
|
||||
inject_index_part(&test_state, Generation::new(generation_n - 1)).await;
|
||||
let injected_current = inject_index_part(&test_state, Generation::new(generation_n)).await;
|
||||
assert_got_index_part(
|
||||
&test_state,
|
||||
Generation::new(generation_n),
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::time::Duration;
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use camino::Utf8Path;
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use tokio::fs;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -22,7 +22,7 @@ use crate::tenant::timeline::span::debug_assert_current_span_has_tenant_and_time
|
||||
use crate::tenant::Generation;
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use super::index::{IndexPart, LayerFileMetadata};
|
||||
use super::{
|
||||
@@ -40,7 +40,7 @@ static MAX_DOWNLOAD_DURATION: Duration = Duration::from_secs(120);
|
||||
pub async fn download_layer_file<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &'a GenericRemoteStorage,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
layer_file_name: &'a LayerFileName,
|
||||
layer_metadata: &'a LayerFileMetadata,
|
||||
@@ -48,11 +48,11 @@ pub async fn download_layer_file<'a>(
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let local_path = conf
|
||||
.timeline_path(&tenant_id, &timeline_id)
|
||||
.timeline_path(&tenant_shard_id, &timeline_id)
|
||||
.join(layer_file_name.file_name());
|
||||
|
||||
let remote_path = remote_layer_path(
|
||||
&tenant_id,
|
||||
&tenant_shard_id.tenant_id,
|
||||
&timeline_id,
|
||||
layer_metadata.shard,
|
||||
layer_file_name,
|
||||
@@ -171,10 +171,10 @@ pub fn is_temp_download_file(path: &Utf8Path) -> bool {
|
||||
/// List timelines of given tenant in remote storage
|
||||
pub async fn list_remote_timelines(
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
cancel: CancellationToken,
|
||||
) -> anyhow::Result<(HashSet<TimelineId>, HashSet<String>)> {
|
||||
let remote_path = remote_timelines_path(&tenant_id);
|
||||
let remote_path = remote_timelines_path(&tenant_shard_id);
|
||||
|
||||
fail::fail_point!("storage-sync-list-remote-timelines", |_| {
|
||||
anyhow::bail!("storage-sync-list-remote-timelines");
|
||||
@@ -182,7 +182,7 @@ pub async fn list_remote_timelines(
|
||||
|
||||
let listing = download_retry_forever(
|
||||
|| storage.list(Some(&remote_path), ListingMode::WithDelimiter),
|
||||
&format!("list timelines for {tenant_id}"),
|
||||
&format!("list timelines for {tenant_shard_id}"),
|
||||
cancel,
|
||||
)
|
||||
.await?;
|
||||
@@ -192,7 +192,7 @@ pub async fn list_remote_timelines(
|
||||
|
||||
for timeline_remote_storage_key in listing.prefixes {
|
||||
let object_name = timeline_remote_storage_key.object_name().ok_or_else(|| {
|
||||
anyhow::anyhow!("failed to get timeline id for remote tenant {tenant_id}")
|
||||
anyhow::anyhow!("failed to get timeline id for remote tenant {tenant_shard_id}")
|
||||
})?;
|
||||
|
||||
match object_name.parse::<TimelineId>() {
|
||||
@@ -213,13 +213,12 @@ pub async fn list_remote_timelines(
|
||||
|
||||
async fn do_download_index_part(
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
shard: ShardIndex,
|
||||
index_generation: Generation,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<IndexPart, DownloadError> {
|
||||
let remote_path = remote_index_path(tenant_id, timeline_id, shard, index_generation);
|
||||
let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation);
|
||||
|
||||
let index_part_bytes = download_retry_forever(
|
||||
|| async {
|
||||
@@ -255,9 +254,8 @@ async fn do_download_index_part(
|
||||
#[tracing::instrument(skip_all, fields(generation=?my_generation))]
|
||||
pub(super) async fn download_index_part(
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
shard: ShardIndex,
|
||||
my_generation: Generation,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<IndexPart, DownloadError> {
|
||||
@@ -267,9 +265,8 @@ pub(super) async fn download_index_part(
|
||||
// Operating without generations: just fetch the generation-less path
|
||||
return do_download_index_part(
|
||||
storage,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
shard,
|
||||
my_generation,
|
||||
cancel,
|
||||
)
|
||||
@@ -282,9 +279,8 @@ pub(super) async fn download_index_part(
|
||||
// This is an optimization to avoid doing the listing for the general case below.
|
||||
let res = do_download_index_part(
|
||||
storage,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
shard,
|
||||
my_generation,
|
||||
cancel.clone(),
|
||||
)
|
||||
@@ -310,9 +306,8 @@ pub(super) async fn download_index_part(
|
||||
// This is an optimization to avoid doing the listing for the general case below.
|
||||
let res = do_download_index_part(
|
||||
storage,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
shard,
|
||||
my_generation.previous(),
|
||||
cancel.clone(),
|
||||
)
|
||||
@@ -335,7 +330,7 @@ pub(super) async fn download_index_part(
|
||||
// General case/fallback: if there is no index at my_generation or prev_generation, then list all index_part.json
|
||||
// objects, and select the highest one with a generation <= my_generation. Constructing the prefix is equivalent
|
||||
// to constructing a full index path with no generation, because the generation is a suffix.
|
||||
let index_prefix = remote_index_path(tenant_id, timeline_id, shard, Generation::none());
|
||||
let index_prefix = remote_index_path(tenant_shard_id, timeline_id, Generation::none());
|
||||
let indices = backoff::retry(
|
||||
|| async { storage.list_files(Some(&index_prefix)).await },
|
||||
|_| false,
|
||||
@@ -361,7 +356,7 @@ pub(super) async fn download_index_part(
|
||||
match max_previous_generation {
|
||||
Some(g) => {
|
||||
tracing::debug!("Found index_part in generation {g:?}");
|
||||
do_download_index_part(storage, tenant_id, timeline_id, shard, g, cancel).await
|
||||
do_download_index_part(storage, tenant_shard_id, timeline_id, g, cancel).await
|
||||
}
|
||||
None => {
|
||||
// Migration from legacy pre-generation state: we have a generation but no prior
|
||||
@@ -369,9 +364,8 @@ pub(super) async fn download_index_part(
|
||||
tracing::info!("No index_part.json* found");
|
||||
do_download_index_part(
|
||||
storage,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
shard,
|
||||
Generation::none(),
|
||||
cancel,
|
||||
)
|
||||
|
||||
@@ -4,7 +4,7 @@ use anyhow::{bail, Context};
|
||||
use bytes::Bytes;
|
||||
use camino::Utf8Path;
|
||||
use fail::fail_point;
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use std::io::ErrorKind;
|
||||
use tokio::fs;
|
||||
|
||||
@@ -25,9 +25,8 @@ use tracing::info;
|
||||
/// Serializes and uploads the given index part data to the remote storage.
|
||||
pub(super) async fn upload_index_part<'a>(
|
||||
storage: &'a GenericRemoteStorage,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
shard: ShardIndex,
|
||||
generation: Generation,
|
||||
index_part: &'a IndexPart,
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -44,11 +43,11 @@ pub(super) async fn upload_index_part<'a>(
|
||||
let index_part_size = index_part_bytes.len();
|
||||
let index_part_bytes = tokio::io::BufReader::new(std::io::Cursor::new(index_part_bytes));
|
||||
|
||||
let remote_path = remote_index_path(tenant_id, timeline_id, shard, generation);
|
||||
let remote_path = remote_index_path(tenant_shard_id, timeline_id, generation);
|
||||
storage
|
||||
.upload_storage_object(Box::new(index_part_bytes), index_part_size, &remote_path)
|
||||
.await
|
||||
.with_context(|| format!("upload index part for '{tenant_id} / {timeline_id}'"))
|
||||
.with_context(|| format!("upload index part for '{tenant_shard_id} / {timeline_id}'"))
|
||||
}
|
||||
|
||||
/// Attempts to upload given layer files.
|
||||
|
||||
@@ -24,10 +24,7 @@ use tracing::warn;
|
||||
use utils::history_buffer::HistoryBufferWithDropCounter;
|
||||
use utils::rate_limit::RateLimit;
|
||||
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
use utils::{id::TimelineId, lsn::Lsn};
|
||||
|
||||
pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
|
||||
pub use filename::{DeltaFileName, ImageFileName, LayerFileName};
|
||||
@@ -304,12 +301,14 @@ pub trait AsLayerDesc {
|
||||
}
|
||||
|
||||
pub mod tests {
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
|
||||
use super::*;
|
||||
|
||||
impl From<DeltaFileName> for PersistentLayerDesc {
|
||||
fn from(value: DeltaFileName) -> Self {
|
||||
PersistentLayerDesc::new_delta(
|
||||
TenantId::from_array([0; 16]),
|
||||
TenantShardId::from([0; 18]),
|
||||
TimelineId::from_array([0; 16]),
|
||||
value.key_range,
|
||||
value.lsn_range,
|
||||
@@ -321,7 +320,7 @@ pub mod tests {
|
||||
impl From<ImageFileName> for PersistentLayerDesc {
|
||||
fn from(value: ImageFileName) -> Self {
|
||||
PersistentLayerDesc::new_img(
|
||||
TenantId::from_array([0; 16]),
|
||||
TenantShardId::from([0; 18]),
|
||||
TimelineId::from_array([0; 16]),
|
||||
value.key_range,
|
||||
value.lsn,
|
||||
|
||||
@@ -42,6 +42,7 @@ use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use pageserver_api::models::LayerAccessKind;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs::File;
|
||||
@@ -86,7 +87,7 @@ pub struct Summary {
|
||||
impl From<&DeltaLayer> for Summary {
|
||||
fn from(layer: &DeltaLayer) -> Self {
|
||||
Self::expected(
|
||||
layer.desc.tenant_id,
|
||||
layer.desc.tenant_shard_id.tenant_id,
|
||||
layer.desc.timeline_id,
|
||||
layer.desc.key_range.clone(),
|
||||
layer.desc.lsn_range.clone(),
|
||||
@@ -248,7 +249,7 @@ impl DeltaLayer {
|
||||
|
||||
fn temp_path_for(
|
||||
conf: &PageServerConf,
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
key_start: Key,
|
||||
lsn_range: &Range<Lsn>,
|
||||
@@ -259,14 +260,15 @@ impl DeltaLayer {
|
||||
.map(char::from)
|
||||
.collect();
|
||||
|
||||
conf.timeline_path(tenant_id, timeline_id).join(format!(
|
||||
"{}-XXX__{:016X}-{:016X}.{}.{}",
|
||||
key_start,
|
||||
u64::from(lsn_range.start),
|
||||
u64::from(lsn_range.end),
|
||||
rand_string,
|
||||
TEMP_FILE_SUFFIX,
|
||||
))
|
||||
conf.timeline_path(tenant_shard_id, timeline_id)
|
||||
.join(format!(
|
||||
"{}-XXX__{:016X}-{:016X}.{}.{}",
|
||||
key_start,
|
||||
u64::from(lsn_range.start),
|
||||
u64::from(lsn_range.end),
|
||||
rand_string,
|
||||
TEMP_FILE_SUFFIX,
|
||||
))
|
||||
}
|
||||
|
||||
///
|
||||
@@ -318,10 +320,14 @@ impl DeltaLayer {
|
||||
.metadata()
|
||||
.context("get file metadata to determine size")?;
|
||||
|
||||
// TODO(sharding): we must get the TenantShardId from the path instead of reading the Summary.
|
||||
// we should also validate the path against the Summary, as both should contain the same tenant, timeline, key, lsn.
|
||||
let tenant_shard_id = TenantShardId::unsharded(summary.tenant_id);
|
||||
|
||||
Ok(DeltaLayer {
|
||||
path: path.to_path_buf(),
|
||||
desc: PersistentLayerDesc::new_delta(
|
||||
summary.tenant_id,
|
||||
tenant_shard_id,
|
||||
summary.timeline_id,
|
||||
summary.key_range,
|
||||
summary.lsn_range,
|
||||
@@ -353,7 +359,7 @@ struct DeltaLayerWriterInner {
|
||||
conf: &'static PageServerConf,
|
||||
pub path: Utf8PathBuf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
|
||||
key_start: Key,
|
||||
lsn_range: Range<Lsn>,
|
||||
@@ -370,7 +376,7 @@ impl DeltaLayerWriterInner {
|
||||
async fn new(
|
||||
conf: &'static PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
key_start: Key,
|
||||
lsn_range: Range<Lsn>,
|
||||
) -> anyhow::Result<Self> {
|
||||
@@ -380,7 +386,8 @@ impl DeltaLayerWriterInner {
|
||||
//
|
||||
// Note: This overwrites any existing file. There shouldn't be any.
|
||||
// FIXME: throw an error instead?
|
||||
let path = DeltaLayer::temp_path_for(conf, &tenant_id, &timeline_id, key_start, &lsn_range);
|
||||
let path =
|
||||
DeltaLayer::temp_path_for(conf, &tenant_shard_id, &timeline_id, key_start, &lsn_range);
|
||||
|
||||
let mut file = VirtualFile::create(&path).await?;
|
||||
// make room for the header block
|
||||
@@ -395,7 +402,7 @@ impl DeltaLayerWriterInner {
|
||||
conf,
|
||||
path,
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
key_start,
|
||||
lsn_range,
|
||||
tree: tree_builder,
|
||||
@@ -457,7 +464,7 @@ impl DeltaLayerWriterInner {
|
||||
let summary = Summary {
|
||||
magic: DELTA_FILE_MAGIC,
|
||||
format_version: STORAGE_FORMAT_VERSION,
|
||||
tenant_id: self.tenant_id,
|
||||
tenant_id: self.tenant_shard_id.tenant_id,
|
||||
timeline_id: self.timeline_id,
|
||||
key_range: self.key_start..key_end,
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
@@ -498,7 +505,7 @@ impl DeltaLayerWriterInner {
|
||||
// set inner.file here. The first read will have to re-open it.
|
||||
|
||||
let desc = PersistentLayerDesc::new_delta(
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.key_start..key_end,
|
||||
self.lsn_range.clone(),
|
||||
@@ -549,14 +556,20 @@ impl DeltaLayerWriter {
|
||||
pub async fn new(
|
||||
conf: &'static PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
key_start: Key,
|
||||
lsn_range: Range<Lsn>,
|
||||
) -> anyhow::Result<Self> {
|
||||
Ok(Self {
|
||||
inner: Some(
|
||||
DeltaLayerWriterInner::new(conf, timeline_id, tenant_id, key_start, lsn_range)
|
||||
.await?,
|
||||
DeltaLayerWriterInner::new(
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_shard_id,
|
||||
key_start,
|
||||
lsn_range,
|
||||
)
|
||||
.await?,
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ use bytes::Bytes;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hex;
|
||||
use pageserver_api::models::LayerAccessKind;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs::File;
|
||||
@@ -87,7 +88,7 @@ pub(super) struct Summary {
|
||||
impl From<&ImageLayer> for Summary {
|
||||
fn from(layer: &ImageLayer) -> Self {
|
||||
Self::expected(
|
||||
layer.desc.tenant_id,
|
||||
layer.desc.tenant_shard_id.tenant_id,
|
||||
layer.desc.timeline_id,
|
||||
layer.desc.key_range.clone(),
|
||||
layer.lsn,
|
||||
@@ -217,7 +218,7 @@ impl ImageLayer {
|
||||
fn temp_path_for(
|
||||
conf: &PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
fname: &ImageFileName,
|
||||
) -> Utf8PathBuf {
|
||||
let rand_string: String = rand::thread_rng()
|
||||
@@ -226,7 +227,7 @@ impl ImageLayer {
|
||||
.map(char::from)
|
||||
.collect();
|
||||
|
||||
conf.timeline_path(&tenant_id, &timeline_id)
|
||||
conf.timeline_path(&tenant_shard_id, &timeline_id)
|
||||
.join(format!("{fname}.{rand_string}.{TEMP_FILE_SUFFIX}"))
|
||||
}
|
||||
|
||||
@@ -276,10 +277,15 @@ impl ImageLayer {
|
||||
let metadata = file
|
||||
.metadata()
|
||||
.context("get file metadata to determine size")?;
|
||||
|
||||
// TODO(sharding): we should get TenantShardId from path.
|
||||
// OR, not at all: any layer we load from disk should also get reconciled with remote IndexPart.
|
||||
let tenant_shard_id = TenantShardId::unsharded(summary.tenant_id);
|
||||
|
||||
Ok(ImageLayer {
|
||||
path: path.to_path_buf(),
|
||||
desc: PersistentLayerDesc::new_img(
|
||||
summary.tenant_id,
|
||||
tenant_shard_id,
|
||||
summary.timeline_id,
|
||||
summary.key_range,
|
||||
summary.lsn,
|
||||
@@ -400,7 +406,7 @@ struct ImageLayerWriterInner {
|
||||
conf: &'static PageServerConf,
|
||||
path: Utf8PathBuf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
key_range: Range<Key>,
|
||||
lsn: Lsn,
|
||||
|
||||
@@ -415,7 +421,7 @@ impl ImageLayerWriterInner {
|
||||
async fn new(
|
||||
conf: &'static PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
key_range: &Range<Key>,
|
||||
lsn: Lsn,
|
||||
) -> anyhow::Result<Self> {
|
||||
@@ -424,7 +430,7 @@ impl ImageLayerWriterInner {
|
||||
let path = ImageLayer::temp_path_for(
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
&ImageFileName {
|
||||
key_range: key_range.clone(),
|
||||
lsn,
|
||||
@@ -448,7 +454,7 @@ impl ImageLayerWriterInner {
|
||||
conf,
|
||||
path,
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
key_range: key_range.clone(),
|
||||
lsn,
|
||||
tree: tree_builder,
|
||||
@@ -495,7 +501,7 @@ impl ImageLayerWriterInner {
|
||||
let summary = Summary {
|
||||
magic: IMAGE_FILE_MAGIC,
|
||||
format_version: STORAGE_FORMAT_VERSION,
|
||||
tenant_id: self.tenant_id,
|
||||
tenant_id: self.tenant_shard_id.tenant_id,
|
||||
timeline_id: self.timeline_id,
|
||||
key_range: self.key_range.clone(),
|
||||
lsn: self.lsn,
|
||||
@@ -521,7 +527,7 @@ impl ImageLayerWriterInner {
|
||||
.context("get metadata to determine file size")?;
|
||||
|
||||
let desc = PersistentLayerDesc::new_img(
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.key_range.clone(),
|
||||
self.lsn,
|
||||
@@ -577,13 +583,14 @@ impl ImageLayerWriter {
|
||||
pub async fn new(
|
||||
conf: &'static PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
key_range: &Range<Key>,
|
||||
lsn: Lsn,
|
||||
) -> anyhow::Result<ImageLayerWriter> {
|
||||
Ok(Self {
|
||||
inner: Some(
|
||||
ImageLayerWriterInner::new(conf, timeline_id, tenant_id, key_range, lsn).await?,
|
||||
ImageLayerWriterInner::new(conf, timeline_id, tenant_shard_id, key_range, lsn)
|
||||
.await?,
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -14,15 +14,11 @@ use crate::tenant::Timeline;
|
||||
use crate::walrecord;
|
||||
use anyhow::{ensure, Result};
|
||||
use pageserver_api::models::InMemoryLayerInfo;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, OnceLock};
|
||||
use tracing::*;
|
||||
use utils::{
|
||||
bin_ser::BeSer,
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
vec_map::VecMap,
|
||||
};
|
||||
use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn, vec_map::VecMap};
|
||||
// avoid binding to Write (conflicts with std::io::Write)
|
||||
// while being able to use std::fmt::Write's methods
|
||||
use std::fmt::Write as _;
|
||||
@@ -33,7 +29,7 @@ use super::{DeltaLayerWriter, ResidentLayer};
|
||||
|
||||
pub struct InMemoryLayer {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
|
||||
/// This layer contains all the changes from 'start_lsn'. The
|
||||
@@ -226,17 +222,17 @@ impl InMemoryLayer {
|
||||
pub async fn create(
|
||||
conf: &'static PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
start_lsn: Lsn,
|
||||
) -> Result<InMemoryLayer> {
|
||||
trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}");
|
||||
|
||||
let file = EphemeralFile::create(conf, tenant_id, timeline_id).await?;
|
||||
let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id).await?;
|
||||
|
||||
Ok(InMemoryLayer {
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
start_lsn,
|
||||
end_lsn: OnceLock::new(),
|
||||
inner: RwLock::new(InMemoryLayerInner {
|
||||
@@ -335,7 +331,7 @@ impl InMemoryLayer {
|
||||
let mut delta_layer_writer = DeltaLayerWriter::new(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
Key::MIN,
|
||||
self.start_lsn..end_lsn,
|
||||
)
|
||||
|
||||
@@ -82,7 +82,7 @@ impl Layer {
|
||||
metadata: LayerFileMetadata,
|
||||
) -> Self {
|
||||
let desc = PersistentLayerDesc::from_filename(
|
||||
timeline.tenant_id,
|
||||
timeline.tenant_shard_id,
|
||||
timeline.timeline_id,
|
||||
file_name,
|
||||
metadata.file_size(),
|
||||
@@ -113,7 +113,7 @@ impl Layer {
|
||||
metadata: LayerFileMetadata,
|
||||
) -> ResidentLayer {
|
||||
let desc = PersistentLayerDesc::from_filename(
|
||||
timeline.tenant_id,
|
||||
timeline.tenant_shard_id,
|
||||
timeline.timeline_id,
|
||||
file_name,
|
||||
metadata.file_size(),
|
||||
@@ -486,7 +486,7 @@ impl Drop for LayerInner {
|
||||
return;
|
||||
}
|
||||
|
||||
let span = tracing::info_span!(parent: None, "layer_gc", tenant_id = %self.layer_desc().tenant_id, timeline_id = %self.layer_desc().timeline_id);
|
||||
let span = tracing::info_span!(parent: None, "layer_gc", tenant_id = %self.layer_desc().tenant_shard_id.tenant_id, shard_id=%self.layer_desc().tenant_shard_id.shard_slug(), timeline_id = %self.layer_desc().timeline_id);
|
||||
|
||||
let path = std::mem::take(&mut self.path);
|
||||
let file_name = self.layer_desc().filename();
|
||||
@@ -561,7 +561,7 @@ impl LayerInner {
|
||||
shard: ShardIndex,
|
||||
) -> Self {
|
||||
let path = conf
|
||||
.timeline_path(&timeline.tenant_id, &timeline.timeline_id)
|
||||
.timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id)
|
||||
.join(desc.filename().to_string());
|
||||
|
||||
let (inner, version) = if let Some(inner) = downloaded {
|
||||
@@ -832,7 +832,7 @@ impl LayerInner {
|
||||
crate::task_mgr::spawn(
|
||||
&tokio::runtime::Handle::current(),
|
||||
crate::task_mgr::TaskKind::RemoteDownloadTask,
|
||||
Some(self.desc.tenant_id),
|
||||
Some(self.desc.tenant_shard_id.tenant_id),
|
||||
Some(self.desc.timeline_id),
|
||||
&task_name,
|
||||
false,
|
||||
@@ -997,7 +997,7 @@ impl LayerInner {
|
||||
if gc {
|
||||
// do nothing now, only in LayerInner::drop
|
||||
} else if can_evict && evict {
|
||||
let span = tracing::info_span!(parent: None, "layer_evict", tenant_id = %self.desc.tenant_id, timeline_id = %self.desc.timeline_id, layer=%self, %version);
|
||||
let span = tracing::info_span!(parent: None, "layer_evict", tenant_id = %self.desc.tenant_shard_id.tenant_id, shard_id = %self.desc.tenant_shard_id.shard_slug(), timeline_id = %self.desc.timeline_id, layer=%self, %version);
|
||||
|
||||
// downgrade for queueing, in case there's a tear down already ongoing we should not
|
||||
// hold it alive.
|
||||
@@ -1229,7 +1229,7 @@ impl DownloadedLayer {
|
||||
|
||||
let res = if owner.desc.is_delta {
|
||||
let summary = Some(delta_layer::Summary::expected(
|
||||
owner.desc.tenant_id,
|
||||
owner.desc.tenant_shard_id.tenant_id,
|
||||
owner.desc.timeline_id,
|
||||
owner.desc.key_range.clone(),
|
||||
owner.desc.lsn_range.clone(),
|
||||
@@ -1240,7 +1240,7 @@ impl DownloadedLayer {
|
||||
} else {
|
||||
let lsn = owner.desc.image_layer_lsn();
|
||||
let summary = Some(image_layer::Summary::expected(
|
||||
owner.desc.tenant_id,
|
||||
owner.desc.tenant_shard_id.tenant_id,
|
||||
owner.desc.timeline_id,
|
||||
owner.desc.key_range.clone(),
|
||||
lsn,
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
use core::fmt::Display;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use std::ops::Range;
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
use utils::{id::TimelineId, lsn::Lsn};
|
||||
|
||||
use crate::repository::Key;
|
||||
|
||||
@@ -11,12 +9,15 @@ use super::{DeltaFileName, ImageFileName, LayerFileName};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[cfg(test)]
|
||||
use utils::id::TenantId;
|
||||
|
||||
/// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the
|
||||
/// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides
|
||||
/// a unified way to generate layer information like file name.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
pub struct PersistentLayerDesc {
|
||||
pub tenant_id: TenantId,
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
pub timeline_id: TimelineId,
|
||||
/// Range of keys that this layer covers
|
||||
pub key_range: Range<Key>,
|
||||
@@ -56,7 +57,7 @@ impl PersistentLayerDesc {
|
||||
#[cfg(test)]
|
||||
pub fn new_test(key_range: Range<Key>) -> Self {
|
||||
Self {
|
||||
tenant_id: TenantId::generate(),
|
||||
tenant_shard_id: TenantShardId::unsharded(TenantId::generate()),
|
||||
timeline_id: TimelineId::generate(),
|
||||
key_range,
|
||||
lsn_range: Lsn(0)..Lsn(1),
|
||||
@@ -66,14 +67,14 @@ impl PersistentLayerDesc {
|
||||
}
|
||||
|
||||
pub fn new_img(
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
key_range: Range<Key>,
|
||||
lsn: Lsn,
|
||||
file_size: u64,
|
||||
) -> Self {
|
||||
Self {
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
key_range,
|
||||
lsn_range: Self::image_layer_lsn_range(lsn),
|
||||
@@ -83,14 +84,14 @@ impl PersistentLayerDesc {
|
||||
}
|
||||
|
||||
pub fn new_delta(
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
key_range: Range<Key>,
|
||||
lsn_range: Range<Lsn>,
|
||||
file_size: u64,
|
||||
) -> Self {
|
||||
Self {
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
key_range,
|
||||
lsn_range,
|
||||
@@ -100,18 +101,22 @@ impl PersistentLayerDesc {
|
||||
}
|
||||
|
||||
pub fn from_filename(
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
filename: LayerFileName,
|
||||
file_size: u64,
|
||||
) -> Self {
|
||||
match filename {
|
||||
LayerFileName::Image(i) => {
|
||||
Self::new_img(tenant_id, timeline_id, i.key_range, i.lsn, file_size)
|
||||
}
|
||||
LayerFileName::Delta(d) => {
|
||||
Self::new_delta(tenant_id, timeline_id, d.key_range, d.lsn_range, file_size)
|
||||
Self::new_img(tenant_shard_id, timeline_id, i.key_range, i.lsn, file_size)
|
||||
}
|
||||
LayerFileName::Delta(d) => Self::new_delta(
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
d.key_range,
|
||||
d.lsn_range,
|
||||
file_size,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,10 +177,6 @@ impl PersistentLayerDesc {
|
||||
self.timeline_id
|
||||
}
|
||||
|
||||
pub fn get_tenant_id(&self) -> TenantId {
|
||||
self.tenant_id
|
||||
}
|
||||
|
||||
/// Does this layer only contain some data for the key-range (incremental),
|
||||
/// or does it contain a version of every page? This is important to know
|
||||
/// for garbage collecting old layers: an incremental layer depends on
|
||||
@@ -192,7 +193,7 @@ impl PersistentLayerDesc {
|
||||
if self.is_delta {
|
||||
println!(
|
||||
"----- delta layer for ten {} tli {} keys {}-{} lsn {}-{} is_incremental {} size {} ----",
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.key_range.start,
|
||||
self.key_range.end,
|
||||
@@ -204,7 +205,7 @@ impl PersistentLayerDesc {
|
||||
} else {
|
||||
println!(
|
||||
"----- image layer for ten {} tli {} key {}-{} at {} is_incremental {} size {} ----",
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.key_range.start,
|
||||
self.key_range.end,
|
||||
|
||||
@@ -86,7 +86,7 @@ pub fn start_background_loops(
|
||||
tenant: &Arc<Tenant>,
|
||||
background_jobs_can_start: Option<&completion::Barrier>,
|
||||
) {
|
||||
let tenant_id = tenant.tenant_id;
|
||||
let tenant_id = tenant.tenant_shard_id.tenant_id;
|
||||
task_mgr::spawn(
|
||||
BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::Compaction,
|
||||
|
||||
@@ -13,8 +13,12 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use enumset::EnumSet;
|
||||
use fail::fail_point;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::models::{
|
||||
DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, LayerMapInfo, TimelineState,
|
||||
use pageserver_api::{
|
||||
models::{
|
||||
DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, LayerMapInfo,
|
||||
TimelineState,
|
||||
},
|
||||
shard::TenantShardId,
|
||||
};
|
||||
use serde_with::serde_as;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
@@ -149,7 +153,7 @@ pub struct Timeline {
|
||||
|
||||
myself: Weak<Self>,
|
||||
|
||||
pub tenant_id: TenantId,
|
||||
pub(crate) tenant_shard_id: TenantShardId,
|
||||
pub timeline_id: TimelineId,
|
||||
|
||||
/// The generation of the tenant that instantiated us: this is used for safety when writing remote objects.
|
||||
@@ -701,7 +705,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
/// Flush to disk all data that was written with the put_* functions
|
||||
#[instrument(skip(self), fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id))]
|
||||
#[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))]
|
||||
pub async fn freeze_and_flush(&self) -> anyhow::Result<()> {
|
||||
self.freeze_inmem_layer(false).await;
|
||||
self.flush_frozen_layers_and_wait().await
|
||||
@@ -937,7 +941,7 @@ impl Timeline {
|
||||
tracing::debug!("Waiting for WalReceiverManager...");
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::WalReceiverManager),
|
||||
Some(self.tenant_id),
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
)
|
||||
.await;
|
||||
@@ -988,7 +992,7 @@ impl Timeline {
|
||||
// Shut down the layer flush task before the remote client, as one depends on the other
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::LayerFlushTask),
|
||||
Some(self.tenant_id),
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
)
|
||||
.await;
|
||||
@@ -1006,7 +1010,12 @@ impl Timeline {
|
||||
|
||||
tracing::debug!("Waiting for tasks...");
|
||||
|
||||
task_mgr::shutdown_tasks(None, Some(self.tenant_id), Some(self.timeline_id)).await;
|
||||
task_mgr::shutdown_tasks(
|
||||
None,
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Finally wait until any gate-holders are complete
|
||||
self.gate.close().await;
|
||||
@@ -1125,7 +1134,7 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
|
||||
#[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
|
||||
pub async fn download_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
|
||||
let Some(layer) = self.find_layer(layer_file_name).await else {
|
||||
return Ok(None);
|
||||
@@ -1330,7 +1339,11 @@ impl Timeline {
|
||||
&self.tenant_conf.read().unwrap().tenant_conf,
|
||||
&self.conf.default_tenant_conf,
|
||||
);
|
||||
let tenant_id_str = self.tenant_id.to_string();
|
||||
|
||||
// TODO(sharding): make evictions state shard aware
|
||||
// (https://github.com/neondatabase/neon/issues/5953)
|
||||
let tenant_id_str = self.tenant_shard_id.tenant_id.to_string();
|
||||
|
||||
let timeline_id_str = self.timeline_id.to_string();
|
||||
self.metrics
|
||||
.evictions_with_low_residence_duration
|
||||
@@ -1350,7 +1363,7 @@ impl Timeline {
|
||||
metadata: &TimelineMetadata,
|
||||
ancestor: Option<Arc<Timeline>>,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
generation: Generation,
|
||||
walredo_mgr: Arc<super::WalRedoManager>,
|
||||
resources: TimelineResources,
|
||||
@@ -1381,7 +1394,7 @@ impl Timeline {
|
||||
tenant_conf,
|
||||
myself: myself.clone(),
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
generation,
|
||||
pg_version,
|
||||
layers: Arc::new(tokio::sync::RwLock::new(LayerManager::create())),
|
||||
@@ -1408,7 +1421,7 @@ impl Timeline {
|
||||
ancestor_lsn: metadata.ancestor_lsn(),
|
||||
|
||||
metrics: TimelineMetrics::new(
|
||||
&tenant_id,
|
||||
&tenant_shard_id.tenant_id,
|
||||
&timeline_id,
|
||||
crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
|
||||
"mtime",
|
||||
@@ -1459,7 +1472,7 @@ impl Timeline {
|
||||
initial_logical_size_can_start,
|
||||
initial_logical_size_attempt: Mutex::new(initial_logical_size_attempt),
|
||||
cancel,
|
||||
gate: Gate::new(format!("Timeline<{tenant_id}/{timeline_id}>")),
|
||||
gate: Gate::new(format!("Timeline<{tenant_shard_id}/{timeline_id}>")),
|
||||
|
||||
compaction_lock: tokio::sync::Mutex::default(),
|
||||
gc_lock: tokio::sync::Mutex::default(),
|
||||
@@ -1481,14 +1494,14 @@ impl Timeline {
|
||||
FlushLoopState::Running { .. } => {
|
||||
info!(
|
||||
"skipping attempt to start flush_loop twice {}/{}",
|
||||
self.tenant_id, self.timeline_id
|
||||
self.tenant_shard_id, self.timeline_id
|
||||
);
|
||||
return;
|
||||
}
|
||||
FlushLoopState::Exited => {
|
||||
warn!(
|
||||
"ignoring attempt to restart exited flush_loop {}/{}",
|
||||
self.tenant_id, self.timeline_id
|
||||
self.tenant_shard_id, self.timeline_id
|
||||
);
|
||||
return;
|
||||
}
|
||||
@@ -1507,7 +1520,7 @@ impl Timeline {
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
task_mgr::TaskKind::LayerFlushTask,
|
||||
Some(self.tenant_id),
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
"layer flush task",
|
||||
false,
|
||||
@@ -1519,7 +1532,7 @@ impl Timeline {
|
||||
*flush_loop_state = FlushLoopState::Exited;
|
||||
Ok(())
|
||||
}
|
||||
.instrument(info_span!(parent: None, "layer flush task", tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))
|
||||
.instrument(info_span!(parent: None, "layer flush task", tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1534,7 +1547,7 @@ impl Timeline {
|
||||
) {
|
||||
info!(
|
||||
"launching WAL receiver for timeline {} of tenant {}",
|
||||
self.timeline_id, self.tenant_id
|
||||
self.timeline_id, self.tenant_shard_id
|
||||
);
|
||||
|
||||
let tenant_conf_guard = self.tenant_conf.read().unwrap();
|
||||
@@ -1595,7 +1608,9 @@ impl Timeline {
|
||||
|
||||
// Scan timeline directory and create ImageFileName and DeltaFilename
|
||||
// structs representing all files on disk
|
||||
let timeline_path = self.conf.timeline_path(&self.tenant_id, &self.timeline_id);
|
||||
let timeline_path = self
|
||||
.conf
|
||||
.timeline_path(&self.tenant_shard_id, &self.timeline_id);
|
||||
let conf = self.conf;
|
||||
let span = tracing::Span::current();
|
||||
|
||||
@@ -1802,7 +1817,7 @@ impl Timeline {
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
task_mgr::TaskKind::InitialLogicalSizeCalculation,
|
||||
Some(self.tenant_id),
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
"initial size calculation",
|
||||
false,
|
||||
@@ -1912,7 +1927,7 @@ impl Timeline {
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
task_mgr::TaskKind::OndemandLogicalSizeCalculation,
|
||||
Some(self.tenant_id),
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
"ondemand logical size calculation",
|
||||
false,
|
||||
@@ -1988,7 +2003,7 @@ impl Timeline {
|
||||
fail::fail_point!("timeline-calculate-logical-size-check-dir-exists", |_| {
|
||||
if !self
|
||||
.conf
|
||||
.metadata_path(&self.tenant_id, &self.timeline_id)
|
||||
.metadata_path(&self.tenant_shard_id, &self.timeline_id)
|
||||
.exists()
|
||||
{
|
||||
error!("timeline-calculate-logical-size-pre metadata file does not exist")
|
||||
@@ -2341,7 +2356,13 @@ impl Timeline {
|
||||
// FIXME: It's pointless to check the cache for things that are not 8kB pages.
|
||||
// We should look at the key to determine if it's a cacheable object
|
||||
let (lsn, read_guard) = cache
|
||||
.lookup_materialized_page(self.tenant_id, self.timeline_id, key, lsn, ctx)
|
||||
.lookup_materialized_page(
|
||||
self.tenant_shard_id.tenant_id,
|
||||
self.timeline_id,
|
||||
key,
|
||||
lsn,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
let img = Bytes::from(read_guard.to_vec());
|
||||
Some((lsn, img))
|
||||
@@ -2369,7 +2390,7 @@ impl Timeline {
|
||||
self.get_last_record_lsn(),
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
)
|
||||
.await?;
|
||||
Ok(layer)
|
||||
@@ -2535,7 +2556,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
/// Flush one frozen in-memory layer to disk, as a new delta layer.
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id, layer=%frozen_layer))]
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id, layer=%frozen_layer))]
|
||||
async fn flush_frozen_layer(
|
||||
self: &Arc<Self>,
|
||||
frozen_layer: Arc<InMemoryLayer>,
|
||||
@@ -2656,9 +2677,14 @@ impl Timeline {
|
||||
|
||||
// If we updated our disk_consistent_lsn, persist the updated metadata to local disk.
|
||||
if let Some(metadata) = metadata {
|
||||
save_metadata(self.conf, &self.tenant_id, &self.timeline_id, &metadata)
|
||||
.await
|
||||
.context("save_metadata")?;
|
||||
save_metadata(
|
||||
self.conf,
|
||||
&self.tenant_shard_id,
|
||||
&self.timeline_id,
|
||||
&metadata,
|
||||
)
|
||||
.await
|
||||
.context("save_metadata")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -2722,9 +2748,14 @@ impl Timeline {
|
||||
) -> anyhow::Result<()> {
|
||||
let metadata = self.schedule_uploads(disk_consistent_lsn, layers_to_upload)?;
|
||||
|
||||
save_metadata(self.conf, &self.tenant_id, &self.timeline_id, &metadata)
|
||||
.await
|
||||
.context("save_metadata")?;
|
||||
save_metadata(
|
||||
self.conf,
|
||||
&self.tenant_shard_id,
|
||||
&self.timeline_id,
|
||||
&metadata,
|
||||
)
|
||||
.await
|
||||
.context("save_metadata")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -2772,7 +2803,7 @@ impl Timeline {
|
||||
par_fsync::par_fsync(&[new_delta_path]).context("fsync of delta layer")?;
|
||||
par_fsync::par_fsync(&[self_clone
|
||||
.conf
|
||||
.timeline_path(&self_clone.tenant_id, &self_clone.timeline_id)])
|
||||
.timeline_path(&self_clone.tenant_shard_id, &self_clone.timeline_id)])
|
||||
.context("fsync of timeline dir")?;
|
||||
|
||||
anyhow::Ok(new_delta)
|
||||
@@ -2928,7 +2959,7 @@ impl Timeline {
|
||||
let mut image_layer_writer = ImageLayerWriter::new(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
&img_range,
|
||||
lsn,
|
||||
)
|
||||
@@ -3001,9 +3032,11 @@ impl Timeline {
|
||||
.await
|
||||
.context("fsync of newly created layer files")?;
|
||||
|
||||
par_fsync::par_fsync_async(&[self.conf.timeline_path(&self.tenant_id, &self.timeline_id)])
|
||||
.await
|
||||
.context("fsync of timeline dir")?;
|
||||
par_fsync::par_fsync_async(&[self
|
||||
.conf
|
||||
.timeline_path(&self.tenant_shard_id, &self.timeline_id)])
|
||||
.await
|
||||
.context("fsync of timeline dir")?;
|
||||
|
||||
let mut guard = self.layers.write().await;
|
||||
|
||||
@@ -3489,7 +3522,7 @@ impl Timeline {
|
||||
DeltaLayerWriter::new(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
key,
|
||||
if dup_end_lsn.is_valid() {
|
||||
// this is a layer containing slice of values of the same key
|
||||
@@ -3550,7 +3583,9 @@ impl Timeline {
|
||||
.await
|
||||
.context("fsync all new layers")?;
|
||||
|
||||
let timeline_dir = self.conf.timeline_path(&self.tenant_id, &self.timeline_id);
|
||||
let timeline_dir = self
|
||||
.conf
|
||||
.timeline_path(&self.tenant_shard_id, &self.timeline_id);
|
||||
|
||||
par_fsync::par_fsync_async(&[timeline_dir])
|
||||
.await
|
||||
@@ -3601,7 +3636,7 @@ impl Timeline {
|
||||
let ctx = ctx.attached_child();
|
||||
let mut stats = CompactLevel0Phase1StatsBuilder {
|
||||
version: Some(2),
|
||||
tenant_id: Some(self.tenant_id),
|
||||
tenant_id: Some(self.tenant_shard_id.tenant_id),
|
||||
timeline_id: Some(self.timeline_id),
|
||||
..Default::default()
|
||||
};
|
||||
@@ -4062,7 +4097,7 @@ impl Timeline {
|
||||
let cache = page_cache::get();
|
||||
if let Err(e) = cache
|
||||
.memorize_materialized_page(
|
||||
self.tenant_id,
|
||||
self.tenant_shard_id.tenant_id,
|
||||
self.timeline_id,
|
||||
key,
|
||||
last_rec_lsn,
|
||||
@@ -4106,7 +4141,7 @@ impl Timeline {
|
||||
let task_id = task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
task_mgr::TaskKind::DownloadAllRemoteLayers,
|
||||
Some(self.tenant_id),
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
"download all remote layers task",
|
||||
false,
|
||||
@@ -4128,7 +4163,7 @@ impl Timeline {
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
.instrument(info_span!(parent: None, "download_all_remote_layers", tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))
|
||||
.instrument(info_span!(parent: None, "download_all_remote_layers", tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))
|
||||
);
|
||||
|
||||
let initial_info = DownloadRemoteLayersTaskInfo {
|
||||
@@ -4329,8 +4364,10 @@ impl Timeline {
|
||||
}
|
||||
|
||||
pub(crate) fn get_shard_index(&self) -> ShardIndex {
|
||||
// TODO: carry this on the struct
|
||||
ShardIndex::unsharded()
|
||||
ShardIndex {
|
||||
shard_number: self.tenant_shard_id.shard_number,
|
||||
shard_count: self.tenant_shard_id.shard_count,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,13 +4,10 @@ use std::{
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use pageserver_api::models::TimelineState;
|
||||
use pageserver_api::{models::TimelineState, shard::TenantShardId};
|
||||
use tokio::sync::OwnedMutexGuard;
|
||||
use tracing::{debug, error, info, instrument, warn, Instrument, Span};
|
||||
use utils::{
|
||||
crashsafe, fs_ext,
|
||||
id::{TenantId, TimelineId},
|
||||
};
|
||||
use utils::{crashsafe, fs_ext, id::TimelineId};
|
||||
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
@@ -47,7 +44,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
|
||||
// Shut down the layer flush task before the remote client, as one depends on the other
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::LayerFlushTask),
|
||||
Some(timeline.tenant_id),
|
||||
Some(timeline.tenant_shard_id.tenant_id),
|
||||
Some(timeline.timeline_id),
|
||||
)
|
||||
.await;
|
||||
@@ -73,7 +70,12 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
|
||||
// NB: This and other delete_timeline calls do not run as a task_mgr task,
|
||||
// so, they are not affected by this shutdown_tasks() call.
|
||||
info!("waiting for timeline tasks to shutdown");
|
||||
task_mgr::shutdown_tasks(None, Some(timeline.tenant_id), Some(timeline.timeline_id)).await;
|
||||
task_mgr::shutdown_tasks(
|
||||
None,
|
||||
Some(timeline.tenant_shard_id.tenant_id),
|
||||
Some(timeline.timeline_id),
|
||||
)
|
||||
.await;
|
||||
|
||||
fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
|
||||
Err(anyhow::anyhow!(
|
||||
@@ -125,7 +127,7 @@ async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTi
|
||||
// pub(super): documentation link
|
||||
pub(super) async fn delete_local_layer_files(
|
||||
conf: &PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline: &Timeline,
|
||||
) -> anyhow::Result<()> {
|
||||
let guards = async { tokio::join!(timeline.gc_lock.lock(), timeline.compaction_lock.lock()) };
|
||||
@@ -139,7 +141,7 @@ pub(super) async fn delete_local_layer_files(
|
||||
// NB: storage_sync upload tasks that reference these layers have been cancelled
|
||||
// by the caller.
|
||||
|
||||
let local_timeline_directory = conf.timeline_path(&tenant_id, &timeline.timeline_id);
|
||||
let local_timeline_directory = conf.timeline_path(&tenant_shard_id, &timeline.timeline_id);
|
||||
|
||||
fail::fail_point!("timeline-delete-before-rm", |_| {
|
||||
Err(anyhow::anyhow!("failpoint: timeline-delete-before-rm"))?
|
||||
@@ -175,7 +177,7 @@ pub(super) async fn delete_local_layer_files(
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let metadata_path = conf.metadata_path(&tenant_id, &timeline.timeline_id);
|
||||
let metadata_path = conf.metadata_path(&tenant_shard_id, &timeline.timeline_id);
|
||||
|
||||
for entry in walkdir::WalkDir::new(&local_timeline_directory).contents_first(true) {
|
||||
#[cfg(feature = "testing")]
|
||||
@@ -250,11 +252,11 @@ async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<(
|
||||
// (nothing can fail after its deletion)
|
||||
async fn cleanup_remaining_timeline_fs_traces(
|
||||
conf: &PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
) -> anyhow::Result<()> {
|
||||
// Remove local metadata
|
||||
tokio::fs::remove_file(conf.metadata_path(&tenant_id, &timeline_id))
|
||||
tokio::fs::remove_file(conf.metadata_path(&tenant_shard_id, &timeline_id))
|
||||
.await
|
||||
.or_else(fs_ext::ignore_not_found)
|
||||
.context("remove metadata")?;
|
||||
@@ -266,7 +268,7 @@ async fn cleanup_remaining_timeline_fs_traces(
|
||||
});
|
||||
|
||||
// Remove timeline dir
|
||||
tokio::fs::remove_dir(conf.timeline_path(&tenant_id, &timeline_id))
|
||||
tokio::fs::remove_dir(conf.timeline_path(&tenant_shard_id, &timeline_id))
|
||||
.await
|
||||
.or_else(fs_ext::ignore_not_found)
|
||||
.context("timeline dir")?;
|
||||
@@ -281,7 +283,7 @@ async fn cleanup_remaining_timeline_fs_traces(
|
||||
// to be reordered later and thus missed if a crash occurs.
|
||||
// Note that we dont need to sync after mark file is removed
|
||||
// because we can tolerate the case when mark file reappears on startup.
|
||||
let timeline_path = conf.timelines_path(&tenant_id);
|
||||
let timeline_path = conf.timelines_path(&tenant_shard_id);
|
||||
crashsafe::fsync_async(timeline_path)
|
||||
.await
|
||||
.context("fsync_pre_mark_remove")?;
|
||||
@@ -289,7 +291,7 @@ async fn cleanup_remaining_timeline_fs_traces(
|
||||
// Remove delete mark
|
||||
// TODO: once we are confident that no more exist in the field, remove this
|
||||
// line. It cleans up a legacy marker file that might in rare cases be present.
|
||||
tokio::fs::remove_file(conf.timeline_delete_mark_file_path(tenant_id, timeline_id))
|
||||
tokio::fs::remove_file(conf.timeline_delete_mark_file_path(tenant_shard_id, timeline_id))
|
||||
.await
|
||||
.or_else(fs_ext::ignore_not_found)
|
||||
.context("remove delete mark")
|
||||
@@ -355,7 +357,7 @@ impl DeleteTimelineFlow {
|
||||
// NB: If this fails half-way through, and is retried, the retry will go through
|
||||
// all the same steps again. Make sure the code here is idempotent, and don't
|
||||
// error out if some of the shutdown tasks have already been completed!
|
||||
#[instrument(skip(tenant), fields(tenant_id=%tenant.tenant_id))]
|
||||
#[instrument(skip(tenant), fields(tenant_id=%tenant.tenant_shard_id.tenant_id, shard_id=%tenant.tenant_shard_id.shard_slug()))]
|
||||
pub async fn run(
|
||||
tenant: &Arc<Tenant>,
|
||||
timeline_id: TimelineId,
|
||||
@@ -451,7 +453,8 @@ impl DeleteTimelineFlow {
|
||||
timeline_id: TimelineId,
|
||||
) -> anyhow::Result<()> {
|
||||
let r =
|
||||
cleanup_remaining_timeline_fs_traces(tenant.conf, tenant.tenant_id, timeline_id).await;
|
||||
cleanup_remaining_timeline_fs_traces(tenant.conf, tenant.tenant_shard_id, timeline_id)
|
||||
.await;
|
||||
info!("Done");
|
||||
r
|
||||
}
|
||||
@@ -522,13 +525,13 @@ impl DeleteTimelineFlow {
|
||||
tenant: Arc<Tenant>,
|
||||
timeline: Arc<Timeline>,
|
||||
) {
|
||||
let tenant_id = timeline.tenant_id;
|
||||
let tenant_shard_id = timeline.tenant_shard_id;
|
||||
let timeline_id = timeline.timeline_id;
|
||||
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::TimelineDeletionWorker,
|
||||
Some(tenant_id),
|
||||
Some(tenant_shard_id.tenant_id),
|
||||
Some(timeline_id),
|
||||
"timeline_delete",
|
||||
false,
|
||||
@@ -541,7 +544,7 @@ impl DeleteTimelineFlow {
|
||||
}
|
||||
.instrument({
|
||||
let span =
|
||||
tracing::info_span!(parent: None, "delete_timeline", tenant_id=%tenant_id, timeline_id=%timeline_id);
|
||||
tracing::info_span!(parent: None, "delete_timeline", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),timeline_id=%timeline_id);
|
||||
span.follows_from(Span::current());
|
||||
span
|
||||
}),
|
||||
@@ -554,13 +557,14 @@ impl DeleteTimelineFlow {
|
||||
tenant: &Tenant,
|
||||
timeline: &Timeline,
|
||||
) -> Result<(), DeleteTimelineError> {
|
||||
delete_local_layer_files(conf, tenant.tenant_id, timeline).await?;
|
||||
delete_local_layer_files(conf, tenant.tenant_shard_id, timeline).await?;
|
||||
|
||||
delete_remote_layers_and_index(timeline).await?;
|
||||
|
||||
pausable_failpoint!("in_progress_delete");
|
||||
|
||||
cleanup_remaining_timeline_fs_traces(conf, tenant.tenant_id, timeline.timeline_id).await?;
|
||||
cleanup_remaining_timeline_fs_traces(conf, tenant.tenant_shard_id, timeline.timeline_id)
|
||||
.await?;
|
||||
|
||||
remove_timeline_from_tenant(tenant, timeline.timeline_id, &guard).await?;
|
||||
|
||||
|
||||
@@ -60,9 +60,12 @@ impl Timeline {
|
||||
task_mgr::spawn(
|
||||
BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::Eviction,
|
||||
Some(self.tenant_id),
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
&format!("layer eviction for {}/{}", self.tenant_id, self.timeline_id),
|
||||
&format!(
|
||||
"layer eviction for {}/{}",
|
||||
self.tenant_shard_id, self.timeline_id
|
||||
),
|
||||
false,
|
||||
async move {
|
||||
let cancel = task_mgr::shutdown_token();
|
||||
@@ -77,7 +80,7 @@ impl Timeline {
|
||||
);
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
|
||||
#[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
|
||||
async fn eviction_task(self: Arc<Self>, cancel: CancellationToken) {
|
||||
use crate::tenant::tasks::random_init_delay;
|
||||
{
|
||||
@@ -340,7 +343,7 @@ impl Timeline {
|
||||
// Make one of the tenant's timelines draw the short straw and run the calculation.
|
||||
// The others wait until the calculation is done so that they take into account the
|
||||
// imitated accesses that the winner made.
|
||||
let tenant = match crate::tenant::mgr::get_tenant(self.tenant_id, true) {
|
||||
let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id.tenant_id, true) {
|
||||
Ok(t) => t,
|
||||
Err(_) => {
|
||||
return ControlFlow::Break(());
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use tracing::trace;
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
id::TimelineId,
|
||||
lsn::{AtomicLsn, Lsn},
|
||||
};
|
||||
|
||||
@@ -73,7 +74,7 @@ impl LayerManager {
|
||||
last_record_lsn: Lsn,
|
||||
conf: &'static PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
) -> Result<Arc<InMemoryLayer>> {
|
||||
ensure!(lsn.is_aligned());
|
||||
|
||||
@@ -109,7 +110,8 @@ impl LayerManager {
|
||||
lsn
|
||||
);
|
||||
|
||||
let new_layer = InMemoryLayer::create(conf, timeline_id, tenant_id, start_lsn).await?;
|
||||
let new_layer =
|
||||
InMemoryLayer::create(conf, timeline_id, tenant_shard_id, start_lsn).await?;
|
||||
let layer = Arc::new(new_layer);
|
||||
|
||||
self.layer_map.open_layer = Some(layer.clone());
|
||||
|
||||
@@ -43,11 +43,11 @@ impl<'t> UninitializedTimeline<'t> {
|
||||
/// The caller is responsible for activating the timeline (function `.activate()`).
|
||||
pub(crate) fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {
|
||||
let timeline_id = self.timeline_id;
|
||||
let tenant_id = self.owning_tenant.tenant_id;
|
||||
let tenant_shard_id = self.owning_tenant.tenant_shard_id;
|
||||
|
||||
if self.raw_timeline.is_none() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"No timeline for initialization found for {tenant_id}/{timeline_id}"
|
||||
"No timeline for initialization found for {tenant_shard_id}/{timeline_id}"
|
||||
));
|
||||
}
|
||||
|
||||
@@ -61,13 +61,13 @@ impl<'t> UninitializedTimeline<'t> {
|
||||
|
||||
anyhow::ensure!(
|
||||
new_disk_consistent_lsn.is_valid(),
|
||||
"new timeline {tenant_id}/{timeline_id} has invalid disk_consistent_lsn"
|
||||
"new timeline {tenant_shard_id}/{timeline_id} has invalid disk_consistent_lsn"
|
||||
);
|
||||
|
||||
let mut timelines = self.owning_tenant.timelines.lock().unwrap();
|
||||
match timelines.entry(timeline_id) {
|
||||
Entry::Occupied(_) => anyhow::bail!(
|
||||
"Found freshly initialized timeline {tenant_id}/{timeline_id} in the tenant map"
|
||||
"Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map"
|
||||
),
|
||||
Entry::Vacant(v) => {
|
||||
// after taking here should be no fallible operations, because the drop guard will not
|
||||
@@ -79,7 +79,7 @@ impl<'t> UninitializedTimeline<'t> {
|
||||
// this should be an assertion.
|
||||
uninit_mark.remove_uninit_mark().with_context(|| {
|
||||
format!(
|
||||
"Failed to remove uninit mark file for timeline {tenant_id}/{timeline_id}"
|
||||
"Failed to remove uninit mark file for timeline {tenant_shard_id}/{timeline_id}"
|
||||
)
|
||||
})?;
|
||||
v.insert(Arc::clone(&new_timeline));
|
||||
@@ -134,7 +134,7 @@ impl<'t> UninitializedTimeline<'t> {
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"No raw timeline {}/{} found",
|
||||
self.owning_tenant.tenant_id, self.timeline_id
|
||||
self.owning_tenant.tenant_shard_id, self.timeline_id
|
||||
)
|
||||
})?
|
||||
.0)
|
||||
@@ -144,7 +144,7 @@ impl<'t> UninitializedTimeline<'t> {
|
||||
impl Drop for UninitializedTimeline<'_> {
|
||||
fn drop(&mut self) {
|
||||
if let Some((_, uninit_mark)) = self.raw_timeline.take() {
|
||||
let _entered = info_span!("drop_uninitialized_timeline", tenant_id = %self.owning_tenant.tenant_id, timeline_id = %self.timeline_id).entered();
|
||||
let _entered = info_span!("drop_uninitialized_timeline", tenant_id = %self.owning_tenant.tenant_shard_id.tenant_id, shard_id = %self.owning_tenant.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id).entered();
|
||||
error!("Timeline got dropped without initializing, cleaning its files");
|
||||
cleanup_timeline_directory(uninit_mark);
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ impl WalReceiver {
|
||||
mut broker_client: BrokerClientChannel,
|
||||
ctx: &RequestContext,
|
||||
) -> Self {
|
||||
let tenant_id = timeline.tenant_id;
|
||||
let tenant_id = timeline.tenant_shard_id.tenant_id;
|
||||
let timeline_id = timeline.timeline_id;
|
||||
let walreceiver_ctx =
|
||||
ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
|
||||
|
||||
@@ -75,7 +75,7 @@ pub(super) async fn connection_manager_loop_step(
|
||||
}
|
||||
|
||||
let id = TenantTimelineId {
|
||||
tenant_id: connection_manager_state.timeline.tenant_id,
|
||||
tenant_id: connection_manager_state.timeline.tenant_shard_id.tenant_id,
|
||||
timeline_id: connection_manager_state.timeline.timeline_id,
|
||||
};
|
||||
|
||||
@@ -388,7 +388,7 @@ struct BrokerSkTimeline {
|
||||
impl ConnectionManagerState {
|
||||
pub(super) fn new(timeline: Arc<Timeline>, conf: WalReceiverConf) -> Self {
|
||||
let id = TenantTimelineId {
|
||||
tenant_id: timeline.tenant_id,
|
||||
tenant_id: timeline.tenant_shard_id.tenant_id,
|
||||
timeline_id: timeline.timeline_id,
|
||||
};
|
||||
Self {
|
||||
|
||||
@@ -163,7 +163,7 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
task_mgr::spawn(
|
||||
WALRECEIVER_RUNTIME.handle(),
|
||||
TaskKind::WalReceiverConnectionPoller,
|
||||
Some(timeline.tenant_id),
|
||||
Some(timeline.tenant_shard_id.tenant_id),
|
||||
Some(timeline.timeline_id),
|
||||
"walreceiver connection",
|
||||
false,
|
||||
|
||||
@@ -41,6 +41,9 @@ use utils::{bin_ser::BeSer, id::TenantId, lsn::Lsn, nonblock::set_nonblock};
|
||||
#[cfg(feature = "testing")]
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::metrics::{
|
||||
WalRedoKillCause, WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_COUNTERS,
|
||||
@@ -991,7 +994,11 @@ impl WalRedoProcess {
|
||||
// these files will be collected to an allure report
|
||||
let filename = format!("walredo-{millis}-{}-{seq}.walredo", writebuf.len());
|
||||
|
||||
let path = self.conf.tenant_path(&self.tenant_id).join(&filename);
|
||||
// TODO(sharding): update this call when WalRedoProcess gets a TenantShardId.
|
||||
let path = self
|
||||
.conf
|
||||
.tenant_path(&TenantShardId::unsharded(self.tenant_id))
|
||||
.join(&filename);
|
||||
|
||||
let res = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
|
||||
Reference in New Issue
Block a user