mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-15 17:32:56 +00:00
find a way to duplicate a tenant in local_fs
Use the script like so, against the tenant to duplicate:
poetry run python3 ./test_runner/duplicate_tenant.py 7ea51af32d42bfe7fb93bf5f28114d09 200 8
backup of pageserver.toml
d =1
pg_distrib_dir ='/home/admin/neon-main/pg_install'
http_auth_type ='Trust'
pg_auth_type ='Trust'
listen_http_addr ='127.0.0.1:9898'
listen_pg_addr ='127.0.0.1:64000'
broker_endpoint ='http://127.0.0.1:50051/'
#control_plane_api ='http://127.0.0.1:1234/'
# Initial configuration file created by 'pageserver --init'
#listen_pg_addr = '127.0.0.1:64000'
#listen_http_addr = '127.0.0.1:9898'
#wait_lsn_timeout = '60 s'
#wal_redo_timeout = '60 s'
#max_file_descriptors = 10000
#page_cache_size = 160000
# initial superuser role name to use when creating a new tenant
#initial_superuser_name = 'cloud_admin'
#broker_endpoint = 'http://127.0.0.1:50051'
#log_format = 'plain'
#concurrent_tenant_size_logical_size_queries = '1'
#metric_collection_interval = '10 min'
#cached_metric_collection_interval = '0s'
#synthetic_size_calculation_interval = '10 min'
#disk_usage_based_eviction = { max_usage_pct = .., min_avail_bytes = .., period = "10s"}
#background_task_maximum_delay = '10s'
[tenant_config]
#checkpoint_distance = 268435456 # in bytes
#checkpoint_timeout = 10 m
#compaction_target_size = 134217728 # in bytes
#compaction_period = '20 s'
#compaction_threshold = 10
#gc_period = '1 hr'
#gc_horizon = 67108864
#image_creation_threshold = 3
#pitr_interval = '7 days'
#min_resident_size_override = .. # in bytes
#evictions_low_residence_duration_metric_threshold = '24 hour'
#gc_feedback = false
# make it determinsitic
gc_period = '0s'
checkpoint_timeout = '3650 day'
compaction_period = '20 s'
compaction_threshold = 10
compaction_target_size = 134217728
checkpoint_distance = 268435456
image_creation_threshold = 3
[remote_storage]
local_path = '/home/admin/neon-main/bench_repo_dir/repo/remote_storage_local_fs'
remove http handler
switch to generalized rewrite_summary & impl page_ctl subcommand to use it
WIP: change duplicate_tenant.py script to use the pagectl command
The script works but at restart, we detach the created tenants because
they're not known to the attachment service:
Detaching tenant, control plane omitted it in re-attach response tenant_id=1e399d390e3aee6b11c701cbc716bb6c
=> figure out how to further integrate this
This commit is contained in:
@@ -367,6 +367,8 @@ pub struct TenantInfo {
|
||||
/// If a layer is present in both local FS and S3, it counts only once.
|
||||
pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
|
||||
pub attachment_status: TenantAttachmentStatus,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub generation: Option<u32>,
|
||||
}
|
||||
|
||||
/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
|
||||
@@ -516,6 +518,8 @@ pub enum HistoricLayerInfo {
|
||||
lsn_end: Lsn,
|
||||
remote: bool,
|
||||
access_stats: LayerAccessStats,
|
||||
|
||||
remote_path: Option<String>,
|
||||
},
|
||||
Image {
|
||||
layer_file_name: String,
|
||||
@@ -524,6 +528,8 @@ pub enum HistoricLayerInfo {
|
||||
lsn_start: Lsn,
|
||||
remote: bool,
|
||||
access_stats: LayerAccessStats,
|
||||
|
||||
remote_path: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -876,6 +882,7 @@ mod tests {
|
||||
state: TenantState::Active,
|
||||
current_physical_size: Some(42),
|
||||
attachment_status: TenantAttachmentStatus::Attached,
|
||||
generation: None,
|
||||
};
|
||||
let expected_active = json!({
|
||||
"id": original_active.id.to_string(),
|
||||
@@ -896,6 +903,7 @@ mod tests {
|
||||
},
|
||||
current_physical_size: Some(42),
|
||||
attachment_status: TenantAttachmentStatus::Attached,
|
||||
generation: None,
|
||||
};
|
||||
let expected_broken = json!({
|
||||
"id": original_broken.id.to_string(),
|
||||
|
||||
@@ -83,6 +83,12 @@ impl std::fmt::Display for RemotePath {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RemotePath> for String {
|
||||
fn from(val: RemotePath) -> Self {
|
||||
val.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl RemotePath {
|
||||
pub fn new(relative_path: &Utf8Path) -> anyhow::Result<Self> {
|
||||
anyhow::ensure!(
|
||||
@@ -104,7 +110,7 @@ impl RemotePath {
|
||||
self.0.file_name()
|
||||
}
|
||||
|
||||
pub fn join(&self, segment: &Utf8Path) -> Self {
|
||||
pub fn join<P: AsRef<Utf8Path>>(&self, segment: P) -> Self {
|
||||
Self(self.0.join(segment))
|
||||
}
|
||||
|
||||
|
||||
@@ -267,7 +267,7 @@ async fn calculate_synthetic_size_worker(
|
||||
}
|
||||
};
|
||||
|
||||
for (tenant_shard_id, tenant_state) in tenants {
|
||||
for (tenant_shard_id, tenant_state, _gen) in tenants {
|
||||
if tenant_state != TenantState::Active {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -196,7 +196,7 @@ pub(super) async fn collect_all_metrics(
|
||||
}
|
||||
};
|
||||
|
||||
let tenants = futures::stream::iter(tenants).filter_map(|(id, state)| async move {
|
||||
let tenants = futures::stream::iter(tenants).filter_map(|(id, state, _)| async move {
|
||||
if state != TenantState::Active || !id.is_zero() {
|
||||
None
|
||||
} else {
|
||||
|
||||
@@ -312,7 +312,7 @@ impl DeletionList {
|
||||
result.extend(
|
||||
timeline_layers
|
||||
.into_iter()
|
||||
.map(|l| timeline_remote_path.join(&Utf8PathBuf::from(l))),
|
||||
.map(|l| timeline_remote_path.join(Utf8PathBuf::from(l))),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -515,7 +515,7 @@ async fn collect_eviction_candidates(
|
||||
|
||||
let mut candidates = Vec::new();
|
||||
|
||||
for (tenant_id, _state) in &tenants {
|
||||
for (tenant_id, _state, _gen) in &tenants {
|
||||
if cancel.is_cancelled() {
|
||||
return Ok(EvictionCandidates::Cancelled);
|
||||
}
|
||||
|
||||
@@ -810,11 +810,12 @@ async fn tenant_list_handler(
|
||||
ApiError::ResourceUnavailable("Tenant map is initializing or shutting down".into())
|
||||
})?
|
||||
.iter()
|
||||
.map(|(id, state)| TenantInfo {
|
||||
.map(|(id, state, gen)| TenantInfo {
|
||||
id: *id,
|
||||
state: state.clone(),
|
||||
current_physical_size: None,
|
||||
attachment_status: state.attachment_status(),
|
||||
generation: (*gen).into(),
|
||||
})
|
||||
.collect::<Vec<TenantInfo>>();
|
||||
|
||||
@@ -843,6 +844,7 @@ async fn tenant_status(
|
||||
state: state.clone(),
|
||||
current_physical_size: Some(current_physical_size),
|
||||
attachment_status: state.attachment_status(),
|
||||
generation: tenant.generation().into(),
|
||||
})
|
||||
}
|
||||
.instrument(info_span!("tenant_status_handler",
|
||||
|
||||
@@ -1801,6 +1801,10 @@ impl Tenant {
|
||||
self.current_state() == TenantState::Active
|
||||
}
|
||||
|
||||
pub fn generation(&self) -> Generation {
|
||||
self.generation
|
||||
}
|
||||
|
||||
/// Changes tenant status to active, unless shutdown was already requested.
|
||||
///
|
||||
/// `background_jobs_can_start` is an optional barrier set to a value during pageserver startup
|
||||
|
||||
@@ -1531,7 +1531,7 @@ pub(crate) enum TenantMapListError {
|
||||
///
|
||||
/// Get list of tenants, for the mgmt API
|
||||
///
|
||||
pub(crate) async fn list_tenants() -> Result<Vec<(TenantShardId, TenantState)>, TenantMapListError>
|
||||
pub(crate) async fn list_tenants() -> Result<Vec<(TenantShardId, TenantState, Generation)>, TenantMapListError>
|
||||
{
|
||||
let tenants = TENANTS.read().unwrap();
|
||||
let m = match &*tenants {
|
||||
@@ -1540,7 +1540,7 @@ pub(crate) async fn list_tenants() -> Result<Vec<(TenantShardId, TenantState)>,
|
||||
};
|
||||
Ok(m.iter()
|
||||
.filter_map(|(id, tenant)| match tenant {
|
||||
TenantSlot::Attached(tenant) => Some((*id, tenant.current_state())),
|
||||
TenantSlot::Attached(tenant) => Some((*id, tenant.current_state(), tenant.generation())),
|
||||
TenantSlot::Secondary => None,
|
||||
TenantSlot::InProgress(_) => None,
|
||||
})
|
||||
|
||||
@@ -4,6 +4,7 @@ use pageserver_api::models::{
|
||||
HistoricLayerInfo, LayerAccessKind, LayerResidenceEventReason, LayerResidenceStatus,
|
||||
};
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
use remote_storage::RemotePath;
|
||||
use std::ops::Range;
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, Weak};
|
||||
@@ -313,6 +314,12 @@ impl Layer {
|
||||
&self.0.path
|
||||
}
|
||||
|
||||
/// This can return None even though it should return Some in some edge cases.
|
||||
#[allow(unused)]
|
||||
pub(crate) fn remote_path(&self) -> Option<RemotePath> {
|
||||
self.0.remote_path()
|
||||
}
|
||||
|
||||
pub(crate) fn metadata(&self) -> LayerFileMetadata {
|
||||
self.0.metadata()
|
||||
}
|
||||
@@ -970,6 +977,17 @@ impl LayerInner {
|
||||
}
|
||||
}
|
||||
|
||||
/// This can return None even though it should return Some in some edge cases.
|
||||
fn remote_path(&self) -> Option<RemotePath> {
|
||||
let tl = self.timeline.upgrade()?; // TODO: should distinguish this case, but, accuracy doesn't matter for this field.
|
||||
Some(crate::tenant::remote_timeline_client::remote_layer_path(
|
||||
&tl.tenant_id,
|
||||
&tl.timeline_id,
|
||||
&self.desc.filename(),
|
||||
self.generation,
|
||||
))
|
||||
}
|
||||
|
||||
fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {
|
||||
let layer_file_name = self.desc.filename().file_name();
|
||||
|
||||
@@ -989,6 +1007,7 @@ impl LayerInner {
|
||||
lsn_end: lsn_range.end,
|
||||
remote,
|
||||
access_stats,
|
||||
remote_path: self.remote_path().map(|p| p.into()),
|
||||
}
|
||||
} else {
|
||||
let lsn = self.desc.image_layer_lsn();
|
||||
@@ -999,6 +1018,7 @@ impl LayerInner {
|
||||
lsn_start: lsn,
|
||||
remote,
|
||||
access_stats,
|
||||
remote_path: self.remote_path().map(|p| p.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
69
test_runner/duplicate_tenant.py
Normal file
69
test_runner/duplicate_tenant.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# Usage from top of repo:
|
||||
# poetry run python3 ./test_runner/duplicate_tenant.py c66e2e233057f7f05563caff664ecb14 .neon/remote_storage_local_fs
|
||||
import argparse
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.types import TenantId
|
||||
|
||||
parser = argparse.ArgumentParser(description="Duplicate tenant script.")
|
||||
parser.add_argument("initial_tenant", type=str, help="Initial tenant")
|
||||
parser.add_argument("remote_storage_local_fs_root", type=Path, help="Remote storage local fs root")
|
||||
parser.add_argument("--ncopies", type=int, help="Number of copies")
|
||||
parser.add_argument("--numthreads", type=int, default=1, help="Number of threads")
|
||||
parser.add_argument("--port", type=int, default=9898, help="Pageserver management api port")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
initial_tenant = args.initial_tenant
|
||||
remote_storage_local_fs_root: Path = args.remote_storage_local_fs_root
|
||||
ncopies = args.ncopies
|
||||
numthreads = args.numthreads
|
||||
|
||||
new_tenant = TenantId.generate()
|
||||
print(f"New tenant: {new_tenant}")
|
||||
|
||||
client = PageserverHttpClient(args.port, lambda: None)
|
||||
|
||||
src_tenant_gen = int(client.tenant_status(initial_tenant)["generation"])
|
||||
|
||||
assert remote_storage_local_fs_root.is_dir(), f"{remote_storage_local_fs_root} is not a directory"
|
||||
|
||||
src_timelines_dir: Path = remote_storage_local_fs_root / "tenants" / initial_tenant / "timelines"
|
||||
assert src_timelines_dir.is_dir(), f"{src_timelines_dir} is not a directory"
|
||||
|
||||
dst_timelines_dir: Path = remote_storage_local_fs_root / "tenants" / str(new_tenant) / "timelines"
|
||||
dst_timelines_dir.parent.mkdir(parents=False, exist_ok=False)
|
||||
dst_timelines_dir.mkdir(parents=False, exist_ok=False)
|
||||
|
||||
for tl in src_timelines_dir.iterdir():
|
||||
src_tl_dir = src_timelines_dir / tl.name
|
||||
assert src_tl_dir.is_dir(), f"{src_tl_dir} is not a directory"
|
||||
dst_tl_dir = dst_timelines_dir / tl.name
|
||||
dst_tl_dir.mkdir(parents=False, exist_ok=False)
|
||||
for file in tl.iterdir():
|
||||
shutil.copy2(file, dst_tl_dir)
|
||||
if "__" in file.name:
|
||||
cmd = [
|
||||
"./target/debug/pagectl", # TODO: abstract this like the other binaries
|
||||
"layer",
|
||||
"rewrite-summary",
|
||||
str(dst_tl_dir / file.name),
|
||||
"--new-tenant-id",
|
||||
str(new_tenant),
|
||||
]
|
||||
subprocess.run(cmd, check=True)
|
||||
|
||||
client.tenant_attach(new_tenant, generation=src_tenant_gen)
|
||||
|
||||
while True:
|
||||
status = client.tenant_status(new_tenant)
|
||||
if status["state"]["slug"] == "Active":
|
||||
break
|
||||
print("Waiting for tenant to be active..., is: " + status["state"]["slug"])
|
||||
time.sleep(1)
|
||||
|
||||
print("Tenant is active: " + str(new_tenant))
|
||||
@@ -58,6 +58,7 @@ class HistoricLayerInfo:
|
||||
lsn_start: str
|
||||
lsn_end: Optional[str]
|
||||
remote: bool
|
||||
remote_path: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, d: Dict[str, Any]) -> HistoricLayerInfo:
|
||||
@@ -68,6 +69,7 @@ class HistoricLayerInfo:
|
||||
lsn_start=d["lsn_start"],
|
||||
lsn_end=d.get("lsn_end"),
|
||||
remote=d["remote"],
|
||||
remote_path=d.get("remote_path"),
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user