Allow setting gc/compaction_period to 0, to disable automatic GC/compaction

Many python tests were setting the GC/compaction period to large
values, to effectively disable GC / compaction. Reserve value 0 to
mean "explicitly disabled". We also set them to 0 in unit tests now,
although currently, unit tests don't launch the background jobs at
all, so it won't have any effect.

Fixes https://github.com/neondatabase/neon/issues/2917
This commit is contained in:
Heikki Linnakangas
2022-11-25 19:16:11 +02:00
committed by Heikki Linnakangas
parent 1a316a264d
commit 15db566420
9 changed files with 46 additions and 35 deletions

View File

@@ -1717,7 +1717,11 @@ pub mod harness {
// OK in a test.
let conf: &'static PageServerConf = Box::leak(Box::new(conf));
let tenant_conf = TenantConf::dummy_conf();
// Disable automatic GC and compaction to make the unit tests more deterministic.
// The tests perform them manually if needed.
let mut tenant_conf = TenantConf::dummy_conf();
tenant_conf.gc_period = Duration::ZERO;
tenant_conf.compaction_period = Duration::ZERO;
let tenant_id = TenantId::generate();
fs::create_dir_all(conf.tenant_path(&tenant_id))?;

View File

@@ -51,6 +51,7 @@ pub struct TenantConf {
// This parameter determines L1 layer file size.
pub compaction_target_size: u64,
// How often to check if there's compaction work to be done.
// Duration::ZERO means automatic compaction is disabled.
#[serde(with = "humantime_serde")]
pub compaction_period: Duration,
// Level0 delta layer threshold for compaction.
@@ -61,6 +62,7 @@ pub struct TenantConf {
// Page versions older than this are garbage collected away.
pub gc_horizon: u64,
// Interval at which garbage collection is triggered.
// Duration::ZERO means automatic GC is disabled
#[serde(with = "humantime_serde")]
pub gc_period: Duration,
// Delta layer churn threshold to create L1 image layers.

View File

@@ -65,13 +65,17 @@ async fn compaction_loop(tenant_id: TenantId) {
},
};
// Run blocking part of the task
// Run compaction
let mut sleep_duration = tenant.get_compaction_period();
if let Err(e) = tenant.compaction_iteration() {
sleep_duration = wait_duration;
error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration);
if sleep_duration == Duration::ZERO {
info!("automatic compaction is disabled");
// check again in 10 seconds, in case it's been enabled again.
sleep_duration = Duration::from_secs(10);
} else {
// Run compaction
if let Err(e) = tenant.compaction_iteration() {
sleep_duration = wait_duration;
error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration);
}
}
// Sleep
@@ -112,15 +116,21 @@ async fn gc_loop(tenant_id: TenantId) {
},
};
// Run gc
let gc_period = tenant.get_gc_period();
let gc_horizon = tenant.get_gc_horizon();
let mut sleep_duration = gc_period;
if gc_horizon > 0 {
if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false).await
{
sleep_duration = wait_duration;
error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration);
if sleep_duration == Duration::ZERO {
info!("automatic GC is disabled");
// check again in 10 seconds, in case it's been enabled again.
sleep_duration = Duration::from_secs(10);
} else {
// Run gc
if gc_horizon > 0 {
if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false).await
{
sleep_duration = wait_duration;
error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration);
}
}
}

View File

@@ -12,12 +12,12 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
n_iters = 10
n_records = 100000
# We want to have a lot of lot of layer files to exercise the layer map. Make
# gc_horizon and checkpoint_distance very small, so that we get a lot of small layer files.
# We want to have a lot of lot of layer files to exercise the layer map. Disable
# GC, and make checkpoint_distance very small, so that we get a lot of small layer
# files.
tenant, _ = env.neon_cli.create_tenant(
conf={
"gc_period": "100 m",
"gc_horizon": "1048576",
"gc_period": "0s",
"checkpoint_distance": "8192",
"compaction_period": "1 s",
"compaction_threshold": "1",

View File

@@ -11,16 +11,12 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
# Override defaults, 1M gc_horizon and 4M checkpoint_distance.
# Extend compaction_period and gc_period to disable background compaction and gc.
# Override defaults: 4M checkpoint_distance, disable background compaction and gc.
tenant, _ = env.neon_cli.create_tenant(
conf={
"gc_period": "10 m",
"gc_horizon": "1048576",
"checkpoint_distance": "4194304",
"compaction_period": "10 m",
"compaction_threshold": "2",
"compaction_target_size": "4194304",
"gc_period": "0s",
"compaction_period": "0s",
}
)

View File

@@ -52,8 +52,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv):
tenant, _ = env.neon_cli.create_tenant(
conf={
# disable background GC
"gc_period": "10 m",
"gc_horizon": f"{10 * 1024 ** 3}",
"gc_period": "0s",
# small checkpoint distance to create more delta layer files
"checkpoint_distance": f"{1024 ** 2}",
# set the target size to be large to allow the image layer to cover the whole key space
@@ -127,8 +126,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
tenant, _ = env.neon_cli.create_tenant(
conf={
# disable background GC
"gc_period": "10 m",
"gc_horizon": f"{10 * 1024 ** 3}",
"gc_period": "0s",
# small checkpoint distance to create more delta layer files
"checkpoint_distance": f"{1024 ** 2}",
# set the target size to be large to allow the image layer to cover the whole key space

View File

@@ -44,8 +44,8 @@ def test_single_branch_get_tenant_size_grows(neon_env_builder: NeonEnvBuilder):
Operate on single branch reading the tenants size after each transaction.
"""
# gc and compaction is not wanted automatically
# the pitr_interval here is quite problematic, so we cannot really use it.
# Disable automatic gc and compaction.
# The pitr_interval here is quite problematic, so we cannot really use it.
# it'd have to be calibrated per test executing env.
# there was a bug which was hidden if the create table and first batch of
@@ -53,7 +53,7 @@ def test_single_branch_get_tenant_size_grows(neon_env_builder: NeonEnvBuilder):
# that there next_gc_cutoff could be smaller than initdb_lsn, which will
# obviously lead to issues when calculating the size.
gc_horizon = 0x30000
neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='1h', gc_period='1h', pitr_interval='0sec', gc_horizon={gc_horizon}}}"
neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}"
env = neon_env_builder.init_start()
@@ -162,7 +162,7 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
gc_horizon = 128 * 1024
neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='1h', gc_period='1h', pitr_interval='0sec', gc_horizon={gc_horizon}}}"
neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}"
env = neon_env_builder.init_start()

View File

@@ -295,7 +295,7 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder
def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder):
# Disable background compaction and GC as we don't want it to happen after `get_physical_size` request
# and before checking the expected size on disk, which makes the assertion failed
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='10m', gc_period='10m', pitr_interval='1s'}"
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='0s', gc_period='0s', pitr_interval='1s'}"
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()

View File

@@ -16,8 +16,9 @@ def test_truncate(neon_env_builder: NeonEnvBuilder, zenbenchmark):
# by image layer generation. So adjust default parameters to make it happen more frequently.
tenant, _ = env.neon_cli.create_tenant(
conf={
"gc_period": "100 m",
"gc_horizon": "1048576",
# disable automatic GC
"gc_period": "0s",
# Compact and create images aggressively
"checkpoint_distance": "1000000",
"compaction_period": "1 s",
"compaction_threshold": "3",