mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-15 17:32:56 +00:00
test_vm_bit_clear_on_heap_lock_blackbox: dump layer map while flaky code is running
refs https://github.com/neondatabase/neon/issues/6967
This commit is contained in:
@@ -4,6 +4,10 @@ version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
default = []
|
||||
testing = [ "pageserver_api/testing" ]
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
async-trait.workspace = true
|
||||
|
||||
@@ -383,6 +383,12 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<AuxFilePolicy>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'switch_aux_file_policy'")?,
|
||||
#[cfg(feature = "testing")]
|
||||
test_vm_bit_debug_logging: settings
|
||||
.remove("test_vm_bit_debug_logging")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'test_vm_bit_debug_logging' as bool")?,
|
||||
};
|
||||
if !settings.is_empty() {
|
||||
bail!("Unrecognized tenant settings: {settings:?}")
|
||||
@@ -506,6 +512,12 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<AuxFilePolicy>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'switch_aux_file_policy'")?,
|
||||
#[cfg(feature = "testing")]
|
||||
test_vm_bit_debug_logging: settings
|
||||
.remove("test_vm_bit_debug_logging")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'test_vm_bit_debug_logging' as bool")?,
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -4,6 +4,10 @@ version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
default = []
|
||||
testing = []
|
||||
|
||||
[dependencies]
|
||||
serde.workspace = true
|
||||
serde_with.workspace = true
|
||||
|
||||
@@ -322,6 +322,8 @@ pub struct TenantConfig {
|
||||
pub timeline_get_throttle: Option<ThrottleConfig>,
|
||||
pub image_layer_creation_check_threshold: Option<u8>,
|
||||
pub switch_aux_file_policy: Option<AuxFilePolicy>,
|
||||
#[cfg(feature = "testing")]
|
||||
pub test_vm_bit_debug_logging: Option<bool>,
|
||||
}
|
||||
|
||||
/// The policy for the aux file storage. It can be switched through `switch_aux_file_policy`
|
||||
|
||||
@@ -8,7 +8,7 @@ license.workspace = true
|
||||
default = []
|
||||
# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
|
||||
# which adds some runtime cost to run tests on outage conditions
|
||||
testing = ["fail/failpoints"]
|
||||
testing = ["fail/failpoints", "pageserver_api/testing"]
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
|
||||
@@ -3831,6 +3831,8 @@ pub(crate) mod harness {
|
||||
tenant_conf.image_layer_creation_check_threshold,
|
||||
),
|
||||
switch_aux_file_policy: Some(tenant_conf.switch_aux_file_policy),
|
||||
#[cfg(feature = "testing")]
|
||||
test_vm_bit_debug_logging: Some(tenant_conf.test_vm_bit_debug_logging),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -377,6 +377,9 @@ pub struct TenantConf {
|
||||
/// There is a `last_aux_file_policy` flag which gets persisted in `index_part.json` once the first aux
|
||||
/// file is written.
|
||||
pub switch_aux_file_policy: AuxFilePolicy,
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub test_vm_bit_debug_logging: bool,
|
||||
}
|
||||
|
||||
/// Same as TenantConf, but this struct preserves the information about
|
||||
@@ -476,6 +479,11 @@ pub struct TenantConfOpt {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub switch_aux_file_policy: Option<AuxFilePolicy>,
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub test_vm_bit_debug_logging: Option<bool>,
|
||||
}
|
||||
|
||||
impl TenantConfOpt {
|
||||
@@ -538,6 +546,10 @@ impl TenantConfOpt {
|
||||
switch_aux_file_policy: self
|
||||
.switch_aux_file_policy
|
||||
.unwrap_or(global_conf.switch_aux_file_policy),
|
||||
#[cfg(feature = "testing")]
|
||||
test_vm_bit_debug_logging: self
|
||||
.test_vm_bit_debug_logging
|
||||
.unwrap_or(global_conf.test_vm_bit_debug_logging),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -582,6 +594,8 @@ impl Default for TenantConf {
|
||||
timeline_get_throttle: crate::tenant::throttle::Config::disabled(),
|
||||
image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
|
||||
switch_aux_file_policy: AuxFilePolicy::default_tenant_config(),
|
||||
#[cfg(feature = "testing")]
|
||||
test_vm_bit_debug_logging: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -657,6 +671,8 @@ impl From<TenantConfOpt> for models::TenantConfig {
|
||||
timeline_get_throttle: value.timeline_get_throttle.map(ThrottleConfig::from),
|
||||
image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
|
||||
switch_aux_file_policy: value.switch_aux_file_policy,
|
||||
#[cfg(feature = "testing")]
|
||||
test_vm_bit_debug_logging: value.test_vm_bit_debug_logging,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -924,16 +924,59 @@ impl Timeline {
|
||||
let mut reconstruct_state = ValuesReconstructState::new();
|
||||
|
||||
// Only add the cached image to the reconstruct state when it exists.
|
||||
let cached_page_img_lsn = cached_page_img.as_ref().map(|(lsn, _)| *lsn);
|
||||
if cached_page_img.is_some() {
|
||||
let mut key_state = VectoredValueReconstructState::default();
|
||||
key_state.img = cached_page_img;
|
||||
reconstruct_state.keys.insert(key, Ok(key_state));
|
||||
}
|
||||
|
||||
let debug_log = {
|
||||
#[cfg(feature = "testing")]
|
||||
{
|
||||
self.get_test_vm_bit_debug_logging()
|
||||
}
|
||||
#[cfg(not(feature = "testing"))]
|
||||
{
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
if debug_log {
|
||||
tracing::info!(%key, %lsn, ?cached_page_img_lsn, "debug-logging page reconstruction");
|
||||
}
|
||||
|
||||
if debug_log {
|
||||
tracing::info!(
|
||||
location = "before vectored get",
|
||||
"debug-logging page reconstruction"
|
||||
);
|
||||
self.layers
|
||||
.read()
|
||||
.await
|
||||
.layer_map()
|
||||
.dump(false, ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let vectored_res = self
|
||||
.get_vectored_impl(keyspace.clone(), lsn, &mut reconstruct_state, ctx)
|
||||
.await;
|
||||
|
||||
if debug_log {
|
||||
tracing::info!(
|
||||
location = "before validation",
|
||||
"debug-logging page reconstruction"
|
||||
);
|
||||
self.layers
|
||||
.read()
|
||||
.await
|
||||
.layer_map()
|
||||
.dump(false, ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
if self.conf.validate_vectored_get {
|
||||
self.validate_get_vectored_impl(&vectored_res, keyspace, lsn, ctx)
|
||||
.await;
|
||||
@@ -2196,6 +2239,15 @@ impl Timeline {
|
||||
.unwrap_or(default_tenant_conf.evictions_low_residence_duration_metric_threshold)
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
fn get_test_vm_bit_debug_logging(&self) -> bool {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
.tenant_conf
|
||||
.test_vm_bit_debug_logging
|
||||
.unwrap_or(self.conf.default_tenant_conf.test_vm_bit_debug_logging)
|
||||
}
|
||||
|
||||
fn get_image_layer_creation_check_threshold(&self) -> u8 {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
@@ -2211,6 +2263,10 @@ impl Timeline {
|
||||
pub(super) fn tenant_conf_updated(&self, new_conf: &TenantConfOpt) {
|
||||
// NB: Most tenant conf options are read by background loops, so,
|
||||
// changes will automatically be picked up.
|
||||
#[cfg(feature = "testing")]
|
||||
{
|
||||
info!(?new_conf.test_vm_bit_debug_logging, "updating tenant conf");
|
||||
}
|
||||
|
||||
// The threshold is embedded in the metric. So, we need to update it.
|
||||
{
|
||||
|
||||
@@ -287,6 +287,13 @@ def test_vm_bit_clear_on_heap_lock_blackbox(neon_env_builder: NeonEnvBuilder):
|
||||
# already truncated away.
|
||||
#
|
||||
# ERROR: could not access status of transaction 1027
|
||||
|
||||
# Debugging https://github.com/neondatabase/neon/issues/6967
|
||||
# the select() below fails occassionally at get_impl="vectored" validation
|
||||
env.pageserver.http_client().patch_tenant_config_client_side(
|
||||
tenant_id,
|
||||
{"test_vm_bit_debug_logging": True},
|
||||
)
|
||||
cur.execute("select xmin, xmax, * from vmtest_lock where id = 40000 for update")
|
||||
tup = cur.fetchall()
|
||||
log.info(f"tuple = {tup}")
|
||||
|
||||
Reference in New Issue
Block a user